Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/CMakeLists.txt1
-rw-r--r--intern/cycles/blender/CMakeLists.txt11
-rw-r--r--intern/cycles/blender/addon/engine.py3
-rw-r--r--intern/cycles/blender/addon/properties.py125
-rw-r--r--intern/cycles/blender/addon/ui.py132
-rw-r--r--intern/cycles/blender/blender_camera.cpp9
-rw-r--r--intern/cycles/blender/blender_curves.cpp681
-rw-r--r--intern/cycles/blender/blender_device.cpp50
-rw-r--r--intern/cycles/blender/blender_geometry.cpp24
-rw-r--r--intern/cycles/blender/blender_mesh.cpp76
-rw-r--r--intern/cycles/blender/blender_object.cpp4
-rw-r--r--intern/cycles/blender/blender_python.cpp11
-rw-r--r--intern/cycles/blender/blender_session.cpp75
-rw-r--r--intern/cycles/blender/blender_shader.cpp8
-rw-r--r--intern/cycles/blender/blender_sync.cpp141
-rw-r--r--intern/cycles/blender/blender_sync.h33
-rw-r--r--intern/cycles/blender/blender_viewport.cpp16
-rw-r--r--intern/cycles/blender/blender_viewport.h4
-rw-r--r--intern/cycles/blender/blender_volume.cpp57
-rw-r--r--intern/cycles/bvh/CMakeLists.txt4
-rw-r--r--intern/cycles/bvh/bvh.cpp72
-rw-r--r--intern/cycles/bvh/bvh.h2
-rw-r--r--intern/cycles/bvh/bvh4.cpp447
-rw-r--r--intern/cycles/bvh/bvh4.h88
-rw-r--r--intern/cycles/bvh/bvh8.cpp541
-rw-r--r--intern/cycles/bvh/bvh8.h99
-rw-r--r--intern/cycles/bvh/bvh_build.cpp169
-rw-r--r--intern/cycles/bvh/bvh_build.h15
-rw-r--r--intern/cycles/bvh/bvh_embree.cpp161
-rw-r--r--intern/cycles/bvh/bvh_embree.h2
-rw-r--r--intern/cycles/bvh/bvh_optix.cpp13
-rw-r--r--intern/cycles/bvh/bvh_params.h2
-rw-r--r--intern/cycles/bvh/bvh_sort.cpp15
-rw-r--r--intern/cycles/bvh/bvh_split.cpp10
-rw-r--r--intern/cycles/bvh/bvh_split.h10
-rw-r--r--intern/cycles/bvh/bvh_unaligned.cpp6
-rw-r--r--intern/cycles/device/CMakeLists.txt12
-rw-r--r--intern/cycles/device/cuda/device_cuda.h33
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp73
-rw-r--r--intern/cycles/device/device.cpp55
-rw-r--r--intern/cycles/device/device.h11
-rw-r--r--intern/cycles/device/device_cpu.cpp177
-rw-r--r--intern/cycles/device/device_cuda.cpp1
-rw-r--r--intern/cycles/device/device_denoising.cpp10
-rw-r--r--intern/cycles/device/device_denoising.h2
-rw-r--r--intern/cycles/device/device_multi.cpp4
-rw-r--r--intern/cycles/device/device_network.cpp1
-rw-r--r--intern/cycles/device/device_opencl.cpp1
-rw-r--r--intern/cycles/device/device_optix.cpp51
-rw-r--r--intern/cycles/device/device_split_kernel.cpp18
-rw-r--r--intern/cycles/device/device_split_kernel.h4
-rw-r--r--intern/cycles/device/device_task.cpp4
-rw-r--r--intern/cycles/device/device_task.h54
-rw-r--r--intern/cycles/device/opencl/device_opencl.h18
-rw-r--r--intern/cycles/device/opencl/device_opencl_impl.cpp103
-rw-r--r--intern/cycles/kernel/CMakeLists.txt14
-rw-r--r--intern/cycles/kernel/bvh/bvh.h98
-rw-r--r--intern/cycles/kernel/bvh/bvh_local.h63
-rw-r--r--intern/cycles/kernel/bvh/bvh_nodes.h145
-rw-r--r--intern/cycles/kernel/bvh/bvh_shadow_all.h146
-rw-r--r--intern/cycles/kernel/bvh/bvh_traversal.h154
-rw-r--r--intern/cycles/kernel/bvh/bvh_types.h7
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume.h108
-rw-r--r--intern/cycles/kernel/bvh/bvh_volume_all.h136
-rw-r--r--intern/cycles/kernel/bvh/obvh_local.h398
-rw-r--r--intern/cycles/kernel/bvh/obvh_nodes.h410
-rw-r--r--intern/cycles/kernel/bvh/obvh_shadow_all.h664
-rw-r--r--intern/cycles/kernel/bvh/obvh_traversal.h557
-rw-r--r--intern/cycles/kernel/bvh/obvh_volume.h480
-rw-r--r--intern/cycles/kernel/bvh/obvh_volume_all.h551
-rw-r--r--intern/cycles/kernel/bvh/qbvh_local.h291
-rw-r--r--intern/cycles/kernel/bvh/qbvh_nodes.h329
-rw-r--r--intern/cycles/kernel/bvh/qbvh_shadow_all.h453
-rw-r--r--intern/cycles/kernel/bvh/qbvh_traversal.h420
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume.h367
-rw-r--r--intern/cycles/kernel/bvh/qbvh_volume_all.h444
-rw-r--r--intern/cycles/kernel/closure/bsdf.h48
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair_principled.h7
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h91
-rw-r--r--intern/cycles/kernel/geom/geom_curve_intersect.h1354
-rw-r--r--intern/cycles/kernel/geom/geom_motion_curve.h112
-rw-r--r--intern/cycles/kernel/geom/geom_object.h80
-rw-r--r--intern/cycles/kernel/geom/geom_primitive.h5
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h21
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h427
-rw-r--r--intern/cycles/kernel/kernel_emission.h4
-rw-r--r--intern/cycles/kernel/kernel_light.h508
-rw-r--r--intern/cycles/kernel/kernel_light_background.h448
-rw-r--r--intern/cycles/kernel/kernel_light_common.h159
-rw-r--r--intern/cycles/kernel/kernel_montecarlo.h10
-rw-r--r--intern/cycles/kernel/kernel_shader.h41
-rw-r--r--intern/cycles/kernel/kernel_types.h87
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel.cpp10
-rw-r--r--intern/cycles/kernel/kernels/optix/kernel_optix.cu26
-rw-r--r--intern/cycles/kernel/osl/CMakeLists.txt9
-rw-r--r--intern/cycles/kernel/osl/osl_closures.cpp79
-rw-r--r--intern/cycles/kernel/osl/osl_closures.h2
-rw-r--r--intern/cycles/kernel/shaders/node_sky_texture.osl123
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h50
-rw-r--r--intern/cycles/kernel/svm/svm_geometry.h2
-rw-r--r--intern/cycles/kernel/svm/svm_noise.h4
-rw-r--r--intern/cycles/kernel/svm/svm_sky.h301
-rw-r--r--intern/cycles/kernel/svm/svm_types.h2
-rw-r--r--intern/cycles/render/CMakeLists.txt2
-rw-r--r--intern/cycles/render/curves.cpp110
-rw-r--r--intern/cycles/render/curves.h62
-rw-r--r--intern/cycles/render/denoising.cpp6
-rw-r--r--intern/cycles/render/geometry.cpp41
-rw-r--r--intern/cycles/render/hair.cpp1
-rw-r--r--intern/cycles/render/hair.h1
-rw-r--r--intern/cycles/render/image.cpp1
-rw-r--r--intern/cycles/render/image.h2
-rw-r--r--intern/cycles/render/image_sky.cpp91
-rw-r--r--intern/cycles/render/image_sky.h49
-rw-r--r--intern/cycles/render/integrator.cpp1
-rw-r--r--intern/cycles/render/light.cpp140
-rw-r--r--intern/cycles/render/nodes.cpp199
-rw-r--r--intern/cycles/render/nodes.h9
-rw-r--r--intern/cycles/render/object.cpp82
-rw-r--r--intern/cycles/render/scene.cpp13
-rw-r--r--intern/cycles/render/scene.h12
-rw-r--r--intern/cycles/render/session.cpp46
-rw-r--r--intern/cycles/render/session.h14
-rw-r--r--intern/cycles/render/shader.cpp1
-rw-r--r--intern/cycles/render/svm.cpp3
-rw-r--r--intern/cycles/test/render_graph_finalize_test.cpp4
-rw-r--r--intern/cycles/util/CMakeLists.txt5
-rw-r--r--intern/cycles/util/util_debug.cpp15
-rw-r--r--intern/cycles/util/util_debug.h6
-rw-r--r--intern/cycles/util/util_math_fast.h8
-rw-r--r--intern/cycles/util/util_openimagedenoise.h39
-rw-r--r--intern/cycles/util/util_sky_model.h24
-rw-r--r--intern/cycles/util/util_sky_nishita.cpp371
-rw-r--r--intern/cycles/util/util_task.cpp430
-rw-r--r--intern/cycles/util/util_task.h102
-rw-r--r--intern/cycles/util/util_tbb.h39
-rw-r--r--intern/cycles/util/util_version.h2
-rw-r--r--intern/ghost/intern/GHOST_IXrGraphicsBinding.h3
-rw-r--r--intern/ghost/intern/GHOST_XrGraphicsBinding.cpp35
-rw-r--r--intern/ghost/intern/GHOST_XrSession.cpp11
-rw-r--r--intern/ghost/intern/GHOST_XrSwapchain.cpp7
-rw-r--r--intern/ghost/intern/GHOST_XrSwapchain.h3
-rw-r--r--intern/guardedalloc/MEM_guardedalloc.h3
-rw-r--r--intern/libmv/libmv/multiview/projection_test.cc8
-rw-r--r--intern/libmv/libmv/simple_pipeline/bundle.cc17
-rw-r--r--intern/mantaflow/extern/manta_fluid_API.h24
-rw-r--r--intern/mantaflow/intern/MANTA_main.cpp1609
-rw-r--r--intern/mantaflow/intern/MANTA_main.h29
-rw-r--r--intern/mantaflow/intern/manta_fluid_API.cpp47
-rw-r--r--intern/mantaflow/intern/strings/fluid_script.h155
-rw-r--r--intern/mantaflow/intern/strings/liquid_script.h109
-rw-r--r--intern/mantaflow/intern/strings/smoke_script.h45
-rw-r--r--intern/quadriflow/quadriflow_capi.cpp2
-rw-r--r--intern/rigidbody/RBI_api.h2
154 files changed, 4477 insertions, 13557 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 121c8bdad6e..e5a5e9773d3 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -286,6 +286,7 @@ include_directories(
${OPENEXR_INCLUDE_DIR}
${OPENEXR_INCLUDE_DIRS}
${PUGIXML_INCLUDE_DIR}
+ ${TBB_INCLUDE_DIRS}
)
if(CYCLES_STANDALONE_REPOSITORY)
diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt
index 496e8e9310b..2316800e21e 100644
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -92,10 +92,6 @@ if(WITH_MOD_FLUID)
add_definitions(-DWITH_FLUID)
endif()
-if(WITH_NEW_OBJECT_TYPES)
- add_definitions(-DWITH_NEW_OBJECT_TYPES)
-endif()
-
if(WITH_OPENVDB)
add_definitions(-DWITH_OPENVDB ${OPENVDB_DEFINITIONS})
list(APPEND INC_SYS
@@ -106,6 +102,13 @@ if(WITH_OPENVDB)
)
endif()
+if(WITH_OPENIMAGEDENOISE)
+ add_definitions(-DWITH_OPENIMAGEDENOISE)
+ list(APPEND INC_SYS
+ ${OPENIMAGEDENOISE_INCLUDE_DIRS}
+ )
+endif()
+
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
# avoid link failure with clang 3.4 debug
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index e7ea5e7a1f6..7566ca28dd7 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -179,7 +179,8 @@ def reset(engine, data, depsgraph):
import _cycles
import bpy
- if bpy.app.debug_value == 256:
+ prefs = bpy.context.preferences
+ if prefs.experimental.use_cycles_debug and prefs.view.show_developer_ui:
_cycles.debug_flags_update(depsgraph.scene.as_pointer())
else:
_cycles.debug_flags_reset()
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 1635afab210..840efb65d96 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -55,8 +55,7 @@ enum_displacement_methods = (
enum_bvh_layouts = (
('BVH2', "BVH2", "", 1),
- ('BVH4', "BVH4", "", 2),
- ('BVH8', "BVH8", "", 4),
+ ('EMBREE', "Embree", "", 4),
)
enum_bvh_types = (
@@ -78,20 +77,9 @@ enum_panorama_types = (
('MIRRORBALL', "Mirror Ball", "Uses the mirror ball mapping"),
)
-enum_curve_primitives = (
- ('TRIANGLES', "Triangles", "Create triangle geometry around strands"),
- ('LINE_SEGMENTS', "Line Segments", "Use line segment primitives"),
- ('CURVE_SEGMENTS', "Curve Segments", "Use segmented cardinal curve primitives"),
-)
-
-enum_triangle_curves = (
- ('CAMERA_TRIANGLES', "Planes", "Create individual triangles forming planes that face camera"),
- ('TESSELLATED_TRIANGLES', "Tessellated", "Create mesh surrounding each strand"),
-)
-
enum_curve_shape = (
- ('RIBBONS', "Ribbons", "Ignore thickness of each strand"),
- ('THICK', "Thick", "Use thickness of strand when rendering"),
+ ('RIBBONS', "Rounded Ribbons", "Render hair as flat ribbon with rounded normals, for fast rendering"),
+ ('THICK', "3D Curves", "Render hair as 3D curve, for accurate results when viewing hair close up"),
)
enum_tile_order = (
@@ -194,10 +182,36 @@ enum_aov_types = (
('COLOR', "Color", "Write a Color pass", 1),
)
-enum_viewport_denoising = (
- ('NONE', "None", "Disable viewport denoising", 0),
- ('OPTIX', "OptiX AI-Accelerated", "Use the OptiX denoiser running on the GPU (requires at least one compatible OptiX device)", 1),
-)
+def enum_openimagedenoise_denoiser(self, context):
+ if _cycles.with_openimagedenoise:
+ return [('OPENIMAGEDENOISE', "OpenImageDenoise", "Use Intel OpenImageDenoise AI denoiser running on the CPU", 4)]
+ return []
+
+def enum_optix_denoiser(self, context):
+ if not context or bool(context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX')):
+ return [('OPTIX', "OptiX", "Use the OptiX AI denoiser with GPU acceleration, only available on NVIDIA GPUs", 2)]
+ return []
+
+def enum_preview_denoiser(self, context):
+ optix_items = enum_optix_denoiser(self, context)
+ oidn_items = enum_openimagedenoise_denoiser(self, context)
+
+ if len(optix_items):
+ auto_label = "Fastest (Optix)"
+ elif len(oidn_items):
+ auto_label = "Fastest (OpenImageDenoise)"
+ else:
+ auto_label = "None"
+
+ items = [('AUTO', auto_label, "Use the fastest available denoiser for viewport rendering", 0)]
+ items += optix_items
+ items += oidn_items
+ return items
+
+def enum_denoiser(self, context):
+ items = [('NLM', "NLM", "Cycles native non-local means denoiser, running on any compute device", 1)]
+ items += enum_optix_denoiser(self, context)
+ return items
enum_denoising_optix_input_passes = (
('RGB', "Color", "Use only color as input", 1),
@@ -236,11 +250,29 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
description="Pause all viewport preview renders",
default=False,
)
- preview_denoising: EnumProperty(
- name="Viewport Denoising",
- description="Denoise the image after each preview update with the selected denoiser engine",
- items=enum_viewport_denoising,
- default='NONE',
+
+ use_denoising: BoolProperty(
+ name="Use Denoising",
+ description="Denoise the rendered image",
+ default=False,
+ )
+ use_preview_denoising: BoolProperty(
+ name="Use Viewport Denoising",
+ description="Denoise the image in the 3D viewport",
+ default=False,
+ )
+
+ denoiser: EnumProperty(
+ name="Denoiser",
+ description="Denoise the image with the selected denoiser",
+ items=enum_denoiser,
+ default=1,
+ )
+ preview_denoiser: EnumProperty(
+ name="Viewport Denoiser",
+ description="Denoise the image after each preview update with the selected denoiser",
+ items=enum_preview_denoiser,
+ default=0,
)
use_square_samples: BoolProperty(
@@ -256,7 +288,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=128,
)
preview_samples: IntProperty(
- name="Preview Samples",
+ name="Viewport Samples",
description="Number of samples to render in the viewport, unlimited if 0",
min=0, max=(1 << 24),
default=32,
@@ -476,7 +508,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
subtype='PIXEL'
)
preview_dicing_rate: FloatProperty(
- name="Preview Dicing Rate",
+ name="Viewport Dicing Rate",
description="Size of a micropolygon in pixels during preview render",
min=0.1, max=1000.0, soft_min=0.5,
default=8.0,
@@ -629,11 +661,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
items=enum_bvh_types,
default='DYNAMIC_BVH',
)
- use_bvh_embree: BoolProperty(
- name="Use Embree",
- description="Use Embree as ray accelerator",
- default=False,
- )
debug_use_spatial_splits: BoolProperty(
name="Use Spatial Splits",
description="Use BVH spatial splits: longer builder time, faster render",
@@ -786,7 +813,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
debug_bvh_layout: EnumProperty(
name="BVH Layout",
items=enum_bvh_layouts,
- default='BVH8',
+ default='EMBREE',
)
debug_use_cpu_split_kernel: BoolProperty(name="Split Kernel", default=False)
@@ -1241,39 +1268,17 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
- primitive: EnumProperty(
- name="Primitive",
- description="Type of primitive used for hair rendering",
- items=enum_curve_primitives,
- default='LINE_SEGMENTS',
- )
shape: EnumProperty(
name="Shape",
description="Form of hair",
items=enum_curve_shape,
- default='THICK',
- )
- cull_backfacing: BoolProperty(
- name="Cull Back-faces",
- description="Do not test the back-face of each strand",
- default=True,
- )
- use_curves: BoolProperty(
- name="Use Cycles Hair Rendering",
- description="Activate Cycles hair rendering for particle system",
- default=True,
- )
- resolution: IntProperty(
- name="Resolution",
- description="Resolution of generated mesh",
- min=3, max=64,
- default=3,
+ default='RIBBONS',
)
subdivisions: IntProperty(
name="Subdivisions",
description="Number of subdivisions used in Cardinal curve intersection (power of 2)",
min=0, max=24,
- default=4,
+ default=2,
)
@classmethod
@@ -1369,7 +1374,7 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
use_denoising: BoolProperty(
name="Use Denoising",
description="Denoise the rendered image",
- default=False,
+ default=True,
update=update_render_passes,
)
denoising_diffuse_direct: BoolProperty(
@@ -1439,12 +1444,6 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
default=0,
)
- use_optix_denoising: BoolProperty(
- name="OptiX AI-Accelerated",
- description="Use the OptiX denoiser to denoise the rendered image",
- default=False,
- update=update_render_passes,
- )
denoising_optix_input_passes: EnumProperty(
name="Input Passes",
description="Passes handed over to the OptiX denoiser (this can have different effects on the denoised image)",
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 78a44881743..b049d0bf2b4 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -112,10 +112,6 @@ def show_device_active(context):
return True
return context.preferences.addons[__package__].preferences.has_active_device()
-def show_optix_denoising(context):
- # OptiX AI denoiser can be used when at least one device supports OptiX
- return bool(context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX'))
-
def draw_samples_info(layout, context):
cscene = context.scene.cycles
@@ -190,11 +186,6 @@ class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel):
col.prop(cscene, "aa_samples", text="Render")
col.prop(cscene, "preview_aa_samples", text="Viewport")
- # Viewport denoising is currently only supported with OptiX
- if show_optix_denoising(context):
- col = layout.column()
- col.prop(cscene, "preview_denoising")
-
if not use_branched_path(context):
draw_samples_info(layout, context)
@@ -256,6 +247,39 @@ class CYCLES_RENDER_PT_sampling_adaptive(CyclesButtonsPanel, Panel):
col.prop(cscene, "adaptive_threshold", text="Noise Threshold")
col.prop(cscene, "adaptive_min_samples", text="Min Samples")
+
+class CYCLES_RENDER_PT_sampling_denoising(CyclesButtonsPanel, Panel):
+ bl_label = "Denoising"
+ bl_parent_id = "CYCLES_RENDER_PT_sampling"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ heading = layout.column(align=True, heading="Render")
+ row = heading.row(align=True)
+ row.prop(cscene, "use_denoising", text="")
+ sub = row.row()
+ sub.active = cscene.use_denoising
+ sub.prop(cscene, "denoiser", text="")
+
+ heading = layout.column(align=False, heading="Viewport")
+ row = heading.row(align=True)
+ row.prop(cscene, "use_preview_denoising", text="")
+ sub = row.row()
+ sub.active = cscene.use_preview_denoising
+ sub.prop(cscene, "preview_denoiser", text="")
+
+ sub = heading.row(align=True)
+ sub.active = cscene.use_preview_denoising
+ sub.prop(cscene, "preview_denoising_start_sample", text="Start Sample")
+
+
class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
bl_label = "Advanced"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
@@ -387,13 +411,6 @@ class CYCLES_RENDER_PT_hair(CyclesButtonsPanel, Panel):
bl_label = "Hair"
bl_options = {'DEFAULT_CLOSED'}
- def draw_header(self, context):
- layout = self.layout
- scene = context.scene
- ccscene = scene.cycles_curves
-
- layout.prop(ccscene, "use_curves", text="")
-
def draw(self, context):
layout = self.layout
layout.use_property_split = True
@@ -402,18 +419,10 @@ class CYCLES_RENDER_PT_hair(CyclesButtonsPanel, Panel):
scene = context.scene
ccscene = scene.cycles_curves
- layout.active = ccscene.use_curves
-
col = layout.column()
col.prop(ccscene, "shape", text="Shape")
- if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
- col.prop(ccscene, "cull_backfacing", text="Cull back-faces")
- col.prop(ccscene, "primitive", text="Primitive")
-
- if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
- col.prop(ccscene, "resolution", text="Resolution")
- elif ccscene.primitive == 'CURVE_SEGMENTS':
- col.prop(ccscene, "subdivisions", text="Curve subdivisions")
+ if ccscene.shape == 'RIBBONS':
+ col.prop(ccscene, "subdivisions", text="Curve Subdivisions")
class CYCLES_RENDER_PT_volumes(CyclesButtonsPanel, Panel):
@@ -693,16 +702,20 @@ class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Pa
col = layout.column()
- if _cycles.with_embree:
- row = col.row()
- row.active = use_cpu(context)
- row.prop(cscene, "use_bvh_embree")
+ use_embree = False
+ if use_cpu(context):
+ use_embree = _cycles.with_embree
+ if not use_embree:
+ sub = col.column(align=True)
+ sub.label(text="Cycles built without Embree support")
+ sub.label(text="CPU raytracing performance will be poor")
+
col.prop(cscene, "debug_use_spatial_splits")
sub = col.column()
- sub.active = not cscene.use_bvh_embree or not _cycles.with_embree
+ sub.active = not use_embree
sub.prop(cscene, "debug_use_hair_bvh")
sub = col.column()
- sub.active = not cscene.debug_use_spatial_splits and not cscene.use_bvh_embree
+ sub.active = not cscene.debug_use_spatial_splits and not use_embree
sub.prop(cscene, "debug_bvh_time_steps")
@@ -741,11 +754,6 @@ class CYCLES_RENDER_PT_performance_viewport(CyclesButtonsPanel, Panel):
col.prop(rd, "preview_pixel_size", text="Pixel Size")
col.prop(cscene, "preview_start_resolution", text="Start Pixels")
- if show_optix_denoising(context):
- sub = col.row(align=True)
- sub.active = cscene.preview_denoising != 'NONE'
- sub.prop(cscene, "preview_denoising_start_sample", text="Denoising Start Sample")
-
class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):
bl_label = "Filter"
@@ -968,12 +976,17 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
bl_context = "view_layer"
bl_options = {'DEFAULT_CLOSED'}
+ @classmethod
+ def poll(cls, context):
+ cscene = context.scene.cycles
+ return CyclesButtonsPanel.poll(context) and cscene.use_denoising
+
def draw_header(self, context):
scene = context.scene
view_layer = context.view_layer
cycles_view_layer = view_layer.cycles
- layout = self.layout
+ layout = self.layout
layout.prop(cycles_view_layer, "use_denoising", text="")
def draw(self, context):
@@ -984,18 +997,17 @@ class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
scene = context.scene
view_layer = context.view_layer
cycles_view_layer = view_layer.cycles
+ denoiser = scene.cycles.denoiser
- layout.active = cycles_view_layer.use_denoising
+ layout.active = denoiser != 'NONE' and cycles_view_layer.use_denoising
col = layout.column()
- if show_optix_denoising(context):
- col.prop(cycles_view_layer, "use_optix_denoising")
- col.separator(factor=2.0)
-
- if cycles_view_layer.use_optix_denoising:
- col.prop(cycles_view_layer, "denoising_optix_input_passes")
- return
+ if denoiser == 'OPTIX':
+ col.prop(cycles_view_layer, "denoising_optix_input_passes")
+ return
+ elif denoiser == 'OPENIMAGEDENOISE':
+ return
col.prop(cycles_view_layer, "denoising_radius", text="Radius")
@@ -1190,6 +1202,7 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
+ layout.use_property_split = True
rd = context.scene.render
# scene = context.scene
@@ -1199,10 +1212,10 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
layout.active = (rd.use_motion_blur and cob.use_motion_blur)
- row = layout.row()
+ col = layout.column()
+ col.prop(cob, "motion_steps", text="Steps")
if ob.type != 'CAMERA':
- row.prop(cob, "use_deform_motion", text="Deformation")
- row.prop(cob, "motion_steps", text="Steps")
+ col.prop(cob, "use_deform_motion", text="Deformation")
def has_geometry_visibility(ob):
@@ -1575,17 +1588,18 @@ class CYCLES_WORLD_PT_ray_visibility(CyclesButtonsPanel, Panel):
def draw(self, context):
layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
world = context.world
visibility = world.cycles_visibility
- flow = layout.column_flow()
-
- flow.prop(visibility, "camera")
- flow.prop(visibility, "diffuse")
- flow.prop(visibility, "glossy")
- flow.prop(visibility, "transmission")
- flow.prop(visibility, "scatter")
+ col = layout.column()
+ col.prop(visibility, "camera")
+ col.prop(visibility, "diffuse")
+ col.prop(visibility, "glossy")
+ col.prop(visibility, "transmission")
+ col.prop(visibility, "scatter")
class CYCLES_WORLD_PT_settings(CyclesButtonsPanel, Panel):
@@ -1975,7 +1989,10 @@ class CYCLES_RENDER_PT_debug(CyclesButtonsPanel, Panel):
@classmethod
def poll(cls, context):
- return CyclesButtonsPanel.poll(context) and bpy.app.debug_value == 256
+ prefs = bpy.context.preferences
+ return (CyclesButtonsPanel.poll(context)
+ and prefs.experimental.use_cycles_debug
+ and prefs.view.show_developer_ui)
def draw(self, context):
layout = self.layout
@@ -2248,6 +2265,7 @@ classes = (
CYCLES_RENDER_PT_sampling,
CYCLES_RENDER_PT_sampling_sub_samples,
CYCLES_RENDER_PT_sampling_adaptive,
+ CYCLES_RENDER_PT_sampling_denoising,
CYCLES_RENDER_PT_sampling_advanced,
CYCLES_RENDER_PT_light_paths,
CYCLES_RENDER_PT_light_paths_max_bounces,
diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp
index 40a1a2c2edc..011678a7a65 100644
--- a/intern/cycles/blender/blender_camera.cpp
+++ b/intern/cycles/blender/blender_camera.cpp
@@ -867,13 +867,13 @@ void BlenderSync::sync_view(BL::SpaceView3D &b_v3d,
}
}
-BufferParams BlenderSync::get_buffer_params(BL::Scene &b_scene,
- BL::RenderSettings &b_render,
+BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
int width,
- int height)
+ int height,
+ const bool use_denoiser)
{
BufferParams params;
bool use_border = false;
@@ -907,8 +907,7 @@ BufferParams BlenderSync::get_buffer_params(BL::Scene &b_scene,
PassType display_pass = update_viewport_display_passes(b_v3d, params.passes);
/* Can only denoise the combined image pass */
- params.denoising_data_pass = display_pass == PASS_COMBINED &&
- update_viewport_display_denoising(b_v3d, b_scene);
+ params.denoising_data_pass = display_pass == PASS_COMBINED && use_denoiser;
return params;
}
diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp
index 847a43c5f34..82c99631a89 100644
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -18,7 +18,6 @@
#include "render/camera.h"
#include "render/curves.h"
#include "render/hair.h"
-#include "render/mesh.h"
#include "render/object.h"
#include "render/scene.h"
@@ -39,27 +38,6 @@ ParticleCurveData::~ParticleCurveData()
{
}
-static void interp_weights(float t, float data[4])
-{
- /* Cardinal curve interpolation */
- float t2 = t * t;
- float t3 = t2 * t;
- float fc = 0.71f;
-
- data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t;
- data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f;
- data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t;
- data[3] = fc * t3 - fc * t2;
-}
-
-static void curveinterp_v3_v3v3v3v3(
- float3 *p, float3 *v1, float3 *v2, float3 *v3, float3 *v4, const float w[4])
-{
- p->x = v1->x * w[0] + v2->x * w[1] + v3->x * w[2] + v4->x * w[3];
- p->y = v1->y * w[0] + v2->y * w[1] + v3->y * w[2] + v4->y * w[3];
- p->z = v1->z * w[0] + v2->z * w[1] + v3->z * w[2] + v4->z * w[3];
-}
-
static float shaperadius(float shape, float root, float tip, float time)
{
assert(time >= 0.0f);
@@ -77,43 +55,13 @@ static float shaperadius(float shape, float root, float tip, float time)
/* curve functions */
-static void InterpolateKeySegments(
- int seg, int segno, int key, int curve, float3 *keyloc, float *time, ParticleCurveData *CData)
-{
- float3 ckey_loc1 = CData->curvekey_co[key];
- float3 ckey_loc2 = ckey_loc1;
- float3 ckey_loc3 = CData->curvekey_co[key + 1];
- float3 ckey_loc4 = ckey_loc3;
-
- if (key > CData->curve_firstkey[curve])
- ckey_loc1 = CData->curvekey_co[key - 1];
-
- if (key < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 2)
- ckey_loc4 = CData->curvekey_co[key + 2];
-
- float time1 = CData->curvekey_time[key] / CData->curve_length[curve];
- float time2 = CData->curvekey_time[key + 1] / CData->curve_length[curve];
-
- float dfra = (time2 - time1) / (float)segno;
-
- if (time)
- *time = (dfra * seg) + time1;
-
- float t[4];
-
- interp_weights((float)seg / (float)segno, t);
-
- if (keyloc)
- curveinterp_v3_v3v3v3v3(keyloc, &ckey_loc1, &ckey_loc2, &ckey_loc3, &ckey_loc4, t);
-}
-
static bool ObtainCacheParticleData(
- Geometry *geom, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
+ Hair *hair, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
{
int curvenum = 0;
int keyno = 0;
- if (!(geom && b_mesh && b_ob && CData))
+ if (!(hair && b_mesh && b_ob && CData))
return false;
Transform tfm = get_transform(b_ob->matrix_world());
@@ -129,7 +77,7 @@ static bool ObtainCacheParticleData(
if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
(b_part.type() == BL::ParticleSettings::type_HAIR)) {
- int shader = clamp(b_part.material() - 1, 0, geom->used_shaders.size() - 1);
+ int shader = clamp(b_part.material() - 1, 0, hair->used_shaders.size() - 1);
int display_step = background ? b_part.render_step() : b_part.display_step();
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() :
@@ -203,14 +151,14 @@ static bool ObtainCacheParticleData(
return true;
}
-static bool ObtainCacheParticleUV(Geometry *geom,
+static bool ObtainCacheParticleUV(Hair *hair,
BL::Mesh *b_mesh,
BL::Object *b_ob,
ParticleCurveData *CData,
bool background,
int uv_num)
{
- if (!(geom && b_mesh && b_ob && CData))
+ if (!(hair && b_mesh && b_ob && CData))
return false;
CData->curve_uv.clear();
@@ -266,14 +214,14 @@ static bool ObtainCacheParticleUV(Geometry *geom,
return true;
}
-static bool ObtainCacheParticleVcol(Geometry *geom,
+static bool ObtainCacheParticleVcol(Hair *hair,
BL::Mesh *b_mesh,
BL::Object *b_ob,
ParticleCurveData *CData,
bool background,
int vcol_num)
{
- if (!(geom && b_mesh && b_ob && CData))
+ if (!(hair && b_mesh && b_ob && CData))
return false;
CData->curve_vcol.clear();
@@ -314,7 +262,7 @@ static bool ObtainCacheParticleVcol(Geometry *geom,
BL::Mesh::vertex_colors_iterator l;
b_mesh->vertex_colors.begin(l);
- float3 vcol = make_float3(0.0f, 0.0f, 0.0f);
+ float4 vcol = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
if (b_mesh->vertex_colors.length())
b_psys.mcol_on_emitter(psmd, *b_pa, pa_no, vcol_num, &vcol.x);
CData->curve_vcol.push_back_slow(vcol);
@@ -329,272 +277,6 @@ static bool ObtainCacheParticleVcol(Geometry *geom,
return true;
}
-static void ExportCurveTrianglePlanes(Mesh *mesh,
- ParticleCurveData *CData,
- float3 RotCam,
- bool is_ortho)
-{
- int vertexno = mesh->verts.size();
- int vertexindex = vertexno;
- int numverts = 0, numtris = 0;
-
- /* compute and reserve size of arrays */
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- numverts += 2 + (CData->curve_keynum[curve] - 1) * 2;
- numtris += (CData->curve_keynum[curve] - 1) * 2;
- }
- }
-
- mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);
-
- /* actually export */
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- float3 xbasis;
- float3 v1;
- float time = 0.0f;
- float3 ickey_loc = CData->curvekey_co[CData->curve_firstkey[curve]];
- float radius = shaperadius(
- CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], 0.0f);
- v1 = CData->curvekey_co[CData->curve_firstkey[curve] + 1] -
- CData->curvekey_co[CData->curve_firstkey[curve]];
- if (is_ortho)
- xbasis = normalize(cross(RotCam, v1));
- else
- xbasis = normalize(cross(RotCam - ickey_loc, v1));
- float3 ickey_loc_shfl = ickey_loc - radius * xbasis;
- float3 ickey_loc_shfr = ickey_loc + radius * xbasis;
- mesh->add_vertex(ickey_loc_shfl);
- mesh->add_vertex(ickey_loc_shfr);
- vertexindex += 2;
-
- for (int curvekey = CData->curve_firstkey[curve] + 1;
- curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve];
- curvekey++) {
- ickey_loc = CData->curvekey_co[curvekey];
-
- if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)
- v1 = CData->curvekey_co[curvekey] -
- CData->curvekey_co[max(curvekey - 1, CData->curve_firstkey[curve])];
- else
- v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey - 1];
-
- time = CData->curvekey_time[curvekey] / CData->curve_length[curve];
- radius = shaperadius(
- CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
-
- if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)
- radius = shaperadius(CData->psys_shape[sys],
- CData->psys_rootradius[sys],
- CData->psys_tipradius[sys],
- 0.95f);
-
- if (CData->psys_closetip[sys] &&
- (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
- radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], 0.0f, 0.95f);
-
- if (is_ortho)
- xbasis = normalize(cross(RotCam, v1));
- else
- xbasis = normalize(cross(RotCam - ickey_loc, v1));
- float3 ickey_loc_shfl = ickey_loc - radius * xbasis;
- float3 ickey_loc_shfr = ickey_loc + radius * xbasis;
- mesh->add_vertex(ickey_loc_shfl);
- mesh->add_vertex(ickey_loc_shfr);
- mesh->add_triangle(
- vertexindex - 2, vertexindex, vertexindex - 1, CData->psys_shader[sys], true);
- mesh->add_triangle(
- vertexindex + 1, vertexindex - 1, vertexindex, CData->psys_shader[sys], true);
- vertexindex += 2;
- }
- }
- }
-
- mesh->resize_mesh(mesh->verts.size(), mesh->num_triangles());
- mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
- mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
- mesh->add_face_normals();
- mesh->add_vertex_normals();
- mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
-
- /* texture coords still needed */
-}
-
-static void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution)
-{
- int vertexno = mesh->verts.size();
- int vertexindex = vertexno;
- int numverts = 0, numtris = 0;
-
- /* compute and reserve size of arrays */
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- numverts += (CData->curve_keynum[curve] - 1) * resolution + resolution;
- numtris += (CData->curve_keynum[curve] - 1) * 2 * resolution;
- }
- }
-
- mesh->reserve_mesh(mesh->verts.size() + numverts, mesh->num_triangles() + numtris);
-
- /* actually export */
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- float3 firstxbasis = cross(make_float3(1.0f, 0.0f, 0.0f),
- CData->curvekey_co[CData->curve_firstkey[curve] + 1] -
- CData->curvekey_co[CData->curve_firstkey[curve]]);
- if (!is_zero(firstxbasis))
- firstxbasis = normalize(firstxbasis);
- else
- firstxbasis = normalize(cross(make_float3(0.0f, 1.0f, 0.0f),
- CData->curvekey_co[CData->curve_firstkey[curve] + 1] -
- CData->curvekey_co[CData->curve_firstkey[curve]]));
-
- for (int curvekey = CData->curve_firstkey[curve];
- curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
- curvekey++) {
- float3 xbasis = firstxbasis;
- float3 v1;
- float3 v2;
-
- if (curvekey == CData->curve_firstkey[curve]) {
- v1 = CData->curvekey_co[min(
- curvekey + 2, CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)] -
- CData->curvekey_co[curvekey + 1];
- v2 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
- }
- else if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1) {
- v1 = CData->curvekey_co[curvekey] - CData->curvekey_co[curvekey - 1];
- v2 = CData->curvekey_co[curvekey - 1] -
- CData->curvekey_co[max(curvekey - 2, CData->curve_firstkey[curve])];
- }
- else {
- v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
- v2 = CData->curvekey_co[curvekey] - CData->curvekey_co[curvekey - 1];
- }
-
- xbasis = cross(v1, v2);
-
- if (len_squared(xbasis) >= 0.05f * len_squared(v1) * len_squared(v2)) {
- firstxbasis = normalize(xbasis);
- break;
- }
- }
-
- for (int curvekey = CData->curve_firstkey[curve];
- curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
- curvekey++) {
- int subv = 1;
- float3 xbasis;
- float3 ybasis;
- float3 v1;
- float3 v2;
-
- if (curvekey == CData->curve_firstkey[curve]) {
- subv = 0;
- v1 = CData->curvekey_co[min(
- curvekey + 2, CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)] -
- CData->curvekey_co[curvekey + 1];
- v2 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
- }
- else if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1) {
- v1 = CData->curvekey_co[curvekey] - CData->curvekey_co[curvekey - 1];
- v2 = CData->curvekey_co[curvekey - 1] -
- CData->curvekey_co[max(curvekey - 2, CData->curve_firstkey[curve])];
- }
- else {
- v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
- v2 = CData->curvekey_co[curvekey] - CData->curvekey_co[curvekey - 1];
- }
-
- xbasis = cross(v1, v2);
-
- if (len_squared(xbasis) >= 0.05f * len_squared(v1) * len_squared(v2)) {
- xbasis = normalize(xbasis);
- firstxbasis = xbasis;
- }
- else
- xbasis = firstxbasis;
-
- ybasis = normalize(cross(xbasis, v2));
-
- for (; subv <= 1; subv++) {
- float3 ickey_loc = make_float3(0.0f, 0.0f, 0.0f);
- float time = 0.0f;
-
- InterpolateKeySegments(subv, 1, curvekey, curve, &ickey_loc, &time, CData);
-
- float radius = shaperadius(CData->psys_shape[sys],
- CData->psys_rootradius[sys],
- CData->psys_tipradius[sys],
- time);
-
- if ((curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 2) &&
- (subv == 1))
- radius = shaperadius(CData->psys_shape[sys],
- CData->psys_rootradius[sys],
- CData->psys_tipradius[sys],
- 0.95f);
-
- if (CData->psys_closetip[sys] && (subv == 1) &&
- (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 2))
- radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], 0.0f, 0.95f);
-
- float angle = M_2PI_F / (float)resolution;
- for (int section = 0; section < resolution; section++) {
- float3 ickey_loc_shf = ickey_loc + radius * (cosf(angle * section) * xbasis +
- sinf(angle * section) * ybasis);
- mesh->add_vertex(ickey_loc_shf);
- }
-
- if (subv != 0) {
- for (int section = 0; section < resolution - 1; section++) {
- mesh->add_triangle(vertexindex - resolution + section,
- vertexindex + section,
- vertexindex - resolution + section + 1,
- CData->psys_shader[sys],
- true);
- mesh->add_triangle(vertexindex + section + 1,
- vertexindex - resolution + section + 1,
- vertexindex + section,
- CData->psys_shader[sys],
- true);
- }
- mesh->add_triangle(vertexindex - 1,
- vertexindex + resolution - 1,
- vertexindex - resolution,
- CData->psys_shader[sys],
- true);
- mesh->add_triangle(vertexindex,
- vertexindex - resolution,
- vertexindex + resolution - 1,
- CData->psys_shader[sys],
- true);
- }
- vertexindex += resolution;
- }
- }
- }
- }
-
- mesh->resize_mesh(mesh->verts.size(), mesh->num_triangles());
- mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
- mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
- mesh->add_face_normals();
- mesh->add_vertex_normals();
- mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
-
- /* texture coords still needed */
-}
-
static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CData)
{
int num_keys = 0;
@@ -823,154 +505,8 @@ static void ExportCurveSegmentsMotion(Hair *hair, ParticleCurveData *CData, int
}
}
-static void ExportCurveTriangleUV(ParticleCurveData *CData, int resol, float2 *uvdata)
-{
- if (uvdata == NULL)
- return;
- int vertexindex = 0;
-
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- for (int curvekey = CData->curve_firstkey[curve];
- curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
- curvekey++) {
- for (int section = 0; section < resol; section++) {
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- uvdata[vertexindex] = CData->curve_uv[curve];
- vertexindex++;
- }
- }
- }
- }
-}
-
-static void ExportCurveTriangleVcol(ParticleCurveData *CData, int resol, uchar4 *cdata)
-{
- if (cdata == NULL)
- return;
-
- int vertexindex = 0;
-
- for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
- for (int curve = CData->psys_firstcurve[sys];
- curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
- curve++) {
- for (int curvekey = CData->curve_firstkey[curve];
- curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
- curvekey++) {
- for (int section = 0; section < resol; section++) {
- /* Encode vertex color using the sRGB curve. */
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- cdata[vertexindex] = color_float_to_byte(
- color_srgb_to_linear_v3(CData->curve_vcol[curve]));
- vertexindex++;
- }
- }
- }
- }
-}
-
/* Hair Curve Sync */
-void BlenderSync::sync_curve_settings(BL::Depsgraph &b_depsgraph)
-{
- PointerRNA csscene = RNA_pointer_get(&b_scene.ptr, "cycles_curves");
-
- CurveSystemManager *curve_system_manager = scene->curve_system_manager;
- CurveSystemManager prev_curve_system_manager = *curve_system_manager;
-
- curve_system_manager->use_curves = get_boolean(csscene, "use_curves");
-
- curve_system_manager->primitive = (CurvePrimitiveType)get_enum(
- csscene, "primitive", CURVE_NUM_PRIMITIVE_TYPES, CURVE_LINE_SEGMENTS);
- curve_system_manager->curve_shape = (CurveShapeType)get_enum(
- csscene, "shape", CURVE_NUM_SHAPE_TYPES, CURVE_THICK);
- curve_system_manager->resolution = get_int(csscene, "resolution");
- curve_system_manager->subdivisions = get_int(csscene, "subdivisions");
- curve_system_manager->use_backfacing = !get_boolean(csscene, "cull_backfacing");
-
- /* Triangles */
- if (curve_system_manager->primitive == CURVE_TRIANGLES) {
- /* camera facing planes */
- if (curve_system_manager->curve_shape == CURVE_RIBBON) {
- curve_system_manager->triangle_method = CURVE_CAMERA_TRIANGLES;
- curve_system_manager->resolution = 1;
- }
- else if (curve_system_manager->curve_shape == CURVE_THICK) {
- curve_system_manager->triangle_method = CURVE_TESSELATED_TRIANGLES;
- }
- }
- /* Line Segments */
- else if (curve_system_manager->primitive == CURVE_LINE_SEGMENTS) {
- if (curve_system_manager->curve_shape == CURVE_RIBBON) {
- /* tangent shading */
- curve_system_manager->line_method = CURVE_UNCORRECTED;
- curve_system_manager->use_encasing = true;
- curve_system_manager->use_backfacing = false;
- curve_system_manager->use_tangent_normal_geometry = true;
- }
- else if (curve_system_manager->curve_shape == CURVE_THICK) {
- curve_system_manager->line_method = CURVE_ACCURATE;
- curve_system_manager->use_encasing = false;
- curve_system_manager->use_tangent_normal_geometry = false;
- }
- }
- /* Curve Segments */
- else if (curve_system_manager->primitive == CURVE_SEGMENTS) {
- if (curve_system_manager->curve_shape == CURVE_RIBBON) {
- curve_system_manager->primitive = CURVE_RIBBONS;
- curve_system_manager->use_backfacing = false;
- }
- }
-
- if (curve_system_manager->modified_mesh(prev_curve_system_manager)) {
- BL::Depsgraph::objects_iterator b_ob;
-
- for (b_depsgraph.objects.begin(b_ob); b_ob != b_data.objects.end(); ++b_ob) {
- if (object_is_mesh(*b_ob)) {
- BL::Object::particle_systems_iterator b_psys;
- for (b_ob->particle_systems.begin(b_psys); b_psys != b_ob->particle_systems.end();
- ++b_psys) {
- if ((b_psys->settings().render_type() == BL::ParticleSettings::render_type_PATH) &&
- (b_psys->settings().type() == BL::ParticleSettings::type_HAIR)) {
- BL::ID key = BKE_object_is_modified(*b_ob) ? *b_ob : b_ob->data();
- geometry_map.set_recalc(key);
- object_map.set_recalc(*b_ob);
- }
- }
- }
- }
- }
-
- if (curve_system_manager->modified(prev_curve_system_manager))
- curve_system_manager->tag_update(scene);
-}
-
bool BlenderSync::object_has_particle_hair(BL::Object b_ob)
{
/* Test if the object has a particle modifier with hair. */
@@ -994,78 +530,38 @@ bool BlenderSync::object_has_particle_hair(BL::Object b_ob)
/* Old particle hair. */
void BlenderSync::sync_particle_hair(
- Geometry *geom, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step)
+ Hair *hair, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step)
{
- Hair *hair = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom) : NULL;
- Mesh *mesh = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom) : NULL;
-
/* obtain general settings */
if (b_ob.mode() == b_ob.mode_PARTICLE_EDIT || b_ob.mode() == b_ob.mode_EDIT) {
return;
}
- const int triangle_method = scene->curve_system_manager->triangle_method;
- const int resolution = scene->curve_system_manager->resolution;
- int used_res = 1;
-
/* extract particle hair data - should be combined with connecting to mesh later*/
ParticleCurveData CData;
- ObtainCacheParticleData(geom, &b_mesh, &b_ob, &CData, !preview);
-
- /* add hair geometry to mesh */
- if (mesh) {
- if (triangle_method == CURVE_CAMERA_TRIANGLES) {
- /* obtain camera parameters */
- float3 RotCam;
- Camera *camera = scene->camera;
- Transform &ctfm = camera->matrix;
- if (camera->type == CAMERA_ORTHOGRAPHIC) {
- RotCam = -make_float3(ctfm.x.z, ctfm.y.z, ctfm.z.z);
- }
- else {
- Transform tfm = get_transform(b_ob.matrix_world());
- Transform itfm = transform_quick_inverse(tfm);
- RotCam = transform_point(&itfm, make_float3(ctfm.x.w, ctfm.y.w, ctfm.z.w));
- }
- bool is_ortho = camera->type == CAMERA_ORTHOGRAPHIC;
- ExportCurveTrianglePlanes(mesh, &CData, RotCam, is_ortho);
- }
- else {
- ExportCurveTriangleGeometry(mesh, &CData, resolution);
- used_res = resolution;
- }
- }
- else {
- if (motion)
- ExportCurveSegmentsMotion(hair, &CData, motion_step);
- else
- ExportCurveSegments(scene, hair, &CData);
- }
+ ObtainCacheParticleData(hair, &b_mesh, &b_ob, &CData, !preview);
+
+ /* add hair geometry */
+ if (motion)
+ ExportCurveSegmentsMotion(hair, &CData, motion_step);
+ else
+ ExportCurveSegments(scene, hair, &CData);
/* generated coordinates from first key. we should ideally get this from
* blender to handle deforming objects */
if (!motion) {
- if (geom->need_attribute(scene, ATTR_STD_GENERATED)) {
+ if (hair->need_attribute(scene, ATTR_STD_GENERATED)) {
float3 loc, size;
mesh_texture_space(b_mesh, loc, size);
- if (mesh) {
- Attribute *attr_generated = mesh->attributes.add(ATTR_STD_GENERATED);
- float3 *generated = attr_generated->data_float3();
-
- for (size_t i = 0; i < mesh->verts.size(); i++)
- generated[i] = mesh->verts[i] * size - loc;
- }
- else {
- Attribute *attr_generated = hair->attributes.add(ATTR_STD_GENERATED);
- float3 *generated = attr_generated->data_float3();
+ Attribute *attr_generated = hair->attributes.add(ATTR_STD_GENERATED);
+ float3 *generated = attr_generated->data_float3();
- for (size_t i = 0; i < hair->num_curves(); i++) {
- float3 co = hair->curve_keys[hair->get_curve(i).first_key];
- generated[i] = co * size - loc;
- }
+ for (size_t i = 0; i < hair->num_curves(); i++) {
+ float3 co = hair->curve_keys[hair->get_curve(i).first_key];
+ generated[i] = co * size - loc;
}
}
}
@@ -1076,32 +572,22 @@ void BlenderSync::sync_particle_hair(
int vcol_num = 0;
for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l, vcol_num++) {
- if (!geom->need_attribute(scene, ustring(l->name().c_str())))
+ if (!hair->need_attribute(scene, ustring(l->name().c_str())))
continue;
- ObtainCacheParticleVcol(geom, &b_mesh, &b_ob, &CData, !preview, vcol_num);
+ ObtainCacheParticleVcol(hair, &b_mesh, &b_ob, &CData, !preview, vcol_num);
- if (mesh) {
- Attribute *attr_vcol = mesh->attributes.add(
- ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CORNER_BYTE);
+ Attribute *attr_vcol = hair->attributes.add(
+ ustring(l->name().c_str()), TypeRGBA, ATTR_ELEMENT_CURVE);
- uchar4 *cdata = attr_vcol->data_uchar4();
+ float4 *fdata = attr_vcol->data_float4();
- ExportCurveTriangleVcol(&CData, used_res, cdata);
- }
- else {
- Attribute *attr_vcol = hair->attributes.add(
- ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CURVE);
-
- float3 *fdata = attr_vcol->data_float3();
+ if (fdata) {
+ size_t i = 0;
- if (fdata) {
- size_t i = 0;
-
- /* Encode vertex color using the sRGB curve. */
- for (size_t curve = 0; curve < CData.curve_vcol.size(); curve++) {
- fdata[i++] = color_srgb_to_linear_v3(CData.curve_vcol[curve]);
- }
+ /* Encode vertex color using the sRGB curve. */
+ for (size_t curve = 0; curve < CData.curve_vcol.size(); curve++) {
+ fdata[i++] = color_srgb_to_linear_v4(CData.curve_vcol[curve]);
}
}
}
@@ -1118,35 +604,23 @@ void BlenderSync::sync_particle_hair(
ustring name = ustring(l->name().c_str());
/* UV map */
- if (geom->need_attribute(scene, name) || geom->need_attribute(scene, std)) {
+ if (hair->need_attribute(scene, name) || hair->need_attribute(scene, std)) {
Attribute *attr_uv;
- ObtainCacheParticleUV(geom, &b_mesh, &b_ob, &CData, !preview, uv_num);
+ ObtainCacheParticleUV(hair, &b_mesh, &b_ob, &CData, !preview, uv_num);
- if (mesh) {
- if (active_render)
- attr_uv = mesh->attributes.add(std, name);
- else
- attr_uv = mesh->attributes.add(name, TypeFloat2, ATTR_ELEMENT_CORNER);
-
- float2 *uv = attr_uv->data_float2();
-
- ExportCurveTriangleUV(&CData, used_res, uv);
- }
- else {
- if (active_render)
- attr_uv = hair->attributes.add(std, name);
- else
- attr_uv = hair->attributes.add(name, TypeFloat2, ATTR_ELEMENT_CURVE);
+ if (active_render)
+ attr_uv = hair->attributes.add(std, name);
+ else
+ attr_uv = hair->attributes.add(name, TypeFloat2, ATTR_ELEMENT_CURVE);
- float2 *uv = attr_uv->data_float2();
+ float2 *uv = attr_uv->data_float2();
- if (uv) {
- size_t i = 0;
+ if (uv) {
+ size_t i = 0;
- for (size_t curve = 0; curve < CData.curve_uv.size(); curve++) {
- uv[i++] = CData.curve_uv[curve];
- }
+ for (size_t curve = 0; curve < CData.curve_uv.size(); curve++) {
+ uv[i++] = CData.curve_uv[curve];
}
}
}
@@ -1154,7 +628,6 @@ void BlenderSync::sync_particle_hair(
}
}
-#ifdef WITH_NEW_OBJECT_TYPES
static float4 hair_point_as_float4(BL::HairPoint b_point)
{
float4 mP = float3_to_float4(get_float3(b_point.co()));
@@ -1320,12 +793,10 @@ static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_st
export_hair_motion_validate_attribute(hair, motion_step, num_motion_keys, have_motion);
}
}
-#endif /* WITH_NEW_OBJECT_TYPES */
/* Hair object. */
void BlenderSync::sync_hair(Hair *hair, BL::Object &b_ob, bool motion, int motion_step)
{
-#ifdef WITH_NEW_OBJECT_TYPES
/* Convert Blender hair to Cycles curves. */
BL::Hair b_hair(b_ob.data());
if (motion) {
@@ -1334,97 +805,70 @@ void BlenderSync::sync_hair(Hair *hair, BL::Object &b_ob, bool motion, int motio
else {
export_hair_curves(scene, hair, b_hair);
}
-#else
- (void)hair;
- (void)b_ob;
- (void)motion;
- (void)motion_step;
-#endif /* WITH_NEW_OBJECT_TYPES */
}
void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph,
BL::Object b_ob,
- Geometry *geom,
+ Hair *hair,
const vector<Shader *> &used_shaders)
{
- Hair *hair = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom) : NULL;
- Mesh *mesh = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom) : NULL;
-
/* Compares curve_keys rather than strands in order to handle quick hair
* adjustments in dynamic BVH - other methods could probably do this better. */
array<float3> oldcurve_keys;
array<float> oldcurve_radius;
- array<int> oldtriangles;
- if (hair) {
- oldcurve_keys.steal_data(hair->curve_keys);
- oldcurve_radius.steal_data(hair->curve_radius);
- }
- else {
- oldtriangles.steal_data(mesh->triangles);
- }
+ oldcurve_keys.steal_data(hair->curve_keys);
+ oldcurve_radius.steal_data(hair->curve_radius);
- geom->clear();
- geom->used_shaders = used_shaders;
+ hair->clear();
+ hair->used_shaders = used_shaders;
- if (view_layer.use_hair && scene->curve_system_manager->use_curves) {
-#ifdef WITH_NEW_OBJECT_TYPES
+ if (view_layer.use_hair) {
if (b_ob.type() == BL::Object::type_HAIR) {
/* Hair object. */
sync_hair(hair, b_ob, false);
- assert(mesh == NULL);
}
- else
-#endif
- {
+ else {
/* Particle hair. */
- bool need_undeformed = geom->need_attribute(scene, ATTR_STD_GENERATED);
+ bool need_undeformed = hair->need_attribute(scene, ATTR_STD_GENERATED);
BL::Mesh b_mesh = object_to_mesh(
b_data, b_ob, b_depsgraph, need_undeformed, Mesh::SUBDIVISION_NONE);
if (b_mesh) {
- sync_particle_hair(geom, b_mesh, b_ob, false);
+ sync_particle_hair(hair, b_mesh, b_ob, false);
free_object_to_mesh(b_data, b_ob, b_mesh);
}
}
}
/* tag update */
- const bool rebuild = (hair && ((oldcurve_keys != hair->curve_keys) ||
- (oldcurve_radius != hair->curve_radius))) ||
- (mesh && (oldtriangles != mesh->triangles));
+ const bool rebuild = ((oldcurve_keys != hair->curve_keys) ||
+ (oldcurve_radius != hair->curve_radius));
- geom->tag_update(scene, rebuild);
+ hair->tag_update(scene, rebuild);
}
void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
BL::Object b_ob,
- Geometry *geom,
+ Hair *hair,
int motion_step)
{
- Hair *hair = (geom->type == Geometry::HAIR) ? static_cast<Hair *>(geom) : NULL;
- Mesh *mesh = (geom->type == Geometry::MESH) ? static_cast<Mesh *>(geom) : NULL;
-
/* Skip if nothing exported. */
- if ((hair && hair->num_keys() == 0) || (mesh && mesh->verts.size() == 0)) {
+ if (hair->num_keys() == 0) {
return;
}
/* Export deformed coordinates. */
if (ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) {
-#ifdef WITH_NEW_OBJECT_TYPES
if (b_ob.type() == BL::Object::type_HAIR) {
/* Hair object. */
sync_hair(hair, b_ob, true, motion_step);
- assert(mesh == NULL);
return;
}
- else
-#endif
- {
+ else {
/* Particle hair. */
BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
if (b_mesh) {
- sync_particle_hair(geom, b_mesh, b_ob, true, motion_step);
+ sync_particle_hair(hair, b_mesh, b_ob, true, motion_step);
free_object_to_mesh(b_data, b_ob, b_mesh);
return;
}
@@ -1432,12 +876,7 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
}
/* No deformation on this frame, copy coordinates if other frames did have it. */
- if (hair) {
- hair->copy_center_to_motion_step(motion_step);
- }
- else {
- mesh->copy_center_to_motion_step(motion_step);
- }
+ hair->copy_center_to_motion_step(motion_step);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_device.cpp b/intern/cycles/blender/blender_device.cpp
index ac52948806c..fb9ab9e8c97 100644
--- a/intern/cycles/blender/blender_device.cpp
+++ b/intern/cycles/blender/blender_device.cpp
@@ -21,13 +21,6 @@
CCL_NAMESPACE_BEGIN
-enum DenoiserType {
- DENOISER_NONE = 0,
- DENOISER_OPTIX = 1,
-
- DENOISER_NUM
-};
-
enum ComputeDevice {
COMPUTE_DEVICE_CPU = 0,
COMPUTE_DEVICE_CUDA = 1,
@@ -120,49 +113,6 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
}
}
- /* Ensure there is an OptiX device when using the OptiX denoiser. */
- bool use_optix_denoising = get_enum(cscene, "preview_denoising", DENOISER_NUM, DENOISER_NONE) ==
- DENOISER_OPTIX &&
- !background;
- BL::Scene::view_layers_iterator b_view_layer;
- for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
- ++b_view_layer) {
- PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
- if (get_boolean(crl, "use_optix_denoising")) {
- use_optix_denoising = true;
- }
- }
-
- if (use_optix_denoising && device.type != DEVICE_OPTIX) {
- vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX);
- if (!optix_devices.empty()) {
- /* Convert to a special multi device with separate denoising devices. */
- if (device.multi_devices.empty()) {
- device.multi_devices.push_back(device);
- }
-
- /* Try to use the same physical devices for denoising. */
- for (const DeviceInfo &cuda_device : device.multi_devices) {
- if (cuda_device.type == DEVICE_CUDA) {
- for (const DeviceInfo &optix_device : optix_devices) {
- if (cuda_device.num == optix_device.num) {
- device.id += optix_device.id;
- device.denoising_devices.push_back(optix_device);
- break;
- }
- }
- }
- }
-
- if (device.denoising_devices.empty()) {
- /* Simply use the first available OptiX device. */
- const DeviceInfo optix_device = optix_devices.front();
- device.id += optix_device.id; /* Uniquely identify this special multi device. */
- device.denoising_devices.push_back(optix_device);
- }
- }
- }
-
return device;
}
diff --git a/intern/cycles/blender/blender_geometry.cpp b/intern/cycles/blender/blender_geometry.cpp
index 7ca35cff961..f7e4623024d 100644
--- a/intern/cycles/blender/blender_geometry.cpp
+++ b/intern/cycles/blender/blender_geometry.cpp
@@ -40,17 +40,9 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
BL::Material material_override = view_layer.material_override;
Shader *default_shader = (b_ob.type() == BL::Object::type_VOLUME) ? scene->default_volume :
scene->default_surface;
-#ifdef WITH_NEW_OBJECT_TYPES
- Geometry::Type geom_type = ((b_ob.type() == BL::Object::type_HAIR || use_particle_hair) &&
- (scene->curve_system_manager->primitive != CURVE_TRIANGLES)) ?
+ Geometry::Type geom_type = (b_ob.type() == BL::Object::type_HAIR || use_particle_hair) ?
Geometry::HAIR :
Geometry::MESH;
-#else
- Geometry::Type geom_type = ((use_particle_hair) &&
- (scene->curve_system_manager->primitive != CURVE_TRIANGLES)) ?
- Geometry::HAIR :
- Geometry::MESH;
-#endif
/* Find shader indices. */
vector<Shader *> used_shaders;
@@ -129,12 +121,9 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
geom->name = ustring(b_ob_data.name().c_str());
-#ifdef WITH_NEW_OBJECT_TYPES
if (b_ob.type() == BL::Object::type_HAIR || use_particle_hair) {
-#else
- if (use_particle_hair) {
-#endif
- sync_hair(b_depsgraph, b_ob, geom, used_shaders);
+ Hair *hair = static_cast<Hair *>(geom);
+ sync_hair(b_depsgraph, b_ob, hair, used_shaders);
}
else if (b_ob.type() == BL::Object::type_VOLUME || object_fluid_gas_domain_find(b_ob)) {
Mesh *mesh = static_cast<Mesh *>(geom);
@@ -173,12 +162,9 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
return;
}
-#ifdef WITH_NEW_OBJECT_TYPES
if (b_ob.type() == BL::Object::type_HAIR || use_particle_hair) {
-#else
- if (use_particle_hair) {
-#endif
- sync_hair_motion(b_depsgraph, b_ob, geom, motion_step);
+ Hair *hair = static_cast<Hair *>(geom);
+ sync_hair_motion(b_depsgraph, b_ob, hair, motion_step);
}
else if (b_ob.type() == BL::Object::type_VOLUME || object_fluid_gas_domain_find(b_ob)) {
/* No volume motion blur support yet. */
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
index a6f380a9ae7..49407799fcd 100644
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -278,25 +278,59 @@ static void mikk_compute_tangents(
genTangSpaceDefault(&context);
}
+/* Create sculpt vertex color attributes. */
+static void attr_create_sculpt_vertex_color(Scene *scene,
+ Mesh *mesh,
+ BL::Mesh &b_mesh,
+ bool subdivision)
+{
+ BL::Mesh::sculpt_vertex_colors_iterator l;
+
+ for (b_mesh.sculpt_vertex_colors.begin(l); l != b_mesh.sculpt_vertex_colors.end(); ++l) {
+ const bool active_render = l->active_render();
+ AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
+ ustring vcol_name = ustring(l->name().c_str());
+
+ const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
+ mesh->need_attribute(scene, vcol_std);
+
+ if (!need_vcol) {
+ continue;
+ }
+
+ AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
+ Attribute *vcol_attr = attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_VERTEX);
+ vcol_attr->std = vcol_std;
+
+ float4 *cdata = vcol_attr->data_float4();
+ int numverts = b_mesh.vertices.length();
+
+ for (int i = 0; i < numverts; i++) {
+ *(cdata++) = get_float4(l->data[i].color());
+ }
+ }
+}
+
/* Create vertex color attributes. */
static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivision)
{
- if (subdivision) {
- BL::Mesh::vertex_colors_iterator l;
+ BL::Mesh::vertex_colors_iterator l;
- for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l) {
- const bool active_render = l->active_render();
- AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
- ustring vcol_name = ustring(l->name().c_str());
+ for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l) {
+ const bool active_render = l->active_render();
+ AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
+ ustring vcol_name = ustring(l->name().c_str());
- const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
- mesh->need_attribute(scene, vcol_std);
+ const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
+ mesh->need_attribute(scene, vcol_std);
- if (!need_vcol) {
- continue;
- }
+ if (!need_vcol) {
+ continue;
+ }
- Attribute *vcol_attr = NULL;
+ Attribute *vcol_attr = NULL;
+
+ if (subdivision) {
if (active_render) {
vcol_attr = mesh->subd_attributes.add(vcol_std, vcol_name);
}
@@ -316,22 +350,7 @@ static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh,
}
}
}
- }
- else {
- BL::Mesh::vertex_colors_iterator l;
- for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l) {
- const bool active_render = l->active_render();
- AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
- ustring vcol_name = ustring(l->name().c_str());
-
- const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
- mesh->need_attribute(scene, vcol_std);
-
- if (!need_vcol) {
- continue;
- }
-
- Attribute *vcol_attr = NULL;
+ else {
if (active_render) {
vcol_attr = mesh->attributes.add(vcol_std, vcol_name);
}
@@ -828,6 +847,7 @@ static void create_mesh(Scene *scene,
*/
attr_create_pointiness(scene, mesh, b_mesh, subdivision);
attr_create_vertex_color(scene, mesh, b_mesh, subdivision);
+ attr_create_sculpt_vertex_color(scene, mesh, b_mesh, subdivision);
attr_create_random_per_island(scene, mesh, b_mesh, subdivision);
if (subdivision) {
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp
index c28586d0f63..d3a37563ef4 100644
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -69,11 +69,7 @@ bool BlenderSync::object_is_mesh(BL::Object &b_ob)
BL::Object::type_enum type = b_ob.type();
-#ifdef WITH_NEW_OBJECT_TYPES
if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
-#else
- if (type == BL::Object::type_VOLUME) {
-#endif
/* Will be exported attached to mesh. */
return true;
}
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index 0be19dbffd1..3e595c3ee52 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -31,8 +31,10 @@
#include "util/util_logging.h"
#include "util/util_md5.h"
#include "util/util_opengl.h"
+#include "util/util_openimagedenoise.h"
#include "util/util_path.h"
#include "util/util_string.h"
+#include "util/util_task.h"
#include "util/util_types.h"
#ifdef WITH_OSL
@@ -1075,5 +1077,14 @@ void *CCL_python_module_init()
Py_INCREF(Py_False);
#endif /* WITH_EMBREE */
+ if (ccl::openimagedenoise_supported()) {
+ PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
+ Py_INCREF(Py_True);
+ }
+ else {
+ PyModule_AddObject(mod, "with_openimagedenoise", Py_False);
+ Py_INCREF(Py_False);
+ }
+
return (void *)mod;
}
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index dbe87ce2b13..391a1b8f473 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -158,7 +158,7 @@ void BlenderSession::create_session()
/* set buffer parameters */
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_render, b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@@ -239,8 +239,13 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
- BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_scene, b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
+ BufferParams buffer_params = BlenderSync::get_buffer_params(b_render,
+ b_null_space_view3d,
+ b_null_region_view3d,
+ scene->camera,
+ width,
+ height,
+ session_params.denoising.use);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@@ -468,14 +473,13 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
session->update_render_tile_cb = function_bind(
&BlenderSession::update_render_tile, this, _1, _2);
+ BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
+
/* get buffer parameters */
SessionParams session_params = BlenderSync::get_session_params(
- b_engine, b_userpref, b_scene, background);
+ b_engine, b_userpref, b_scene, background, b_view_layer);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
-
- /* render each layer */
- BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
+ b_render, b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
/* temporary render result to find needed passes and views */
BL::RenderResult b_rr = begin_render_result(
@@ -485,35 +489,26 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
BL::RenderLayer b_rlay = *b_single_rlay;
b_rlay_name = b_view_layer.name();
- /* add passes */
- vector<Pass> passes = sync->sync_render_passes(
- b_rlay, b_view_layer, session_params.adaptive_sampling);
- buffer_params.passes = passes;
+ /* Update denoising parameters. */
+ session->set_denoising(session_params.denoising);
- PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
- bool use_denoising = get_boolean(crl, "use_denoising");
- bool use_optix_denoising = get_boolean(crl, "use_optix_denoising");
- bool write_denoising_passes = get_boolean(crl, "denoising_store_passes");
+ bool use_denoising = session_params.denoising.use;
+ bool store_denoising_passes = session_params.denoising.store_passes;
- buffer_params.denoising_data_pass = use_denoising || write_denoising_passes;
+ buffer_params.denoising_data_pass = use_denoising || store_denoising_passes;
buffer_params.denoising_clean_pass = (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES);
- buffer_params.denoising_prefiltered_pass = write_denoising_passes && !use_optix_denoising;
-
- session->params.run_denoising = use_denoising || write_denoising_passes;
- session->params.full_denoising = use_denoising && !use_optix_denoising;
- session->params.optix_denoising = use_denoising && use_optix_denoising;
- session->params.write_denoising_passes = write_denoising_passes && !use_optix_denoising;
- session->params.denoising.radius = get_int(crl, "denoising_radius");
- session->params.denoising.strength = get_float(crl, "denoising_strength");
- session->params.denoising.feature_strength = get_float(crl, "denoising_feature_strength");
- session->params.denoising.relative_pca = get_boolean(crl, "denoising_relative_pca");
- session->params.denoising.optix_input_passes = get_enum(crl, "denoising_optix_input_passes");
- session->tile_manager.schedule_denoising = session->params.run_denoising;
+ buffer_params.denoising_prefiltered_pass = store_denoising_passes &&
+ session_params.denoising.type == DENOISER_NLM;
scene->film->denoising_data_pass = buffer_params.denoising_data_pass;
scene->film->denoising_clean_pass = buffer_params.denoising_clean_pass;
scene->film->denoising_prefiltered_pass = buffer_params.denoising_prefiltered_pass;
+ /* Add passes */
+ vector<Pass> passes = sync->sync_render_passes(
+ b_rlay, b_view_layer, session_params.adaptive_sampling, session_params.denoising);
+ buffer_params.passes = passes;
+
scene->film->pass_alpha_threshold = b_view_layer.pass_alpha_threshold();
scene->film->tag_passes_update(scene, passes);
scene->film->tag_update(scene);
@@ -798,7 +793,7 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
/* increase samples, but never decrease */
session->set_samples(session_params.samples);
- session->set_denoising_start_sample(session_params.denoising_start_sample);
+ session->set_denoising_start_sample(session_params.denoising.start_sample);
session->set_pause(session_pause);
/* copy recalc flags, outside of mutex so we can decide to do the real
@@ -831,21 +826,17 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
/* get buffer parameters */
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_render, b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
- if (session_params.device.type != DEVICE_OPTIX &&
- session_params.device.denoising_devices.empty()) {
- /* cannot use OptiX denoising when it is not supported by the device. */
- buffer_params.denoising_data_pass = false;
- }
- else {
- session->set_denoising(buffer_params.denoising_data_pass, true);
+ if (!buffer_params.denoising_data_pass) {
+ session_params.denoising.use = false;
}
+ session->set_denoising(session_params.denoising);
+
+ /* Update film if denoising data was enabled or disabled. */
if (scene->film->denoising_data_pass != buffer_params.denoising_data_pass) {
scene->film->denoising_data_pass = buffer_params.denoising_data_pass;
-
- /* Force a scene and session reset below. */
scene->film->tag_update(scene);
}
@@ -917,7 +908,7 @@ bool BlenderSession::draw(int w, int h)
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_render, b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
if (session_pause == false) {
@@ -935,7 +926,7 @@ bool BlenderSession::draw(int w, int h)
/* draw */
BufferParams buffer_params = BlenderSync::get_buffer_params(
- b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ b_render, b_v3d, b_rv3d, scene->camera, width, height, session->params.denoising.use);
DeviceDrawParams draw_params;
if (session->params.display_buffer_linear) {
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp
index f207d8ae07f..19d2730dc93 100644
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -813,6 +813,14 @@ static ShaderNode *add_node(Scene *scene,
sky->sun_direction = normalize(get_float3(b_sky_node.sun_direction()));
sky->turbidity = b_sky_node.turbidity();
sky->ground_albedo = b_sky_node.ground_albedo();
+ sky->sun_disc = b_sky_node.sun_disc();
+ sky->sun_size = b_sky_node.sun_size();
+ sky->sun_elevation = b_sky_node.sun_elevation();
+ sky->sun_rotation = b_sky_node.sun_rotation();
+ sky->altitude = b_sky_node.altitude();
+ sky->air_density = b_sky_node.air_density();
+ sky->dust_density = b_sky_node.dust_density();
+ sky->ozone_density = b_sky_node.ozone_density();
BL::TexMapping b_texture_mapping(b_sky_node.texture_mapping());
get_tex_mapping(&sky->tex_mapping, b_texture_mapping);
node = sky;
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 09813dc8c05..bf065cc5492 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -38,6 +38,7 @@
#include "util/util_foreach.h"
#include "util/util_hash.h"
#include "util/util_opengl.h"
+#include "util/util_openimagedenoise.h"
CCL_NAMESPACE_BEGIN
@@ -212,7 +213,6 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render,
sync_film(b_v3d);
sync_shaders(b_depsgraph, b_v3d);
sync_images();
- sync_curve_settings(b_depsgraph);
geometry_synced.clear(); /* use for objects and motion sync */
@@ -538,7 +538,8 @@ int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass)
vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
BL::ViewLayer &b_view_layer,
- bool adaptive_sampling)
+ bool adaptive_sampling,
+ const DenoiseParams &denoising)
{
vector<Pass> passes;
@@ -555,16 +556,13 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
Pass::add(pass_type, passes, b_pass.name().c_str());
}
- PointerRNA crp = RNA_pointer_get(&b_view_layer.ptr, "cycles");
- bool use_denoising = get_boolean(crp, "use_denoising");
- bool use_optix_denoising = get_boolean(crp, "use_optix_denoising");
- bool write_denoising_passes = get_boolean(crp, "denoising_store_passes");
+ PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
scene->film->denoising_flags = 0;
- if (use_denoising || write_denoising_passes) {
- if (!use_optix_denoising) {
+ if (denoising.use || denoising.store_passes) {
+ if (denoising.type == DENOISER_NLM) {
#define MAP_OPTION(name, flag) \
- if (!get_boolean(crp, name)) \
+ if (!get_boolean(crl, name)) \
scene->film->denoising_flags |= flag;
MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR);
MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND);
@@ -577,11 +575,11 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
}
- if (write_denoising_passes) {
+ if (denoising.store_passes) {
b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str());
- if (!use_optix_denoising) {
+ if (denoising.type == DENOISER_NLM) {
b_engine.add_pass("Denoising Shadowing", 1, "X", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Variance", 3, "RGB", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Intensity", 1, "X", b_view_layer.name().c_str());
@@ -593,46 +591,46 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
}
#ifdef __KERNEL_DEBUG__
- if (get_boolean(crp, "pass_debug_bvh_traversed_nodes")) {
+ if (get_boolean(crl, "pass_debug_bvh_traversed_nodes")) {
b_engine.add_pass("Debug BVH Traversed Nodes", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_BVH_TRAVERSED_NODES, passes, "Debug BVH Traversed Nodes");
}
- if (get_boolean(crp, "pass_debug_bvh_traversed_instances")) {
+ if (get_boolean(crl, "pass_debug_bvh_traversed_instances")) {
b_engine.add_pass("Debug BVH Traversed Instances", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_BVH_TRAVERSED_INSTANCES, passes, "Debug BVH Traversed Instances");
}
- if (get_boolean(crp, "pass_debug_bvh_intersections")) {
+ if (get_boolean(crl, "pass_debug_bvh_intersections")) {
b_engine.add_pass("Debug BVH Intersections", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_BVH_INTERSECTIONS, passes, "Debug BVH Intersections");
}
- if (get_boolean(crp, "pass_debug_ray_bounces")) {
+ if (get_boolean(crl, "pass_debug_ray_bounces")) {
b_engine.add_pass("Debug Ray Bounces", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_RAY_BOUNCES, passes, "Debug Ray Bounces");
}
#endif
- if (get_boolean(crp, "pass_debug_render_time")) {
+ if (get_boolean(crl, "pass_debug_render_time")) {
b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time");
}
- if (get_boolean(crp, "pass_debug_sample_count")) {
+ if (get_boolean(crl, "pass_debug_sample_count")) {
b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_SAMPLE_COUNT, passes, "Debug Sample Count");
}
- if (get_boolean(crp, "use_pass_volume_direct")) {
+ if (get_boolean(crl, "use_pass_volume_direct")) {
b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir");
}
- if (get_boolean(crp, "use_pass_volume_indirect")) {
+ if (get_boolean(crl, "use_pass_volume_indirect")) {
b_engine.add_pass("VolumeInd", 3, "RGB", b_view_layer.name().c_str());
Pass::add(PASS_VOLUME_INDIRECT, passes, "VolumeInd");
}
/* Cryptomatte stores two ID/weight pairs per RGBA layer.
* User facing parameter is the number of pairs. */
- int crypto_depth = divide_up(min(16, get_int(crp, "pass_crypto_depth")), 2);
+ int crypto_depth = divide_up(min(16, get_int(crl, "pass_crypto_depth")), 2);
scene->film->cryptomatte_depth = crypto_depth;
scene->film->cryptomatte_passes = CRYPT_NONE;
- if (get_boolean(crp, "use_pass_crypto_object")) {
+ if (get_boolean(crl, "use_pass_crypto_object")) {
for (int i = 0; i < crypto_depth; i++) {
string passname = cryptomatte_prefix + string_printf("Object%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
@@ -641,7 +639,7 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_OBJECT);
}
- if (get_boolean(crp, "use_pass_crypto_material")) {
+ if (get_boolean(crl, "use_pass_crypto_material")) {
for (int i = 0; i < crypto_depth; i++) {
string passname = cryptomatte_prefix + string_printf("Material%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
@@ -650,7 +648,7 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_MATERIAL);
}
- if (get_boolean(crp, "use_pass_crypto_asset")) {
+ if (get_boolean(crl, "use_pass_crypto_asset")) {
for (int i = 0; i < crypto_depth; i++) {
string passname = cryptomatte_prefix + string_printf("Asset%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
@@ -659,19 +657,19 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_ASSET);
}
- if (get_boolean(crp, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) {
+ if (get_boolean(crl, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) {
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_ACCURATE);
}
if (adaptive_sampling) {
Pass::add(PASS_ADAPTIVE_AUX_BUFFER, passes);
- if (!get_boolean(crp, "pass_debug_sample_count")) {
+ if (!get_boolean(crl, "pass_debug_sample_count")) {
Pass::add(PASS_SAMPLE_COUNT, passes);
}
}
- RNA_BEGIN (&crp, b_aov, "aovs") {
+ RNA_BEGIN (&crl, b_aov, "aovs") {
bool is_color = (get_enum(b_aov, "type") == 1);
string name = get_string(b_aov, "name");
@@ -732,6 +730,11 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps");
+ PointerRNA csscene = RNA_pointer_get(&b_scene.ptr, "cycles_curves");
+ params.hair_subdivisions = get_int(csscene, "subdivisions");
+ params.hair_shape = (CurveShapeType)get_enum(
+ csscene, "shape", CURVE_NUM_SHAPE_TYPES, CURVE_THICK);
+
if (background && params.shadingsystem != SHADINGSYSTEM_OSL)
params.persistent_data = r.use_persistent_data();
else
@@ -751,20 +754,7 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
params.texture_limit = 0;
}
- /* TODO(sergey): Once OSL supports per-microarchitecture optimization get
- * rid of this.
- */
- if (params.shadingsystem == SHADINGSYSTEM_OSL) {
- params.bvh_layout = BVH_LAYOUT_BVH4;
- }
- else {
- params.bvh_layout = DebugFlags().cpu.bvh_layout;
- }
-
-#ifdef WITH_EMBREE
- params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE :
- params.bvh_layout;
-#endif
+ params.bvh_layout = DebugFlags().cpu.bvh_layout;
params.background = background;
@@ -782,7 +772,8 @@ bool BlenderSync::get_session_pause(BL::Scene &b_scene, bool background)
SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
BL::Preferences &b_preferences,
BL::Scene &b_scene,
- bool background)
+ bool background,
+ BL::ViewLayer b_view_layer)
{
SessionParams params;
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
@@ -860,9 +851,22 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
params.tile_order = TILE_BOTTOM_TO_TOP;
}
- /* other parameters */
+ /* Denoising */
+ params.denoising = get_denoise_params(b_scene, b_view_layer, background);
+
+ if (params.denoising.use) {
+ /* Add additional denoising devices if we are rendering and denoising
+ * with different devices. */
+ params.device.add_denoising_devices(params.denoising.type);
+
+ /* Check if denoiser is supported by device. */
+ if (!(params.device.denoisers & params.denoising.type)) {
+ params.denoising.use = false;
+ }
+ }
+
+ /* Viewport Performance */
params.start_resolution = get_int(cscene, "preview_start_resolution");
- params.denoising_start_sample = get_int(cscene, "preview_denoising_start_sample");
params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
/* other parameters */
@@ -915,4 +919,55 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
return params;
}
+DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
+ BL::ViewLayer &b_view_layer,
+ bool background)
+{
+ DenoiseParams denoising;
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+ if (background) {
+ /* Final Render Denoising */
+ denoising.use = get_boolean(cscene, "use_denoising");
+ denoising.type = (DenoiserType)get_enum(cscene, "denoiser", DENOISER_NUM, DENOISER_NONE);
+
+ if (b_view_layer) {
+ PointerRNA clayer = RNA_pointer_get(&b_view_layer.ptr, "cycles");
+ if (!get_boolean(clayer, "use_denoising")) {
+ denoising.use = false;
+ }
+
+ denoising.radius = get_int(clayer, "denoising_radius");
+ denoising.strength = get_float(clayer, "denoising_strength");
+ denoising.feature_strength = get_float(clayer, "denoising_feature_strength");
+ denoising.relative_pca = get_boolean(clayer, "denoising_relative_pca");
+ denoising.optix_input_passes = get_enum(clayer, "denoising_optix_input_passes");
+
+ denoising.store_passes = get_boolean(clayer, "denoising_store_passes");
+ }
+ }
+ else {
+ /* Viewport Denoising */
+ denoising.use = get_boolean(cscene, "use_preview_denoising");
+ denoising.type = (DenoiserType)get_enum(
+ cscene, "preview_denoiser", DENOISER_NUM, DENOISER_NONE);
+ denoising.start_sample = get_int(cscene, "preview_denoising_start_sample");
+
+ /* Auto select fastest denoiser. */
+ if (denoising.type == DENOISER_NONE) {
+ if (!Device::available_devices(DEVICE_MASK_OPTIX).empty()) {
+ denoising.type = DENOISER_OPTIX;
+ }
+ else if (openimagedenoise_supported()) {
+ denoising.type = DENOISER_OPENIMAGEDENOISE;
+ }
+ else {
+ denoising.use = false;
+ }
+ }
+ }
+
+ return denoising;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index 341281b18ee..0214d9eb3b8 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -75,7 +75,8 @@ class BlenderSync {
void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer,
BL::ViewLayer &b_view_layer,
- bool adaptive_sampling);
+ bool adaptive_sampling,
+ const DenoiseParams &denoising);
void sync_integrator();
void sync_camera(BL::RenderSettings &b_render,
BL::Object &b_override,
@@ -94,23 +95,29 @@ class BlenderSync {
/* get parameters */
static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
- static SessionParams get_session_params(BL::RenderEngine &b_engine,
- BL::Preferences &b_userpref,
- BL::Scene &b_scene,
- bool background);
+ static SessionParams get_session_params(
+ BL::RenderEngine &b_engine,
+ BL::Preferences &b_userpref,
+ BL::Scene &b_scene,
+ bool background,
+ BL::ViewLayer b_view_layer = BL::ViewLayer(PointerRNA_NULL));
static bool get_session_pause(BL::Scene &b_scene, bool background);
- static BufferParams get_buffer_params(BL::Scene &b_scene,
- BL::RenderSettings &b_render,
+ static BufferParams get_buffer_params(BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
int width,
- int height);
+ int height,
+ const bool use_denoiser);
static PassType get_pass_type(BL::RenderPass &b_pass);
static int get_denoising_pass(BL::RenderPass &b_pass);
private:
+ static DenoiseParams get_denoise_params(BL::Scene &b_scene,
+ BL::ViewLayer &b_view_layer,
+ bool background);
+
/* sync */
void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
@@ -153,16 +160,12 @@ class BlenderSync {
/* Hair */
void sync_hair(BL::Depsgraph b_depsgraph,
BL::Object b_ob,
- Geometry *geom,
+ Hair *hair,
const vector<Shader *> &used_shaders);
- void sync_hair_motion(BL::Depsgraph b_depsgraph,
- BL::Object b_ob,
- Geometry *geom,
- int motion_step);
+ void sync_hair_motion(BL::Depsgraph b_depsgraph, BL::Object b_ob, Hair *hair, int motion_step);
void sync_hair(Hair *hair, BL::Object &b_ob, bool motion, int motion_step = 0);
void sync_particle_hair(
- Geometry *geom, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
- void sync_curve_settings(BL::Depsgraph &b_depsgraph);
+ Hair *hair, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
bool object_has_particle_hair(BL::Object b_ob);
/* Camera */
diff --git a/intern/cycles/blender/blender_viewport.cpp b/intern/cycles/blender/blender_viewport.cpp
index 93e84e28032..73ef5f94720 100644
--- a/intern/cycles/blender/blender_viewport.cpp
+++ b/intern/cycles/blender/blender_viewport.cpp
@@ -61,17 +61,6 @@ const bool BlenderViewportParameters::custom_viewport_parameters() const
return !(use_scene_world && use_scene_lights);
}
-bool BlenderViewportParameters::get_viewport_display_denoising(BL::SpaceView3D &b_v3d,
- BL::Scene &b_scene)
-{
- bool use_denoising = false;
- if (b_v3d) {
- PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
- use_denoising = get_enum(cscene, "preview_denoising") != 0;
- }
- return use_denoising;
-}
-
PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceView3D &b_v3d)
{
PassType display_pass = PASS_NONE;
@@ -83,11 +72,6 @@ PassType BlenderViewportParameters::get_viewport_display_render_pass(BL::SpaceVi
return display_pass;
}
-bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene)
-{
- return BlenderViewportParameters::get_viewport_display_denoising(b_v3d, b_scene);
-}
-
PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes)
{
if (b_v3d) {
diff --git a/intern/cycles/blender/blender_viewport.h b/intern/cycles/blender/blender_viewport.h
index 3e44e552f1d..7c6c9c4d274 100644
--- a/intern/cycles/blender/blender_viewport.h
+++ b/intern/cycles/blender/blender_viewport.h
@@ -44,15 +44,11 @@ class BlenderViewportParameters {
friend class BlenderSync;
public:
- /* Get whether to enable denoising data pass in viewport. */
- static bool get_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene);
/* Retrieve the render pass that needs to be displayed on the given `SpaceView3D`
* When the `b_v3d` parameter is not given `PASS_NONE` will be returned. */
static PassType get_viewport_display_render_pass(BL::SpaceView3D &b_v3d);
};
-bool update_viewport_display_denoising(BL::SpaceView3D &b_v3d, BL::Scene &b_scene);
-
PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes);
CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_volume.cpp b/intern/cycles/blender/blender_volume.cpp
index 4eed6be8c7c..80591e0eec8 100644
--- a/intern/cycles/blender/blender_volume.cpp
+++ b/intern/cycles/blender/blender_volume.cpp
@@ -35,8 +35,10 @@ CCL_NAMESPACE_BEGIN
class BlenderSmokeLoader : public ImageLoader {
public:
BlenderSmokeLoader(BL::Object &b_ob, AttributeStandard attribute)
- : b_domain(object_fluid_gas_domain_find(b_ob)), b_mesh(b_ob.data()), attribute(attribute)
+ : b_domain(object_fluid_gas_domain_find(b_ob)), attribute(attribute)
{
+ BL::Mesh b_mesh(b_ob.data());
+ mesh_texture_space(b_mesh, texspace_loc, texspace_size);
}
bool load_metadata(ImageMetaData &metadata) override
@@ -77,9 +79,7 @@ class BlenderSmokeLoader : public ImageLoader {
/* Create a matrix to transform from object space to mesh texture space.
* This does not work with deformations but that can probably only be done
* well with a volume grid mapping of coordinates. */
- float3 loc, size;
- mesh_texture_space(b_mesh, loc, size);
- metadata.transform_3d = transform_translate(-loc) * transform_scale(size);
+ metadata.transform_3d = transform_translate(-texspace_loc) * transform_scale(texspace_size);
metadata.use_transform_3d = true;
return true;
@@ -177,7 +177,7 @@ class BlenderSmokeLoader : public ImageLoader {
}
BL::FluidDomainSettings b_domain;
- BL::Mesh b_mesh;
+ float3 texspace_loc, texspace_size;
AttributeStandard attribute;
};
@@ -216,25 +216,16 @@ static void sync_smoke_volume(Scene *scene, BL::Object &b_ob, Mesh *mesh, float
class BlenderVolumeLoader : public VDBImageLoader {
public:
- BlenderVolumeLoader(BL::Volume b_volume, const string &grid_name)
- : VDBImageLoader(grid_name),
- b_volume(b_volume),
- b_volume_grid(PointerRNA_NULL),
- unload(false)
+ BlenderVolumeLoader(BL::BlendData &b_data, BL::Volume &b_volume, const string &grid_name)
+ : VDBImageLoader(grid_name), b_data(b_data), b_volume(b_volume), unload(false)
{
-#ifdef WITH_OPENVDB
- /* Find grid with matching name. */
- BL::Volume::grids_iterator b_grid_iter;
- for (b_volume.grids.begin(b_grid_iter); b_grid_iter != b_volume.grids.end(); ++b_grid_iter) {
- if (b_grid_iter->name() == grid_name) {
- b_volume_grid = *b_grid_iter;
- }
- }
-#endif
}
bool load_metadata(ImageMetaData &metadata) override
{
+ b_volume.grids.load(b_data.ptr.data);
+ BL::VolumeGrid b_volume_grid = find_grid();
+
if (!b_volume_grid) {
return false;
}
@@ -255,6 +246,9 @@ class BlenderVolumeLoader : public VDBImageLoader {
const size_t pixel_size,
const bool associate_alpha) override
{
+ b_volume.grids.load(b_data.ptr.data);
+ BL::VolumeGrid b_volume_grid = find_grid();
+
if (!b_volume_grid) {
return false;
}
@@ -266,19 +260,38 @@ class BlenderVolumeLoader : public VDBImageLoader {
{
/* TODO: detect multiple volume datablocks with the same filepath. */
const BlenderVolumeLoader &other_loader = (const BlenderVolumeLoader &)other;
- return b_volume == other_loader.b_volume && b_volume_grid == other_loader.b_volume_grid;
+ return b_volume == other_loader.b_volume && grid_name == other_loader.grid_name;
}
void cleanup() override
{
VDBImageLoader::cleanup();
+
+ BL::VolumeGrid b_volume_grid = find_grid();
if (b_volume_grid && unload) {
b_volume_grid.unload();
}
}
+ /* Find grid with matching name. Grid point not stored in the class since
+ * grids may be unloaded before we load the pixels, for example for motion
+ * blur where we move between frames. */
+ BL::VolumeGrid find_grid()
+ {
+#ifdef WITH_OPENVDB
+ BL::Volume::grids_iterator b_grid_iter;
+ for (b_volume.grids.begin(b_grid_iter); b_grid_iter != b_volume.grids.end(); ++b_grid_iter) {
+ if (b_grid_iter->name() == grid_name) {
+ return *b_grid_iter;
+ }
+ }
+#endif
+
+ return BL::VolumeGrid(PointerRNA_NULL);
+ }
+
+ BL::BlendData b_data;
BL::Volume b_volume;
- BL::VolumeGrid b_volume_grid;
bool unload;
};
@@ -325,7 +338,7 @@ static void sync_volume_object(BL::BlendData &b_data, BL::Object &b_ob, Scene *s
mesh->attributes.add(std) :
mesh->attributes.add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VOXEL);
- ImageLoader *loader = new BlenderVolumeLoader(b_volume, name.string());
+ ImageLoader *loader = new BlenderVolumeLoader(b_data, b_volume, name.string());
ImageParams params;
params.frame = b_volume.grids.frame();
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index fb724704a84..8b8f3ca7265 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -9,8 +9,6 @@ set(INC_SYS
set(SRC
bvh.cpp
bvh2.cpp
- bvh4.cpp
- bvh8.cpp
bvh_binning.cpp
bvh_build.cpp
bvh_embree.cpp
@@ -24,8 +22,6 @@ set(SRC
set(SRC_HEADERS
bvh.h
bvh2.h
- bvh4.h
- bvh8.h
bvh_binning.h
bvh_build.h
bvh_embree.h
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 0313bcd68b0..e9e67fd1305 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -22,17 +22,10 @@
#include "render/object.h"
#include "bvh/bvh2.h"
-#include "bvh/bvh4.h"
-#include "bvh/bvh8.h"
#include "bvh/bvh_build.h"
+#include "bvh/bvh_embree.h"
#include "bvh/bvh_node.h"
-
-#ifdef WITH_OPTIX
-# include "bvh/bvh_optix.h"
-#endif
-#ifdef WITH_EMBREE
-# include "bvh/bvh_embree.h"
-#endif
+#include "bvh/bvh_optix.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
@@ -47,10 +40,6 @@ const char *bvh_layout_name(BVHLayout layout)
switch (layout) {
case BVH_LAYOUT_BVH2:
return "BVH2";
- case BVH_LAYOUT_BVH4:
- return "BVH4";
- case BVH_LAYOUT_BVH8:
- return "BVH8";
case BVH_LAYOUT_NONE:
return "NONE";
case BVH_LAYOUT_EMBREE:
@@ -114,10 +103,6 @@ BVH *BVH::create(const BVHParams &params,
switch (params.bvh_layout) {
case BVH_LAYOUT_BVH2:
return new BVH2(params, geometry, objects);
- case BVH_LAYOUT_BVH4:
- return new BVH4(params, geometry, objects);
- case BVH_LAYOUT_BVH8:
- return new BVH8(params, geometry, objects);
case BVH_LAYOUT_EMBREE:
#ifdef WITH_EMBREE
return new BVHEmbree(params, geometry, objects);
@@ -337,13 +322,6 @@ void BVH::pack_primitives()
void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
{
- /* The BVH's for instances are built separately, but for traversal all
- * BVH's are stored in global arrays. This function merges them into the
- * top level BVH, adjusting indexes and offsets where appropriate.
- */
- const bool use_qbvh = (params.bvh_layout == BVH_LAYOUT_BVH4);
- const bool use_obvh = (params.bvh_layout == BVH_LAYOUT_BVH8);
-
/* Adjust primitive index to point to the triangle in the global array, for
* geometry with transform applied and already in the top level BVH.
*/
@@ -506,53 +484,21 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
size_t nsize, nsize_bbox;
if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
- if (use_obvh) {
- nsize = BVH_UNALIGNED_ONODE_SIZE;
- nsize_bbox = BVH_UNALIGNED_ONODE_SIZE - 1;
- }
- else {
- nsize = use_qbvh ? BVH_UNALIGNED_QNODE_SIZE : BVH_UNALIGNED_NODE_SIZE;
- nsize_bbox = (use_qbvh) ? BVH_UNALIGNED_QNODE_SIZE - 1 : 0;
- }
+ nsize = BVH_UNALIGNED_NODE_SIZE;
+ nsize_bbox = 0;
}
else {
- if (use_obvh) {
- nsize = BVH_ONODE_SIZE;
- nsize_bbox = BVH_ONODE_SIZE - 1;
- }
- else {
- nsize = (use_qbvh) ? BVH_QNODE_SIZE : BVH_NODE_SIZE;
- nsize_bbox = (use_qbvh) ? BVH_QNODE_SIZE - 1 : 0;
- }
+ nsize = BVH_NODE_SIZE;
+ nsize_bbox = 0;
}
memcpy(pack_nodes + pack_nodes_offset, bvh_nodes + i, nsize_bbox * sizeof(int4));
/* Modify offsets into arrays */
int4 data = bvh_nodes[i + nsize_bbox];
-
- if (use_obvh) {
- int4 data1 = bvh_nodes[i + nsize_bbox - 1];
- data.z += (data.z < 0) ? -noffset_leaf : noffset;
- data.w += (data.w < 0) ? -noffset_leaf : noffset;
- data.x += (data.x < 0) ? -noffset_leaf : noffset;
- data.y += (data.y < 0) ? -noffset_leaf : noffset;
- data1.z += (data1.z < 0) ? -noffset_leaf : noffset;
- data1.w += (data1.w < 0) ? -noffset_leaf : noffset;
- data1.x += (data1.x < 0) ? -noffset_leaf : noffset;
- data1.y += (data1.y < 0) ? -noffset_leaf : noffset;
- pack_nodes[pack_nodes_offset + nsize_bbox] = data;
- pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1;
- }
- else {
- data.z += (data.z < 0) ? -noffset_leaf : noffset;
- data.w += (data.w < 0) ? -noffset_leaf : noffset;
- if (use_qbvh) {
- data.x += (data.x < 0) ? -noffset_leaf : noffset;
- data.y += (data.y < 0) ? -noffset_leaf : noffset;
- }
- pack_nodes[pack_nodes_offset + nsize_bbox] = data;
- }
+ data.z += (data.z < 0) ? -noffset_leaf : noffset;
+ data.w += (data.w < 0) ? -noffset_leaf : noffset;
+ pack_nodes[pack_nodes_offset + nsize_bbox] = data;
/* Usually this copies nothing, but we better
* be prepared for possible node size extension.
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index bdde38640c9..6639e06b0bc 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -76,7 +76,7 @@ struct PackedBVH {
}
};
-enum BVH_TYPE { bvh2, bvh4, bvh8 };
+enum BVH_TYPE { bvh2 };
/* BVH */
diff --git a/intern/cycles/bvh/bvh4.cpp b/intern/cycles/bvh/bvh4.cpp
deleted file mode 100644
index 143c3e54f94..00000000000
--- a/intern/cycles/bvh/bvh4.cpp
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bvh/bvh4.h"
-
-#include "render/mesh.h"
-#include "render/object.h"
-
-#include "bvh/bvh_node.h"
-#include "bvh/bvh_unaligned.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Can we avoid this somehow or make more generic?
- *
- * Perhaps we can merge nodes in actual tree and make our
- * life easier all over the place.
- */
-
-BVH4::BVH4(const BVHParams &params_,
- const vector<Geometry *> &geometry_,
- const vector<Object *> &objects_)
- : BVH(params_, geometry_, objects_)
-{
- params.bvh_layout = BVH_LAYOUT_BVH4;
-}
-
-namespace {
-
-BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
-{
- if (node->is_leaf()) {
- return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
- }
- /* Collect nodes of one layer deeper, allowing us to have more children in an inner layer. */
- assert(node->num_children() <= 2);
- const BVHNode *children[4];
- const BVHNode *child0 = node->get_child(0);
- const BVHNode *child1 = node->get_child(1);
- int num_children = 0;
- if (child0->is_leaf()) {
- children[num_children++] = child0;
- }
- else {
- children[num_children++] = child0->get_child(0);
- children[num_children++] = child0->get_child(1);
- }
- if (child1->is_leaf()) {
- children[num_children++] = child1;
- }
- else {
- children[num_children++] = child1->get_child(0);
- children[num_children++] = child1->get_child(1);
- }
- /* Merge children in subtrees. */
- BVHNode *children4[4];
- for (int i = 0; i < num_children; ++i) {
- children4[i] = bvh_node_merge_children_recursively(children[i]);
- }
- /* Allocate new node. */
- BVHNode *node4 = new InnerNode(node->bounds, children4, num_children);
- /* TODO(sergey): Consider doing this from the InnerNode() constructor.
- * But in order to do this nicely need to think of how to pass all the
- * parameters there. */
- if (node->is_unaligned) {
- node4->is_unaligned = true;
- node4->aligned_space = new Transform();
- *node4->aligned_space = *node->aligned_space;
- }
- return node4;
-}
-
-} // namespace
-
-BVHNode *BVH4::widen_children_nodes(const BVHNode *root)
-{
- if (root == NULL) {
- return NULL;
- }
- if (root->is_leaf()) {
- return const_cast<BVHNode *>(root);
- }
- BVHNode *root4 = bvh_node_merge_children_recursively(root);
- /* TODO(sergey): Pack children nodes to parents which has less that 4
- * children. */
- return root4;
-}
-
-void BVH4::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
-{
- float4 data[BVH_QNODE_LEAF_SIZE];
- memset(data, 0, sizeof(data));
- if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
- /* object */
- data[0].x = __int_as_float(~(leaf->lo));
- data[0].y = __int_as_float(0);
- }
- else {
- /* triangle */
- data[0].x = __int_as_float(leaf->lo);
- data[0].y = __int_as_float(leaf->hi);
- }
- data[0].z = __uint_as_float(leaf->visibility);
- if (leaf->num_triangles() != 0) {
- data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
- }
-
- memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
-}
-
-void BVH4::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- bool has_unaligned = false;
- /* Check whether we have to create unaligned node or all nodes are aligned
- * and we can cut some corner here.
- */
- if (params.use_unaligned_nodes) {
- for (int i = 0; i < num; i++) {
- if (en[i].node->is_unaligned) {
- has_unaligned = true;
- break;
- }
- }
- }
- if (has_unaligned) {
- /* There's no unaligned children, pack into AABB node. */
- pack_unaligned_inner(e, en, num);
- }
- else {
- /* Create unaligned node with orientation transform for each of the
- * children.
- */
- pack_aligned_inner(e, en, num);
- }
-}
-
-void BVH4::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- BoundBox bounds[4];
- int child[4];
- for (int i = 0; i < num; ++i) {
- bounds[i] = en[i].node->bounds;
- child[i] = en[i].encodeIdx();
- }
- pack_aligned_node(
- e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
-}
-
-void BVH4::pack_aligned_node(int idx,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num)
-{
- float4 data[BVH_QNODE_SIZE];
- memset(data, 0, sizeof(data));
-
- data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
- data[0].y = time_from;
- data[0].z = time_to;
-
- for (int i = 0; i < num; i++) {
- float3 bb_min = bounds[i].min;
- float3 bb_max = bounds[i].max;
-
- data[1][i] = bb_min.x;
- data[2][i] = bb_max.x;
- data[3][i] = bb_min.y;
- data[4][i] = bb_max.y;
- data[5][i] = bb_min.z;
- data[6][i] = bb_max.z;
-
- data[7][i] = __int_as_float(child[i]);
- }
-
- for (int i = num; i < 4; i++) {
- /* We store BB which would never be recorded as intersection
- * so kernel might safely assume there are always 4 child nodes.
- */
- data[1][i] = FLT_MAX;
- data[2][i] = -FLT_MAX;
-
- data[3][i] = FLT_MAX;
- data[4][i] = -FLT_MAX;
-
- data[5][i] = FLT_MAX;
- data[6][i] = -FLT_MAX;
-
- data[7][i] = __int_as_float(0);
- }
-
- memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_QNODE_SIZE);
-}
-
-void BVH4::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- Transform aligned_space[4];
- BoundBox bounds[4];
- int child[4];
- for (int i = 0; i < num; ++i) {
- aligned_space[i] = en[i].node->get_aligned_space();
- bounds[i] = en[i].node->bounds;
- child[i] = en[i].encodeIdx();
- }
- pack_unaligned_node(e.idx,
- aligned_space,
- bounds,
- child,
- e.node->visibility,
- e.node->time_from,
- e.node->time_to,
- num);
-}
-
-void BVH4::pack_unaligned_node(int idx,
- const Transform *aligned_space,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num)
-{
- float4 data[BVH_UNALIGNED_QNODE_SIZE];
- memset(data, 0, sizeof(data));
-
- data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
- data[0].y = time_from;
- data[0].z = time_to;
-
- for (int i = 0; i < num; i++) {
- Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
-
- data[1][i] = space.x.x;
- data[2][i] = space.x.y;
- data[3][i] = space.x.z;
-
- data[4][i] = space.y.x;
- data[5][i] = space.y.y;
- data[6][i] = space.y.z;
-
- data[7][i] = space.z.x;
- data[8][i] = space.z.y;
- data[9][i] = space.z.z;
-
- data[10][i] = space.x.w;
- data[11][i] = space.y.w;
- data[12][i] = space.z.w;
-
- data[13][i] = __int_as_float(child[i]);
- }
-
- for (int i = num; i < 4; i++) {
- /* We store BB which would never be recorded as intersection
- * so kernel might safely assume there are always 4 child nodes.
- */
-
- data[1][i] = NAN;
- data[2][i] = NAN;
- data[3][i] = NAN;
-
- data[4][i] = NAN;
- data[5][i] = NAN;
- data[6][i] = NAN;
-
- data[7][i] = NAN;
- data[8][i] = NAN;
- data[9][i] = NAN;
-
- data[10][i] = NAN;
- data[11][i] = NAN;
- data[12][i] = NAN;
-
- data[13][i] = __int_as_float(0);
- }
-
- memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_QNODE_SIZE);
-}
-
-/* Quad SIMD Nodes */
-
-void BVH4::pack_nodes(const BVHNode *root)
-{
- /* Calculate size of the arrays required. */
- const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
- const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
- assert(num_leaf_nodes <= num_nodes);
- const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
- size_t node_size;
- if (params.use_unaligned_nodes) {
- const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
- node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
- (num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
- }
- else {
- node_size = num_inner_nodes * BVH_QNODE_SIZE;
- }
- /* Resize arrays. */
- pack.nodes.clear();
- pack.leaf_nodes.clear();
- /* For top level BVH, first merge existing BVH's so we know the offsets. */
- if (params.top_level) {
- pack_instances(node_size, num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
- }
- else {
- pack.nodes.resize(node_size);
- pack.leaf_nodes.resize(num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
- }
-
- int nextNodeIdx = 0, nextLeafNodeIdx = 0;
-
- vector<BVHStackEntry> stack;
- stack.reserve(BVHParams::MAX_DEPTH * 2);
- if (root->is_leaf()) {
- stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
- }
- else {
- stack.push_back(BVHStackEntry(root, nextNodeIdx));
- nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
- }
-
- while (stack.size()) {
- BVHStackEntry e = stack.back();
- stack.pop_back();
-
- if (e.node->is_leaf()) {
- /* leaf node */
- const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
- pack_leaf(e, leaf);
- }
- else {
- /* Inner node. */
- /* Collect nodes. */
- const BVHNode *children[4];
- const int num_children = e.node->num_children();
- /* Push entries on the stack. */
- for (int i = 0; i < num_children; ++i) {
- int idx;
- children[i] = e.node->get_child(i);
- assert(children[i] != NULL);
- if (children[i]->is_leaf()) {
- idx = nextLeafNodeIdx++;
- }
- else {
- idx = nextNodeIdx;
- nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
- }
- stack.push_back(BVHStackEntry(children[i], idx));
- }
- /* Set node. */
- pack_inner(e, &stack[stack.size() - num_children], num_children);
- }
- }
-
- assert(node_size == nextNodeIdx);
- /* Root index to start traversal at, to handle case of single leaf node. */
- pack.root_index = (root->is_leaf()) ? -1 : 0;
-}
-
-void BVH4::refit_nodes()
-{
- assert(!params.top_level);
-
- BoundBox bbox = BoundBox::empty;
- uint visibility = 0;
- refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
-}
-
-void BVH4::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
-{
- if (leaf) {
- /* Refit leaf node. */
- int4 *data = &pack.leaf_nodes[idx];
- int4 c = data[0];
-
- BVH::refit_primitives(c.x, c.y, bbox, visibility);
-
- /* TODO(sergey): This is actually a copy of pack_leaf(),
- * but this chunk of code only knows actual data and has
- * no idea about BVHNode.
- *
- * Would be nice to de-duplicate code, but trying to make
- * making code more general ends up in much nastier code
- * in my opinion so far.
- *
- * Same applies to the inner nodes case below.
- */
- float4 leaf_data[BVH_QNODE_LEAF_SIZE];
- leaf_data[0].x = __int_as_float(c.x);
- leaf_data[0].y = __int_as_float(c.y);
- leaf_data[0].z = __uint_as_float(visibility);
- leaf_data[0].w = __uint_as_float(c.w);
- memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
- }
- else {
- int4 *data = &pack.nodes[idx];
- bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
- int4 c;
- if (is_unaligned) {
- c = data[13];
- }
- else {
- c = data[7];
- }
- /* Refit inner node, set bbox from children. */
- BoundBox child_bbox[4] = {BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
- uint child_visibility[4] = {0};
- int num_nodes = 0;
-
- for (int i = 0; i < 4; ++i) {
- if (c[i] != 0) {
- refit_node((c[i] < 0) ? -c[i] - 1 : c[i], (c[i] < 0), child_bbox[i], child_visibility[i]);
- ++num_nodes;
- bbox.grow(child_bbox[i]);
- visibility |= child_visibility[i];
- }
- }
-
- if (is_unaligned) {
- Transform aligned_space[4] = {
- transform_identity(), transform_identity(), transform_identity(), transform_identity()};
- pack_unaligned_node(
- idx, aligned_space, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
- }
- else {
- pack_aligned_node(idx, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
- }
- }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh4.h b/intern/cycles/bvh/bvh4.h
deleted file mode 100644
index afbb9007afb..00000000000
--- a/intern/cycles/bvh/bvh4.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH4_H__
-#define __BVH4_H__
-
-#include "bvh/bvh.h"
-#include "bvh/bvh_params.h"
-
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BVHNode;
-struct BVHStackEntry;
-class BVHParams;
-class BoundBox;
-class LeafNode;
-class Object;
-class Progress;
-
-#define BVH_QNODE_SIZE 8
-#define BVH_QNODE_LEAF_SIZE 1
-#define BVH_UNALIGNED_QNODE_SIZE 14
-
-/* BVH4
- *
- * Quad BVH, with each node having four children, to use with SIMD instructions.
- */
-class BVH4 : public BVH {
- protected:
- /* constructor */
- friend class BVH;
- BVH4(const BVHParams &params,
- const vector<Geometry *> &geometry,
- const vector<Object *> &objects);
-
- /* Building process. */
- virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
-
- /* pack */
- void pack_nodes(const BVHNode *root) override;
-
- void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
- void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
-
- void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
- void pack_aligned_node(int idx,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num);
-
- void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
- void pack_unaligned_node(int idx,
- const Transform *aligned_space,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num);
-
- /* refit */
- void refit_nodes() override;
- void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH4_H__ */
diff --git a/intern/cycles/bvh/bvh8.cpp b/intern/cycles/bvh/bvh8.cpp
deleted file mode 100644
index b805865b2c8..00000000000
--- a/intern/cycles/bvh/bvh8.cpp
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- * Original code Copyright 2017, Intel Corporation
- * Modifications Copyright 2018, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "bvh/bvh8.h"
-
-#include "render/hair.h"
-#include "render/mesh.h"
-#include "render/object.h"
-
-#include "bvh/bvh_node.h"
-#include "bvh/bvh_unaligned.h"
-
-CCL_NAMESPACE_BEGIN
-
-BVH8::BVH8(const BVHParams &params_,
- const vector<Geometry *> &geometry_,
- const vector<Object *> &objects_)
- : BVH(params_, geometry_, objects_)
-{
-}
-
-namespace {
-
-BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
-{
- if (node->is_leaf()) {
- return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
- }
- /* Collect nodes of two layer deeper, allowing us to have more childrem in
- * an inner layer. */
- assert(node->num_children() <= 2);
- const BVHNode *children[8];
- const BVHNode *child0 = node->get_child(0);
- const BVHNode *child1 = node->get_child(1);
- int num_children = 0;
- if (child0->is_leaf()) {
- children[num_children++] = child0;
- }
- else {
- const BVHNode *child00 = child0->get_child(0), *child01 = child0->get_child(1);
- if (child00->is_leaf()) {
- children[num_children++] = child00;
- }
- else {
- children[num_children++] = child00->get_child(0);
- children[num_children++] = child00->get_child(1);
- }
- if (child01->is_leaf()) {
- children[num_children++] = child01;
- }
- else {
- children[num_children++] = child01->get_child(0);
- children[num_children++] = child01->get_child(1);
- }
- }
- if (child1->is_leaf()) {
- children[num_children++] = child1;
- }
- else {
- const BVHNode *child10 = child1->get_child(0), *child11 = child1->get_child(1);
- if (child10->is_leaf()) {
- children[num_children++] = child10;
- }
- else {
- children[num_children++] = child10->get_child(0);
- children[num_children++] = child10->get_child(1);
- }
- if (child11->is_leaf()) {
- children[num_children++] = child11;
- }
- else {
- children[num_children++] = child11->get_child(0);
- children[num_children++] = child11->get_child(1);
- }
- }
- /* Merge children in subtrees. */
- BVHNode *children4[8];
- for (int i = 0; i < num_children; ++i) {
- children4[i] = bvh_node_merge_children_recursively(children[i]);
- }
- /* Allocate new node. */
- BVHNode *node8 = new InnerNode(node->bounds, children4, num_children);
- /* TODO(sergey): Consider doing this from the InnerNode() constructor.
- * But in order to do this nicely need to think of how to pass all the
- * parameters there. */
- if (node->is_unaligned) {
- node8->is_unaligned = true;
- node8->aligned_space = new Transform();
- *node8->aligned_space = *node->aligned_space;
- }
- return node8;
-}
-
-} // namespace
-
-BVHNode *BVH8::widen_children_nodes(const BVHNode *root)
-{
- if (root == NULL) {
- return NULL;
- }
- if (root->is_leaf()) {
- return const_cast<BVHNode *>(root);
- }
- BVHNode *root8 = bvh_node_merge_children_recursively(root);
- /* TODO(sergey): Pack children nodes to parents which has less that 4
- * children. */
- return root8;
-}
-
-void BVH8::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
-{
- float4 data[BVH_ONODE_LEAF_SIZE];
- memset(data, 0, sizeof(data));
- if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
- /* object */
- data[0].x = __int_as_float(~(leaf->lo));
- data[0].y = __int_as_float(0);
- }
- else {
- /* triangle */
- data[0].x = __int_as_float(leaf->lo);
- data[0].y = __int_as_float(leaf->hi);
- }
- data[0].z = __uint_as_float(leaf->visibility);
- if (leaf->num_triangles() != 0) {
- data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
- }
-
- memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
-}
-
-void BVH8::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- bool has_unaligned = false;
- /* Check whether we have to create unaligned node or all nodes are aligned
- * and we can cut some corner here.
- */
- if (params.use_unaligned_nodes) {
- for (int i = 0; i < num; i++) {
- if (en[i].node->is_unaligned) {
- has_unaligned = true;
- break;
- }
- }
- }
- if (has_unaligned) {
- /* There's no unaligned children, pack into AABB node. */
- pack_unaligned_inner(e, en, num);
- }
- else {
- /* Create unaligned node with orientation transform for each of the
- * children.
- */
- pack_aligned_inner(e, en, num);
- }
-}
-
-void BVH8::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- BoundBox bounds[8];
- int child[8];
- for (int i = 0; i < num; ++i) {
- bounds[i] = en[i].node->bounds;
- child[i] = en[i].encodeIdx();
- }
- pack_aligned_node(
- e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
-}
-
-void BVH8::pack_aligned_node(int idx,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num)
-{
- float8 data[8];
- memset(data, 0, sizeof(data));
-
- data[0].a = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
- data[0].b = time_from;
- data[0].c = time_to;
-
- for (int i = 0; i < num; i++) {
- float3 bb_min = bounds[i].min;
- float3 bb_max = bounds[i].max;
-
- data[1][i] = bb_min.x;
- data[2][i] = bb_max.x;
- data[3][i] = bb_min.y;
- data[4][i] = bb_max.y;
- data[5][i] = bb_min.z;
- data[6][i] = bb_max.z;
-
- data[7][i] = __int_as_float(child[i]);
- }
-
- for (int i = num; i < 8; i++) {
- /* We store BB which would never be recorded as intersection
- * so kernel might safely assume there are always 4 child nodes.
- */
- data[1][i] = FLT_MAX;
- data[2][i] = -FLT_MAX;
-
- data[3][i] = FLT_MAX;
- data[4][i] = -FLT_MAX;
-
- data[5][i] = FLT_MAX;
- data[6][i] = -FLT_MAX;
-
- data[7][i] = __int_as_float(0);
- }
-
- memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_ONODE_SIZE);
-}
-
-void BVH8::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
- Transform aligned_space[8];
- BoundBox bounds[8];
- int child[8];
- for (int i = 0; i < num; ++i) {
- aligned_space[i] = en[i].node->get_aligned_space();
- bounds[i] = en[i].node->bounds;
- child[i] = en[i].encodeIdx();
- }
- pack_unaligned_node(e.idx,
- aligned_space,
- bounds,
- child,
- e.node->visibility,
- e.node->time_from,
- e.node->time_to,
- num);
-}
-
-void BVH8::pack_unaligned_node(int idx,
- const Transform *aligned_space,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num)
-{
- float8 data[BVH_UNALIGNED_ONODE_SIZE];
- memset(data, 0, sizeof(data));
-
- data[0].a = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
- data[0].b = time_from;
- data[0].c = time_to;
-
- for (int i = 0; i < num; i++) {
- Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
-
- data[1][i] = space.x.x;
- data[2][i] = space.x.y;
- data[3][i] = space.x.z;
-
- data[4][i] = space.y.x;
- data[5][i] = space.y.y;
- data[6][i] = space.y.z;
-
- data[7][i] = space.z.x;
- data[8][i] = space.z.y;
- data[9][i] = space.z.z;
-
- data[10][i] = space.x.w;
- data[11][i] = space.y.w;
- data[12][i] = space.z.w;
-
- data[13][i] = __int_as_float(child[i]);
- }
-
- for (int i = num; i < 8; i++) {
- /* We store BB which would never be recorded as intersection
- * so kernel might safely assume there are always 4 child nodes.
- */
-
- data[1][i] = NAN;
- data[2][i] = NAN;
- data[3][i] = NAN;
-
- data[4][i] = NAN;
- data[5][i] = NAN;
- data[6][i] = NAN;
-
- data[7][i] = NAN;
- data[8][i] = NAN;
- data[9][i] = NAN;
-
- data[10][i] = NAN;
- data[11][i] = NAN;
- data[12][i] = NAN;
-
- data[13][i] = __int_as_float(0);
- }
-
- memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_ONODE_SIZE);
-}
-
-/* Quad SIMD Nodes */
-
-void BVH8::pack_nodes(const BVHNode *root)
-{
- /* Calculate size of the arrays required. */
- const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
- const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
- assert(num_leaf_nodes <= num_nodes);
- const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
- size_t node_size;
- if (params.use_unaligned_nodes) {
- const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
- node_size = (num_unaligned_nodes * BVH_UNALIGNED_ONODE_SIZE) +
- (num_inner_nodes - num_unaligned_nodes) * BVH_ONODE_SIZE;
- }
- else {
- node_size = num_inner_nodes * BVH_ONODE_SIZE;
- }
- /* Resize arrays. */
- pack.nodes.clear();
- pack.leaf_nodes.clear();
- /* For top level BVH, first merge existing BVH's so we know the offsets. */
- if (params.top_level) {
- pack_instances(node_size, num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
- }
- else {
- pack.nodes.resize(node_size);
- pack.leaf_nodes.resize(num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
- }
-
- int nextNodeIdx = 0, nextLeafNodeIdx = 0;
-
- vector<BVHStackEntry> stack;
- stack.reserve(BVHParams::MAX_DEPTH * 2);
- if (root->is_leaf()) {
- stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
- }
- else {
- stack.push_back(BVHStackEntry(root, nextNodeIdx));
- nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
- }
-
- while (stack.size()) {
- BVHStackEntry e = stack.back();
- stack.pop_back();
-
- if (e.node->is_leaf()) {
- /* leaf node */
- const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
- pack_leaf(e, leaf);
- }
- else {
- /* Inner node. */
- /* Collect nodes. */
- const BVHNode *children[8];
- int num_children = e.node->num_children();
- /* Push entries on the stack. */
- for (int i = 0; i < num_children; ++i) {
- int idx;
- children[i] = e.node->get_child(i);
- if (children[i]->is_leaf()) {
- idx = nextLeafNodeIdx++;
- }
- else {
- idx = nextNodeIdx;
- nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
- }
- stack.push_back(BVHStackEntry(children[i], idx));
- }
- /* Set node. */
- pack_inner(e, &stack[stack.size() - num_children], num_children);
- }
- }
-
- assert(node_size == nextNodeIdx);
- /* Root index to start traversal at, to handle case of single leaf node. */
- pack.root_index = (root->is_leaf()) ? -1 : 0;
-}
-
-void BVH8::refit_nodes()
-{
- assert(!params.top_level);
-
- BoundBox bbox = BoundBox::empty;
- uint visibility = 0;
- refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
-}
-
-void BVH8::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
-{
- if (leaf) {
- int4 *data = &pack.leaf_nodes[idx];
- int4 c = data[0];
- /* Refit leaf node. */
- for (int prim = c.x; prim < c.y; prim++) {
- int pidx = pack.prim_index[prim];
- int tob = pack.prim_object[prim];
- Object *ob = objects[tob];
-
- if (pidx == -1) {
- /* Object instance. */
- bbox.grow(ob->bounds);
- }
- else {
- /* Primitives. */
- if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
- /* Curves. */
- const Hair *hair = static_cast<const Hair *>(ob->geometry);
- int prim_offset = (params.top_level) ? hair->prim_offset : 0;
- Hair::Curve curve = hair->get_curve(pidx - prim_offset);
- int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
-
- curve.bounds_grow(k, &hair->curve_keys[0], &hair->curve_radius[0], bbox);
-
- /* Motion curves. */
- if (hair->use_motion_blur) {
- Attribute *attr = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (attr) {
- size_t hair_size = hair->curve_keys.size();
- size_t steps = hair->motion_steps - 1;
- float3 *key_steps = attr->data_float3();
-
- for (size_t i = 0; i < steps; i++) {
- curve.bounds_grow(k, key_steps + i * hair_size, &hair->curve_radius[0], bbox);
- }
- }
- }
- }
- else {
- /* Triangles. */
- const Mesh *mesh = static_cast<const Mesh *>(ob->geometry);
- int prim_offset = (params.top_level) ? mesh->prim_offset : 0;
- Mesh::Triangle triangle = mesh->get_triangle(pidx - prim_offset);
- const float3 *vpos = &mesh->verts[0];
-
- triangle.bounds_grow(vpos, bbox);
-
- /* Motion triangles. */
- if (mesh->use_motion_blur) {
- Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
- if (attr) {
- size_t mesh_size = mesh->verts.size();
- size_t steps = mesh->motion_steps - 1;
- float3 *vert_steps = attr->data_float3();
-
- for (size_t i = 0; i < steps; i++) {
- triangle.bounds_grow(vert_steps + i * mesh_size, bbox);
- }
- }
- }
- }
- }
-
- visibility |= ob->visibility;
- }
-
- float4 leaf_data[BVH_ONODE_LEAF_SIZE];
- leaf_data[0].x = __int_as_float(c.x);
- leaf_data[0].y = __int_as_float(c.y);
- leaf_data[0].z = __uint_as_float(visibility);
- leaf_data[0].w = __uint_as_float(c.w);
- memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
- }
- else {
- float8 *data = (float8 *)&pack.nodes[idx];
- bool is_unaligned = (__float_as_uint(data[0].a) & PATH_RAY_NODE_UNALIGNED) != 0;
- /* Refit inner node, set bbox from children. */
- BoundBox child_bbox[8] = {BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty,
- BoundBox::empty};
- int child[8];
- uint child_visibility[8] = {0};
- int num_nodes = 0;
-
- for (int i = 0; i < 8; ++i) {
- child[i] = __float_as_int(data[(is_unaligned) ? 13 : 7][i]);
-
- if (child[i] != 0) {
- refit_node((child[i] < 0) ? -child[i] - 1 : child[i],
- (child[i] < 0),
- child_bbox[i],
- child_visibility[i]);
- ++num_nodes;
- bbox.grow(child_bbox[i]);
- visibility |= child_visibility[i];
- }
- }
-
- if (is_unaligned) {
- Transform aligned_space[8] = {transform_identity(),
- transform_identity(),
- transform_identity(),
- transform_identity(),
- transform_identity(),
- transform_identity(),
- transform_identity(),
- transform_identity()};
- pack_unaligned_node(
- idx, aligned_space, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
- }
- else {
- pack_aligned_node(idx, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
- }
- }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh8.h b/intern/cycles/bvh/bvh8.h
deleted file mode 100644
index d23fa528e3e..00000000000
--- a/intern/cycles/bvh/bvh8.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Original code Copyright 2017, Intel Corporation
- * Modifications Copyright 2018, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __BVH8_H__
-#define __BVH8_H__
-
-#include "bvh/bvh.h"
-#include "bvh/bvh_params.h"
-
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BVHNode;
-struct BVHStackEntry;
-class BVHParams;
-class BoundBox;
-class LeafNode;
-class Object;
-class Progress;
-
-#define BVH_ONODE_SIZE 16
-#define BVH_ONODE_LEAF_SIZE 1
-#define BVH_UNALIGNED_ONODE_SIZE 28
-
-/* BVH8
- *
- * Octo BVH, with each node having eight children, to use with SIMD instructions.
- */
-class BVH8 : public BVH {
- protected:
- /* constructor */
- friend class BVH;
- BVH8(const BVHParams &params,
- const vector<Geometry *> &geometry,
- const vector<Object *> &objects);
-
- /* Building process. */
- virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
-
- /* pack */
- void pack_nodes(const BVHNode *root) override;
-
- void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
- void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
-
- void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
- void pack_aligned_node(int idx,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num);
-
- void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
- void pack_unaligned_node(int idx,
- const Transform *aligned_space,
- const BoundBox *bounds,
- const int *child,
- const uint visibility,
- const float time_from,
- const float time_to,
- const int num);
-
- /* refit */
- void refit_nodes() override;
- void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH8_H__ */
diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp
index 814b5ced5d2..86ab7b00815 100644
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -39,48 +39,6 @@
CCL_NAMESPACE_BEGIN
-/* BVH Build Task */
-
-class BVHBuildTask : public Task {
- public:
- BVHBuildTask(
- BVHBuild *build, InnerNode *node, int child, const BVHObjectBinning &range, int level)
- : range_(range)
- {
- run = function_bind(&BVHBuild::thread_build_node, build, node, child, &range_, level);
- }
-
- private:
- BVHObjectBinning range_;
-};
-
-class BVHSpatialSplitBuildTask : public Task {
- public:
- BVHSpatialSplitBuildTask(BVHBuild *build,
- InnerNode *node,
- int child,
- const BVHRange &range,
- const vector<BVHReference> &references,
- int level)
- : range_(range),
- references_(references.begin() + range.start(), references.begin() + range.end())
- {
- range_.set_start(0);
- run = function_bind(&BVHBuild::thread_build_spatial_split_node,
- build,
- node,
- child,
- &range_,
- &references_,
- level,
- _1);
- }
-
- private:
- BVHRange range_;
- vector<BVHReference> references_;
-};
-
/* Constructor / Destructor */
BVHBuild::BVHBuild(const vector<Object *> &objects_,
@@ -201,6 +159,13 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair
if (hair->has_motion_blur()) {
curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
}
+
+ const PrimitiveType primitive_type =
+ (curve_attr_mP != NULL) ?
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_MOTION_CURVE_RIBBON :
+ PRIMITIVE_MOTION_CURVE_THICK) :
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON : PRIMITIVE_CURVE_THICK);
+
const size_t num_curves = hair->num_curves();
for (uint j = 0; j < num_curves; j++) {
const Hair::Curve curve = hair->get_curve(j);
@@ -211,7 +176,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair
BoundBox bounds = BoundBox::empty;
curve.bounds_grow(k, &hair->curve_keys[0], curve_radius, bounds);
if (bounds.valid()) {
- int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
+ int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
references.push_back(BVHReference(bounds, j, i, packed_type));
root.grow(bounds);
center.grow(bounds.center2());
@@ -232,7 +197,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair
curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds);
}
if (bounds.valid()) {
- int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
+ int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
references.push_back(BVHReference(bounds, j, i, packed_type));
root.grow(bounds);
center.grow(bounds.center2());
@@ -288,7 +253,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair
bounds.grow(curr_bounds);
if (bounds.valid()) {
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
- int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
+ int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
references.push_back(BVHReference(bounds, j, i, packed_type, prev_time, curr_time));
root.grow(bounds);
center.grow(bounds.center2());
@@ -423,22 +388,6 @@ BVHNode *BVHBuild::run()
}
spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
- if (params.use_spatial_split) {
- /* NOTE: The API here tries to be as much ready for multi-threaded build
- * as possible, but at the same time it tries not to introduce any
- * changes in behavior for until all refactoring needed for threading is
- * finished.
- *
- * So we currently allocate single storage for now, which is only used by
- * the only thread working on the spatial BVH build.
- */
- spatial_storage.resize(TaskScheduler::num_threads() + 1);
- size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1;
- foreach (BVHSpatialStorage &storage, spatial_storage) {
- storage.right_bounds.clear();
- }
- spatial_storage[0].right_bounds.resize(num_bins);
- }
spatial_free_index = 0;
need_prim_time = params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0;
@@ -465,7 +414,8 @@ BVHNode *BVHBuild::run()
if (params.use_spatial_split) {
/* Perform multithreaded spatial split build. */
- rootnode = build_node(root, &references, 0, 0);
+ BVHSpatialStorage *local_storage = &spatial_storage.local();
+ rootnode = build_node(root, references, 0, local_storage);
task_pool.wait_work();
}
else {
@@ -475,6 +425,9 @@ BVHNode *BVHBuild::run()
task_pool.wait_work();
}
+ /* clean up temporary memory usage by threads */
+ spatial_storage.clear();
+
/* delete if we canceled */
if (rootnode) {
if (progress.get_cancel()) {
@@ -529,41 +482,46 @@ void BVHBuild::progress_update()
progress_start_time = time_dt();
}
-void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *range, int level)
+void BVHBuild::thread_build_node(InnerNode *inner,
+ int child,
+ const BVHObjectBinning &range,
+ int level)
{
if (progress.get_cancel())
return;
/* build nodes */
- BVHNode *node = build_node(*range, level);
+ BVHNode *node = build_node(range, level);
/* set child in inner node */
inner->children[child] = node;
/* update progress */
- if (range->size() < THREAD_TASK_SIZE) {
+ if (range.size() < THREAD_TASK_SIZE) {
/*rotate(node, INT_MAX, 5);*/
thread_scoped_lock lock(build_mutex);
- progress_count += range->size();
+ progress_count += range.size();
progress_update();
}
}
void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
int child,
- BVHRange *range,
- vector<BVHReference> *references,
- int level,
- int thread_id)
+ const BVHRange &range,
+ vector<BVHReference> &references,
+ int level)
{
if (progress.get_cancel()) {
return;
}
+ /* Get per-thread memory for spatial split. */
+ BVHSpatialStorage *local_storage = &spatial_storage.local();
+
/* build nodes */
- BVHNode *node = build_node(*range, references, level, thread_id);
+ BVHNode *node = build_node(range, references, level, local_storage);
/* set child in inner node */
inner->children[child] = node;
@@ -586,14 +544,22 @@ bool BVHBuild::range_within_max_leaf_size(const BVHRange &range,
for (int i = 0; i < size; i++) {
const BVHReference &ref = references[range.start() + i];
- if (ref.prim_type() & PRIMITIVE_CURVE)
- num_curves++;
- if (ref.prim_type() & PRIMITIVE_MOTION_CURVE)
- num_motion_curves++;
- else if (ref.prim_type() & PRIMITIVE_TRIANGLE)
- num_triangles++;
- else if (ref.prim_type() & PRIMITIVE_MOTION_TRIANGLE)
- num_motion_triangles++;
+ if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
+ if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
+ num_motion_curves++;
+ }
+ else {
+ num_curves++;
+ }
+ }
+ else if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
+ if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
+ num_motion_triangles++;
+ }
+ else {
+ num_triangles++;
+ }
+ }
}
return (num_triangles <= params.max_triangle_leaf_size) &&
@@ -675,8 +641,8 @@ BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)
/* Threaded build */
inner = new InnerNode(bounds);
- task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
- task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
+ task_pool.push([=] { thread_build_node(inner, 0, left, level + 1); });
+ task_pool.push([=] { thread_build_node(inner, 1, right, level + 1); });
}
if (do_unalinged_split) {
@@ -688,9 +654,9 @@ BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)
/* multithreaded spatial split builder */
BVHNode *BVHBuild::build_node(const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
int level,
- int thread_id)
+ BVHSpatialStorage *storage)
{
/* Update progress.
*
@@ -707,18 +673,17 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
if (!(range.size() > 0 && params.top_level && level == 0)) {
if (params.small_enough_for_leaf(range.size(), level)) {
progress_count += range.size();
- return create_leaf_node(range, *references);
+ return create_leaf_node(range, references);
}
}
/* Perform splitting test. */
- BVHSpatialStorage *storage = &spatial_storage[thread_id];
BVHMixedSplit split(this, storage, range, references, level);
if (!(range.size() > 0 && params.top_level && level == 0)) {
if (split.no_split) {
progress_count += range.size();
- return create_leaf_node(range, *references);
+ return create_leaf_node(range, references);
}
}
float leafSAH = params.sah_primitive_cost * split.leafSAH;
@@ -731,7 +696,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
Transform aligned_space;
bool do_unalinged_split = false;
if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
- aligned_space = unaligned_heuristic.compute_aligned_space(range, &references->at(0));
+ aligned_space = unaligned_heuristic.compute_aligned_space(range, &references.at(0));
unaligned_split = BVHMixedSplit(
this, storage, range, references, level, &unaligned_heuristic, &aligned_space);
/* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
@@ -757,8 +722,7 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
BoundBox bounds;
if (do_unalinged_split) {
- bounds = unaligned_heuristic.compute_aligned_boundbox(
- range, &references->at(0), aligned_space);
+ bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references.at(0), aligned_space);
}
else {
bounds = range.bounds();
@@ -770,24 +734,35 @@ BVHNode *BVHBuild::build_node(const BVHRange &range,
/* Local build. */
/* Build left node. */
- vector<BVHReference> copy(references->begin() + right.start(),
- references->begin() + right.end());
+ vector<BVHReference> right_references(references.begin() + right.start(),
+ references.begin() + right.end());
right.set_start(0);
- BVHNode *leftnode = build_node(left, references, level + 1, thread_id);
+ BVHNode *leftnode = build_node(left, references, level + 1, storage);
/* Build right node. */
- BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
+ BVHNode *rightnode = build_node(right, right_references, level + 1, storage);
inner = new InnerNode(bounds, leftnode, rightnode);
}
else {
/* Threaded build. */
inner = new InnerNode(bounds);
- task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 0, left, *references, level + 1),
- true);
- task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 1, right, *references, level + 1),
- true);
+
+ vector<BVHReference> left_references(references.begin() + left.start(),
+ references.begin() + left.end());
+ vector<BVHReference> right_references(references.begin() + right.start(),
+ references.begin() + right.end());
+ right.set_start(0);
+
+ /* Create tasks for left and right nodes, using copy for most arguments and
+ * move for reference to avoid memory copies. */
+ task_pool.push([=, refs = std::move(left_references)]() mutable {
+ thread_build_spatial_split_node(inner, 0, left, refs, level + 1);
+ });
+ task_pool.push([=, refs = std::move(right_references)]() mutable {
+ thread_build_spatial_split_node(inner, 1, right, refs, level + 1);
+ });
}
if (do_unalinged_split) {
diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h
index 3fe4c3799e2..c35af083fbd 100644
--- a/intern/cycles/bvh/bvh_build.h
+++ b/intern/cycles/bvh/bvh_build.h
@@ -74,9 +74,9 @@ class BVHBuild {
/* Building. */
BVHNode *build_node(const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
int level,
- int thread_id);
+ BVHSpatialStorage *storage);
BVHNode *build_node(const BVHObjectBinning &range, int level);
BVHNode *create_leaf_node(const BVHRange &range, const vector<BVHReference> &references);
BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
@@ -86,13 +86,12 @@ class BVHBuild {
/* Threads. */
enum { THREAD_TASK_SIZE = 4096 };
- void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
+ void thread_build_node(InnerNode *node, int child, const BVHObjectBinning &range, int level);
void thread_build_spatial_split_node(InnerNode *node,
int child,
- BVHRange *range,
- vector<BVHReference> *references,
- int level,
- int thread_id);
+ const BVHRange &range,
+ vector<BVHReference> &references,
+ int level);
thread_mutex build_mutex;
/* Progress. */
@@ -127,7 +126,7 @@ class BVHBuild {
/* Spatial splitting. */
float spatial_min_overlap;
- vector<BVHSpatialStorage> spatial_storage;
+ enumerable_thread_specific<BVHSpatialStorage> spatial_storage;
size_t spatial_free_index;
thread_spin_lock spatial_spin_lock;
diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
index 6735202835b..17e1f86a589 100644
--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -47,9 +47,11 @@
# include "render/hair.h"
# include "render/mesh.h"
# include "render/object.h"
+
# include "util/util_foreach.h"
# include "util/util_logging.h"
# include "util/util_progress.h"
+# include "util/util_stats.h"
CCL_NAMESPACE_BEGIN
@@ -65,30 +67,9 @@ static_assert(Object::MAX_MOTION_STEPS == Geometry::MAX_MOTION_STEPS,
* as well as filtering for volume objects happen here.
* Cycles' own BVH does that directly inside the traversal calls.
*/
-static void rtc_filter_func(const RTCFilterFunctionNArguments *args)
-{
- /* Current implementation in Cycles assumes only single-ray intersection queries. */
- assert(args->N == 1);
-
- const RTCRay *ray = (RTCRay *)args->ray;
- const RTCHit *hit = (RTCHit *)args->hit;
- CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
- KernelGlobals *kg = ctx->kg;
-
- /* Check if there is backfacing hair to ignore. */
- if (IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) &&
- !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING) &&
- !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
- if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
- make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
- *args->valid = 0;
- return;
- }
- }
-}
-
static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
{
+ /* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
const RTCRay *ray = (RTCRay *)args->ray;
@@ -96,17 +77,6 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
KernelGlobals *kg = ctx->kg;
- /* For all ray types: Check if there is backfacing hair to ignore */
- if (IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) &&
- !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING) &&
- !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
- if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
- make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
- *args->valid = 0;
- return;
- }
- }
-
switch (ctx->type) {
case CCLIntersectContext::RAY_SHADOW_ALL: {
/* Append the intersection to the end of the array. */
@@ -168,7 +138,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
}
/* Ignore curves. */
- if (hit->geomID & 1) {
+ if (IS_HAIR(hit->geomID)) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
break;
@@ -249,6 +219,34 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
}
}
+static void rtc_filter_func_thick_curve(const RTCFilterFunctionNArguments *args)
+{
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+
+ /* Always ignore backfacing intersections. */
+ if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+ make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+}
+
+static void rtc_filter_occluded_func_thick_curve(const RTCFilterFunctionNArguments *args)
+{
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+
+ /* Always ignore backfacing intersections. */
+ if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+ make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+
+ rtc_filter_occluded_func(args);
+}
+
static size_t unaccounted_mem = 0;
static bool rtc_memory_monitor_func(void *userPtr, const ssize_t bytes, const bool)
@@ -326,8 +324,6 @@ BVHEmbree::BVHEmbree(const BVHParams &params_,
stats(NULL),
curve_subdivisions(params.curve_subdivisions),
build_quality(RTC_BUILD_QUALITY_REFIT),
- use_curves(params_.curve_flags & CURVE_KN_INTERPOLATE),
- use_ribbons(params.curve_flags & CURVE_KN_RIBBONS),
dynamic_scene(true)
{
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
@@ -653,7 +649,6 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
}
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
- rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
@@ -724,9 +719,7 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair
/* Catmull-Rom splines need extra CVs at the beginning and end of each curve. */
size_t num_keys_embree = num_keys;
- if (use_curves) {
- num_keys_embree += num_curves * 2;
- }
+ num_keys_embree += num_curves * 2;
/* Copy the CV data to Embree */
const int t_mid = (num_motion_steps - 1) / 2;
@@ -746,45 +739,22 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair
assert(rtc_verts);
if (rtc_verts) {
- if (use_curves) {
- const size_t num_curves = hair->num_curves();
- for (size_t j = 0; j < num_curves; ++j) {
- Hair::Curve c = hair->get_curve(j);
- int fk = c.first_key;
- int k = 1;
- for (; k < c.num_keys + 1; ++k, ++fk) {
- rtc_verts[k] = float3_to_float4(verts[fk]);
- rtc_verts[k].w = curve_radius[fk];
- }
- /* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
- rtc_verts[0] = rtc_verts[1];
- rtc_verts[k] = rtc_verts[k - 1];
- rtc_verts += c.num_keys + 2;
- }
- }
- else {
- for (size_t j = 0; j < num_keys_embree; ++j) {
- rtc_verts[j] = float3_to_float4(verts[j]);
- rtc_verts[j].w = curve_radius[j];
+ const size_t num_curves = hair->num_curves();
+ for (size_t j = 0; j < num_curves; ++j) {
+ Hair::Curve c = hair->get_curve(j);
+ int fk = c.first_key;
+ int k = 1;
+ for (; k < c.num_keys + 1; ++k, ++fk) {
+ rtc_verts[k] = float3_to_float4(verts[fk]);
+ rtc_verts[k].w = curve_radius[fk];
}
+ /* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
+ rtc_verts[0] = rtc_verts[1];
+ rtc_verts[k] = rtc_verts[k - 1];
+ rtc_verts += c.num_keys + 2;
}
}
}
-# if RTC_VERSION >= 30900
- if (!use_curves) {
- unsigned char *flags = (unsigned char *)rtcSetNewGeometryBuffer(geom_id,
- RTC_BUFFER_TYPE_FLAGS,
- 0,
- RTC_FORMAT_UCHAR,
- sizeof(unsigned char),
- num_keys_embree);
- flags[0] = RTC_CURVE_FLAG_NEIGHBOR_RIGHT;
- ::memset(flags + 1,
- RTC_CURVE_FLAG_NEIGHBOR_RIGHT | RTC_CURVE_FLAG_NEIGHBOR_RIGHT,
- num_keys_embree - 2);
- flags[num_keys_embree - 1] = RTC_CURVE_FLAG_NEIGHBOR_LEFT;
- }
-# endif
}
void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
@@ -800,6 +770,12 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
}
const size_t num_motion_steps = min(num_geometry_motion_steps, RTC_MAX_TIME_STEP_COUNT);
+ const PrimitiveType primitive_type =
+ (num_motion_steps > 1) ?
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_MOTION_CURVE_RIBBON :
+ PRIMITIVE_MOTION_CURVE_THICK) :
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON : PRIMITIVE_CURVE_THICK);
+
assert(num_geometry_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
const size_t num_curves = hair->num_curves();
@@ -820,21 +796,12 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
size_t prim_tri_index_size = pack.prim_index.size();
pack.prim_tri_index.resize(prim_tri_index_size + num_segments);
-# if RTC_VERSION >= 30900
- enum RTCGeometryType type = (!use_curves) ?
- (use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE :
- RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE) :
- (use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE :
- RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE);
-# else
- enum RTCGeometryType type = (!use_curves) ?
- RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE :
- (use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE :
- RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE);
-# endif
+ enum RTCGeometryType type = (hair->curve_shape == CURVE_RIBBON ?
+ RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE :
+ RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE);
RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, type);
- rtcSetGeometryTessellationRate(geom_id, curve_subdivisions);
+ rtcSetGeometryTessellationRate(geom_id, curve_subdivisions + 1);
unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT, sizeof(int), num_segments);
size_t rtc_index = 0;
@@ -842,14 +809,11 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
Hair::Curve c = hair->get_curve(j);
for (size_t k = 0; k < c.num_segments(); ++k) {
rtc_indices[rtc_index] = c.first_key + k;
- if (use_curves) {
- /* Room for extra CVs at Catmull-Rom splines. */
- rtc_indices[rtc_index] += j * 2;
- }
+ /* Room for extra CVs at Catmull-Rom splines. */
+ rtc_indices[rtc_index] += j * 2;
/* Cycles specific data. */
pack.prim_object[prim_object_size + rtc_index] = i;
- pack.prim_type[prim_type_size + rtc_index] = (PRIMITIVE_PACK_SEGMENT(
- num_motion_steps > 1 ? PRIMITIVE_MOTION_CURVE : PRIMITIVE_CURVE, k));
+ pack.prim_type[prim_type_size + rtc_index] = (PRIMITIVE_PACK_SEGMENT(primitive_type, k));
pack.prim_index[prim_index_size + rtc_index] = j;
pack.prim_tri_index[prim_tri_index_size + rtc_index] = rtc_index;
@@ -863,8 +827,13 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
update_curve_vertex_buffer(geom_id, hair);
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
- rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
- rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
+ if (hair->curve_shape == CURVE_RIBBON) {
+ rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
+ }
+ else {
+ rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_thick_curve);
+ rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_thick_curve);
+ }
rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
rtcCommitGeometry(geom_id);
diff --git a/intern/cycles/bvh/bvh_embree.h b/intern/cycles/bvh/bvh_embree.h
index eb121d060b7..f60a1ca0102 100644
--- a/intern/cycles/bvh/bvh_embree.h
+++ b/intern/cycles/bvh/bvh_embree.h
@@ -81,7 +81,7 @@ class BVHEmbree : public BVH {
vector<RTCScene> delayed_delete_scenes;
int curve_subdivisions;
enum RTCBuildQuality build_quality;
- bool use_curves, use_ribbons, dynamic_scene;
+ bool dynamic_scene;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_optix.cpp b/intern/cycles/bvh/bvh_optix.cpp
index 740994b2ebc..ccb7ae08625 100644
--- a/intern/cycles/bvh/bvh_optix.cpp
+++ b/intern/cycles/bvh/bvh_optix.cpp
@@ -18,10 +18,14 @@
#ifdef WITH_OPTIX
# include "bvh/bvh_optix.h"
+
+# include "device/device.h"
+
# include "render/geometry.h"
# include "render/hair.h"
# include "render/mesh.h"
# include "render/object.h"
+
# include "util/util_foreach.h"
# include "util/util_logging.h"
# include "util/util_progress.h"
@@ -73,9 +77,12 @@ void BVHOptiX::pack_blas()
// 'pack.prim_time' is only used in geom_curve_intersect.h
// It is not needed because of OPTIX_MOTION_FLAG_[START|END]_VANISH
- uint type = PRIMITIVE_CURVE;
- if (hair->use_motion_blur && hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION))
- type = PRIMITIVE_MOTION_CURVE;
+ uint type = (hair->use_motion_blur &&
+ hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) ?
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_MOTION_CURVE_RIBBON :
+ PRIMITIVE_MOTION_CURVE_THICK) :
+ ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON :
+ PRIMITIVE_CURVE_THICK);
for (size_t j = 0; j < num_curves; ++j) {
const Hair::Curve curve = hair->get_curve(j);
diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h
index 5e2c4b63f1b..1a50742dc33 100644
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -89,7 +89,6 @@ class BVHParams {
int bvh_type;
/* These are needed for Embree. */
- int curve_flags;
int curve_subdivisions;
/* fixed parameters */
@@ -122,7 +121,6 @@ class BVHParams {
bvh_type = 0;
- curve_flags = 0;
curve_subdivisions = 4;
}
diff --git a/intern/cycles/bvh/bvh_sort.cpp b/intern/cycles/bvh/bvh_sort.cpp
index 4498a759c08..b01785b547a 100644
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ b/intern/cycles/bvh/bvh_sort.cpp
@@ -88,18 +88,6 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
const int job_end,
const BVHReferenceCompare &compare);
-class BVHSortTask : public Task {
- public:
- BVHSortTask(TaskPool *task_pool,
- BVHReference *data,
- const int job_start,
- const int job_end,
- const BVHReferenceCompare &compare)
- {
- run = function_bind(bvh_reference_sort_threaded, task_pool, data, job_start, job_end, compare);
- }
-};
-
/* Multi-threaded reference sort. */
static void bvh_reference_sort_threaded(TaskPool *task_pool,
BVHReference *data,
@@ -158,7 +146,8 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
have_work = false;
if (left < end) {
if (start < right) {
- task_pool->push(new BVHSortTask(task_pool, data, left, end, compare), true);
+ task_pool->push(
+ function_bind(bvh_reference_sort_threaded, task_pool, data, left, end, compare));
}
else {
start = left;
diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp
index acdca0f13ad..4b21f852d7a 100644
--- a/intern/cycles/bvh/bvh_split.cpp
+++ b/intern/cycles/bvh/bvh_split.cpp
@@ -33,7 +33,7 @@ CCL_NAMESPACE_BEGIN
BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
@@ -43,7 +43,7 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
left_bounds(BoundBox::empty),
right_bounds(BoundBox::empty),
storage_(storage),
- references_(references),
+ references_(&references),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
{
@@ -133,7 +133,7 @@ void BVHObjectSplit::split(BVHRange &left, BVHRange &right, const BVHRange &rang
BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage,
const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
@@ -141,7 +141,7 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
dim(0),
pos(0.0f),
storage_(storage),
- references_(references),
+ references_(&references),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
{
@@ -152,7 +152,7 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
}
else {
range_bounds = unaligned_heuristic->compute_aligned_boundbox(
- range, &references->at(0), *aligned_space);
+ range, &references_->at(0), *aligned_space);
}
float3 origin = range_bounds.min;
diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h
index 5f2e41cf343..28ff0e05fc3 100644
--- a/intern/cycles/bvh/bvh_split.h
+++ b/intern/cycles/bvh/bvh_split.h
@@ -44,7 +44,7 @@ class BVHObjectSplit {
BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
@@ -82,7 +82,7 @@ class BVHSpatialSplit {
BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage,
const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
@@ -187,7 +187,7 @@ class BVHMixedSplit {
__forceinline BVHMixedSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
- vector<BVHReference> *references,
+ vector<BVHReference> &references,
int level,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL)
@@ -197,7 +197,7 @@ class BVHMixedSplit {
}
else {
bounds = unaligned_heuristic->compute_aligned_boundbox(
- range, &references->at(0), *aligned_space);
+ range, &references.at(0), *aligned_space);
}
/* find split candidates. */
float area = bounds.safe_area();
@@ -220,7 +220,7 @@ class BVHMixedSplit {
/* leaf SAH is the lowest => create leaf. */
minSAH = min(min(leafSAH, object.sah), spatial.sah);
- no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, *references));
+ no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, references));
}
__forceinline void split(BVHBuild *builder,
diff --git a/intern/cycles/bvh/bvh_unaligned.cpp b/intern/cycles/bvh/bvh_unaligned.cpp
index f0995f343fe..c969b361643 100644
--- a/intern/cycles/bvh/bvh_unaligned.cpp
+++ b/intern/cycles/bvh/bvh_unaligned.cpp
@@ -68,7 +68,8 @@ bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *ali
const Object *object = objects_[ref.prim_object()];
const int packed_type = ref.prim_type();
const int type = (packed_type & PRIMITIVE_ALL);
- if (type & PRIMITIVE_CURVE) {
+ /* No motion blur curves here, we can't fit them to aligned boxes well. */
+ if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
const int curve_index = ref.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
const Hair *hair = static_cast<const Hair *>(object->geometry);
@@ -93,7 +94,8 @@ BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
const Object *object = objects_[prim.prim_object()];
const int packed_type = prim.prim_type();
const int type = (packed_type & PRIMITIVE_ALL);
- if (type & PRIMITIVE_CURVE) {
+ /* No motion blur curves here, we can't fit them to aligned boxes well. */
+ if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
const int curve_index = prim.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
const Hair *hair = static_cast<const Hair *>(object->geometry);
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index aa5b65a2b73..ca366722eb7 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -99,6 +99,18 @@ if(WITH_CYCLES_DEVICE_MULTI)
add_definitions(-DWITH_MULTI)
endif()
+if(WITH_OPENIMAGEDENOISE)
+ add_definitions(-DWITH_OPENIMAGEDENOISE)
+ add_definitions(-DOIDN_STATIC_LIB)
+ list(APPEND INC_SYS
+ ${OPENIMAGEDENOISE_INCLUDE_DIRS}
+ )
+ list(APPEND LIB
+ ${OPENIMAGEDENOISE_LIBRARIES}
+ ${TBB_LIBRARIES}
+ )
+endif()
+
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h
index 1aa2fdd0967..e5e3e24165d 100644
--- a/intern/cycles/device/cuda/device_cuda.h
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -21,6 +21,7 @@
# include "device/device_split_kernel.h"
# include "util/util_map.h"
+# include "util/util_task.h"
# ifdef WITH_CUDA_DYNLOAD
# include "cuew.h"
@@ -96,9 +97,9 @@ class CUDADevice : public Device {
static bool have_precompiled_kernels();
- virtual bool show_samples() const;
+ virtual bool show_samples() const override;
- virtual BVHLayoutMask get_bvh_layout_mask() const;
+ virtual BVHLayoutMask get_bvh_layout_mask() const override;
void set_error(const string &error) override;
@@ -108,7 +109,7 @@ class CUDADevice : public Device {
bool support_device(const DeviceRequestedFeatures & /*requested_features*/);
- bool check_peer_access(Device *peer_device);
+ bool check_peer_access(Device *peer_device) override;
bool use_adaptive_compilation();
@@ -122,7 +123,7 @@ class CUDADevice : public Device {
const char *base = "cuda",
bool force_ptx = false);
- virtual bool load_kernels(const DeviceRequestedFeatures &requested_features);
+ virtual bool load_kernels(const DeviceRequestedFeatures &requested_features) override;
void load_functions();
@@ -140,19 +141,19 @@ class CUDADevice : public Device {
void generic_free(device_memory &mem);
- void mem_alloc(device_memory &mem);
+ void mem_alloc(device_memory &mem) override;
- void mem_copy_to(device_memory &mem);
+ void mem_copy_to(device_memory &mem) override;
- void mem_copy_from(device_memory &mem, int y, int w, int h, int elem);
+ void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
- void mem_zero(device_memory &mem);
+ void mem_zero(device_memory &mem) override;
- void mem_free(device_memory &mem);
+ void mem_free(device_memory &mem) override;
- device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/);
+ device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
- virtual void const_copy_to(const char *name, void *host, size_t size);
+ virtual void const_copy_to(const char *name, void *host, size_t size) override;
void global_alloc(device_memory &mem);
@@ -252,15 +253,15 @@ class CUDADevice : public Device {
int dw,
int dh,
bool transparent,
- const DeviceDrawParams &draw_params);
+ const DeviceDrawParams &draw_params) override;
- void thread_run(DeviceTask *task);
+ void thread_run(DeviceTask &task);
- virtual void task_add(DeviceTask &task);
+ virtual void task_add(DeviceTask &task) override;
- virtual void task_wait();
+ virtual void task_wait() override;
- virtual void task_cancel();
+ virtual void task_cancel() override;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index 7aa63ff48c3..b9bbeb9a25b 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -105,7 +105,7 @@ class CUDASplitKernel : public DeviceSplitKernel {
virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
const DeviceRequestedFeatures &);
virtual int2 split_kernel_local_size();
- virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
+ virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask &task);
};
/* Utility to push/pop CUDA context. */
@@ -243,7 +243,7 @@ CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
CUDADevice::~CUDADevice()
{
- task_pool.stop();
+ task_pool.cancel();
delete split_kernel;
@@ -2326,11 +2326,11 @@ void CUDADevice::draw_pixels(device_memory &mem,
Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params);
}
-void CUDADevice::thread_run(DeviceTask *task)
+void CUDADevice::thread_run(DeviceTask &task)
{
CUDAContextScope scope(this);
- if (task->type == DeviceTask::RENDER) {
+ if (task.type == DeviceTask::RENDER) {
DeviceRequestedFeatures requested_features;
if (use_split_kernel()) {
if (split_kernel == NULL) {
@@ -2343,72 +2343,64 @@ void CUDADevice::thread_run(DeviceTask *task)
/* keep rendering tiles until done */
RenderTile tile;
- DenoisingTask denoising(this, *task);
+ DenoisingTask denoising(this, task);
- while (task->acquire_tile(this, tile, task->tile_types)) {
+ while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
if (use_split_kernel()) {
device_only_memory<uchar> void_buffer(this, "void_buffer");
split_kernel->path_trace(task, tile, void_buffer, void_buffer);
}
else {
- render(*task, tile, work_tiles);
+ render(task, tile, work_tiles);
}
}
else if (tile.task == RenderTile::BAKE) {
- render(*task, tile, work_tiles);
+ render(task, tile, work_tiles);
}
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising);
- task->update_progress(&tile, tile.w * tile.h);
+ task.update_progress(&tile, tile.w * tile.h);
}
- task->release_tile(tile);
+ task.release_tile(tile);
- if (task->get_cancel()) {
- if (task->need_finish_queue == false)
+ if (task.get_cancel()) {
+ if (task.need_finish_queue == false)
break;
}
}
work_tiles.free();
}
- else if (task->type == DeviceTask::SHADER) {
- shader(*task);
+ else if (task.type == DeviceTask::SHADER) {
+ shader(task);
cuda_assert(cuCtxSynchronize());
}
- else if (task->type == DeviceTask::DENOISE_BUFFER) {
+ else if (task.type == DeviceTask::DENOISE_BUFFER) {
RenderTile tile;
- tile.x = task->x;
- tile.y = task->y;
- tile.w = task->w;
- tile.h = task->h;
- tile.buffer = task->buffer;
- tile.sample = task->sample + task->num_samples;
- tile.num_samples = task->num_samples;
- tile.start_sample = task->sample;
- tile.offset = task->offset;
- tile.stride = task->stride;
- tile.buffers = task->buffers;
-
- DenoisingTask denoising(this, *task);
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.sample = task.sample + task.num_samples;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ DenoisingTask denoising(this, task);
denoise(tile, denoising);
- task->update_progress(&tile, tile.w * tile.h);
+ task.update_progress(&tile, tile.w * tile.h);
}
}
-class CUDADeviceTask : public DeviceTask {
- public:
- CUDADeviceTask(CUDADevice *device, DeviceTask &task) : DeviceTask(task)
- {
- run = function_bind(&CUDADevice::thread_run, device, this);
- }
-};
-
void CUDADevice::task_add(DeviceTask &task)
{
CUDAContextScope scope(this);
@@ -2424,7 +2416,10 @@ void CUDADevice::task_add(DeviceTask &task)
film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
}
else {
- task_pool.push(new CUDADeviceTask(this, task));
+ task_pool.push([=] {
+ DeviceTask task_copy = task;
+ thread_run(task_copy);
+ });
}
}
@@ -2652,7 +2647,7 @@ int2 CUDASplitKernel::split_kernel_local_size()
int2 CUDASplitKernel::split_kernel_global_size(device_memory &kg,
device_memory &data,
- DeviceTask * /*task*/)
+ DeviceTask & /*task*/)
{
CUDAContextScope scope(device);
size_t free;
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 41dd7894d93..9dbb33980b4 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -77,7 +77,7 @@ std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &reques
/* Device */
-Device::~Device()
+Device::~Device() noexcept(false)
{
if (!background) {
if (vertex_buffer != 0) {
@@ -603,6 +603,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.has_osl = true;
info.has_profiling = true;
info.has_peer_memory = false;
+ info.denoisers = DENOISER_ALL;
foreach (const DeviceInfo &device, subdevices) {
/* Ensure CPU device does not slow down GPU. */
@@ -647,6 +648,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.has_osl &= device.has_osl;
info.has_profiling &= device.has_profiling;
info.has_peer_memory |= device.has_peer_memory;
+ info.denoisers &= device.denoisers;
}
return info;
@@ -667,4 +669,55 @@ void Device::free_memory()
network_devices.free_memory();
}
+/* DeviceInfo */
+
+void DeviceInfo::add_denoising_devices(DenoiserType denoiser_type)
+{
+ assert(denoising_devices.empty());
+
+ if (denoiser_type == DENOISER_OPTIX && type != DEVICE_OPTIX) {
+ vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX);
+ if (!optix_devices.empty()) {
+ /* Convert to a special multi device with separate denoising devices. */
+ if (multi_devices.empty()) {
+ multi_devices.push_back(*this);
+ }
+
+ /* Try to use the same physical devices for denoising. */
+ for (const DeviceInfo &cuda_device : multi_devices) {
+ if (cuda_device.type == DEVICE_CUDA) {
+ for (const DeviceInfo &optix_device : optix_devices) {
+ if (cuda_device.num == optix_device.num) {
+ id += optix_device.id;
+ denoising_devices.push_back(optix_device);
+ break;
+ }
+ }
+ }
+ }
+
+ if (denoising_devices.empty()) {
+ /* Simply use the first available OptiX device. */
+ const DeviceInfo optix_device = optix_devices.front();
+ id += optix_device.id; /* Uniquely identify this special multi device. */
+ denoising_devices.push_back(optix_device);
+ }
+
+ denoisers = denoiser_type;
+ }
+ }
+ else if (denoiser_type == DENOISER_OPENIMAGEDENOISE && type != DEVICE_CPU) {
+ /* Convert to a special multi device with separate denoising devices. */
+ if (multi_devices.empty()) {
+ multi_devices.push_back(*this);
+ }
+
+ /* Add CPU denoising devices. */
+ DeviceInfo cpu_device = Device::available_devices(DEVICE_MASK_CPU).front();
+ denoising_devices.push_back(cpu_device);
+
+ denoisers = denoiser_type;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index dff981080a5..a5833369a17 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -83,6 +83,7 @@ class DeviceInfo {
bool use_split_kernel; /* Use split or mega kernel. */
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
+ DenoiserTypeMask denoisers; /* Supported denoiser types. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
vector<DeviceInfo> denoising_devices;
@@ -101,6 +102,7 @@ class DeviceInfo {
use_split_kernel = false;
has_profiling = false;
has_peer_memory = false;
+ denoisers = DENOISER_NONE;
}
bool operator==(const DeviceInfo &info)
@@ -110,6 +112,9 @@ class DeviceInfo {
(type == info.type && num == info.num && description == info.description));
return id == info.id;
}
+
+ /* Add additional devices needed for the specified denoiser. */
+ void add_denoising_devices(DenoiserType denoiser_type);
};
class DeviceRequestedFeatures {
@@ -132,6 +137,7 @@ class DeviceRequestedFeatures {
/* BVH/sampling kernel features. */
bool use_hair;
+ bool use_hair_thick;
bool use_object_motion;
bool use_camera_motion;
@@ -178,6 +184,7 @@ class DeviceRequestedFeatures {
max_nodes_group = 0;
nodes_features = 0;
use_hair = false;
+ use_hair_thick = false;
use_object_motion = false;
use_camera_motion = false;
use_baking = false;
@@ -200,6 +207,7 @@ class DeviceRequestedFeatures {
max_nodes_group == requested_features.max_nodes_group &&
nodes_features == requested_features.nodes_features &&
use_hair == requested_features.use_hair &&
+ use_hair_thick == requested_features.use_hair_thick &&
use_object_motion == requested_features.use_object_motion &&
use_camera_motion == requested_features.use_camera_motion &&
use_baking == requested_features.use_baking &&
@@ -319,7 +327,8 @@ class Device {
virtual void mem_free_sub_ptr(device_ptr /*ptr*/){};
public:
- virtual ~Device();
+ /* noexcept needed to silence TBB warning. */
+ virtual ~Device() noexcept(false);
/* info */
DeviceInfo info;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index fc6febd8cee..8f68e66a1b4 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -51,10 +51,12 @@
#include "util/util_function.h"
#include "util/util_logging.h"
#include "util/util_map.h"
+#include "util/util_openimagedenoise.h"
#include "util/util_opengl.h"
#include "util/util_optimization.h"
#include "util/util_progress.h"
#include "util/util_system.h"
+#include "util/util_task.h"
#include "util/util_thread.h"
CCL_NAMESPACE_BEGIN
@@ -161,7 +163,7 @@ class CPUSplitKernel : public DeviceSplitKernel {
virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
const DeviceRequestedFeatures &);
virtual int2 split_kernel_local_size();
- virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
+ virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask &task);
virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
};
@@ -176,6 +178,10 @@ class CPUDevice : public Device {
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
+#ifdef WITH_OPENIMAGEDENOISE
+ oidn::DeviceRef oidn_device;
+ oidn::FilterRef oidn_filter;
+#endif
bool use_split_kernel;
@@ -332,7 +338,7 @@ class CPUDevice : public Device {
~CPUDevice()
{
- task_pool.stop();
+ task_pool.cancel();
texture_info.free();
}
@@ -344,17 +350,6 @@ class CPUDevice : public Device {
virtual BVHLayoutMask get_bvh_layout_mask() const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
- if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
- bvh_layout_mask |= BVH_LAYOUT_BVH4;
- }
- /* MSVC does not support the -march=native switch and you always end up */
- /* with an sse2 kernel when you use WITH_KERNEL_NATIVE. We *cannot* feed */
- /* that kernel BVH8 even if the CPU flags would allow for it. */
-#if (defined(__x86_64__) || defined(_M_X64)) && !(defined(_MSC_VER) && defined(WITH_KERNEL_NATIVE))
- if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
- bvh_layout_mask |= BVH_LAYOUT_BVH8;
- }
-#endif
#ifdef WITH_EMBREE
bvh_layout_mask |= BVH_LAYOUT_EMBREE;
#endif /* WITH_EMBREE */
@@ -527,26 +522,18 @@ class CPUDevice : public Device {
#endif
}
- void thread_run(DeviceTask *task)
+ void thread_run(DeviceTask &task)
{
- if (task->type == DeviceTask::RENDER)
- thread_render(*task);
- else if (task->type == DeviceTask::SHADER)
- thread_shader(*task);
- else if (task->type == DeviceTask::FILM_CONVERT)
- thread_film_convert(*task);
- else if (task->type == DeviceTask::DENOISE_BUFFER)
- thread_denoise(*task);
+ if (task.type == DeviceTask::RENDER)
+ thread_render(task);
+ else if (task.type == DeviceTask::SHADER)
+ thread_shader(task);
+ else if (task.type == DeviceTask::FILM_CONVERT)
+ thread_film_convert(task);
+ else if (task.type == DeviceTask::DENOISE_BUFFER)
+ thread_denoise(task);
}
- class CPUDeviceTask : public DeviceTask {
- public:
- CPUDeviceTask(CPUDevice *device, DeviceTask &task) : DeviceTask(task)
- {
- run = function_bind(&CPUDevice::thread_run, device, this);
- }
- };
-
bool denoising_non_local_means(device_ptr image_ptr,
device_ptr guide_ptr,
device_ptr variance_ptr,
@@ -961,7 +948,71 @@ class CPUDevice : public Device {
}
}
- void denoise(DenoisingTask &denoising, RenderTile &tile)
+ void denoise_openimagedenoise(DeviceTask &task, RenderTile &rtile)
+ {
+#ifdef WITH_OPENIMAGEDENOISE
+ assert(openimagedenoise_supported());
+
+ /* Only one at a time, since OpenImageDenoise itself is multithreaded. */
+ static thread_mutex mutex;
+ thread_scoped_lock lock(mutex);
+
+ /* Create device and filter, cached for reuse. */
+ if (!oidn_device) {
+ oidn_device = oidn::newDevice();
+ oidn_device.commit();
+ }
+ if (!oidn_filter) {
+ oidn_filter = oidn_device.newFilter("RT");
+ }
+
+ /* Copy pixels from compute device to CPU (no-op for CPU device). */
+ rtile.buffers->buffer.copy_from_device();
+
+ /* Set images with appropriate stride for our interleaved pass storage. */
+ const struct {
+ const char *name;
+ int offset;
+ } passes[] = {{"color", task.pass_denoising_data + DENOISING_PASS_COLOR},
+ {"normal", task.pass_denoising_data + DENOISING_PASS_NORMAL},
+ {"albedo", task.pass_denoising_data + DENOISING_PASS_ALBEDO},
+ {"output", 0},
+ { NULL,
+ 0 }};
+
+ for (int i = 0; passes[i].name; i++) {
+ const int64_t offset = rtile.offset + rtile.x + rtile.y * rtile.stride;
+ const int64_t buffer_offset = (offset * task.pass_stride + passes[i].offset) * sizeof(float);
+ const int64_t pixel_stride = task.pass_stride * sizeof(float);
+ const int64_t row_stride = rtile.stride * pixel_stride;
+
+ oidn_filter.setImage(passes[i].name,
+ (char *)rtile.buffer + buffer_offset,
+ oidn::Format::Float3,
+ rtile.w,
+ rtile.h,
+ 0,
+ pixel_stride,
+ row_stride);
+ }
+
+ /* Execute filter. */
+ oidn_filter.set("hdr", true);
+ oidn_filter.set("srgb", false);
+ oidn_filter.commit();
+ oidn_filter.execute();
+
+ /* todo: it may be possible to avoid this copy, but we have to ensure that
+ * when other code copies data from the device it doesn't overwrite the
+ * denoiser buffers. */
+ rtile.buffers->buffer.copy_to_device();
+#else
+ (void)task;
+ (void)rtile;
+#endif
+ }
+
+ void denoise_nlm(DenoisingTask &denoising, RenderTile &tile)
{
ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING);
@@ -1019,15 +1070,14 @@ class CPUDevice : public Device {
}
}
- RenderTile tile;
- DenoisingTask denoising(this, task);
- denoising.profiler = &kg->profiler;
+ DenoisingTask *denoising = NULL;
+ RenderTile tile;
while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
if (use_split_kernel) {
device_only_memory<uchar> void_buffer(this, "void_buffer");
- split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
+ split_kernel->path_trace(task, tile, kgbuffer, void_buffer);
}
else {
render(task, tile, kg);
@@ -1037,7 +1087,16 @@ class CPUDevice : public Device {
render(task, tile, kg);
}
else if (tile.task == RenderTile::DENOISE) {
- denoise(denoising, tile);
+ if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ denoise_openimagedenoise(task, tile);
+ }
+ else if (task.denoising.type == DENOISER_NLM) {
+ if (denoising == NULL) {
+ denoising = new DenoisingTask(this, task);
+ denoising->profiler = &kg->profiler;
+ }
+ denoise_nlm(*denoising, tile);
+ }
task.update_progress(&tile, tile.w * tile.h);
}
@@ -1055,6 +1114,7 @@ class CPUDevice : public Device {
kg->~KernelGlobals();
kgbuffer.free();
delete split_kernel;
+ delete denoising;
}
void thread_denoise(DeviceTask &task)
@@ -1072,16 +1132,22 @@ class CPUDevice : public Device {
tile.stride = task.stride;
tile.buffers = task.buffers;
- DenoisingTask denoising(this, task);
+ if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ denoise_openimagedenoise(task, tile);
+ }
+ else {
+ DenoisingTask denoising(this, task);
- ProfilingState denoising_profiler_state;
- profiler.add_state(&denoising_profiler_state);
- denoising.profiler = &denoising_profiler_state;
+ ProfilingState denoising_profiler_state;
+ profiler.add_state(&denoising_profiler_state);
+ denoising.profiler = &denoising_profiler_state;
- denoise(denoising, tile);
- task.update_progress(&tile, tile.w * tile.h);
+ denoise_nlm(denoising, tile);
+
+ profiler.remove_state(&denoising_profiler_state);
+ }
- profiler.remove_state(&denoising_profiler_state);
+ task.update_progress(&tile, tile.w * tile.h);
}
void thread_film_convert(DeviceTask &task)
@@ -1155,13 +1221,24 @@ class CPUDevice : public Device {
/* split task into smaller ones */
list<DeviceTask> tasks;
- if (task.type == DeviceTask::SHADER)
+ if (task.type == DeviceTask::DENOISE_BUFFER &&
+ task.denoising.type == DENOISER_OPENIMAGEDENOISE) {
+ /* Denoise entire buffer at once with OIDN, it has own threading. */
+ tasks.push_back(task);
+ }
+ else if (task.type == DeviceTask::SHADER) {
task.split(tasks, info.cpu_threads, 256);
- else
+ }
+ else {
task.split(tasks, info.cpu_threads);
+ }
- foreach (DeviceTask &task, tasks)
- task_pool.push(new CPUDeviceTask(this, task));
+ foreach (DeviceTask &task, tasks) {
+ task_pool.push([=] {
+ DeviceTask task_copy = task;
+ thread_run(task_copy);
+ });
+ }
}
void task_wait()
@@ -1326,7 +1403,7 @@ int2 CPUSplitKernel::split_kernel_local_size()
int2 CPUSplitKernel::split_kernel_global_size(device_memory & /*kg*/,
device_memory & /*data*/,
- DeviceTask * /*task*/)
+ DeviceTask & /*task*/)
{
return make_int2(1, 1);
}
@@ -1358,6 +1435,10 @@ void device_cpu_info(vector<DeviceInfo> &devices)
info.has_osl = true;
info.has_half_images = true;
info.has_profiling = true;
+ info.denoisers = DENOISER_NLM;
+ if (openimagedenoise_supported()) {
+ info.denoisers |= DENOISER_OPENIMAGEDENOISE;
+ }
devices.insert(devices.begin(), info);
}
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 04c04761311..d9ffcceb06e 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -130,6 +130,7 @@ void device_cuda_info(vector<DeviceInfo> &devices)
info.has_half_images = (major >= 3);
info.has_volume_decoupled = false;
info.has_adaptive_stop_per_sample = false;
+ info.denoisers = DENOISER_NLM;
/* Check if the device has P2P access to any other device in the system. */
for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index ac17c02a427..89de80a5bcd 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -56,8 +56,8 @@ DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
tile_info->frames[i] = task.denoising_frames[i - 1];
}
- write_passes = task.denoising_write_passes;
- do_filter = task.denoising_do_filter;
+ do_prefilter = task.denoising.store_passes && task.denoising.type == DENOISER_NLM;
+ do_filter = task.denoising.use && task.denoising.type == DENOISER_NLM;
}
DenoisingTask::~DenoisingTask()
@@ -91,7 +91,7 @@ void DenoisingTask::set_render_buffer(RenderTile *rtiles)
target_buffer.stride = rtiles[9].stride;
target_buffer.ptr = rtiles[9].buffer;
- if (write_passes && rtiles[9].buffers) {
+ if (do_prefilter && rtiles[9].buffers) {
target_buffer.denoising_output_offset =
rtiles[9].buffers->params.get_denoising_prefiltered_offset();
}
@@ -111,7 +111,7 @@ void DenoisingTask::setup_denoising_buffer()
rect = rect_clip(rect,
make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
- buffer.use_intensity = write_passes || (tile_info->num_frames > 1);
+ buffer.use_intensity = do_prefilter || (tile_info->num_frames > 1);
buffer.passes = buffer.use_intensity ? 15 : 14;
buffer.width = rect.z - rect.x;
buffer.stride = align_up(buffer.width, 4);
@@ -343,7 +343,7 @@ void DenoisingTask::run_denoising(RenderTile *tile)
reconstruct();
}
- if (write_passes) {
+ if (do_prefilter) {
write_buffer();
}
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index bd1d0193dbd..4c122e981eb 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -60,7 +60,7 @@ class DenoisingTask {
int4 rect;
int4 filter_area;
- bool write_passes;
+ bool do_prefilter;
bool do_filter;
struct DeviceFunctions {
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 020b9e10e60..fd14bbdccc5 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -396,8 +396,8 @@ class MultiDevice : public Device {
size_t existing_size = mem.device_size;
/* This is a hack to only allocate the tile buffers on denoising devices
- * Similarily the tile buffers also need to be allocated separately on all devices so any
- * overlap rendered for denoising does not interfer with each other */
+ * Similarly the tile buffers also need to be allocated separately on all devices so any
+ * overlap rendered for denoising does not interfere with each other */
if (strcmp(mem.name, "RenderBuffers") == 0) {
vector<device_ptr> device_pointers;
device_pointers.reserve(devices.size());
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 0933d51f321..8904b517e92 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -313,6 +313,7 @@ void device_network_info(vector<DeviceInfo> &devices)
info.has_volume_decoupled = false;
info.has_adaptive_stop_per_sample = false;
info.has_osl = false;
+ info.denoisers = DENOISER_NONE;
devices.push_back(info);
}
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 8a0b128697f..39b9ef70192 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -120,6 +120,7 @@ void device_opencl_info(vector<DeviceInfo> &devices)
info.use_split_kernel = true;
info.has_volume_decoupled = false;
info.has_adaptive_stop_per_sample = false;
+ info.denoisers = DENOISER_NLM;
info.id = id;
/* Check OpenCL extensions */
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index fbf6a914744..ececca3df53 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -246,7 +246,7 @@ class OptiXDevice : public CUDADevice {
~OptiXDevice()
{
// Stop processing any more tasks
- task_pool.stop();
+ task_pool.cancel();
// Make CUDA context current
const CUDAContextScope scope(cuContext);
@@ -428,11 +428,20 @@ class OptiXDevice : public CUDADevice {
group_descs[PG_HITS].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_shadow_all_hit";
if (requested_features.use_hair) {
- // Add curve intersection programs
group_descs[PG_HITD].hitgroup.moduleIS = optix_module;
- group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve";
group_descs[PG_HITS].hitgroup.moduleIS = optix_module;
- group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve";
+
+ // Add curve intersection programs
+ if (requested_features.use_hair_thick) {
+ // Slower programs for thick hair since that also slows down ribbons.
+ // Ideally this should not be needed.
+ group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_all";
+ group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_all";
+ }
+ else {
+ group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
+ group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
+ }
}
if (requested_features.use_subsurface || requested_features.use_shader_raytrace) {
@@ -712,7 +721,7 @@ class OptiXDevice : public CUDADevice {
const CUDAContextScope scope(cuContext);
// Choose between OptiX and NLM denoising
- if (task.denoising_use_optix) {
+ if (task.denoising.type == DENOISER_OPTIX) {
// Map neighboring tiles onto this device, indices are as following:
// Where index 4 is the center tile and index 9 is the target for the result.
// 0 1 2
@@ -1436,21 +1445,21 @@ class OptiXDevice : public CUDADevice {
KernelData *const data = (KernelData *)host;
*(OptixTraversableHandle *)&data->bvh.scene = tlas_handle;
- update_launch_params(name, offsetof(KernelParams, data), host, size);
+ update_launch_params(offsetof(KernelParams, data), host, size);
return;
}
// Update data storage pointers in launch parameters
# define KERNEL_TEX(data_type, tex_name) \
if (strcmp(name, #tex_name) == 0) { \
- update_launch_params(name, offsetof(KernelParams, tex_name), host, size); \
+ update_launch_params(offsetof(KernelParams, tex_name), host, size); \
return; \
}
# include "kernel/kernel_textures.h"
# undef KERNEL_TEX
}
- void update_launch_params(const char *name, size_t offset, void *data, size_t data_size)
+ void update_launch_params(size_t offset, void *data, size_t data_size)
{
const CUDAContextScope scope(cuContext);
@@ -1463,15 +1472,6 @@ class OptiXDevice : public CUDADevice {
void task_add(DeviceTask &task) override
{
- struct OptiXDeviceTask : public DeviceTask {
- OptiXDeviceTask(OptiXDevice *device, DeviceTask &task, int task_index) : DeviceTask(task)
- {
- // Using task index parameter instead of thread index, since number of CUDA streams may
- // differ from number of threads
- run = function_bind(&OptiXDevice::thread_run, device, *this, task_index);
- }
- };
-
// Upload texture information to device if it has changed since last launch
load_texture_info();
@@ -1483,7 +1483,10 @@ class OptiXDevice : public CUDADevice {
if (task.type == DeviceTask::DENOISE_BUFFER) {
// Execute denoising in a single thread (e.g. to avoid race conditions during creation)
- task_pool.push(new OptiXDeviceTask(this, task, 0));
+ task_pool.push([=] {
+ DeviceTask task_copy = task;
+ thread_run(task_copy, 0);
+ });
return;
}
@@ -1493,8 +1496,15 @@ class OptiXDevice : public CUDADevice {
// Queue tasks in internal task pool
int task_index = 0;
- for (DeviceTask &task : tasks)
- task_pool.push(new OptiXDeviceTask(this, task, task_index++));
+ for (DeviceTask &task : tasks) {
+ task_pool.push([=] {
+ // Using task index parameter instead of thread index, since number of CUDA streams may
+ // differ from number of threads
+ DeviceTask task_copy = task;
+ thread_run(task_copy, task_index);
+ });
+ task_index++;
+ }
}
void task_wait() override
@@ -1551,6 +1561,7 @@ void device_optix_info(const vector<DeviceInfo> &cuda_devices, vector<DeviceInfo
info.type = DEVICE_OPTIX;
info.id += "_OptiX";
+ info.denoisers |= DENOISER_OPTIX;
devices.push_back(info);
}
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index f22d8761058..4c288f60c16 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -145,7 +145,7 @@ size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory &kg,
return max_buffer_size / size_per_element;
}
-bool DeviceSplitKernel::path_trace(DeviceTask *task,
+bool DeviceSplitKernel::path_trace(DeviceTask &task,
RenderTile &tile,
device_memory &kgbuffer,
device_memory &kernel_data)
@@ -222,9 +222,9 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
subtile.start_sample = tile.sample;
subtile.num_samples = samples_per_second;
- if (task->adaptive_sampling.use) {
- subtile.num_samples = task->adaptive_sampling.align_dynamic_samples(subtile.start_sample,
- subtile.num_samples);
+ if (task.adaptive_sampling.use) {
+ subtile.num_samples = task.adaptive_sampling.align_dynamic_samples(subtile.start_sample,
+ subtile.num_samples);
}
/* Don't go beyond requested number of samples. */
@@ -286,7 +286,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
- if (task->get_cancel() && cancel_time == DBL_MAX) {
+ if (task.get_cancel() && cancel_time == DBL_MAX) {
/* Wait up to twice as many seconds for current samples to finish
* to avoid artifacts in render result from ending too soon.
*/
@@ -323,7 +323,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
}
int filter_sample = tile.sample + subtile.num_samples - 1;
- if (task->adaptive_sampling.use && task->adaptive_sampling.need_filter(filter_sample)) {
+ if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) {
size_t buffer_size[2];
buffer_size[0] = round_up(tile.w, local_size[0]);
buffer_size[1] = round_up(tile.h, local_size[1]);
@@ -352,16 +352,16 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
#undef ENQUEUE_SPLIT_KERNEL
tile.sample += subtile.num_samples;
- task->update_progress(&tile, tile.w * tile.h * subtile.num_samples);
+ task.update_progress(&tile, tile.w * tile.h * subtile.num_samples);
time_multiplier = min(time_multiplier << 1, 10);
- if (task->get_cancel()) {
+ if (task.get_cancel()) {
return true;
}
}
- if (task->adaptive_sampling.use) {
+ if (task.adaptive_sampling.use) {
/* Reset the start samples. */
RenderTile subtile = tile;
subtile.start_sample = tile.start_sample;
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 9d6b9efdd62..07a21b10299 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -109,7 +109,7 @@ class DeviceSplitKernel {
virtual ~DeviceSplitKernel();
bool load_kernels(const DeviceRequestedFeatures &requested_features);
- bool path_trace(DeviceTask *task,
+ bool path_trace(DeviceTask &task,
RenderTile &rtile,
device_memory &kgbuffer,
device_memory &kernel_data);
@@ -137,7 +137,7 @@ class DeviceSplitKernel {
virtual int2 split_kernel_local_size() = 0;
virtual int2 split_kernel_global_size(device_memory &kg,
device_memory &data,
- DeviceTask *task) = 0;
+ DeviceTask &task) = 0;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index 7485e1b41de..6e7c184c6c9 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -50,7 +50,7 @@ DeviceTask::DeviceTask(Type type_)
last_update_time = time_dt();
}
-int DeviceTask::get_subtask_count(int num, int max_size)
+int DeviceTask::get_subtask_count(int num, int max_size) const
{
if (max_size != 0) {
int max_size_num;
@@ -78,7 +78,7 @@ int DeviceTask::get_subtask_count(int num, int max_size)
return num;
}
-void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
+void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size) const
{
num = get_subtask_count(num, max_size);
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 8c4e682adb1..600973b8100 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -21,7 +21,6 @@
#include "util/util_function.h"
#include "util/util_list.h"
-#include "util/util_task.h"
CCL_NAMESPACE_BEGIN
@@ -32,8 +31,33 @@ class RenderBuffers;
class RenderTile;
class Tile;
+enum DenoiserType {
+ DENOISER_NLM = 1,
+ DENOISER_OPTIX = 2,
+ DENOISER_OPENIMAGEDENOISE = 4,
+ DENOISER_NUM,
+
+ DENOISER_NONE = 0,
+ DENOISER_ALL = ~0,
+};
+
+typedef int DenoiserTypeMask;
+
class DenoiseParams {
public:
+ /* Apply denoiser to image. */
+ bool use;
+ /* Output denoising data passes (possibly without applying the denoiser). */
+ bool store_passes;
+
+ /* Denoiser type. */
+ DenoiserType type;
+
+ /* Viewport start sample. */
+ int start_sample;
+
+ /** Native Denoiser **/
+
/* Pixel radius for neighboring pixels to take into account. */
int radius;
/* Controls neighbor pixel weighting for the denoising filter. */
@@ -47,18 +71,36 @@ class DenoiseParams {
int neighbor_frames;
/* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */
bool clamp_input;
+
+ /** Optix Denoiser **/
+
/* Passes handed over to the OptiX denoiser (default to color + albedo). */
int optix_input_passes;
DenoiseParams()
{
+ use = false;
+ store_passes = false;
+
+ type = DENOISER_NLM;
+
radius = 8;
strength = 0.5f;
feature_strength = 0.5f;
relative_pca = false;
neighbor_frames = 2;
clamp_input = true;
+
optix_input_passes = 2;
+
+ start_sample = 0;
+ }
+
+ /* Test if a denoising task needs to run, also to prefilter passes for the native
+ * denoiser when we are not applying denoising to the combined image. */
+ bool need_denoising_task() const
+ {
+ return (use || (store_passes && type == DENOISER_NLM));
}
};
@@ -75,7 +117,7 @@ class AdaptiveSampling {
int min_samples;
};
-class DeviceTask : public Task {
+class DeviceTask {
public:
typedef enum { RENDER, FILM_CONVERT, SHADER, DENOISE_BUFFER } Type;
Type type;
@@ -98,8 +140,8 @@ class DeviceTask : public Task {
explicit DeviceTask(Type type = RENDER);
- int get_subtask_count(int num, int max_size = 0);
- void split(list<DeviceTask> &tasks, int num, int max_size = 0);
+ int get_subtask_count(int num, int max_size = 0) const;
+ void split(list<DeviceTask> &tasks, int num, int max_size = 0) const;
void update_progress(RenderTile *rtile, int pixel_samples = -1);
@@ -116,10 +158,6 @@ class DeviceTask : public Task {
bool denoising_from_render;
vector<int> denoising_frames;
- bool denoising_do_filter;
- bool denoising_use_optix;
- bool denoising_write_passes;
-
int pass_stride;
int frame_stride;
int target_pass_stride;
diff --git a/intern/cycles/device/opencl/device_opencl.h b/intern/cycles/device/opencl/device_opencl.h
index 389268e1c2a..e0140996cf0 100644
--- a/intern/cycles/device/opencl/device_opencl.h
+++ b/intern/cycles/device/opencl/device_opencl.h
@@ -23,6 +23,7 @@
# include "util/util_map.h"
# include "util/util_param.h"
# include "util/util_string.h"
+# include "util/util_task.h"
# include "clew.h"
@@ -258,6 +259,8 @@ class OpenCLDevice : public Device {
TaskPool load_required_kernel_task_pool;
/* Task pool for optional kernels (feature kernels during foreground rendering) */
TaskPool load_kernel_task_pool;
+ std::atomic<int> load_kernel_num_compiling;
+
cl_context cxContext;
cl_command_queue cqCommandQueue;
cl_platform_id cpPlatform;
@@ -455,14 +458,6 @@ class OpenCLDevice : public Device {
void denoise(RenderTile &tile, DenoisingTask &denoising);
- class OpenCLDeviceTask : public DeviceTask {
- public:
- OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : DeviceTask(task)
- {
- run = function_bind(&OpenCLDevice::thread_run, device, this);
- }
- };
-
int get_split_task_count(DeviceTask & /*task*/)
{
return 1;
@@ -470,7 +465,10 @@ class OpenCLDevice : public Device {
void task_add(DeviceTask &task)
{
- task_pool.push(new OpenCLDeviceTask(this, task));
+ task_pool.push([=] {
+ DeviceTask task_copy = task;
+ thread_run(task_copy);
+ });
}
void task_wait()
@@ -483,7 +481,7 @@ class OpenCLDevice : public Device {
task_pool.cancel();
}
- void thread_run(DeviceTask *task);
+ void thread_run(DeviceTask &task);
virtual BVHLayoutMask get_bvh_layout_mask() const
{
diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp
index beb3174b111..8c94815b193 100644
--- a/intern/cycles/device/opencl/device_opencl_impl.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -542,7 +542,7 @@ class OpenCLSplitKernel : public DeviceSplitKernel {
virtual int2 split_kernel_global_size(device_memory &kg,
device_memory &data,
- DeviceTask * /*task*/)
+ DeviceTask & /*task*/)
{
cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice);
/* Use small global size on CPU devices as it seems to be much faster. */
@@ -610,6 +610,7 @@ void OpenCLDevice::opencl_assert_err(cl_int err, const char *where)
OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
: Device(info, stats, profiler, background),
+ load_kernel_num_compiling(0),
kernel_programs(this),
preview_programs(this),
memory_manager(this),
@@ -684,9 +685,9 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b
OpenCLDevice::~OpenCLDevice()
{
- task_pool.stop();
- load_required_kernel_task_pool.stop();
- load_kernel_task_pool.stop();
+ task_pool.cancel();
+ load_required_kernel_task_pool.cancel();
+ load_kernel_task_pool.cancel();
memory_manager.free();
@@ -798,7 +799,11 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures &requested_feature
* internally within a single process. */
foreach (OpenCLProgram *program, programs) {
if (!program->load()) {
- load_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program));
+ load_kernel_num_compiling++;
+ load_kernel_task_pool.push([=] {
+ program->compile();
+ load_kernel_num_compiling--;
+ });
}
}
return true;
@@ -868,7 +873,7 @@ bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requeste
* Better to check on device level than per kernel as mixing preview and
* non-preview kernels does not work due to different data types */
if (use_preview_kernels) {
- use_preview_kernels = !load_kernel_task_pool.finished();
+ use_preview_kernels = load_kernel_num_compiling.load() > 0;
}
}
return split_kernel->load_kernels(requested_features);
@@ -895,7 +900,7 @@ DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state()
return DEVICE_KERNEL_USING_FEATURE_KERNEL;
}
- bool other_kernels_finished = load_kernel_task_pool.finished();
+ bool other_kernels_finished = load_kernel_num_compiling.load() == 0;
if (use_preview_kernels) {
if (other_kernels_finished) {
return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE;
@@ -1336,20 +1341,20 @@ void OpenCLDevice::flush_texture_buffers()
memory_manager.alloc("texture_info", texture_info);
}
-void OpenCLDevice::thread_run(DeviceTask *task)
+void OpenCLDevice::thread_run(DeviceTask &task)
{
flush_texture_buffers();
- if (task->type == DeviceTask::RENDER) {
+ if (task.type == DeviceTask::RENDER) {
RenderTile tile;
- DenoisingTask denoising(this, *task);
+ DenoisingTask denoising(this, task);
/* Allocate buffer for kernel globals */
device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals");
kgbuffer.alloc_to_device(1);
/* Keep rendering tiles until done. */
- while (task->acquire_tile(this, tile, task->tile_types)) {
+ while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
assert(tile.task == RenderTile::PATH_TRACE);
scoped_timer timer(&tile.buffers->render_time);
@@ -1368,42 +1373,42 @@ void OpenCLDevice::thread_run(DeviceTask *task)
clFinish(cqCommandQueue);
}
else if (tile.task == RenderTile::BAKE) {
- bake(*task, tile);
+ bake(task, tile);
}
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising);
- task->update_progress(&tile, tile.w * tile.h);
+ task.update_progress(&tile, tile.w * tile.h);
}
- task->release_tile(tile);
+ task.release_tile(tile);
}
kgbuffer.free();
}
- else if (task->type == DeviceTask::SHADER) {
- shader(*task);
+ else if (task.type == DeviceTask::SHADER) {
+ shader(task);
}
- else if (task->type == DeviceTask::FILM_CONVERT) {
- film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
+ else if (task.type == DeviceTask::FILM_CONVERT) {
+ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
}
- else if (task->type == DeviceTask::DENOISE_BUFFER) {
+ else if (task.type == DeviceTask::DENOISE_BUFFER) {
RenderTile tile;
- tile.x = task->x;
- tile.y = task->y;
- tile.w = task->w;
- tile.h = task->h;
- tile.buffer = task->buffer;
- tile.sample = task->sample + task->num_samples;
- tile.num_samples = task->num_samples;
- tile.start_sample = task->sample;
- tile.offset = task->offset;
- tile.stride = task->stride;
- tile.buffers = task->buffers;
-
- DenoisingTask denoising(this, *task);
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.sample = task.sample + task.num_samples;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ DenoisingTask denoising(this, task);
denoise(tile, denoising);
- task->update_progress(&tile, tile.w * tile.h);
+ task.update_progress(&tile, tile.w * tile.h);
}
}
@@ -1937,10 +1942,8 @@ void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile)
clFinish(cqCommandQueue);
}
-string OpenCLDevice::kernel_build_options(const string *debug_src)
+static bool kernel_build_opencl_2(cl_device_id cdDevice)
{
- string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
-
/* Build with OpenCL 2.0 if available, this improves performance
* with AMD OpenCL drivers on Windows and Linux (legacy drivers).
* Note that OpenCL selects the highest 1.x version by default,
@@ -1948,10 +1951,36 @@ string OpenCLDevice::kernel_build_options(const string *debug_src)
int version_major, version_minor;
if (OpenCLInfo::get_device_version(cdDevice, &version_major, &version_minor)) {
if (version_major >= 2) {
- build_options += "-cl-std=CL2.0 ";
+ /* This appears to trigger a driver bug in Radeon RX cards with certain
+ * driver version, so don't use OpenCL 2.0 for those. */
+ string device_name = OpenCLInfo::get_readable_device_name(cdDevice);
+ if (string_startswith(device_name, "Radeon RX 4") ||
+ string_startswith(device_name, "Radeon (TM) RX 4") ||
+ string_startswith(device_name, "Radeon RX 5") ||
+ string_startswith(device_name, "Radeon (TM) RX 5")) {
+ char version[256] = "";
+ int driver_major, driver_minor;
+ clGetDeviceInfo(cdDevice, CL_DEVICE_VERSION, sizeof(version), &version, NULL);
+ if (sscanf(version, "OpenCL 2.0 AMD-APP (%d.%d)", &driver_major, &driver_minor) == 2) {
+ return !(driver_major == 3075 && driver_minor <= 12);
+ }
+ }
+
+ return true;
}
}
+ return false;
+}
+
+string OpenCLDevice::kernel_build_options(const string *debug_src)
+{
+ string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
+
+ if (kernel_build_opencl_2(cdDevice)) {
+ build_options += "-cl-std=CL2.0 ";
+ }
+
if (platform_name == "NVIDIA CUDA") {
build_options +=
"-D__KERNEL_OPENCL_NVIDIA__ "
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 2e839a616e9..7cc0d32d521 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -81,18 +81,6 @@ set(SRC_BVH_HEADERS
bvh/bvh_types.h
bvh/bvh_volume.h
bvh/bvh_volume_all.h
- bvh/qbvh_nodes.h
- bvh/qbvh_shadow_all.h
- bvh/qbvh_local.h
- bvh/qbvh_traversal.h
- bvh/qbvh_volume.h
- bvh/qbvh_volume_all.h
- bvh/obvh_nodes.h
- bvh/obvh_shadow_all.h
- bvh/obvh_local.h
- bvh/obvh_traversal.h
- bvh/obvh_volume.h
- bvh/obvh_volume_all.h
bvh/bvh_embree.h
)
@@ -113,6 +101,8 @@ set(SRC_HEADERS
kernel_id_passes.h
kernel_jitter.h
kernel_light.h
+ kernel_light_background.h
+ kernel_light_common.h
kernel_math.h
kernel_montecarlo.h
kernel_passes.h
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 9b9df883b62..80b58f46329 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -35,14 +35,6 @@ CCL_NAMESPACE_BEGIN
#ifndef __KERNEL_OPTIX__
-/* Common QBVH functions. */
-# ifdef __QBVH__
-# include "kernel/bvh/qbvh_nodes.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_nodes.h"
-# endif
-# endif
-
/* Regular BVH traversal */
# include "kernel/bvh/bvh_nodes.h"
@@ -51,27 +43,21 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_FEATURES 0
# include "kernel/bvh/bvh_traversal.h"
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "kernel/bvh/bvh_traversal.h"
-# endif
-
# if defined(__HAIR__)
# define BVH_FUNCTION_NAME bvh_intersect_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_traversal.h"
# endif
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_MOTION
# include "kernel/bvh/bvh_traversal.h"
# endif
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION
# include "kernel/bvh/bvh_traversal.h"
# endif
@@ -96,15 +82,9 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_volume.h"
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
-# include "kernel/bvh/bvh_volume.h"
-# endif
-
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
# include "kernel/bvh/bvh_volume.h"
# endif
# endif /* __VOLUME__ */
@@ -116,27 +96,21 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_FEATURES 0
# include "kernel/bvh/bvh_shadow_all.h"
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING
-# include "kernel/bvh/bvh_shadow_all.h"
-# endif
-
# if defined(__HAIR__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_shadow_all.h"
# endif
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_MOTION
# include "kernel/bvh/bvh_shadow_all.h"
# endif
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
+# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION
# include "kernel/bvh/bvh_shadow_all.h"
# endif
# endif /* __SHADOW_RECORD_ALL__ */
@@ -148,15 +122,9 @@ CCL_NAMESPACE_BEGIN
# define BVH_FUNCTION_FEATURES BVH_HAIR
# include "kernel/bvh/bvh_volume_all.h"
-# if defined(__INSTANCING__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
-# include "kernel/bvh/bvh_volume_all.h"
-# endif
-
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
# include "kernel/bvh/bvh_volume_all.h"
# endif
# endif /* __VOLUME_RECORD_ALL__ */
@@ -264,21 +232,8 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
}
# endif /* __HAIR__ */
-# ifdef __KERNEL_CPU__
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing) {
- return bvh_intersect_instancing(kg, ray, isect, visibility);
- }
-# endif /* __INSTANCING__ */
- return bvh_intersect(kg, ray, isect, visibility);
-# else /* __KERNEL_CPU__ */
-# ifdef __INSTANCING__
- return bvh_intersect_instancing(kg, ray, isect, visibility);
-# else
return bvh_intersect(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-# endif /* __KERNEL_CPU__ */
-#endif /* __KERNEL_OPTIX__ */
+#endif /* __KERNEL_OPTIX__ */
}
#ifdef __BVH_LOCAL__
@@ -476,21 +431,8 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
}
# endif /* __HAIR__ */
-# ifdef __KERNEL_CPU__
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing) {
- return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
- }
-# endif /* __INSTANCING__ */
return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
-# else
-# ifdef __INSTANCING__
- return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
-# else
- return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
-# endif /* __INSTANCING__ */
-# endif /* __KERNEL_CPU__ */
-# endif /* __KERNEL_OPTIX__ */
+# endif /* __KERNEL_OPTIX__ */
}
#endif /* __SHADOW_RECORD_ALL__ */
@@ -548,21 +490,8 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
}
# endif /* __OBJECT_MOTION__ */
-# ifdef __KERNEL_CPU__
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing) {
- return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
- }
-# endif /* __INSTANCING__ */
return bvh_intersect_volume(kg, ray, isect, visibility);
-# else /* __KERNEL_CPU__ */
-# ifdef __INSTANCING__
- return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
-# else
- return bvh_intersect_volume(kg, ray, isect, visibility);
-# endif /* __INSTANCING__ */
-# endif /* __KERNEL_CPU__ */
-# endif /* __KERNEL_OPTIX__ */
+# endif /* __KERNEL_OPTIX__ */
}
#endif /* __VOLUME__ */
@@ -599,11 +528,6 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
}
# endif /* __OBJECT_MOTION__ */
-# ifdef __INSTANCING__
- if (kernel_data.bvh.have_instancing) {
- return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
- }
-# endif /* __INSTANCING__ */
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
#endif /* __VOLUME_RECORD_ALL__ */
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 7a069ef1108..4006c9c1632 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_local.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_local.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -88,26 +81,6 @@ ccl_device_inline
object = local_object;
}
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
/* traversal loop */
do {
do {
@@ -117,33 +90,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect_t,
node_addr,
PATH_RAY_ALL_VISIBILITY,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- PATH_RAY_ALL_VISIBILITY,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -247,20 +203,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index db598d1c7fa..5367bdb633c 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -28,7 +28,6 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
return space;
}
-#if !defined(__KERNEL_SSE2__)
ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 idir,
@@ -39,9 +38,9 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
{
/* fetch node data */
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-# endif
+#endif
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
@@ -68,13 +67,13 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
dist[0] = c0min;
dist[1] = c1min;
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
-# else
+#else
return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
-# endif
+#endif
}
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg,
@@ -113,21 +112,21 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
float dist[2])
{
int mask = 0;
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-# endif
+#endif
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.x) & visibility))
-# endif
+#endif
{
mask |= 1;
}
}
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.y) & visibility))
-# endif
+#endif
{
mask |= 2;
}
@@ -152,125 +151,3 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
}
}
-
-#else /* !defined(__KERNEL_SSE2__) */
-
-int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg,
- const float3 &P,
- const float3 &dir,
- const ssef &tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-
- /* fetch node data */
- const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr;
-
- /* intersect ray against child nodes */
- const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
- const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
- dist[0] = tminmax[0];
- dist[1] = tminmax[1];
-
- int mask = movemask(lrhit);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
- const float3 P,
- const float3 dir,
- const ssef &isect_near,
- const ssef &isect_far,
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
- Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
- float3 aligned_dir0 = transform_direction(&space0, dir),
- aligned_dir1 = transform_direction(&space1, dir);
- float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
- float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
- nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
- ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
- lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
- lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
-
- ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
- upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
- upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
- ssef tnear_x = min(lower_x, upper_x);
- ssef tnear_y = min(lower_y, upper_y);
- ssef tnear_z = min(lower_z, upper_z);
- ssef tfar_x = max(lower_x, upper_x);
- ssef tfar_y = max(lower_y, upper_y);
- ssef tfar_z = max(lower_z, upper_z);
-
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- sseb vmask = tnear <= tfar;
- dist[0] = tnear.f[0];
- dist[1] = tnear.f[1];
-
- int mask = (int)movemask(vmask);
-
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance hit, how to solve? */
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
- (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
- return cmask;
-# else
- return mask & 3;
-# endif
-}
-
-ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
- const float3 &P,
- const float3 &dir,
- const ssef &isect_near,
- const ssef &isect_far,
- const ssef &tsplat,
- const ssef Psplat[3],
- const ssef idirsplat[3],
- const shuffle_swap_t shufflexyz[3],
- const int node_addr,
- const uint visibility,
- float dist[2])
-{
- float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return bvh_unaligned_node_intersect(
- kg, P, dir, isect_near, isect_far, node_addr, visibility, dist);
- }
- else {
- return bvh_aligned_node_intersect(
- kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist);
- }
-}
-#endif /* !defined(__KERNEL_SSE2__) */
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index 268bb149970..dccd257d2de 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_shadow_all.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_shadow_all.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -34,7 +27,6 @@
* enabled/disabled. This way we can compile optimized versions for each case
* without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_HAIR: hair curve rendering
* BVH_MOTION: motion blur rendering
*/
@@ -76,33 +68,11 @@ ccl_device_inline
Transform ob_itfm;
#endif
-#if BVH_FEATURE(BVH_INSTANCING)
int num_hits_in_instance = 0;
-#endif
*num_hits = 0;
isect_array->t = tmax;
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif /* __KERNEL_SSE2__ */
-
/* traversal loop */
do {
do {
@@ -112,33 +82,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect_t,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -174,9 +127,7 @@ ccl_device_inline
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
const uint p_type = type & PRIMITIVE_ALL;
@@ -207,31 +158,13 @@ ccl_device_inline
}
#endif
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
+ case PRIMITIVE_CURVE_THICK:
+ case PRIMITIVE_MOTION_CURVE_THICK:
+ case PRIMITIVE_CURVE_RIBBON:
+ case PRIMITIVE_MOTION_CURVE_RIBBON: {
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
+ hit = curve_intersect(
+ kg, isect_array, P, dir, visibility, object, prim_addr, ray->time, curve_type);
break;
}
#endif
@@ -276,9 +209,7 @@ ccl_device_inline
/* move on to next entry in intersections array */
isect_array++;
(*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
-#endif
isect_array->t = isect_t;
}
@@ -286,32 +217,19 @@ ccl_device_inline
prim_addr++;
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
+#else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
+#endif
num_hits_in_instance = 0;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -319,10 +237,8 @@ ccl_device_inline
node_addr = kernel_tex_fetch(__object_node, object);
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
@@ -330,11 +246,11 @@ ccl_device_inline
if (num_hits_in_instance) {
float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
+#else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
+#endif
/* scale isect->t to adjust for instancing */
for (int i = 0; i < num_hits_in_instance; i++) {
@@ -342,33 +258,20 @@ ccl_device_inline
}
}
else {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
+#else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
+#endif
}
isect_t = tmax;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return false;
@@ -381,20 +284,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
const uint max_hits,
uint *num_hits)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 18afc6ae4eb..8b2699ab807 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_traversal.h"
-#endif
-#ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_traversal.h"
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -34,7 +27,6 @@
* enabled/disabled. This way we can compile optimized versions for each case
* without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_HAIR: hair curve rendering
* BVH_MOTION: motion blur rendering
*/
@@ -77,26 +69,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
BVH_DEBUG_INIT();
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect->t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
/* traversal loop */
do {
do {
@@ -106,37 +78,18 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
{
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect->t,
node_addr,
visibility,
dist);
}
-#else // __KERNEL_SSE2__
- {
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
- }
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -173,9 +126,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -191,17 +142,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
/* shadow ray early termination */
-#if defined(__KERNEL_SSE2__)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-#else
- if (visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
-#endif
}
}
break;
@@ -214,51 +156,28 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if (motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
/* shadow ray early termination */
-# if defined(__KERNEL_SSE2__)
- if (visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-# else
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
-# endif
}
}
break;
}
#endif /* BVH_FEATURE(BVH_MOTION) */
#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
+ case PRIMITIVE_CURVE_THICK:
+ case PRIMITIVE_MOTION_CURVE_THICK:
+ case PRIMITIVE_CURVE_RIBBON:
+ case PRIMITIVE_MOTION_CURVE_RIBBON: {
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
- else {
- hit = curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
+ const bool hit = curve_intersect(
+ kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
if (hit) {
/* shadow ray early termination */
-# if defined(__KERNEL_SSE2__)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-# else
- if (visibility & PATH_RAY_SHADOW_OPAQUE)
- return true;
-# endif
}
}
break;
@@ -266,30 +185,16 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#endif /* BVH_FEATURE(BVH_HAIR) */
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
@@ -300,38 +205,22 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
BVH_DEBUG_NEXT_INSTANCE();
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
@@ -342,20 +231,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility);
-#endif /* __QBVH__ */
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_types.h b/intern/cycles/kernel/bvh/bvh_types.h
index 84dc0dbaef5..b173568266b 100644
--- a/intern/cycles/kernel/bvh/bvh_types.h
+++ b/intern/cycles/kernel/bvh/bvh_types.h
@@ -31,13 +31,10 @@ CCL_NAMESPACE_BEGIN
/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
#define BVH_STACK_SIZE 192
-#define BVH_QSTACK_SIZE 384
-#define BVH_OSTACK_SIZE 768
/* BVH intersection function variations */
-#define BVH_INSTANCING 1
-#define BVH_MOTION 2
-#define BVH_HAIR 4
+#define BVH_MOTION 1
+#define BVH_HAIR 2
#define BVH_NAME_JOIN(x, y) x##_##y
#define BVH_NAME_EVAL(x, y) BVH_NAME_JOIN(x, y)
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index c83b0d783f4..1f2ea47269b 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_volume.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_volume.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -34,7 +27,6 @@
* various features can be enabled/disabled. This way we can compile optimized
* versions for each case without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_MOTION: motion blur rendering
*/
@@ -79,26 +71,6 @@ ccl_device_inline
isect->prim = PRIM_NONE;
isect->object = OBJECT_NONE;
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect->t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
/* traversal loop */
do {
do {
@@ -108,33 +80,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect->t,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -170,9 +125,7 @@ ccl_device_inline
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -222,31 +175,17 @@ ccl_device_inline
}
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
@@ -262,38 +201,22 @@ ccl_device_inline
}
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
+#else
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect->t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
+#endif
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_MOTION) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
@@ -304,20 +227,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
- }
- kernel_assert(!"Should not happen");
- return false;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index ae8c4d12e8a..a8664cc4331 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -17,13 +17,6 @@
* limitations under the License.
*/
-#ifdef __QBVH__
-# include "kernel/bvh/qbvh_volume_all.h"
-# ifdef __KERNEL_AVX2__
-# include "kernel/bvh/obvh_volume_all.h"
-# endif
-#endif
-
#if BVH_FEATURE(BVH_HAIR)
# define NODE_INTERSECT bvh_node_intersect
#else
@@ -34,7 +27,6 @@
* various features can be enabled/disabled. This way we can compile optimized
* versions for each case without new features slowing things down.
*
- * BVH_INSTANCING: object instancing
* BVH_MOTION: motion blur rendering
*/
@@ -76,33 +68,11 @@ ccl_device_inline
Transform ob_itfm;
#endif
-#if BVH_FEATURE(BVH_INSTANCING)
int num_hits_in_instance = 0;
-#endif
uint num_hits = 0;
isect_array->t = tmax;
-#if defined(__KERNEL_SSE2__)
- const shuffle_swap_t shuf_identity = shuffle_swap_identity();
- const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
- const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
- ssef Psplat[3], idirsplat[3];
-# if BVH_FEATURE(BVH_HAIR)
- ssef tnear(0.0f), tfar(isect_t);
-# endif
- shuffle_swap_t shufflexyz[3];
-
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif /* __KERNEL_SSE2__ */
-
/* traversal loop */
do {
do {
@@ -112,33 +82,16 @@ ccl_device_inline
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
P,
-# if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
dir,
-# endif
+#endif
idir,
isect_t,
node_addr,
visibility,
dist);
-#else // __KERNEL_SSE2__
- traverse_mask = NODE_INTERSECT(kg,
- P,
- dir,
-# if BVH_FEATURE(BVH_HAIR)
- tnear,
- tfar,
-# endif
- tsplat,
- Psplat,
- idirsplat,
- shufflexyz,
- node_addr,
- visibility,
- dist);
-#endif // __KERNEL_SSE2__
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
@@ -174,9 +127,7 @@ ccl_device_inline
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
-#if BVH_FEATURE(BVH_INSTANCING)
if (prim_addr >= 0) {
-#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
bool hit;
@@ -204,25 +155,21 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
-#endif
isect_array->t = isect_t;
if (num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
+#else
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
+#endif
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
}
@@ -248,25 +195,21 @@ ccl_device_inline
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
-# if BVH_FEATURE(BVH_INSTANCING)
num_hits_in_instance++;
-# endif
isect_array->t = isect_t;
if (num_hits == max_hits) {
-# if BVH_FEATURE(BVH_INSTANCING)
if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
+# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
+# else
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
+# endif
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
}
}
@@ -279,35 +222,21 @@ ccl_device_inline
}
}
}
-#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
+#else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
+#endif
num_hits_in_instance = 0;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -322,55 +251,39 @@ ccl_device_inline
}
}
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
-#if BVH_FEATURE(BVH_INSTANCING)
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
if (num_hits_in_instance) {
float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
+#else
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
+#endif
/* Scale isect->t to adjust for instancing. */
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
else {
-# if BVH_FEATURE(BVH_MOTION)
+#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
+#else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
+#endif
}
isect_t = tmax;
isect_array->t = isect_t;
-# if defined(__KERNEL_SSE2__)
- Psplat[0] = ssef(P.x);
- Psplat[1] = ssef(P.y);
- Psplat[2] = ssef(P.z);
-
- tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- tfar = ssef(isect_t);
-# endif
-
- gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-# endif
-
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
-#endif /* FEATURE(BVH_INSTANCING) */
} while (node_addr != ENTRYPOINT_SENTINEL);
return num_hits;
@@ -382,20 +295,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
const uint max_hits,
const uint visibility)
{
- switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
- case BVH_LAYOUT_BVH8:
- return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, max_hits, visibility);
-#endif
-#ifdef __QBVH__
- case BVH_LAYOUT_BVH4:
- return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, max_hits, visibility);
-#endif
- case BVH_LAYOUT_BVH2:
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
- }
- kernel_assert(!"Should not happen");
- return 0;
+ return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/obvh_local.h b/intern/cycles/kernel/bvh/obvh_local.h
deleted file mode 100644
index e6bb548bc5b..00000000000
--- a/intern/cycles/kernel/bvh/obvh_local.h
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for subsurface scattering, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- LocalIntersection *local_isect,
- int local_object,
- uint *lcg_state,
- int max_hits)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_tex_fetch(__object_node, local_object);
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = ray->t;
-
- if (local_isect != NULL) {
- local_isect->num_hits = 0;
- }
- kernel_assert((local_isect == NULL) == (max_hits == 0));
-
- const int object_flag = kernel_tex_fetch(__object_flag, local_object);
- if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#else
- isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
-#endif
- object = local_object;
- }
-
- avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
- int prim_addr = __float_as_int(leaf.x);
-
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* Intersect ray against primitive, */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* Intersect ray against primitive. */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- ray->time,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#endif
- default:
- break;
- }
- }
- } while (node_addr != ENTRYPOINT_SENTINEL);
- } while (node_addr != ENTRYPOINT_SENTINEL);
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_nodes.h b/intern/cycles/kernel/bvh/obvh_nodes.h
deleted file mode 100644
index e5c935b75ed..00000000000
--- a/intern/cycles/kernel/bvh/obvh_nodes.h
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- * Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Aligned nodes intersection AVX code is adopted from Embree,
- */
-
-struct OBVHStackItem {
- int addr;
- float dist;
-};
-
-ccl_device_inline void obvh_near_far_idx_calc(const float3 &idir,
- int *ccl_restrict near_x,
- int *ccl_restrict near_y,
- int *ccl_restrict near_z,
- int *ccl_restrict far_x,
- int *ccl_restrict far_y,
- int *ccl_restrict far_z)
-
-{
-#ifdef __KERNEL_SSE__
- *near_x = 0;
- *far_x = 1;
- *near_y = 2;
- *far_y = 3;
- *near_z = 4;
- *far_z = 5;
-
- const size_t mask = movemask(ssef(idir.m128));
-
- const int mask_x = mask & 1;
- const int mask_y = (mask & 2) >> 1;
- const int mask_z = (mask & 4) >> 2;
-
- *near_x += mask_x;
- *far_x -= mask_x;
- *near_y += mask_y;
- *far_y -= mask_y;
- *near_z += mask_z;
- *far_z -= mask_z;
-#else
- if (idir.x >= 0.0f) {
- *near_x = 0;
- *far_x = 1;
- }
- else {
- *near_x = 1;
- *far_x = 0;
- }
- if (idir.y >= 0.0f) {
- *near_y = 2;
- *far_y = 3;
- }
- else {
- *near_y = 3;
- *far_y = 2;
- }
- if (idir.z >= 0.0f) {
- *near_z = 4;
- *far_z = 5;
- }
- else {
- *near_z = 5;
- *far_z = 4;
- }
-#endif
-}
-
-ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, OBVHStackItem *ccl_restrict b)
-{
- OBVHStackItem tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3)
-{
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
- if (s3->dist < s2->dist) {
- obvh_item_swap(s3, s2);
- }
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4)
-{
- if (s2->dist < s1->dist) {
- obvh_item_swap(s2, s1);
- }
- if (s4->dist < s3->dist) {
- obvh_item_swap(s4, s3);
- }
- if (s3->dist < s1->dist) {
- obvh_item_swap(s3, s1);
- }
- if (s4->dist < s2->dist) {
- obvh_item_swap(s4, s2);
- }
- if (s3->dist < s2->dist) {
- obvh_item_swap(s3, s2);
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5)
-{
- obvh_stack_sort(s1, s2, s3, s4);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6)
-{
- obvh_stack_sort(s1, s2, s3, s4, s5);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6,
- OBVHStackItem *ccl_restrict s7)
-{
- obvh_stack_sort(s1, s2, s3, s4, s5, s6);
- if (s7->dist < s6->dist) {
- obvh_item_swap(s6, s7);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
- OBVHStackItem *ccl_restrict s2,
- OBVHStackItem *ccl_restrict s3,
- OBVHStackItem *ccl_restrict s4,
- OBVHStackItem *ccl_restrict s5,
- OBVHStackItem *ccl_restrict s6,
- OBVHStackItem *ccl_restrict s7,
- OBVHStackItem *ccl_restrict s8)
-{
- obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7);
- if (s8->dist < s7->dist) {
- obvh_item_swap(s7, s8);
- if (s7->dist < s6->dist) {
- obvh_item_swap(s6, s7);
- if (s6->dist < s5->dist) {
- obvh_item_swap(s5, s6);
- if (s5->dist < s4->dist) {
- obvh_item_swap(s4, s5);
- if (s4->dist < s3->dist) {
- obvh_item_swap(s3, s4);
- if (s3->dist < s2->dist) {
- obvh_item_swap(s2, s3);
- if (s2->dist < s1->dist) {
- obvh_item_swap(s1, s2);
- }
- }
- }
- }
- }
- }
- }
-}
-
-/* Axis-aligned nodes intersection */
-
-ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
-#else
- const avx3f &org,
-#endif
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr + 2;
-#ifdef __KERNEL_AVX2__
- const avxf tnear_x = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, org_idir.x);
- const avxf tnear_y = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, org_idir.y);
- const avxf tnear_z = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, org_idir.z);
- const avxf tfar_x = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, org_idir.x);
- const avxf tfar_y = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, org_idir.y);
- const avxf tfar_z = msub(
- kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, org_idir.z);
-
- const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
- const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
- const avxb vmask = tnear <= tfar;
- int mask = (int)movemask(vmask);
- *dist = tnear;
- return mask;
-#else
- return 0;
-#endif
-}
-
-/* Unaligned nodes intersection */
-
-ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
-#endif
- const avx3f &org,
- const avx3f &dir,
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr;
- const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
- const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
- const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
-
- const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
- const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
- const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
-
- const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
- const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
- const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
-
- const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
- const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
- const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
-
- const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
- aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
- aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
-
- const avxf aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
- aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
- aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
-
- const avxf neg_one(-1.0f);
- const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
- const avxf tnear_x = min(tlower_x, tupper_x);
- const avxf tnear_y = min(tlower_y, tupper_y);
- const avxf tnear_z = min(tlower_z, tupper_z);
- const avxf tfar_x = max(tlower_x, tupper_x);
- const avxf tfar_y = max(tlower_y, tupper_y);
- const avxf tfar_z = max(tlower_z, tupper_z);
- const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const avxb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
-}
-
-/* Intersectors wrappers.
- *
- * They'll check node type and call appropriate intersection code.
- */
-
-ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg,
- const avxf &isect_near,
- const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
- const avx3f &org_idir,
-#endif
- const avx3f &org,
- const avx3f &dir,
- const avx3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- avxf *ccl_restrict dist)
-{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return obvh_unaligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#endif
- org,
- dir,
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
- else {
- return obvh_aligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#else
- org,
-#endif
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
-}
diff --git a/intern/cycles/kernel/bvh/obvh_shadow_all.h b/intern/cycles/kernel/bvh/obvh_shadow_all.h
deleted file mode 100644
index b7ab75b723c..00000000000
--- a/intern/cycles/kernel/bvh/obvh_shadow_all.h
+++ /dev/null
@@ -1,664 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const int skip_object,
- const uint max_hits,
- uint *num_hits)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- *num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (false
-#ifdef __VISIBILITY_FLAG__
- || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
-#endif
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- //#if !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- if (p_type == PRIMITIVE_TRIANGLE) {
- int prim_count = prim_addr2 - prim_addr;
- if (prim_count < 3) {
- while (prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
- p_type);
- int hit = triangle_intersect(
- kg, isect_array, P, dir, PATH_RAY_SHADOW, object, prim_addr);
- /* Shadow ray early termination. */
- if (hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if (*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
-
- isect_array->t = isect_t;
- }
-
- prim_addr++;
- } // while
- }
- else {
- kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) ==
- p_type);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int *nhiptr = &num_hits_in_instance;
-#else
- int nhi = 0;
- int *nhiptr = &nhi;
-#endif
-
- int result = triangle_intersect8(kg,
- &isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- prim_count,
- num_hits,
- max_hits,
- nhiptr,
- isect_t);
- if (result == 2) {
- return true;
- }
- } // prim_count
- } // PRIMITIVE_TRIANGLE
- else {
- while (prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
-
-#ifdef __SHADOW_TRICKS__
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- if (tri_object == skip_object) {
- ++prim_addr;
- continue;
- }
-#endif
-
- bool hit;
-
- /* todo: specialized intersect functions which don't fill in
- * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
- * might give a few % performance improvement */
-
- switch (p_type) {
-
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, prim_addr);
- break;
- }
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- PATH_RAY_SHADOW,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
- break;
- }
-#endif
- default: {
- hit = false;
- break;
- }
- }
-
- /* Shadow ray early termination. */
- if (hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if (*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
-
- isect_array->t = isect_t;
- }
-
- prim_addr++;
- } // while prim
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_traversal.h b/intern/cycles/kernel/bvh/obvh_traversal.h
deleted file mode 100644
index 9095233f8b6..00000000000
--- a/intern/cycles/kernel/bvh/obvh_traversal.h
+++ /dev/null
@@ -1,557 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[0].dist = -FLT_MAX;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
- float node_dist = -FLT_MAX;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- BVH_DEBUG_INIT();
- avxf tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (UNLIKELY(node_dist > isect->t)
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
-#ifdef __VISIBILITY_FLAG__
- || (__float_as_uint(inodes.x) & visibility) == 0
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- int child_mask;
- avxf dist;
-
- BVH_DEBUG_NEXT_NODE();
-
- {
- child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
- }
-
- if (child_mask != 0) {
- avxf cnodes;
- /* TODO(sergey): Investigate whether moving cnodes upwards
- * gives a speedup (will be different cache pattern but will
- * avoid extra check here).
- */
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- float d0 = ((float *)&dist)[r];
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- node_dist = d0;
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- node_dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- node_dist = d0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-#ifdef __VISIBILITY_FLAG__
- if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
-#else
- if (UNLIKELY((node_dist > isect->t)))
-#endif
- {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- int prim_count = prim_addr2 - prim_addr;
- if (prim_count < 3) {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- } // for
- }
- else {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect8(kg,
- &isect,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- prim_count,
- 0,
- 0,
- NULL,
- 0.0f)) {
- tfar = avxf(isect->t);
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- } // prim count
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
- else {
- hit = curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
- if (hit) {
- tfar = avxf(isect->t);
- /* Shadow ray early termination. */
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- qbvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
-# else
- qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[stack_ptr].dist = -FLT_MAX;
-
- node_addr = kernel_tex_fetch(__object_node, object);
-
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_volume.h b/intern/cycles/kernel/bvh/obvh_volume.h
deleted file mode 100644
index fb41ae783ab..00000000000
--- a/intern/cycles/kernel/bvh/obvh_volume.h
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- avxf tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr);
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_volume_all.h b/intern/cycles/kernel/bvh/obvh_volume_all.h
deleted file mode 100644
index 56e2afd4a11..00000000000
--- a/intern/cycles/kernel/bvh/obvh_volume_all.h
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT obvh_node_intersect
-#else
-# define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- const uint visibility)
-{
- /* Traversal stack in CUDA thread-local memory. */
- OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- uint num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
- avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- avxf dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
-
- /* Five children are hit, push all onto stack and sort 5
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c4 = __float_as_int(cnodes[r]);
- float d4 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Six children are hit, push all onto stack and sort 6
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c5 = __float_as_int(cnodes[r]);
- float d5 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c5;
- traversal_stack[stack_ptr].dist = d5;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c4;
- traversal_stack[stack_ptr].dist = d4;
-
- /* Seven children are hit, push all onto stack and sort 7
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c6 = __float_as_int(cnodes[r]);
- float d6 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Eight children are hit, push all onto stack and sort 8
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c7 = __float_as_int(cnodes[r]);
- float d7 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c7;
- traversal_stack[stack_ptr].dist = d7;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c6;
- traversal_stack[stack_ptr].dist = d6;
- obvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3],
- &traversal_stack[stack_ptr - 4],
- &traversal_stack[stack_ptr - 5],
- &traversal_stack[stack_ptr - 6],
- &traversal_stack[stack_ptr - 7]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
- bool hit;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-# if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-# endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-# if BVH_FEATURE(BVH_INSTANCING)
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = avxf(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-# endif
- idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return num_hits;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_local.h b/intern/cycles/kernel/bvh/qbvh_local.h
deleted file mode 100644
index b21f79bd3a0..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_local.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for finding local intersections
- * around the shading point, for subsurface scattering and bevel. We disable
- * various features for performance, and for instanced objects avoid traversing
- * other parts of the scene.
- *
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- LocalIntersection *local_isect,
- int local_object,
- uint *lcg_state,
- int max_hits)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps (for non shadow rays).
- * - Separate version for shadow rays.
- * - Likely and unlikely for if() statements.
- * - SSE for hair.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_tex_fetch(__object_node, local_object);
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = ray->t;
-
- if (local_isect != NULL) {
- local_isect->num_hits = 0;
- }
- kernel_assert((local_isect == NULL) == (max_hits == 0));
-
- const int object_flag = kernel_tex_fetch(__object_flag, local_object);
- if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#else
- isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
-#endif
- object = local_object;
- }
-
- ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
- int prim_addr = __float_as_int(leaf.x);
-
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- /* Intersect ray against primitive, */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- /* Intersect ray against primitive. */
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect_local(kg,
- local_isect,
- P,
- dir,
- ray->time,
- object,
- local_object,
- prim_addr,
- isect_t,
- lcg_state,
- max_hits)) {
- return true;
- }
- }
- break;
- }
-#endif
- default:
- break;
- }
- }
- } while (node_addr != ENTRYPOINT_SENTINEL);
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h
deleted file mode 100644
index 070406fb18a..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_nodes.h
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Aligned nodes intersection SSE code is adopted from Embree,
- */
-
-struct QBVHStackItem {
- int addr;
- float dist;
-};
-
-ccl_device_inline void qbvh_near_far_idx_calc(const float3 &idir,
- int *ccl_restrict near_x,
- int *ccl_restrict near_y,
- int *ccl_restrict near_z,
- int *ccl_restrict far_x,
- int *ccl_restrict far_y,
- int *ccl_restrict far_z)
-
-{
-#ifdef __KERNEL_SSE__
- *near_x = 0;
- *far_x = 1;
- *near_y = 2;
- *far_y = 3;
- *near_z = 4;
- *far_z = 5;
-
- const size_t mask = movemask(ssef(idir.m128));
-
- const int mask_x = mask & 1;
- const int mask_y = (mask & 2) >> 1;
- const int mask_z = (mask & 4) >> 2;
-
- *near_x += mask_x;
- *far_x -= mask_x;
- *near_y += mask_y;
- *far_y -= mask_y;
- *near_z += mask_z;
- *far_z -= mask_z;
-#else
- if (idir.x >= 0.0f) {
- *near_x = 0;
- *far_x = 1;
- }
- else {
- *near_x = 1;
- *far_x = 0;
- }
- if (idir.y >= 0.0f) {
- *near_y = 2;
- *far_y = 3;
- }
- else {
- *near_y = 3;
- *far_y = 2;
- }
- if (idir.z >= 0.0f) {
- *near_z = 4;
- *far_z = 5;
- }
- else {
- *near_z = 5;
- *far_z = 4;
- }
-#endif
-}
-
-/* TOOD(sergey): Investigate if using intrinsics helps for both
- * stack item swap and float comparison.
- */
-ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a, QBVHStackItem *ccl_restrict b)
-{
- QBVHStackItem tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
- QBVHStackItem *ccl_restrict s2,
- QBVHStackItem *ccl_restrict s3)
-{
- if (s2->dist < s1->dist) {
- qbvh_item_swap(s2, s1);
- }
- if (s3->dist < s2->dist) {
- qbvh_item_swap(s3, s2);
- }
- if (s2->dist < s1->dist) {
- qbvh_item_swap(s2, s1);
- }
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
- QBVHStackItem *ccl_restrict s2,
- QBVHStackItem *ccl_restrict s3,
- QBVHStackItem *ccl_restrict s4)
-{
- if (s2->dist < s1->dist) {
- qbvh_item_swap(s2, s1);
- }
- if (s4->dist < s3->dist) {
- qbvh_item_swap(s4, s3);
- }
- if (s3->dist < s1->dist) {
- qbvh_item_swap(s3, s1);
- }
- if (s4->dist < s2->dist) {
- qbvh_item_swap(s4, s2);
- }
- if (s3->dist < s2->dist) {
- qbvh_item_swap(s3, s2);
- }
-}
-
-/* Axis-aligned nodes intersection */
-
-// ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
-static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &org_idir,
-#else
- const sse3f &org,
-#endif
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr + 1;
-#ifdef __KERNEL_AVX2__
- const ssef tnear_x = msub(
- kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, org_idir.x);
- const ssef tnear_y = msub(
- kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, org_idir.y);
- const ssef tnear_z = msub(
- kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, org_idir.z);
- const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, org_idir.x);
- const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, org_idir.y);
- const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, org_idir.z);
-#else
- const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - org.x) * idir.x;
- const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - org.y) * idir.y;
- const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - org.z) * idir.z;
- const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - org.x) * idir.x;
- const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - org.y) * idir.y;
- const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - org.z) * idir.z;
-#endif
-
-#ifdef __KERNEL_SSE41__
- const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near));
- const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far));
- const sseb vmask = cast(tnear) > cast(tfar);
- int mask = (int)movemask(vmask) ^ 0xf;
-#else
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- int mask = (int)movemask(vmask);
-#endif
- *dist = tnear;
- return mask;
-}
-
-/* Unaligned nodes intersection */
-
-ccl_device_inline int qbvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &org_idir,
-#endif
- const sse3f &org,
- const sse3f &dir,
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr;
- const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1);
- const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2);
- const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3);
-
- const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4);
- const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5);
- const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6);
-
- const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7);
- const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8);
- const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9);
-
- const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10);
- const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11);
- const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12);
-
- const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
- aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
- aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
-
- const ssef aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
- aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
- aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
-
- const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
- const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
- nrdir_z = neg_one / aligned_dir_z;
-
- const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
- tlower_z = aligned_P_z * nrdir_z;
-
- const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
- tupper_z = tlower_z - nrdir_z;
-
-#ifdef __KERNEL_SSE41__
- const ssef tnear_x = mini(tlower_x, tupper_x);
- const ssef tnear_y = mini(tlower_y, tupper_y);
- const ssef tnear_z = mini(tlower_z, tupper_z);
- const ssef tfar_x = maxi(tlower_x, tupper_x);
- const ssef tfar_y = maxi(tlower_y, tupper_y);
- const ssef tfar_z = maxi(tlower_z, tupper_z);
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
-#else
- const ssef tnear_x = min(tlower_x, tupper_x);
- const ssef tnear_y = min(tlower_y, tupper_y);
- const ssef tnear_z = min(tlower_z, tupper_z);
- const ssef tfar_x = max(tlower_x, tupper_x);
- const ssef tfar_y = max(tlower_y, tupper_y);
- const ssef tfar_z = max(tlower_z, tupper_z);
- const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
- const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
- const sseb vmask = tnear <= tfar;
- *dist = tnear;
- return movemask(vmask);
-#endif
-}
-
-/* Intersectors wrappers.
- *
- * They'll check node type and call appropriate intersection code.
- */
-
-ccl_device_inline int qbvh_node_intersect(KernelGlobals *ccl_restrict kg,
- const ssef &isect_near,
- const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
- const sse3f &org_idir,
-#endif
- const sse3f &org,
- const sse3f &dir,
- const sse3f &idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int node_addr,
- ssef *ccl_restrict dist)
-{
- const int offset = node_addr;
- const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
- if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
- return qbvh_unaligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#endif
- org,
- dir,
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
- else {
- return qbvh_aligned_node_intersect(kg,
- isect_near,
- isect_far,
-#ifdef __KERNEL_AVX2__
- org_idir,
-#else
- org,
-#endif
- idir,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- dist);
- }
-}
diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
deleted file mode 100644
index 682251bf25b..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint visibility,
- const uint max_hits,
- uint *num_hits)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- *num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (false
-#ifdef __VISIBILITY_FLAG__
- || ((__float_as_uint(inodes.x) & visibility) == 0)
-#endif
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- while (prim_addr < prim_addr2) {
- kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
- bool hit;
-
- /* todo: specialized intersect functions which don't fill in
- * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
- * might give a few % performance improvement */
-
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
- break;
- }
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
- else {
- hit = curve_intersect(kg,
- isect_array,
- P,
- dir,
- visibility,
- object,
- prim_addr,
- ray->time,
- curve_type);
- }
- break;
- }
-#endif
- default: {
- hit = false;
- break;
- }
- }
-
- /* Shadow ray early termination. */
- if (hit) {
- /* detect if this surface has a shader with transparent shadows */
-
- /* todo: optimize so primitive visibility flag indicates if
- * the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
- /* if no transparent shadows, all light is blocked */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return true;
- }
- /* if maximum number of hits reached, block all light */
- else if (*num_hits == max_hits) {
- return true;
- }
-
- /* move on to next entry in intersections array */
- isect_array++;
- (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
-
- isect_array->t = isect_t;
- }
-
- prim_addr++;
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
deleted file mode 100644
index f43e84bf368..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps (for non shadow rays).
- * - Separate version for shadow rays.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[0].dist = -FLT_MAX;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
- float node_dist = -FLT_MAX;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- BVH_DEBUG_INIT();
-
- ssef tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
- (void)inodes;
-
- if (UNLIKELY(node_dist > isect->t)
-#if BVH_FEATURE(BVH_MOTION)
- || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
-#ifdef __VISIBILITY_FLAG__
- || (__float_as_uint(inodes.x) & visibility) == 0
-#endif
- ) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- int child_mask;
- ssef dist;
-
- BVH_DEBUG_NEXT_NODE();
-
- {
- child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
- }
-
- if (child_mask != 0) {
- float4 cnodes;
- /* TODO(sergey): Investigate whether moving cnodes upwards
- * gives a speedup (will be different cache pattern but will
- * avoid extra check here).
- */
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- float d0 = ((float *)&dist)[r];
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- node_dist = d0;
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- node_dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- node_dist = d0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-#ifdef __VISIBILITY_FLAG__
- if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
-#else
- if (UNLIKELY((node_dist > isect->t)))
-#endif
- {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (type & PRIMITIVE_ALL) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- if (motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
- case PRIMITIVE_CURVE:
- case PRIMITIVE_MOTION_CURVE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- BVH_DEBUG_NEXT_INTERSECTION();
- const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
- kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
- bool hit;
- if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
- hit = cardinal_curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
- else {
- hit = curve_intersect(
- kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
- }
- if (hit) {
- tfar = ssef(isect->t);
- /* Shadow ray early termination. */
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- return true;
- }
- }
- }
- break;
- }
-#endif /* BVH_FEATURE(BVH_HAIR) */
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-# if BVH_FEATURE(BVH_MOTION)
- qbvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
-# else
- qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
- traversal_stack[stack_ptr].dist = -FLT_MAX;
-
- node_addr = kernel_tex_fetch(__object_node, object);
-
- BVH_DEBUG_NEXT_INSTANCE();
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- node_dist = traversal_stack[stack_ptr].dist;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h
deleted file mode 100644
index e4eaed04467..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_volume.h
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect,
- const uint visibility)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- isect->t = ray->t;
- isect->u = 0.0f;
- isect->v = 0.0f;
- isect->prim = PRIM_NONE;
- isect->object = OBJECT_NONE;
-
- ssef tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- motion_triangle_intersect(
- kg, isect, P, dir, ray->time, visibility, object, prim_addr);
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
-# if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-# else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect->t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h
deleted file mode 100644
index eddc48c487e..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_volume_all.h
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-# define NODE_INTERSECT qbvh_node_intersect
-#else
-# define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
- const Ray *ray,
- Intersection *isect_array,
- const uint max_hits,
- const uint visibility)
-{
- /* TODO(sergey):
- * - Test if pushing distance on the stack helps.
- * - Likely and unlikely for if() statements.
- * - Test restrict attribute for pointers.
- */
-
- /* Traversal stack in CUDA thread-local memory. */
- QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
- traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
- /* Traversal variables in registers. */
- int stack_ptr = 0;
- int node_addr = kernel_data.bvh.root;
-
- /* Ray parameters in registers. */
- const float tmax = ray->t;
- float3 P = ray->P;
- float3 dir = bvh_clamp_direction(ray->D);
- float3 idir = bvh_inverse_direction(dir);
- int object = OBJECT_NONE;
- float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
- uint num_hits = 0;
- isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
- int num_hits_in_instance = 0;
-#endif
-
- ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
- sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
- sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
- float3 P_idir = P * idir;
- sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
- /* Offsets to select the side that becomes the lower or upper bound. */
- int near_x, near_y, near_z;
- int far_x, far_y, far_z;
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
- /* Traversal loop. */
- do {
- do {
- /* Traverse internal nodes. */
- while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
- float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
- if ((__float_as_uint(inodes.x) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-#endif
-
- ssef dist;
- int child_mask = NODE_INTERSECT(kg,
- tnear,
- tfar,
-#ifdef __KERNEL_AVX2__
- P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
- dir4,
-#endif
- idir4,
- near_x,
- near_y,
- near_z,
- far_x,
- far_y,
- far_z,
- node_addr,
- &dist);
-
- if (child_mask != 0) {
- float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
- if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
- }
- else
-#endif
- {
- cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
- }
-
- /* One child is hit, continue with that child. */
- int r = __bscf(child_mask);
- if (child_mask == 0) {
- node_addr = __float_as_int(cnodes[r]);
- continue;
- }
-
- /* Two children are hit, push far child, and continue with
- * closer child.
- */
- int c0 = __float_as_int(cnodes[r]);
- float d0 = ((float *)&dist)[r];
- r = __bscf(child_mask);
- int c1 = __float_as_int(cnodes[r]);
- float d1 = ((float *)&dist)[r];
- if (child_mask == 0) {
- if (d1 < d0) {
- node_addr = c1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
- continue;
- }
- else {
- node_addr = c0;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- continue;
- }
- }
-
- /* Here starts the slow path for 3 or 4 hit children. We push
- * all nodes onto the stack to sort them there.
- */
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c1;
- traversal_stack[stack_ptr].dist = d1;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c0;
- traversal_stack[stack_ptr].dist = d0;
-
- /* Three children are hit, push all onto stack and sort 3
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c2 = __float_as_int(cnodes[r]);
- float d2 = ((float *)&dist)[r];
- if (child_mask == 0) {
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2]);
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- /* Four children are hit, push all onto stack and sort 4
- * stack items, continue with closest child.
- */
- r = __bscf(child_mask);
- int c3 = __float_as_int(cnodes[r]);
- float d3 = ((float *)&dist)[r];
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c3;
- traversal_stack[stack_ptr].dist = d3;
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = c2;
- traversal_stack[stack_ptr].dist = d2;
- qbvh_stack_sort(&traversal_stack[stack_ptr],
- &traversal_stack[stack_ptr - 1],
- &traversal_stack[stack_ptr - 2],
- &traversal_stack[stack_ptr - 3]);
- }
-
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-
- /* If node is leaf, fetch triangle list. */
- if (node_addr < 0) {
- float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
- if ((__float_as_uint(leaf.z) & visibility) == 0) {
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- continue;
- }
-
- int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (prim_addr >= 0) {
-#endif
- int prim_addr2 = __float_as_int(leaf.y);
- const uint type = __float_as_int(leaf.w);
- const uint p_type = type & PRIMITIVE_ALL;
- bool hit;
-
- /* Pop. */
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
-
- /* Primitive intersection. */
- switch (p_type) {
- case PRIMITIVE_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-#endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
- if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(
- kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#if BVH_FEATURE(BVH_MOTION)
- case PRIMITIVE_MOTION_TRIANGLE: {
- for (; prim_addr < prim_addr2; prim_addr++) {
- kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
- /* Only primitives from volume object. */
- uint tri_object = (object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, prim_addr) :
- object;
- int object_flag = kernel_tex_fetch(__object_flag, tri_object);
- if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
- continue;
- }
- /* Intersect ray against primitive. */
- hit = motion_triangle_intersect(
- kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
- if (hit) {
- /* Move on to next entry in intersections array. */
- isect_array++;
- num_hits++;
-# if BVH_FEATURE(BVH_INSTANCING)
- num_hits_in_instance++;
-# endif
- isect_array->t = isect_t;
- if (num_hits == max_hits) {
-# if BVH_FEATURE(BVH_INSTANCING)
- if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(
- kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
-# endif /* BVH_FEATURE(BVH_INSTANCING) */
- return num_hits;
- }
- }
- }
- break;
- }
-#endif
- }
- }
-#if BVH_FEATURE(BVH_INSTANCING)
- else {
- /* Instance push. */
- object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
- int object_flag = kernel_tex_fetch(__object_flag, object);
- if (object_flag & SD_OBJECT_HAS_VOLUME) {
-# if BVH_FEATURE(BVH_MOTION)
- isect_t = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-# else
- isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-# endif
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- num_hits_in_instance = 0;
- isect_array->t = isect_t;
-
- ++stack_ptr;
- kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
- traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
- node_addr = kernel_tex_fetch(__object_node, object);
- }
- else {
- /* Pop. */
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
- }
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
- if (stack_ptr >= 0) {
- kernel_assert(object != OBJECT_NONE);
-
- /* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-# else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-# endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-# if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-# else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-# endif
- }
-
- isect_t = tmax;
- isect_array->t = isect_t;
-
- qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
- tfar = ssef(isect_t);
-# if BVH_FEATURE(BVH_HAIR)
- dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-# endif
- idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-# ifdef __KERNEL_AVX2__
- P_idir = P * idir;
- P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# endif
-# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
- org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-# endif
-
- object = OBJECT_NONE;
- node_addr = traversal_stack[stack_ptr].addr;
- --stack_ptr;
- }
-#endif /* FEATURE(BVH_INSTANCING) */
- } while (node_addr != ENTRYPOINT_SENTINEL);
-
- return num_hits;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 4cc61e8ee71..6070fd983f5 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -119,13 +119,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
differential3 *domega_in,
float *pdf)
{
+ /* For curves use the smooth normal, particularly for ribbons the geometric
+ * normal gives too much darkening otherwise. */
int label;
+ const float3 Ng = (sd->type & PRIMITIVE_ALL_CURVE) ? sc->N : sd->Ng;
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:
label = bsdf_diffuse_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -140,7 +143,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
#ifdef __SVM__
case CLOSURE_BSDF_OREN_NAYAR_ID:
label = bsdf_oren_nayar_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -155,7 +158,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
# ifdef __OSL__
case CLOSURE_BSDF_PHONG_RAMP_ID:
label = bsdf_phong_ramp_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -169,7 +172,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
label = bsdf_diffuse_ramp_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -184,7 +187,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
# endif
case CLOSURE_BSDF_TRANSLUCENT_ID:
label = bsdf_translucent_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -198,7 +201,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_REFLECTION_ID:
label = bsdf_reflection_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -212,7 +215,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_REFRACTION_ID:
label = bsdf_refraction_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -226,7 +229,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
label = bsdf_transparent_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -244,7 +247,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
label = bsdf_microfacet_ggx_sample(kg,
sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -260,7 +263,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
label = bsdf_microfacet_multi_ggx_sample(kg,
sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -277,7 +280,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
label = bsdf_microfacet_multi_ggx_glass_sample(kg,
sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -294,7 +297,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
label = bsdf_microfacet_beckmann_sample(kg,
sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -308,7 +311,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
label = bsdf_ashikhmin_shirley_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -322,7 +325,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
label = bsdf_ashikhmin_velvet_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -336,7 +339,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
label = bsdf_diffuse_toon_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -350,7 +353,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
label = bsdf_glossy_toon_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -364,7 +367,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
label = bsdf_hair_reflection_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -378,7 +381,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
label = bsdf_hair_transmission_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -398,7 +401,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
label = bsdf_principled_diffuse_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -412,7 +415,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
label = bsdf_principled_sheen_sample(sc,
- sd->Ng,
+ Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@@ -485,9 +488,12 @@ ccl_device_inline
const float3 omega_in,
float *pdf)
{
+ /* For curves use the smooth normal, particularly for ribbons the geometric
+ * normal gives too much darkening otherwise. */
+ const float3 Ng = (sd->type & PRIMITIVE_ALL_CURVE) ? sd->N : sd->Ng;
float3 eval;
- if (dot(sd->Ng, omega_in) >= 0.0f) {
+ if (dot(Ng, omega_in) >= 0.0f) {
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index f78bbeb5d9d..389bd62ba68 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -206,9 +206,6 @@ ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bs
float3 X = safe_normalize(sd->dPdu);
float3 Y = safe_normalize(cross(X, sd->I));
float3 Z = safe_normalize(cross(X, Y));
- /* TODO: the solution below works where sd->Ng is the normal
- * pointing from the center of the curve to the shading point.
- * It doesn't work for triangles, see https://developer.blender.org/T43625 */
/* h -1..0..1 means the rays goes from grazing the hair, to hitting it at
* the center, to grazing the other edge. This is the sine of the angle
@@ -216,7 +213,9 @@ ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bs
/* TODO: we convert this value to a cosine later and discard the sign, so
* we could probably save some operations. */
- float h = dot(cross(sd->Ng, X), Z);
+ float h = (sd->type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) ?
+ -sd->v :
+ dot(cross(sd->Ng, X), Z);
kernel_assert(fabsf(h) < 1.0f + 1e-4f);
kernel_assert(isfinite3_safe(Y));
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 928cad58452..6ff0c7f2044 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -23,33 +23,6 @@ CCL_NAMESPACE_BEGIN
#ifdef __HAIR__
-/* Interpolation of curve geometry */
-
-ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
-{
- float fc = 0.71f;
- float data[4];
- float t2 = t * t;
- data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc;
- data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t;
- data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc;
- data[3] = 3.0f * fc * t2 - 2.0f * fc * t;
- return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
-}
-
-ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3)
-{
- float data[4];
- float fc = 0.71f;
- float t2 = t * t;
- float t3 = t2 * t;
- data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t;
- data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f;
- data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t;
- data[3] = fc * t3 - fc * t2;
- return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
-}
-
/* Reading attributes on various curve elements */
ccl_device float curve_attribute_float(
@@ -225,6 +198,66 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg,
}
}
+ccl_device float4 curve_attribute_float4(KernelGlobals *kg,
+ const ShaderData *sd,
+ const AttributeDescriptor desc,
+ float4 *dx,
+ float4 *dy)
+{
+ if (desc.element == ATTR_ELEMENT_CURVE) {
+ /* idea: we can't derive any useful differentials here, but for tiled
+ * mipmap image caching it would be useful to avoid reading the highest
+ * detail level always. maybe a derivative based on the hair density
+ * could be computed somehow? */
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+# endif
+
+ return kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim);
+ }
+ else if (desc.element == ATTR_ELEMENT_CURVE_KEY ||
+ desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+
+ float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0);
+ float4 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + k1);
+
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = sd->du.dx * (f1 - f0);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+# endif
+
+ return (1.0f - sd->u) * f0 + sd->u * f1;
+ }
+ else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+# endif
+
+ return kernel_tex_fetch(__attributes_float3, desc.offset);
+ }
+ else {
+# ifdef __RAY_DIFFERENTIALS__
+ if (dx)
+ *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ if (dy)
+ *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+# endif
+
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+}
+
/* Curve thickness */
ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
@@ -238,12 +271,12 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
float4 P_curve[2];
- if (sd->type & PRIMITIVE_CURVE) {
+ if (!(sd->type & PRIMITIVE_ALL_MOTION)) {
P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
}
else {
- motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+ motion_curve_keys_linear(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
}
r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index 7a770470150..c04dbee52cc 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -1,4 +1,7 @@
/*
+ * Copyright 2009-2020 Intel Corporation. Adapted from Embree with
+ * with modifications.
+ *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -14,501 +17,620 @@
CCL_NAMESPACE_BEGIN
-/* Curve primitive intersection functions. */
+/* Curve primitive intersection functions.
+ *
+ * The code here was adapted from curve_intersector_sweep.h in Embree, to get
+ * an exact match between Embree CPU ray-tracing and our GPU ray-tracing. */
+
+#define CURVE_NUM_BEZIER_SUBDIVISIONS 3
+#define CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE (CURVE_NUM_BEZIER_SUBDIVISIONS + 1)
+#define CURVE_NUM_BEZIER_STEPS 2
+#define CURVE_NUM_JACOBIAN_ITERATIONS 5
#ifdef __HAIR__
-# ifdef __KERNEL_SSE2__
-ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a)
+/* Catmull-rom curve evaluation. */
+
+ccl_device_inline float4 catmull_rom_basis_eval(const float4 curve[4], float u)
{
- return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2]));
+ const float t = u;
+ const float s = 1.0f - u;
+ const float n0 = -t * s * s;
+ const float n1 = 2.0f + t * t * (3.0f * t - 5.0f);
+ const float n2 = 2.0f + s * s * (3.0f * s - 5.0f);
+ const float n3 = -s * t * t;
+ return 0.5f * (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
}
-# endif
-/* On CPU pass P and dir by reference to aligned vector. */
-ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg,
- Intersection *isect,
- const float3 ccl_ref P,
- const float3 ccl_ref dir,
- uint visibility,
- int object,
- int curveAddr,
- float time,
- int type)
+ccl_device_inline float4 catmull_rom_basis_derivative(const float4 curve[4], float u)
{
- const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
+ const float t = u;
+ const float s = 1.0f - u;
+ const float n0 = -s * s + 2.0f * s * t;
+ const float n1 = 2.0f * t * (3.0f * t - 5.0f) + 3.0f * t * t;
+ const float n2 = 2.0f * s * (3.0f * t + 2.0f) - 3.0f * s * s;
+ const float n3 = -2.0f * s * t + t * t;
+ return 0.5f * (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
+}
-# ifndef __KERNEL_OPTIX__ /* see OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */
- if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
- const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
- if (time < prim_time.x || time > prim_time.y) {
- return false;
- }
- }
-# endif
+ccl_device_inline float4 catmull_rom_basis_derivative2(const float4 curve[4], float u)
+{
- int segment = PRIMITIVE_UNPACK_SEGMENT(type);
- float epsilon = 0.0f;
- float r_st, r_en;
+ const float t = u;
+ const float n0 = -3.0f * t + 2.0f;
+ const float n1 = 9.0f * t - 5.0f;
+ const float n2 = -9.0f * t + 4.0f;
+ const float n3 = 3.0f * t - 1.0f;
+ return (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
+}
- int depth = kernel_data.curve.subdivisions;
- int flags = kernel_data.curve.curveflags;
- int prim = kernel_tex_fetch(__prim_index, curveAddr);
+/* Thick Curve */
-# ifdef __KERNEL_SSE2__
- ssef vdir = load4f(dir);
- ssef vcurve_coef[4];
- const float3 *curve_coef = (float3 *)vcurve_coef;
+ccl_device_inline float3 dnormalize(const float3 p, const float3 dp)
+{
+ const float pp = dot(p, p);
+ const float pdp = dot(p, dp);
+ return (pp * dp - pdp * p) / (pp * sqrtf(pp));
+}
- {
- ssef dtmp = vdir * vdir;
- ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp));
- ssef rd_ss = load1f_first(1.0f) / d_ss;
-
- ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]);
- int2 &v00 = (int2 &)v00vec;
-
- int k0 = v00.x + segment;
- int k1 = k0 + 1;
- int ka = max(k0 - 1, v00.x);
- int kb = min(k1 + 1, v00.x + v00.y - 1);
-
-# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \
- (!defined(_MSC_VER) || _MSC_VER > 1800)
- avxf P_curve_0_1, P_curve_2_3;
- if (is_curve_primitive) {
- P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
- P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
- }
- else {
- int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
- motion_cardinal_curve_keys_avx(
- kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1, &P_curve_2_3);
- }
-# else /* __KERNEL_AVX2__ */
- ssef P_curve[4];
-
- if (is_curve_primitive) {
- P_curve[0] = load4f(&kg->__curve_keys.data[ka].x);
- P_curve[1] = load4f(&kg->__curve_keys.data[k0].x);
- P_curve[2] = load4f(&kg->__curve_keys.data[k1].x);
- P_curve[3] = load4f(&kg->__curve_keys.data[kb].x);
+ccl_device_inline float sqr_point_to_line_distance(const float3 PmQ0, const float3 Q1mQ0)
+{
+ const float3 N = cross(PmQ0, Q1mQ0);
+ const float3 D = Q1mQ0;
+ return dot(N, N) / dot(D, D);
+}
+
+ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
+ const float3 cylinder_end,
+ const float cylinder_radius,
+ const float3 ray_dir,
+ float2 *t_o,
+ float *u0_o,
+ float3 *Ng0_o,
+ float *u1_o,
+ float3 *Ng1_o)
+{
+ /* Calculate quadratic equation to solve. */
+ const float rl = 1.0f / len(cylinder_end - cylinder_start);
+ const float3 P0 = cylinder_start, dP = (cylinder_end - cylinder_start) * rl;
+ const float3 O = -P0, dO = ray_dir;
+
+ const float dOdO = dot(dO, dO);
+ const float OdO = dot(dO, O);
+ const float OO = dot(O, O);
+ const float dOz = dot(dP, dO);
+ const float Oz = dot(dP, O);
+
+ const float A = dOdO - sqr(dOz);
+ const float B = 2.0f * (OdO - dOz * Oz);
+ const float C = OO - sqr(Oz) - sqr(cylinder_radius);
+
+ /* We miss the cylinder if determinant is smaller than zero. */
+ const float D = B * B - 4.0f * A * C;
+ if (!(D >= 0.0f)) {
+ *t_o = make_float2(FLT_MAX, -FLT_MAX);
+ return false;
+ }
+
+ /* Special case for rays that are parallel to the cylinder. */
+ const float eps = 16.0f * FLT_EPSILON * max(fabsf(dOdO), fabsf(sqr(dOz)));
+ if (fabsf(A) < eps) {
+ if (C <= 0.0f) {
+ *t_o = make_float2(-FLT_MAX, FLT_MAX);
+ return true;
}
else {
- int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
- motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4 *)&P_curve);
+ *t_o = make_float2(-FLT_MAX, FLT_MAX);
+ return false;
}
-# endif /* __KERNEL_AVX2__ */
-
- ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
- ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
- ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy;
- ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz);
- ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0));
-
- ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0);
- ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
- ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
-
-# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \
- (!defined(_MSC_VER) || _MSC_VER > 1800)
- const avxf vPP = _mm256_broadcast_ps(&P.m128);
- const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
- const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
- const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
-
- const avxf p01 = madd(
- shuffle<0>(P_curve_0_1 - vPP),
- htfm00,
- madd(shuffle<1>(P_curve_0_1 - vPP), htfm11, shuffle<2>(P_curve_0_1 - vPP) * htfm22));
- const avxf p23 = madd(
- shuffle<0>(P_curve_2_3 - vPP),
- htfm00,
- madd(shuffle<1>(P_curve_2_3 - vPP), htfm11, shuffle<2>(P_curve_2_3 - vPP) * htfm22));
-
- const ssef p0 = _mm256_castps256_ps128(p01);
- const ssef p1 = _mm256_extractf128_ps(p01, 1);
- const ssef p2 = _mm256_castps256_ps128(p23);
- const ssef p3 = _mm256_extractf128_ps(p23, 1);
-
- const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
- r_st = ((float4 &)P_curve_1).w;
- const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
- r_en = ((float4 &)P_curve_2).w;
-# else /* __KERNEL_AVX2__ */
- ssef htfm[] = {htfm0, htfm1, htfm2};
- ssef vP = load4f(P);
- ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
- ssef p1 = transform_point_T3(htfm, P_curve[1] - vP);
- ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
- ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
-
- r_st = ((float4 &)P_curve[1]).w;
- r_en = ((float4 &)P_curve[2]).w;
-# endif /* __KERNEL_AVX2__ */
-
- float fc = 0.71f;
- ssef vfc = ssef(fc);
- ssef vfcxp3 = vfc * p3;
-
- vcurve_coef[0] = p1;
- vcurve_coef[1] = vfc * (p2 - p0);
- vcurve_coef[2] = madd(
- ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
- vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
}
-# else
- float3 curve_coef[4];
- /* curve Intersection check */
- /* obtain curve parameters */
+ /* Standard case for rays that are not parallel to the cylinder. */
+ const float Q = sqrtf(D);
+ const float rcp_2A = 1.0f / (2.0f * A);
+ const float t0 = (-B - Q) * rcp_2A;
+ const float t1 = (-B + Q) * rcp_2A;
+
+ /* Calculates u and Ng for near hit. */
{
- /* ray transform created - this should be created at beginning of intersection loop */
- Transform htfm;
- float d = sqrtf(dir.x * dir.x + dir.z * dir.z);
- htfm = make_transform(dir.z / d,
- 0,
- -dir.x / d,
- 0,
- -dir.x * dir.y / d,
- d,
- -dir.y * dir.z / d,
- 0,
- dir.x,
- dir.y,
- dir.z,
- 0);
-
- float4 v00 = kernel_tex_fetch(__curves, prim);
-
- int k0 = __float_as_int(v00.x) + segment;
- int k1 = k0 + 1;
-
- int ka = max(k0 - 1, __float_as_int(v00.x));
- int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
-
- float4 P_curve[4];
-
- if (is_curve_primitive) {
- P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
- P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
- P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
- P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
- }
- else {
- int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
- motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve);
- }
+ *u0_o = (t0 * dOz + Oz) * rl;
+ const float3 Pr = t0 * ray_dir;
+ const float3 Pl = (*u0_o) * (cylinder_end - cylinder_start) + cylinder_start;
+ *Ng0_o = Pr - Pl;
+ }
- float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P);
- float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P);
- float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P);
- float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P);
-
- float fc = 0.71f;
- curve_coef[0] = p1;
- curve_coef[1] = -fc * p0 + fc * p2;
- curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3;
- curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3;
- r_st = P_curve[1].w;
- r_en = P_curve[2].w;
+ /* Calculates u and Ng for far hit. */
+ {
+ *u1_o = (t1 * dOz + Oz) * rl;
+ const float3 Pr = t1 * ray_dir;
+ const float3 Pl = (*u1_o) * (cylinder_end - cylinder_start) + cylinder_start;
+ *Ng1_o = Pr - Pl;
}
-# endif
- float r_curr = max(r_st, r_en);
-
- if ((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING))
- epsilon = 2 * r_curr;
-
- /* find bounds - this is slow for cubic curves */
- float upper, lower;
-
- float zextrem[4];
- curvebounds(&lower,
- &upper,
- &zextrem[0],
- &zextrem[1],
- &zextrem[2],
- &zextrem[3],
- curve_coef[0].z,
- curve_coef[1].z,
- curve_coef[2].z,
- curve_coef[3].z);
- if (lower - r_curr > isect->t || upper + r_curr < epsilon)
- return false;
+ *t_o = make_float2(t0, t1);
- /* minimum width extension */
- float xextrem[4];
- curvebounds(&lower,
- &upper,
- &xextrem[0],
- &xextrem[1],
- &xextrem[2],
- &xextrem[3],
- curve_coef[0].x,
- curve_coef[1].x,
- curve_coef[2].x,
- curve_coef[3].x);
- if (lower > r_curr || upper < -r_curr)
- return false;
+ return true;
+}
- float yextrem[4];
- curvebounds(&lower,
- &upper,
- &yextrem[0],
- &yextrem[1],
- &yextrem[2],
- &yextrem[3],
- curve_coef[0].y,
- curve_coef[1].y,
- curve_coef[2].y,
- curve_coef[3].y);
- if (lower > r_curr || upper < -r_curr)
- return false;
+ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_dir)
+{
+ const float3 O = -P;
+ const float3 D = ray_dir;
+ const float ON = dot(O, N);
+ const float DN = dot(D, N);
+ const float min_rcp_input = 1e-18f;
+ const bool eps = fabsf(DN) < min_rcp_input;
+ const float t = -ON / DN;
+ const float lower = (eps || DN < 0.0f) ? -FLT_MAX : t;
+ const float upper = (eps || DN > 0.0f) ? FLT_MAX : t;
+ return make_float2(lower, upper);
+}
- /* setup recurrent loop */
- int level = 1 << depth;
- int tree = 0;
- float resol = 1.0f / (float)level;
- bool hit = false;
-
- /* begin loop */
- while (!(tree >> (depth))) {
- const float i_st = tree * resol;
- const float i_en = i_st + (level * resol);
-
-# ifdef __KERNEL_SSE2__
- ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
- ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]),
- vi_st,
- vcurve_coef[0]);
- ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]),
- vi_en,
- vcurve_coef[0]);
-
- ssef vbmin = min(vp_st, vp_en);
- ssef vbmax = max(vp_st, vp_en);
-
- float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax;
- float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z;
- float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z;
- float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en;
-# else
- float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st +
- curve_coef[0];
- float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en +
- curve_coef[0];
-
- float bminx = min(p_st.x, p_en.x);
- float bmaxx = max(p_st.x, p_en.x);
- float bminy = min(p_st.y, p_en.y);
- float bmaxy = max(p_st.y, p_en.y);
- float bminz = min(p_st.z, p_en.z);
- float bmaxz = max(p_st.z, p_en.z);
-# endif
+ccl_device bool curve_intersect_iterative(const float3 ray_dir,
+ const float dt,
+ const float4 curve[4],
+ float u,
+ float t,
+ const bool use_backfacing,
+ Intersection *isect)
+{
+ const float length_ray_dir = len(ray_dir);
+
+ /* Error of curve evaluations is proportional to largest coordinate. */
+ const float4 box_min = min(min(curve[0], curve[1]), min(curve[2], curve[3]));
+ const float4 box_max = max(min(curve[0], curve[1]), max(curve[2], curve[3]));
+ const float4 box_abs = max(fabs(box_min), fabs(box_max));
+ const float P_err = 16.0f * FLT_EPSILON *
+ max(box_abs.x, max(box_abs.y, max(box_abs.z, box_abs.w)));
+ const float radius_max = box_max.w;
+
+ for (int i = 0; i < CURVE_NUM_JACOBIAN_ITERATIONS; i++) {
+ const float3 Q = ray_dir * t;
+ const float3 dQdt = ray_dir;
+ const float Q_err = 16.0f * FLT_EPSILON * length_ray_dir * t;
+
+ const float4 P4 = catmull_rom_basis_eval(curve, u);
+ const float4 dPdu4 = catmull_rom_basis_derivative(curve, u);
+
+ const float3 P = float4_to_float3(P4);
+ const float3 dPdu = float4_to_float3(dPdu4);
+ const float radius = P4.w;
+ const float dradiusdu = dPdu4.w;
+
+ const float3 ddPdu = float4_to_float3(catmull_rom_basis_derivative2(curve, u));
+
+ const float3 R = Q - P;
+ const float len_R = len(R);
+ const float R_err = max(Q_err, P_err);
+ const float3 dRdu = -dPdu;
+ const float3 dRdt = dQdt;
+
+ const float3 T = normalize(dPdu);
+ const float3 dTdu = dnormalize(dPdu, ddPdu);
+ const float cos_err = P_err / len(dPdu);
+
+ const float f = dot(R, T);
+ const float f_err = len_R * P_err + R_err + cos_err * (1.0f + len_R);
+ const float dfdu = dot(dRdu, T) + dot(R, dTdu);
+ const float dfdt = dot(dRdt, T);
+
+ const float K = dot(R, R) - sqr(f);
+ const float dKdu = (dot(R, dRdu) - f * dfdu);
+ const float dKdt = (dot(R, dRdt) - f * dfdt);
+ const float rsqrt_K = inversesqrtf(K);
+
+ const float g = sqrtf(K) - radius;
+ const float g_err = R_err + f_err + 16.0f * FLT_EPSILON * radius_max;
+ const float dgdu = dKdu * rsqrt_K - dradiusdu;
+ const float dgdt = dKdt * rsqrt_K;
+
+ const float invdet = 1.0f / (dfdu * dgdt - dgdu * dfdt);
+ u -= (dgdt * f - dfdt * g) * invdet;
+ t -= (-dgdu * f + dfdu * g) * invdet;
+
+ if (fabsf(f) < f_err && fabsf(g) < g_err) {
+ t += dt;
+ if (!(0.0f <= t && t <= isect->t)) {
+ return false; /* Rejects NaNs */
+ }
+ if (!(u >= 0.0f && u <= 1.0f)) {
+ return false; /* Rejects NaNs */
+ }
- if (xextrem[0] >= i_st && xextrem[0] <= i_en) {
- bminx = min(bminx, xextrem[1]);
- bmaxx = max(bmaxx, xextrem[1]);
- }
- if (xextrem[2] >= i_st && xextrem[2] <= i_en) {
- bminx = min(bminx, xextrem[3]);
- bmaxx = max(bmaxx, xextrem[3]);
- }
- if (yextrem[0] >= i_st && yextrem[0] <= i_en) {
- bminy = min(bminy, yextrem[1]);
- bmaxy = max(bmaxy, yextrem[1]);
- }
- if (yextrem[2] >= i_st && yextrem[2] <= i_en) {
- bminy = min(bminy, yextrem[3]);
- bmaxy = max(bmaxy, yextrem[3]);
- }
- if (zextrem[0] >= i_st && zextrem[0] <= i_en) {
- bminz = min(bminz, zextrem[1]);
- bmaxz = max(bmaxz, zextrem[1]);
- }
- if (zextrem[2] >= i_st && zextrem[2] <= i_en) {
- bminz = min(bminz, zextrem[3]);
- bmaxz = max(bmaxz, zextrem[3]);
- }
+ /* Backface culling. */
+ const float3 R = normalize(Q - P);
+ const float3 U = dradiusdu * R + dPdu;
+ const float3 V = cross(dPdu, R);
+ const float3 Ng = cross(V, U);
+ if (!use_backfacing && dot(ray_dir, Ng) > 0.0f) {
+ return false;
+ }
- float r1 = r_st + (r_en - r_st) * i_st;
- float r2 = r_st + (r_en - r_st) * i_en;
- r_curr = max(r1, r2);
+ /* Record intersection. */
+ isect->t = t;
+ isect->u = u;
+ isect->v = 0.0f;
- if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_curr ||
- bmaxx < -r_curr || bminy > r_curr || bmaxy < -r_curr) {
- /* the bounding box does not overlap the square centered at O */
- tree += level;
- level = tree & -tree;
+ return true;
}
- else if (level == 1) {
-
- /* the maximum recursion depth is reached.
- * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
- * dP* is reversed if necessary.*/
- float t = isect->t;
- float u = 0.0f;
- float gd = 0.0f;
-
- if (flags & CURVE_KN_RIBBONS) {
- float3 tg = (p_en - p_st);
-# ifdef __KERNEL_SSE__
- const float3 tg_sq = tg * tg;
- float w = tg_sq.x + tg_sq.y;
-# else
- float w = tg.x * tg.x + tg.y * tg.y;
-# endif
- if (w == 0) {
- tree++;
- level = tree & -tree;
- continue;
- }
-# ifdef __KERNEL_SSE__
- const float3 p_sttg = p_st * tg;
- w = -(p_sttg.x + p_sttg.y) / w;
+ }
+ return false;
+}
+
+ccl_device bool curve_intersect_recursive(const float3 ray_orig,
+ const float3 ray_dir,
+ float4 curve[4],
+ Intersection *isect)
+{
+ /* Move ray closer to make intersection stable. */
+ const float3 center = float4_to_float3(0.25f * (curve[0] + curve[1] + curve[2] + curve[3]));
+ const float dt = dot(center - ray_orig, ray_dir) / dot(ray_dir, ray_dir);
+ const float3 ref = ray_orig + ray_dir * dt;
+ const float4 ref4 = make_float4(ref.x, ref.y, ref.z, 0.0f);
+ curve[0] -= ref4;
+ curve[1] -= ref4;
+ curve[2] -= ref4;
+ curve[3] -= ref4;
+
+ const bool use_backfacing = false;
+ const float step_size = 1.0f / (float)(CURVE_NUM_BEZIER_STEPS);
+
+ int depth = 0;
+
+ /* todo: optimize stack for GPU somehow? Possibly some bitflags are enough, and
+ * u0/u1 can be derived from the depth. */
+ struct {
+ float u0, u1;
+ int i;
+ } stack[CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE];
+
+ bool found = false;
+
+ float u0 = 0.0f;
+ float u1 = 1.0f;
+ int i = 0;
+
+ while (1) {
+ for (; i < CURVE_NUM_BEZIER_STEPS; i++) {
+ const float step = i * step_size;
+
+ /* Subdivide curve. */
+ const float dscale = (u1 - u0) * (1.0f / 3.0f) * step_size;
+ const float vu0 = mix(u0, u1, step);
+ const float vu1 = mix(u0, u1, step + step_size);
+
+ const float4 P0 = catmull_rom_basis_eval(curve, vu0);
+ const float4 dP0du = dscale * catmull_rom_basis_derivative(curve, vu0);
+ const float4 P3 = catmull_rom_basis_eval(curve, vu1);
+ const float4 dP3du = dscale * catmull_rom_basis_derivative(curve, vu1);
+
+ const float4 P1 = P0 + dP0du;
+ const float4 P2 = P3 - dP3du;
+
+ /* Calculate bounding cylinders. */
+ const float rr1 = sqr_point_to_line_distance(float4_to_float3(dP0du),
+ float4_to_float3(P3 - P0));
+ const float rr2 = sqr_point_to_line_distance(float4_to_float3(dP3du),
+ float4_to_float3(P3 - P0));
+ const float maxr12 = sqrtf(max(rr1, rr2));
+ const float one_plus_ulp = 1.0f + 2.0f * FLT_EPSILON;
+ const float one_minus_ulp = 1.0f - 2.0f * FLT_EPSILON;
+ float r_outer = max(max(P0.w, P1.w), max(P2.w, P3.w)) + maxr12;
+ float r_inner = min(min(P0.w, P1.w), min(P2.w, P3.w)) - maxr12;
+ r_outer = one_plus_ulp * r_outer;
+ r_inner = max(0.0f, one_minus_ulp * r_inner);
+ bool valid = true;
+
+ /* Intersect with outer cylinder. */
+ float2 tc_outer;
+ float u_outer0, u_outer1;
+ float3 Ng_outer0, Ng_outer1;
+ valid = cylinder_intersect(float4_to_float3(P0),
+ float4_to_float3(P3),
+ r_outer,
+ ray_dir,
+ &tc_outer,
+ &u_outer0,
+ &Ng_outer0,
+ &u_outer1,
+ &Ng_outer1);
+ if (!valid) {
+ continue;
+ }
+
+ /* Intersect with cap-planes. */
+ float2 tp = make_float2(-dt, isect->t - dt);
+ tp = make_float2(max(tp.x, tc_outer.x), min(tp.y, tc_outer.y));
+ const float2 h0 = half_plane_intersect(
+ float4_to_float3(P0), float4_to_float3(dP0du), ray_dir);
+ tp = make_float2(max(tp.x, h0.x), min(tp.y, h0.y));
+ const float2 h1 = half_plane_intersect(
+ float4_to_float3(P3), -float4_to_float3(dP3du), ray_dir);
+ tp = make_float2(max(tp.x, h1.x), min(tp.y, h1.y));
+ valid = tp.x <= tp.y;
+ if (!valid) {
+ continue;
+ }
+
+ /* Clamp and correct u parameter. */
+ u_outer0 = clamp(u_outer0, 0.0f, 1.0f);
+ u_outer1 = clamp(u_outer1, 0.0f, 1.0f);
+ u_outer0 = mix(u0, u1, (step + u_outer0) * (1.0f / (float)(CURVE_NUM_BEZIER_STEPS + 1)));
+ u_outer1 = mix(u0, u1, (step + u_outer1) * (1.0f / (float)(CURVE_NUM_BEZIER_STEPS + 1)));
+
+ /* Intersect with inner cylinder. */
+ float2 tc_inner;
+ float u_inner0, u_inner1;
+ float3 Ng_inner0, Ng_inner1;
+ const bool valid_inner = cylinder_intersect(float4_to_float3(P0),
+ float4_to_float3(P3),
+ r_inner,
+ ray_dir,
+ &tc_inner,
+ &u_inner0,
+ &Ng_inner0,
+ &u_inner1,
+ &Ng_inner1);
+
+ /* At the unstable area we subdivide deeper. */
+# if 0
+ const bool unstable0 = (!valid_inner) |
+ (fabsf(dot(normalize(ray_dir), normalize(Ng_inner0))) < 0.3f);
+ const bool unstable1 = (!valid_inner) |
+ (fabsf(dot(normalize(ray_dir), normalize(Ng_inner1))) < 0.3f);
# else
- w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
-# endif
- w = saturate(w);
-
- /* compute u on the curve segment */
- u = i_st * (1 - w) + i_en * w;
- r_curr = r_st + (r_en - r_st) * u;
- /* compare x-y distances */
- float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u +
- curve_coef[0];
-
- float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if (dot(tg, dp_st) < 0)
- dp_st *= -1;
- if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
- tree++;
- level = tree & -tree;
- continue;
- }
- float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if (dot(tg, dp_en) < 0)
- dp_en *= -1;
- if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
- tree++;
- level = tree & -tree;
- continue;
- }
+ /* On the GPU appears to be a little faster if always enabled. */
+ (void)valid_inner;
- if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_curr * r_curr || p_curr.z <= epsilon ||
- isect->t < p_curr.z) {
- tree++;
- level = tree & -tree;
- continue;
- }
+ const bool unstable0 = true;
+ const bool unstable1 = true;
+# endif
- t = p_curr.z;
+ /* Subtract the inner interval from the current hit interval. */
+ float2 tp0 = make_float2(tp.x, min(tp.y, tc_inner.x));
+ float2 tp1 = make_float2(max(tp.x, tc_inner.y), tp.y);
+ bool valid0 = valid && (tp0.x <= tp0.y);
+ bool valid1 = valid && (tp1.x <= tp1.y);
+ if (!(valid0 || valid1)) {
+ continue;
}
- else {
- float l = len(p_en - p_st);
- float invl = 1.0f / l;
- float3 tg = (p_en - p_st) * invl;
- gd = (r2 - r1) * invl;
- float difz = -dot(p_st, tg);
- float cyla = 1.0f - (tg.z * tg.z * (1 + gd * gd));
- float invcyla = 1.0f / cyla;
- float halfb = (-p_st.z - tg.z * (difz + gd * (difz * gd + r1)));
- float tcentre = -halfb * invcyla;
- float zcentre = difz + (tg.z * tcentre);
- float3 tdif = -p_st;
- tdif.z += tcentre;
- float tdifz = dot(tdif, tg);
- float tb = 2 * (tdif.z - tg.z * (tdifz + gd * (tdifz * gd + r1)));
- float tc = dot(tdif, tdif) - tdifz * tdifz * (1 + gd * gd) - r1 * r1 - 2 * r1 * tdifz * gd;
- float td = tb * tb - 4 * cyla * tc;
- if (td < 0.0f) {
- tree++;
- level = tree & -tree;
- continue;
- }
- float rootd = sqrtf(td);
- float correction = (-tb - rootd) * 0.5f * invcyla;
- t = tcentre + correction;
-
- float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if (dot(tg, dp_st) < 0)
- dp_st *= -1;
- float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if (dot(tg, dp_en) < 0)
- dp_en *= -1;
-
- if (flags & CURVE_KN_BACKFACING &&
- (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 ||
- isect->t < t || t <= 0.0f)) {
- correction = (-tb + rootd) * 0.5f * invcyla;
- t = tcentre + correction;
+ /* Process one or two hits. */
+ bool recurse = false;
+ if (valid0) {
+ const int termDepth = unstable0 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE :
+ CURVE_NUM_BEZIER_SUBDIVISIONS;
+ if (depth >= termDepth) {
+ found |= curve_intersect_iterative(
+ ray_dir, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
}
-
- if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 ||
- isect->t < t || t <= 0.0f) {
- tree++;
- level = tree & -tree;
- continue;
+ else {
+ recurse = true;
}
+ }
- float w = (zcentre + (tg.z * correction)) * invl;
- w = saturate(w);
- /* compute u on the curve segment */
- u = i_st * (1 - w) + i_en * w;
+ if (valid1 && (tp1.x + dt <= isect->t)) {
+ const int termDepth = unstable1 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE :
+ CURVE_NUM_BEZIER_SUBDIVISIONS;
+ if (depth >= termDepth) {
+ found |= curve_intersect_iterative(
+ ray_dir, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
+ }
+ else {
+ recurse = true;
+ }
}
- /* we found a new intersection */
-# ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
-# endif
- {
- /* record intersection */
- isect->t = t;
- isect->u = u;
- isect->v = gd;
- isect->prim = curveAddr;
- isect->object = object;
- isect->type = type;
- hit = true;
+ if (recurse) {
+ stack[depth].u0 = u0;
+ stack[depth].u1 = u1;
+ stack[depth].i = i + 1;
+ depth++;
+
+ u0 = vu0;
+ u1 = vu1;
+ i = -1;
}
+ }
- tree++;
- level = tree & -tree;
+ if (depth > 0) {
+ depth--;
+ u0 = stack[depth].u0;
+ u1 = stack[depth].u1;
+ i = stack[depth].i;
}
else {
- /* split the curve into two curves and process */
- level = level >> 1;
+ break;
}
}
- return hit;
+ return found;
+}
+
+/* Ribbons */
+
+ccl_device_inline bool cylinder_culling_test(const float2 p1, const float2 p2, const float r)
+{
+ /* Performs culling against a cylinder. */
+ const float2 dp = p2 - p1;
+ const float num = dp.x * p1.y - dp.y * p1.x;
+ const float den2 = dot(p2 - p1, p2 - p1);
+ return num * num <= r * r * den2;
+}
+
+/*! Intersects a ray with a quad with backface culling
+ * enabled. The quad v0,v1,v2,v3 is split into two triangles
+ * v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two
+ * triangles gets intersected. */
+ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
+ const float3 quad_v0,
+ const float3 quad_v1,
+ const float3 quad_v2,
+ const float3 quad_v3,
+ float *u_o,
+ float *v_o,
+ float *t_o)
+{
+ /* Calculate vertices relative to ray origin? */
+ const float3 O = make_float3(0.0f, 0.0f, 0.0f);
+ const float3 D = make_float3(0.0f, 0.0f, 1.0f);
+ const float3 va = quad_v0 - O;
+ const float3 vb = quad_v1 - O;
+ const float3 vc = quad_v2 - O;
+ const float3 vd = quad_v3 - O;
+
+ const float3 edb = vb - vd;
+ const float WW = dot(cross(vd, edb), D);
+ const float3 v0 = (WW <= 0.0f) ? va : vc;
+ const float3 v1 = (WW <= 0.0f) ? vb : vd;
+ const float3 v2 = (WW <= 0.0f) ? vd : vb;
+
+ /* Calculate edges? */
+ const float3 e0 = v2 - v0;
+ const float3 e1 = v0 - v1;
+
+ /* perform edge tests */
+ const float U = dot(cross(v0, e0), D);
+ const float V = dot(cross(v1, e1), D);
+ if (!(max(U, V) <= 0.0f)) {
+ return false;
+ }
+
+ /* Calculate geometry normal and denominator? */
+ const float3 Ng = cross(e1, e0);
+ const float den = dot(Ng, D);
+ const float rcpDen = 1.0f / den;
+
+ /* Perform depth test? */
+ const float t = rcpDen * dot(v0, Ng);
+ if (!(0.0f <= t && t <= ray_tfar)) {
+ return false;
+ }
+
+ /* Avoid division by 0? */
+ if (!(den != 0.0f)) {
+ return false;
+ }
+
+ /* Update hit information? */
+ *t_o = t;
+ *u_o = U * rcpDen;
+ *v_o = V * rcpDen;
+ *u_o = (WW <= 0.0f) ? *u_o : 1.0f - *u_o;
+ *v_o = (WW <= 0.0f) ? *v_o : 1.0f - *v_o;
+ return true;
+}
+
+ccl_device_inline void ribbon_ray_space(const float3 ray_dir, float3 ray_space[3])
+{
+ const float3 dx0 = make_float3(0, ray_dir.z, -ray_dir.y);
+ const float3 dx1 = make_float3(-ray_dir.z, 0, ray_dir.x);
+ ray_space[0] = normalize(dot(dx0, dx0) > dot(dx1, dx1) ? dx0 : dx1);
+ ray_space[1] = normalize(cross(ray_dir, ray_space[0]));
+ ray_space[2] = ray_dir;
+}
+
+ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],
+ const float3 ray_org,
+ const float4 P4)
+{
+ float3 P = float4_to_float3(P4) - ray_org;
+ return make_float4(dot(ray_space[0], P), dot(ray_space[1], P), dot(ray_space[2], P), P4.w);
+}
+
+ccl_device_inline bool ribbon_intersect(const float3 ray_org,
+ const float3 ray_dir,
+ const float ray_tfar,
+ const int N,
+ float4 curve[4],
+ Intersection *isect)
+{
+ /* Transform control points into ray space. */
+ float3 ray_space[3];
+ ribbon_ray_space(ray_dir, ray_space);
+
+ curve[0] = ribbon_to_ray_space(ray_space, ray_org, curve[0]);
+ curve[1] = ribbon_to_ray_space(ray_space, ray_org, curve[1]);
+ curve[2] = ribbon_to_ray_space(ray_space, ray_org, curve[2]);
+ curve[3] = ribbon_to_ray_space(ray_space, ray_org, curve[3]);
+
+ const float4 mx = max(max(fabs(curve[0]), fabs(curve[1])), max(fabs(curve[2]), fabs(curve[3])));
+ const float eps = 4.0f * FLT_EPSILON * max(max(mx.x, mx.y), max(mx.z, mx.w));
+ const float step_size = 1.0f / (float)N;
+
+ /* Evaluate first point and radius scaled normal direction. */
+ float4 p0 = catmull_rom_basis_eval(curve, 0.0f);
+ float3 dp0dt = float4_to_float3(catmull_rom_basis_derivative(curve, 0.0f));
+ if (max3(fabs(dp0dt)) < eps) {
+ const float4 p1 = catmull_rom_basis_eval(curve, step_size);
+ dp0dt = float4_to_float3(p1 - p0);
+ }
+ float3 wn0 = normalize(make_float3(dp0dt.y, -dp0dt.x, 0.0f)) * p0.w;
+
+ /* Evaluate the bezier curve. */
+ for (int i = 0; i < N; i++) {
+ const float u = i * step_size;
+ const float4 p1 = catmull_rom_basis_eval(curve, u + step_size);
+ bool valid = cylinder_culling_test(
+ make_float2(p0.x, p0.y), make_float2(p1.x, p1.y), max(p0.w, p1.w));
+ if (!valid) {
+ continue;
+ }
+
+ /* Evaluate next point. */
+ float3 dp1dt = float4_to_float3(catmull_rom_basis_derivative(curve, u + step_size));
+ dp1dt = (max3(fabs(dp1dt)) < eps) ? float4_to_float3(p1 - p0) : dp1dt;
+ const float3 wn1 = normalize(make_float3(dp1dt.y, -dp1dt.x, 0.0f)) * p1.w;
+
+ /* Construct quad coordinates. */
+ const float3 lp0 = float4_to_float3(p0) + wn0;
+ const float3 lp1 = float4_to_float3(p1) + wn1;
+ const float3 up0 = float4_to_float3(p0) - wn0;
+ const float3 up1 = float4_to_float3(p1) - wn1;
+
+ /* Intersect quad. */
+ float vu, vv, vt;
+ bool valid0 = ribbon_intersect_quad(isect->t, lp0, lp1, up1, up0, &vu, &vv, &vt);
+
+ if (valid0) {
+ /* ignore self intersections */
+ const float avoidance_factor = 2.0f;
+ if (avoidance_factor != 0.0f) {
+ float r = mix(p0.w, p1.w, vu);
+ valid0 = vt > avoidance_factor * r;
+ }
+
+ if (valid0) {
+ vv = 2.0f * vv - 1.0f;
+
+ /* Record intersection. */
+ isect->t = vt;
+ isect->u = u + vu * step_size;
+ isect->v = vv;
+ return true;
+ }
+ }
+
+ p0 = p1;
+ wn0 = wn1;
+ }
+ return false;
}
ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
Intersection *isect,
- float3 P,
- float3 direction,
+ const float3 P,
+ const float3 dir,
uint visibility,
int object,
int curveAddr,
float time,
int type)
{
- /* define few macros to minimize code duplication for SSE */
-# ifndef __KERNEL_SSE2__
-# define len3_squared(x) len_squared(x)
-# define len3(x) len(x)
-# define dot3(x, y) dot(x, y)
-# endif
-
- const bool is_curve_primitive = (type & PRIMITIVE_CURVE);
+ const bool is_motion = (type & PRIMITIVE_ALL_MOTION);
-# ifndef __KERNEL_OPTIX__ /* see OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */
- if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) {
+# ifndef __KERNEL_OPTIX__ /* See OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */
+ if (is_motion && kernel_data.bvh.use_bvh_steps) {
const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
if (time < prim_time.x || time > prim_time.y) {
return false;
@@ -517,210 +639,63 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
# endif
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
- /* curve Intersection check */
- int flags = kernel_data.curve.curveflags;
-
int prim = kernel_tex_fetch(__prim_index, curveAddr);
+
float4 v00 = kernel_tex_fetch(__curves, prim);
- int cnum = __float_as_int(v00.x);
- int k0 = cnum + segment;
+ int k0 = __float_as_int(v00.x) + segment;
int k1 = k0 + 1;
-# ifndef __KERNEL_SSE2__
- float4 P_curve[2];
+ int ka = max(k0 - 1, __float_as_int(v00.x));
+ int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
- if (is_curve_primitive) {
- P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
- P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
+ float4 curve[4];
+ if (!is_motion) {
+ curve[0] = kernel_tex_fetch(__curve_keys, ka);
+ curve[1] = kernel_tex_fetch(__curve_keys, k0);
+ curve[2] = kernel_tex_fetch(__curve_keys, k1);
+ curve[3] = kernel_tex_fetch(__curve_keys, kb);
}
else {
int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
- motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve);
- }
-
- float r1 = P_curve[0].w;
- float r2 = P_curve[1].w;
- float3 p1 = float4_to_float3(P_curve[0]);
- float3 p2 = float4_to_float3(P_curve[1]);
-
- /* minimum width extension */
- float3 dif = P - p1;
- float3 dif_second = P - p2;
-
- float3 p21_diff = p2 - p1;
- float3 sphere_dif1 = (dif + dif_second) * 0.5f;
- float3 dir = direction;
- float sphere_b_tmp = dot3(dir, sphere_dif1);
- float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir;
-# else
- ssef P_curve[2];
-
- if (is_curve_primitive) {
- P_curve[0] = load4f(&kg->__curve_keys.data[k0].x);
- P_curve[1] = load4f(&kg->__curve_keys.data[k1].x);
+ motion_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, curve);
}
- else {
- int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
- motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4 *)&P_curve);
- }
-
- ssef r12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]);
- const ssef vP = load4f(P);
- const ssef dif = vP - P_curve[0];
- const ssef dif_second = vP - P_curve[1];
- float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12));
-
- const ssef p21_diff = P_curve[1] - P_curve[0];
- const ssef sphere_dif1 = (dif + dif_second) * 0.5f;
- const ssef dir = load4f(direction);
- const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1);
- const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1);
-# endif
-
- float mr = max(r1, r2);
- float l = len3(p21_diff);
- float invl = 1.0f / l;
- float sp_r = mr + 0.5f * l;
- float sphere_b = dot3(dir, sphere_dif2);
- float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r;
-
- if (sdisc < 0.0f)
- return false;
-
- /* obtain parameters and test midpoint distance for suitable modes */
-# ifndef __KERNEL_SSE2__
- float3 tg = p21_diff * invl;
-# else
- const ssef tg = p21_diff * invl;
-# endif
- float gd = (r2 - r1) * invl;
-
- float dirz = dot3(dir, tg);
- float difz = dot3(dif, tg);
-
- float a = 1.0f - (dirz * dirz * (1 + gd * gd));
-
- float halfb = dot3(dir, dif) - dirz * (difz + gd * (difz * gd + r1));
-
- float tcentre = -halfb / a;
- float zcentre = difz + (dirz * tcentre);
-
- if ((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE))
- return false;
- if ((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) &&
- !(flags & CURVE_KN_INTERSECTCORRECTION))
+# ifdef __VISIBILITY_FLAG__
+ if (!(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)) {
return false;
-
- /* test minimum separation */
-# ifndef __KERNEL_SSE2__
- float3 cprod = cross(tg, dir);
- float cprod2sq = len3_squared(cross(tg, dif));
-# else
- const ssef cprod = cross(tg, dir);
- float cprod2sq = len3_squared(cross_zxy(tg, dif));
+ }
# endif
- float cprodsq = len3_squared(cprod);
- float distscaled = dot3(cprod, dif);
-
- if (cprodsq == 0)
- distscaled = cprod2sq;
- else
- distscaled = (distscaled * distscaled) / cprodsq;
-
- if (distscaled > mr * mr)
- return false;
- /* calculate true intersection */
-# ifndef __KERNEL_SSE2__
- float3 tdif = dif + tcentre * dir;
-# else
- const ssef tdif = madd(ssef(tcentre), dir, dif);
-# endif
- float tdifz = dot3(tdif, tg);
- float tdifma = tdifz * gd + r1;
- float tb = 2 * (dot3(dir, tdif) - dirz * (tdifz + gd * tdifma));
- float tc = dot3(tdif, tdif) - tdifz * tdifz - tdifma * tdifma;
- float td = tb * tb - 4 * a * tc;
+ if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
+ /* todo: adaptive number of subdivisions could help performance here. */
+ const int subdivisions = kernel_data.bvh.curve_subdivisions;
+ if (ribbon_intersect(P, dir, isect->t, subdivisions, curve, isect)) {
+ isect->prim = curveAddr;
+ isect->object = object;
+ isect->type = type;
+ return true;
+ }
- if (td < 0.0f)
return false;
-
- float rootd = 0.0f;
- float correction = 0.0f;
- if (flags & CURVE_KN_ACCURATE) {
- rootd = sqrtf(td);
- correction = ((-tb - rootd) / (2 * a));
}
-
- float t = tcentre + correction;
-
- if (t < isect->t) {
-
- if (flags & CURVE_KN_INTERSECTCORRECTION) {
- rootd = sqrtf(td);
- correction = ((-tb - rootd) / (2 * a));
- t = tcentre + correction;
- }
-
- float z = zcentre + (dirz * correction);
- // bool backface = false;
-
- if (flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) {
- // backface = true;
- correction = ((-tb + rootd) / (2 * a));
- t = tcentre + correction;
- z = zcentre + (dirz * correction);
+ else {
+ if (curve_intersect_recursive(P, dir, curve, isect)) {
+ isect->prim = curveAddr;
+ isect->object = object;
+ isect->type = type;
+ return true;
}
- if (t > 0.0f && t < isect->t && z >= 0 && z <= l) {
-
- if (flags & CURVE_KN_ENCLOSEFILTER) {
- float enc_ratio = 1.01f;
- if ((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) {
- float a2 = 1.0f - (dirz * dirz * (1 + gd * gd * enc_ratio * enc_ratio));
- float c2 = dot3(dif, dif) - difz * difz * (1 + gd * gd * enc_ratio * enc_ratio) -
- r1 * r1 * enc_ratio * enc_ratio - 2 * r1 * difz * gd * enc_ratio;
- if (a2 * c2 < 0.0f)
- return false;
- }
- }
-
-# ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
-# endif
- {
- /* record intersection */
- isect->t = t;
- isect->u = z * invl;
- isect->v = gd;
- isect->prim = curveAddr;
- isect->object = object;
- isect->type = type;
-
- return true;
- }
- }
+ return false;
}
-
- return false;
-
-# ifndef __KERNEL_SSE2__
-# undef len3_squared
-# undef len3
-# undef dot3
-# endif
}
-ccl_device_inline float3 curve_refine(KernelGlobals *kg,
- ShaderData *sd,
- const Intersection *isect,
- const Ray *ray)
+ccl_device_inline void curve_shader_setup(KernelGlobals *kg,
+ ShaderData *sd,
+ const Intersection *isect,
+ const Ray *ray)
{
- int flag = kernel_data.curve.curveflags;
float t = isect->t;
float3 P = ray->P;
float3 D = ray->D;
@@ -743,118 +718,60 @@ ccl_device_inline float3 curve_refine(KernelGlobals *kg,
int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
int k1 = k0 + 1;
- float3 tg;
+ int ka = max(k0 - 1, __float_as_int(v00.x));
+ int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
- if (flag & CURVE_KN_INTERPOLATE) {
- int ka = max(k0 - 1, __float_as_int(v00.x));
- int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
+ float4 P_curve[4];
- float4 P_curve[4];
+ if (!(sd->type & PRIMITIVE_ALL_MOTION)) {
+ P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
+ P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
+ P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
+ P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
+ }
+ else {
+ motion_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
+ }
- if (sd->type & PRIMITIVE_CURVE) {
- P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
- P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
- P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
- P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
- }
- else {
- motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
- }
+ sd->u = isect->u;
+ sd->v = isect->v;
- float3 p[4];
- p[0] = float4_to_float3(P_curve[0]);
- p[1] = float4_to_float3(P_curve[1]);
- p[2] = float4_to_float3(P_curve[2]);
- p[3] = float4_to_float3(P_curve[3]);
+ P = P + D * t;
- P = P + D * t;
+ const float4 dPdu4 = catmull_rom_basis_derivative(P_curve, isect->u);
+ const float3 dPdu = float4_to_float3(dPdu4);
-# ifdef __UV__
- sd->u = isect->u;
- sd->v = 0.0f;
-# endif
+ if (sd->type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
+ /* Rounded smooth normals for ribbons, to approximate thick curve shape. */
+ const float3 tangent = normalize(dPdu);
+ const float3 bitangent = normalize(cross(tangent, -D));
+ const float sine = isect->v;
+ const float cosine = safe_sqrtf(1.0f - sine * sine);
- tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
+ sd->N = normalize(sine * bitangent - cosine * normalize(cross(tangent, bitangent)));
+ sd->Ng = -D;
- if (kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
- sd->Ng = normalize(-(D - tg * (dot(tg, D))));
- }
- else {
-# ifdef __EMBREE__
- if (kernel_data.bvh.scene) {
- sd->Ng = normalize(isect->Ng);
- }
- else
+# if 0
+ /* This approximates the position and geometric normal of a thick curve too,
+ * but gives too many issues with wrong self intersections. */
+ const float dPdu_radius = dPdu4.w;
+ sd->Ng = sd->N;
+ P += sd->N * dPdu_radius;
# endif
- {
- /* direction from inside to surface of curve */
- float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
- sd->Ng = normalize(P - p_curr);
-
- /* adjustment for changing radius */
- float gd = isect->v;
-
- if (gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg;
- sd->Ng = normalize(sd->Ng);
- }
- }
- }
-
- /* todo: sometimes the normal is still so that this is detected as
- * backfacing even if cull backfaces is enabled */
-
- sd->N = sd->Ng;
}
else {
- float4 P_curve[2];
-
- if (sd->type & PRIMITIVE_CURVE) {
- P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
- P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
- }
- else {
- motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
- }
-
- float l = 1.0f;
- tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l);
-
- P = P + D * t;
-
- float3 dif = P - float4_to_float3(P_curve[0]);
-
-# ifdef __UV__
- sd->u = dot(dif, tg) / l;
- sd->v = 0.0f;
-# endif
-
- if (flag & CURVE_KN_TRUETANGENTGNORMAL) {
- sd->Ng = -(D - tg * dot(tg, D));
- sd->Ng = normalize(sd->Ng);
- }
- else {
- float gd = isect->v;
-
- /* direction from inside to surface of curve */
- float denom = fmaxf(P_curve[0].w + sd->u * l * gd, 1e-8f);
- sd->Ng = (dif - tg * sd->u * l) / denom;
-
- /* adjustment for changing radius */
- if (gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg;
- }
-
- sd->Ng = normalize(sd->Ng);
- }
-
+ /* Thick curves, compute normal using direction from inside the curve.
+ * This could be optimized by recording the normal in the intersection,
+ * however for Optix this would go beyond the size of the payload. */
+ const float3 P_inside = float4_to_float3(catmull_rom_basis_eval(P_curve, isect->u));
+ sd->Ng = normalize(P - P_inside);
sd->N = sd->Ng;
}
# ifdef __DPDU__
/* dPdu/dPdv */
- sd->dPdu = tg;
- sd->dPdv = cross(tg, sd->Ng);
+ sd->dPdu = dPdu;
+ sd->dPdv = cross(dPdu, sd->Ng);
# endif
if (isect->object != OBJECT_NONE) {
@@ -867,7 +784,10 @@ ccl_device_inline float3 curve_refine(KernelGlobals *kg,
P = transform_point(&tfm, P);
}
- return P;
+ sd->P = P;
+
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ sd->shader = __float_as_int(curvedata.z);
}
#endif
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 0e2a00e9d2e..0f66f4af755 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -50,14 +50,14 @@ ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg,
return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
}
-ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
- int offset,
- int numkeys,
- int numsteps,
- int step,
- int k0,
- int k1,
- float4 keys[2])
+ccl_device_inline void motion_curve_keys_for_step_linear(KernelGlobals *kg,
+ int offset,
+ int numkeys,
+ int numsteps,
+ int step,
+ int k0,
+ int k1,
+ float4 keys[2])
{
if (step == numsteps) {
/* center step: regular key location */
@@ -77,7 +77,7 @@ ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
}
/* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys(
+ccl_device_inline void motion_curve_keys_linear(
KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
{
/* get motion info */
@@ -97,24 +97,24 @@ ccl_device_inline void motion_curve_keys(
/* fetch key coordinates */
float4 next_keys[2];
- motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys);
- motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys);
+ motion_curve_keys_for_step_linear(kg, offset, numkeys, numsteps, step, k0, k1, keys);
+ motion_curve_keys_for_step_linear(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys);
/* interpolate between steps */
keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
}
-ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg,
- int offset,
- int numkeys,
- int numsteps,
- int step,
- int k0,
- int k1,
- int k2,
- int k3,
- float4 keys[4])
+ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
+ int offset,
+ int numkeys,
+ int numsteps,
+ int step,
+ int k0,
+ int k1,
+ int k2,
+ int k3,
+ float4 keys[4])
{
if (step == numsteps) {
/* center step: regular key location */
@@ -138,15 +138,15 @@ ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg,
}
/* return 2 curve key locations */
-ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
- int object,
- int prim,
- float time,
- int k0,
- int k1,
- int k2,
- int k3,
- float4 keys[4])
+ccl_device_inline void motion_curve_keys(KernelGlobals *kg,
+ int object,
+ int prim,
+ float time,
+ int k0,
+ int k1,
+ int k2,
+ int k3,
+ float4 keys[4])
{
/* get motion info */
int numsteps, numkeys;
@@ -165,9 +165,8 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
/* fetch key coordinates */
float4 next_keys[4];
- motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
- motion_cardinal_curve_keys_for_step(
- kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
+ motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
+ motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
/* interpolate between steps */
keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
@@ -176,53 +175,6 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
keys[3] = (1.0f - t) * keys[3] + t * next_keys[3];
}
-# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
-/* Similar to above, but returns keys as pair of two AVX registers with each
- * holding two float4.
- */
-ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg,
- int object,
- int prim,
- float time,
- int k0,
- int k1,
- int k2,
- int k3,
- avxf *out_keys_0_1,
- avxf *out_keys_2_3)
-{
- /* Get motion info. */
- int numsteps, numkeys;
- object_motion_info(kg, object, &numsteps, NULL, &numkeys);
-
- /* Figure out which steps we need to fetch and their interpolation factor. */
- int maxstep = numsteps * 2;
- int step = min((int)(time * maxstep), maxstep - 1);
- float t = time * maxstep - step;
-
- /* Find attribute. */
- AttributeElement elem;
- int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
- kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
- /* Fetch key coordinates. */
- float4 next_keys[4];
- float4 keys[4];
- motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
- motion_cardinal_curve_keys_for_step(
- kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
-
- const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
- const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
- const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
- const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
-
- /* Interpolate between steps. */
- *out_keys_0_1 = (1.0f - t) * keys_0_1 + t * next_keys_0_1;
- *out_keys_2_3 = (1.0f - t) * keys_2_3 + t * next_keys_2_3;
-}
-# endif
-
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 3aa68e1f84e..614e2e3b92b 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -411,25 +411,10 @@ ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
ccl_device_inline float3 bvh_clamp_direction(float3 dir)
{
- /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse
- * direction */
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
- const ssef oopes(8.271806E-25f, 8.271806E-25f, 8.271806E-25f, 0.0f);
- const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes);
- const ssef signdir = signmsk(dir.m128) | oopes;
-# ifndef __KERNEL_AVX__
- ssef res = mask & ssef(dir);
- res = _mm_or_ps(res, _mm_andnot_ps(mask, signdir));
-# else
- ssef res = _mm_blendv_ps(signdir, dir, mask);
-# endif
- return float3(res);
-#else /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
const float ooeps = 8.271806E-25f;
return make_float3((fabsf(dir.x) > ooeps) ? dir.x : copysignf(ooeps, dir.x),
(fabsf(dir.y) > ooeps) ? dir.y : copysignf(ooeps, dir.y),
(fabsf(dir.z) > ooeps) ? dir.z : copysignf(ooeps, dir.z));
-#endif /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
}
ccl_device_inline float3 bvh_inverse_direction(float3 dir)
@@ -457,38 +442,6 @@ ccl_device_inline float bvh_instance_push(
return t;
}
-#ifdef __QBVH__
-/* Same as above, but optimized for QBVH scene intersection,
- * which needs to modify two max distances.
- *
- * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
- * so we can avoid having this duplication.
- */
-ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
- int object,
- const Ray *ray,
- float3 *P,
- float3 *dir,
- float3 *idir,
- float *t,
- float *t1)
-{
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-
- *P = transform_point(&tfm, ray->P);
-
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
- *idir = bvh_inverse_direction(*dir);
-
- if (*t != FLT_MAX)
- *t *= len;
-
- if (*t1 != -FLT_MAX)
- *t1 *= len;
-}
-#endif
-
/* Transorm ray to exit static object in BVH */
ccl_device_inline float bvh_instance_pop(
@@ -551,39 +504,6 @@ ccl_device_inline float bvh_instance_motion_push(KernelGlobals *kg,
return t;
}
-# ifdef __QBVH__
-/* Same as above, but optimized for QBVH scene intersection,
- * which needs to modify two max distances.
- *
- * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
- * so we can avoid having this duplication.
- */
-ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg,
- int object,
- const Ray *ray,
- float3 *P,
- float3 *dir,
- float3 *idir,
- float *t,
- float *t1,
- Transform *itfm)
-{
- object_fetch_transform_motion_test(kg, object, ray->time, itfm);
-
- *P = transform_point(itfm, ray->P);
-
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
- *idir = bvh_inverse_direction(*dir);
-
- if (*t != FLT_MAX)
- *t *= len;
-
- if (*t1 != -FLT_MAX)
- *t1 *= len;
-}
-# endif
-
/* Transorm ray to exit motion blurred object in BVH */
ccl_device_inline float bvh_instance_motion_pop(KernelGlobals *kg,
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index 9a91da79f58..997abf438d0 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -174,6 +174,11 @@ ccl_device_inline float4 primitive_attribute_float4(KernelGlobals *kg,
else
return subd_triangle_attribute_float4(kg, sd, desc, dx, dy);
}
+#ifdef __HAIR__
+ else if (sd->type & PRIMITIVE_ALL_CURVE) {
+ return curve_attribute_float4(kg, sd, desc, dx, dy);
+ }
+#endif
else {
if (dx)
*dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index a2731bf2bd0..0278f3ade8e 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -312,12 +312,21 @@ ccl_device float4 triangle_attribute_float4(KernelGlobals *kg,
float4 *dx,
float4 *dy)
{
- if (desc.element == ATTR_ELEMENT_CORNER_BYTE) {
- int tri = desc.offset + sd->prim * 3;
-
- float4 f0 = color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 0));
- float4 f1 = color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 1));
- float4 f2 = color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 2));
+ if (desc.element == ATTR_ELEMENT_CORNER_BYTE || desc.element == ATTR_ELEMENT_VERTEX) {
+ float4 f0, f1, f2;
+
+ if (desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+ int tri = desc.offset + sd->prim * 3;
+ f0 = color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 0));
+ f1 = color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 1));
+ f2 = color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 2));
+ }
+ else {
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+ f0 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x);
+ f1 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y);
+ f2 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z);
+ }
#ifdef __RAY_DIFFERENTIALS__
if (dx)
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 6604806f73b..b0cce274b94 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -71,433 +71,6 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
return false;
}
-#ifdef __KERNEL_AVX2__
-# define cross256(A, B, C, D) _mm256_fmsub_ps(A, B, _mm256_mul_ps(C, D))
-ccl_device_inline int ray_triangle_intersect8(KernelGlobals *kg,
- float3 ray_P,
- float3 ray_dir,
- Intersection **isect,
- uint visibility,
- int object,
- __m256 *triA,
- __m256 *triB,
- __m256 *triC,
- int prim_addr,
- int prim_num,
- uint *num_hits,
- uint max_hits,
- int *num_hits_in_instance,
- float isect_t)
-{
-
- const unsigned char prim_num_mask = (1 << prim_num) - 1;
-
- const __m256i zero256 = _mm256_setzero_si256();
-
- const __m256 Px256 = _mm256_set1_ps(ray_P.x);
- const __m256 Py256 = _mm256_set1_ps(ray_P.y);
- const __m256 Pz256 = _mm256_set1_ps(ray_P.z);
-
- const __m256 dirx256 = _mm256_set1_ps(ray_dir.x);
- const __m256 diry256 = _mm256_set1_ps(ray_dir.y);
- const __m256 dirz256 = _mm256_set1_ps(ray_dir.z);
-
- /* Calculate vertices relative to ray origin. */
- __m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256);
- __m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256);
- __m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256);
-
- __m256 v1_x_256 = _mm256_sub_ps(triA[0], Px256);
- __m256 v1_y_256 = _mm256_sub_ps(triA[1], Py256);
- __m256 v1_z_256 = _mm256_sub_ps(triA[2], Pz256);
-
- __m256 v2_x_256 = _mm256_sub_ps(triB[0], Px256);
- __m256 v2_y_256 = _mm256_sub_ps(triB[1], Py256);
- __m256 v2_z_256 = _mm256_sub_ps(triB[2], Pz256);
-
- __m256 v0_v1_x_256 = _mm256_add_ps(v0_x_256, v1_x_256);
- __m256 v0_v1_y_256 = _mm256_add_ps(v0_y_256, v1_y_256);
- __m256 v0_v1_z_256 = _mm256_add_ps(v0_z_256, v1_z_256);
-
- __m256 v0_v2_x_256 = _mm256_add_ps(v0_x_256, v2_x_256);
- __m256 v0_v2_y_256 = _mm256_add_ps(v0_y_256, v2_y_256);
- __m256 v0_v2_z_256 = _mm256_add_ps(v0_z_256, v2_z_256);
-
- __m256 v1_v2_x_256 = _mm256_add_ps(v1_x_256, v2_x_256);
- __m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256);
- __m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256);
-
- /* Calculate triangle edges. */
- __m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256);
- __m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256);
- __m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256);
-
- __m256 e1_x_256 = _mm256_sub_ps(v0_x_256, v1_x_256);
- __m256 e1_y_256 = _mm256_sub_ps(v0_y_256, v1_y_256);
- __m256 e1_z_256 = _mm256_sub_ps(v0_z_256, v1_z_256);
-
- __m256 e2_x_256 = _mm256_sub_ps(v1_x_256, v2_x_256);
- __m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256);
- __m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256);
-
- /* Perform edge tests. */
- /* cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx) */
- __m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256);
- __m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256);
- __m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256);
- /* vertical dot */
- __m256 U_256 = _mm256_mul_ps(U_x_256, dirx256);
- U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256);
- U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256);
-
- __m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256);
- __m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256);
- __m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256);
- /* vertical dot */
- __m256 V_256 = _mm256_mul_ps(V_x_256, dirx256);
- V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256);
- V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256);
-
- __m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256);
- __m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256);
- __m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256);
- /* vertical dot */
- __m256 W_256 = _mm256_mul_ps(W_x_256, dirx256);
- W_256 = _mm256_fmadd_ps(W_y_256, diry256, W_256);
- W_256 = _mm256_fmadd_ps(W_z_256, dirz256, W_256);
-
- __m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31);
- __m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31);
- __m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31);
- __m256i UVW_256_1 = _mm256_add_epi32(_mm256_add_epi32(U_256_1, V_256_1), W_256_1);
-
- const __m256i one256 = _mm256_set1_epi32(1);
- const __m256i two256 = _mm256_set1_epi32(2);
-
- __m256i mask_minmaxUVW_256 = _mm256_or_si256(_mm256_cmpeq_epi32(one256, UVW_256_1),
- _mm256_cmpeq_epi32(two256, UVW_256_1));
-
- unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256));
- if ((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { // all bits set
- return false;
- }
-
- /* Calculate geometry normal and denominator. */
- __m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256);
- __m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256);
- __m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256);
-
- Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256);
- Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256);
- Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256);
-
- /* vertical dot */
- __m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256);
- den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256, den_256);
- den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256, den_256);
-
- /* Perform depth test. */
- __m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256);
- T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256, T_256);
- T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256, T_256);
-
- const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000);
- __m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000);
-
- __m256 sign_T_256 = _mm256_castsi256_ps(
- _mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256));
-
- unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256);
- if (((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) {
- return false;
- }
-
- __m256 xor_signmask_256 = _mm256_castsi256_ps(
- _mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256));
-
- ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8];
- ccl_align(32) unsigned int mask_minmaxUVW8[8];
-
- if (visibility == PATH_RAY_SHADOW_OPAQUE) {
- __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256);
- __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
- __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256));
- __m256 rayt_256 = _mm256_set1_ps((*isect)->t);
- __m256i mask1 = _mm256_cmpgt_epi32(
- _mm256_castps_si256(sign_T_256),
- _mm256_castps_si256(_mm256_mul_ps(
- _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)),
- rayt_256)));
- mask0 = _mm256_or_si256(mask1, mask0);
- mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask)
- mask_final_256 = _mm256_andnot_si256(
- maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden)
- int mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256));
- if ((mask_final & prim_num_mask) == 0) {
- return false;
- }
- while (mask_final != 0) {
- const int i = __bscf(mask_final);
- if (i >= prim_num) {
- return false;
- }
-# ifdef __VISIBILITY_FLAG__
- if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
- continue;
- }
-# endif
- __m256 inv_den_256 = _mm256_rcp_ps(den_256);
- U_256 = _mm256_mul_ps(U_256, inv_den_256);
- V_256 = _mm256_mul_ps(V_256, inv_den_256);
- T_256 = _mm256_mul_ps(T_256, inv_den_256);
- _mm256_store_ps(U8, U_256);
- _mm256_store_ps(V8, V_256);
- _mm256_store_ps(T8, T_256);
- (*isect)->u = U8[i];
- (*isect)->v = V8[i];
- (*isect)->t = T8[i];
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
- return true;
- }
- return false;
- }
- else {
- _mm256_store_ps(den8, den_256);
- _mm256_store_ps(U8, U_256);
- _mm256_store_ps(V8, V_256);
- _mm256_store_ps(T8, T_256);
-
- _mm256_store_ps(sign_T8, sign_T_256);
- _mm256_store_ps(xor_signmask8, xor_signmask_256);
- _mm256_store_si256((__m256i *)mask_minmaxUVW8, mask_minmaxUVW_256);
-
- int ret = false;
-
- if (visibility == PATH_RAY_SHADOW) {
- for (int i = 0; i < prim_num; i++) {
- if (mask_minmaxUVW8[i]) {
- continue;
- }
-# ifdef __VISIBILITY_FLAG__
- if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
- continue;
- }
-# endif
- if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) {
- continue;
- }
- if (!den8[i]) {
- continue;
- }
- const float inv_den = 1.0f / den8[i];
- (*isect)->u = U8[i] * inv_den;
- (*isect)->v = V8[i] * inv_den;
- (*isect)->t = T8[i] * inv_den;
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
- const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim);
- int shader = 0;
-# ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE)
-# endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-# ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-# endif
- const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
- /* If no transparent shadows, all light is blocked. */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
- return 2;
- }
- /* If maximum number of hits reached, block all light. */
- else if (num_hits == NULL || *num_hits == max_hits) {
- return 2;
- }
- /* Move on to next entry in intersections array. */
- ret = true;
- (*isect)++;
- (*num_hits)++;
- (*num_hits_in_instance)++;
- (*isect)->t = isect_t;
- }
- }
- else {
- for (int i = 0; i < prim_num; i++) {
- if (mask_minmaxUVW8[i]) {
- continue;
- }
-# ifdef __VISIBILITY_FLAG__
- if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
- continue;
- }
-# endif
- if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) {
- continue;
- }
- if (!den8[i]) {
- continue;
- }
- const float inv_den = 1.0f / den8[i];
- (*isect)->u = U8[i] * inv_den;
- (*isect)->v = V8[i] * inv_den;
- (*isect)->t = T8[i] * inv_den;
- (*isect)->prim = (prim_addr + i);
- (*isect)->object = object;
- (*isect)->type = PRIMITIVE_TRIANGLE;
- ret = true;
- }
- }
- return ret;
- }
-}
-
-ccl_device_inline int triangle_intersect8(KernelGlobals *kg,
- Intersection **isect,
- float3 P,
- float3 dir,
- uint visibility,
- int object,
- int prim_addr,
- int prim_num,
- uint *num_hits,
- uint max_hits,
- int *num_hits_in_instance,
- float isect_t)
-{
- __m128 tri_a[8], tri_b[8], tri_c[8];
- __m256 tritmp[12], tri[12];
- __m256 triA[3], triB[3], triC[3];
-
- int i, r;
-
- uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
- for (i = 0; i < prim_num; i++) {
- tri_a[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
- tri_b[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
- tri_c[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
- }
- // create 9 or 12 placeholders
- tri[0] = _mm256_castps128_ps256(tri_a[0]); //_mm256_zextps128_ps256
- tri[1] = _mm256_castps128_ps256(tri_b[0]); //_mm256_zextps128_ps256
- tri[2] = _mm256_castps128_ps256(tri_c[0]); //_mm256_zextps128_ps256
-
- tri[3] = _mm256_castps128_ps256(tri_a[1]); //_mm256_zextps128_ps256
- tri[4] = _mm256_castps128_ps256(tri_b[1]); //_mm256_zextps128_ps256
- tri[5] = _mm256_castps128_ps256(tri_c[1]); //_mm256_zextps128_ps256
-
- tri[6] = _mm256_castps128_ps256(tri_a[2]); //_mm256_zextps128_ps256
- tri[7] = _mm256_castps128_ps256(tri_b[2]); //_mm256_zextps128_ps256
- tri[8] = _mm256_castps128_ps256(tri_c[2]); //_mm256_zextps128_ps256
-
- if (prim_num > 3) {
- tri[9] = _mm256_castps128_ps256(tri_a[3]); //_mm256_zextps128_ps256
- tri[10] = _mm256_castps128_ps256(tri_b[3]); //_mm256_zextps128_ps256
- tri[11] = _mm256_castps128_ps256(tri_c[3]); //_mm256_zextps128_ps256
- }
-
- for (i = 4, r = 0; i < prim_num; i++, r += 3) {
- tri[r] = _mm256_insertf128_ps(tri[r], tri_a[i], 1);
- tri[r + 1] = _mm256_insertf128_ps(tri[r + 1], tri_b[i], 1);
- tri[r + 2] = _mm256_insertf128_ps(tri[r + 2], tri_c[i], 1);
- }
-
- //------------------------------------------------
- // 0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1
- // 1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1
- // 2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1
-
- // 3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1
- // 4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1
- // 5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1
-
- // 6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1
- // 7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1
- // 8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1
-
- // 9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1
- // 10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1
- // 11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1
-
- //"transpose"
- tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); // 0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5
- tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); // 1! Za0 Za1 1 1 Za4 Za5 1 1
-
- tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); // 2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7
- tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); // 3! Za2 Za3 1 1 Za6 Za7 1 1
-
- tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); // 4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5
- tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); // 5! Zb0 Zb1 1 1 Zb4 Zb5 1 1
-
- tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); // 6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7
- tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); // 7! Zb2 Zb3 1 1 Zb6 Zb7 1 1
-
- tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); // 8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5
- tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); // 9! Zc0 Zc1 1 1 Zc4 Zc5 1 1
-
- tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); // 10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7
- tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); // 11! Zc2 Zc3 1 1 Zc6 Zc7 1 1
-
- /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
- triA[0] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[0]),
- _mm256_castps_pd(tritmp[2]))); // Xa0 Xa1 Xa2 Xa3 Xa4 Xa5 Xa6 Xa7
- triA[1] = _mm256_castpd_ps(
- _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[0]),
- _mm256_castps_pd(tritmp[2]))); // Ya0 Ya1 Ya2 Ya3 Ya4 Ya5 Ya6 Ya7
- triA[2] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[1]),
- _mm256_castps_pd(tritmp[3]))); // Za0 Za1 Za2 Za3 Za4 Za5 Za6 Za7
-
- triB[0] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[4]),
- _mm256_castps_pd(tritmp[6]))); // Xb0 Xb1 Xb2 Xb3 Xb4 Xb5 Xb5 Xb7
- triB[1] = _mm256_castpd_ps(
- _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[4]),
- _mm256_castps_pd(tritmp[6]))); // Yb0 Yb1 Yb2 Yb3 Yb4 Yb5 Yb5 Yb7
- triB[2] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[5]),
- _mm256_castps_pd(tritmp[7]))); // Zb0 Zb1 Zb2 Zb3 Zb4 Zb5 Zb5 Zb7
-
- triC[0] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]),
- _mm256_castps_pd(tritmp[10]))); // Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7
- triC[1] = _mm256_castpd_ps(
- _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]),
- _mm256_castps_pd(tritmp[10]))); // Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7
- triC[2] = _mm256_castpd_ps(
- _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]),
- _mm256_castps_pd(tritmp[11]))); // Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7
-
- /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
-
- int result = ray_triangle_intersect8(kg,
- P,
- dir,
- isect,
- visibility,
- object,
- triA,
- triB,
- triC,
- prim_addr,
- prim_num,
- num_hits,
- max_hits,
- num_hits_in_instance,
- isect_t);
- return result;
-}
-
-#endif /* __KERNEL_AVX2__ */
-
/* Special ray intersection routines for subsurface scattering. In that case we
* only want to intersect with primitives in the same object, and if case of
* multiple hits we pick a single random primitive as the intersection point.
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 71b176a0a8f..4ac07d86dda 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -326,9 +326,7 @@ ccl_device_noinline_cpu float3 indirect_background(KernelGlobals *kg,
/* Background MIS weights. */
# ifdef __BACKGROUND_MIS__
/* Check if background light exists or if we should skip pdf. */
- int res_x = kernel_data.integrator.pdf_background_res_x;
-
- if (!(state->flag & PATH_RAY_MIS_SKIP) && res_x) {
+ if (!(state->flag & PATH_RAY_MIS_SKIP) && kernel_data.background.use_mis) {
/* multiple importance sampling, get background light pdf for ray
* direction, and compute weight with respect to BSDF pdf */
float pdf = background_light_pdf(kg, ray->P, ray->D);
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 04472212d0c..138b90373a6 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include "kernel_light_background.h"
+
CCL_NAMESPACE_BEGIN
/* Light Sample result */
@@ -33,500 +35,6 @@ typedef struct LightSample {
LightType type; /* type of light */
} LightSample;
-/* Area light sampling */
-
-/* Uses the following paper:
- *
- * Carlos Urena et al.
- * An Area-Preserving Parametrization for Spherical Rectangles.
- *
- * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
- *
- * Note: light_p is modified when sample_coord is true.
- */
-ccl_device_inline float rect_light_sample(float3 P,
- float3 *light_p,
- float3 axisu,
- float3 axisv,
- float randu,
- float randv,
- bool sample_coord)
-{
- /* In our name system we're using P for the center,
- * which is o in the paper.
- */
-
- float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
- float axisu_len, axisv_len;
- /* Compute local reference system R. */
- float3 x = normalize_len(axisu, &axisu_len);
- float3 y = normalize_len(axisv, &axisv_len);
- float3 z = cross(x, y);
- /* Compute rectangle coords in local reference system. */
- float3 dir = corner - P;
- float z0 = dot(dir, z);
- /* Flip 'z' to make it point against Q. */
- if (z0 > 0.0f) {
- z *= -1.0f;
- z0 *= -1.0f;
- }
- float x0 = dot(dir, x);
- float y0 = dot(dir, y);
- float x1 = x0 + axisu_len;
- float y1 = y0 + axisv_len;
- /* Compute internal angles (gamma_i). */
- float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
- float4 nz = make_float4(y0, x1, y1, x0) * diff;
- nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
- float g0 = safe_acosf(-nz.x * nz.y);
- float g1 = safe_acosf(-nz.y * nz.z);
- float g2 = safe_acosf(-nz.z * nz.w);
- float g3 = safe_acosf(-nz.w * nz.x);
- /* Compute predefined constants. */
- float b0 = nz.x;
- float b1 = nz.z;
- float b0sq = b0 * b0;
- float k = M_2PI_F - g2 - g3;
- /* Compute solid angle from internal angles. */
- float S = g0 + g1 - k;
-
- if (sample_coord) {
- /* Compute cu. */
- float au = randu * S + k;
- float fu = (cosf(au) * b0 - b1) / sinf(au);
- float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
- cu = clamp(cu, -1.0f, 1.0f);
- /* Compute xu. */
- float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
- xu = clamp(xu, x0, x1);
- /* Compute yv. */
- float z0sq = z0 * z0;
- float y0sq = y0 * y0;
- float y1sq = y1 * y1;
- float d = sqrtf(xu * xu + z0sq);
- float h0 = y0 / sqrtf(d * d + y0sq);
- float h1 = y1 / sqrtf(d * d + y1sq);
- float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
- float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
-
- /* Transform (xu, yv, z0) to world coords. */
- *light_p = P + xu * x + yv * y + z0 * z;
- }
-
- /* return pdf */
- if (S != 0.0f)
- return 1.0f / S;
- else
- return 0.0f;
-}
-
-ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv)
-{
- to_unit_disk(&randu, &randv);
- return ru * randu + rv * randv;
-}
-
-ccl_device float3 disk_light_sample(float3 v, float randu, float randv)
-{
- float3 ru, rv;
-
- make_orthonormals(v, &ru, &rv);
-
- return ellipse_sample(ru, rv, randu, randv);
-}
-
-ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv)
-{
- return normalize(D + disk_light_sample(D, randu, randv) * radius);
-}
-
-ccl_device float3
-sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
-{
- return disk_light_sample(normalize(P - center), randu, randv) * radius;
-}
-
-ccl_device float spot_light_attenuation(float3 dir,
- float spot_angle,
- float spot_smooth,
- LightSample *ls)
-{
- float3 I = ls->Ng;
-
- float attenuation = dot(dir, I);
-
- if (attenuation <= spot_angle) {
- attenuation = 0.0f;
- }
- else {
- float t = attenuation - spot_angle;
-
- if (t < spot_smooth && spot_smooth != 0.0f)
- attenuation *= smoothstepf(t / spot_smooth);
- }
-
- return attenuation;
-}
-
-ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
-{
- float cos_pi = dot(Ng, I);
-
- if (cos_pi <= 0.0f)
- return 0.0f;
-
- return t * t / cos_pi;
-}
-
-/* Background Light */
-
-#ifdef __BACKGROUND_MIS__
-
-ccl_device float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
-{
- /* for the following, the CDF values are actually a pair of floats, with the
- * function value as X and the actual CDF as Y. The last entry's function
- * value is the CDF total. */
- int res_x = kernel_data.integrator.pdf_background_res_x;
- int res_y = kernel_data.integrator.pdf_background_res_y;
- int cdf_width = res_x + 1;
-
- /* this is basically std::lower_bound as used by pbrt */
- int first = 0;
- int count = res_y;
-
- while (count > 0) {
- int step = count >> 1;
- int middle = first + step;
-
- if (kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
- first = middle + 1;
- count -= step + 1;
- }
- else
- count = step;
- }
-
- int index_v = max(0, first - 1);
- kernel_assert(index_v >= 0 && index_v < res_y);
-
- float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
- float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
- float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
-
- /* importance-sampled V direction */
- float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
- float v = (index_v + dv) / res_y;
-
- /* this is basically std::lower_bound as used by pbrt */
- first = 0;
- count = res_x;
- while (count > 0) {
- int step = count >> 1;
- int middle = first + step;
-
- if (kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y <
- randu) {
- first = middle + 1;
- count -= step + 1;
- }
- else
- count = step;
- }
-
- int index_u = max(0, first - 1);
- kernel_assert(index_u >= 0 && index_u < res_x);
-
- float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
- index_v * cdf_width + index_u);
- float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf,
- index_v * cdf_width + index_u + 1);
- float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
- index_v * cdf_width + res_x);
-
- /* importance-sampled U direction */
- float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
- float u = (index_u + du) / res_x;
-
- /* compute pdf */
- float sin_theta = sinf(M_PI_F * v);
- float denom = (M_2PI_F * M_PI_F * sin_theta) * cdf_last_u.x * cdf_last_v.x;
-
- if (sin_theta == 0.0f || denom == 0.0f)
- *pdf = 0.0f;
- else
- *pdf = (cdf_u.x * cdf_v.x) / denom;
-
- /* compute direction */
- return equirectangular_to_direction(u, v);
-}
-
-/* TODO(sergey): Same as above, after the release we should consider using
- * 'noinline' for all devices.
- */
-ccl_device float background_map_pdf(KernelGlobals *kg, float3 direction)
-{
- float2 uv = direction_to_equirectangular(direction);
- int res_x = kernel_data.integrator.pdf_background_res_x;
- int res_y = kernel_data.integrator.pdf_background_res_y;
- int cdf_width = res_x + 1;
-
- float sin_theta = sinf(uv.y * M_PI_F);
-
- if (sin_theta == 0.0f)
- return 0.0f;
-
- int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1);
- int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1);
-
- /* pdfs in V direction */
- float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
- index_v * cdf_width + res_x);
- float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
-
- float denom = (M_2PI_F * M_PI_F * sin_theta) * cdf_last_u.x * cdf_last_v.x;
-
- if (denom == 0.0f)
- return 0.0f;
-
- /* pdfs in U direction */
- float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
- index_v * cdf_width + index_u);
- float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
-
- return (cdf_u.x * cdf_v.x) / denom;
-}
-
-ccl_device_inline bool background_portal_data_fetch_and_check_side(
- KernelGlobals *kg, float3 P, int index, float3 *lightpos, float3 *dir)
-{
- int portal = kernel_data.integrator.portal_offset + index;
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
-
- *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
- *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
-
- /* Check whether portal is on the right side. */
- if (dot(*dir, P - *lightpos) > 1e-4f)
- return true;
-
- return false;
-}
-
-ccl_device_inline float background_portal_pdf(
- KernelGlobals *kg, float3 P, float3 direction, int ignore_portal, bool *is_possible)
-{
- float portal_pdf = 0.0f;
-
- int num_possible = 0;
- for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
- if (p == ignore_portal)
- continue;
-
- float3 lightpos, dir;
- if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
- continue;
-
- /* There's a portal that could be sampled from this position. */
- if (is_possible) {
- *is_possible = true;
- }
- num_possible++;
-
- int portal = kernel_data.integrator.portal_offset + p;
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
- float3 axisu = make_float3(
- klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
- float3 axisv = make_float3(
- klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
- bool is_round = (klight->area.invarea < 0.0f);
-
- if (!ray_quad_intersect(P,
- direction,
- 1e-4f,
- FLT_MAX,
- lightpos,
- axisu,
- axisv,
- dir,
- NULL,
- NULL,
- NULL,
- NULL,
- is_round))
- continue;
-
- if (is_round) {
- float t;
- float3 D = normalize_len(lightpos - P, &t);
- portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
- }
- else {
- portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
- }
- }
-
- if (ignore_portal >= 0) {
- /* We have skipped a portal that could be sampled as well. */
- num_possible++;
- }
-
- return (num_possible > 0) ? portal_pdf / num_possible : 0.0f;
-}
-
-ccl_device int background_num_possible_portals(KernelGlobals *kg, float3 P)
-{
- int num_possible_portals = 0;
- for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
- float3 lightpos, dir;
- if (background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
- num_possible_portals++;
- }
- return num_possible_portals;
-}
-
-ccl_device float3 background_portal_sample(KernelGlobals *kg,
- float3 P,
- float randu,
- float randv,
- int num_possible,
- int *sampled_portal,
- float *pdf)
-{
- /* Pick a portal, then re-normalize randv. */
- randv *= num_possible;
- int portal = (int)randv;
- randv -= portal;
-
- /* TODO(sergey): Some smarter way of finding portal to sample
- * is welcome.
- */
- for (int p = 0; p < kernel_data.integrator.num_portals; p++) {
- /* Search for the sampled portal. */
- float3 lightpos, dir;
- if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
- continue;
-
- if (portal == 0) {
- /* p is the portal to be sampled. */
- int portal = kernel_data.integrator.portal_offset + p;
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
- float3 axisu = make_float3(
- klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
- float3 axisv = make_float3(
- klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
- bool is_round = (klight->area.invarea < 0.0f);
-
- float3 D;
- if (is_round) {
- lightpos += ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
- float t;
- D = normalize_len(lightpos - P, &t);
- *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
- }
- else {
- *pdf = rect_light_sample(P, &lightpos, axisu, axisv, randu, randv, true);
- D = normalize(lightpos - P);
- }
-
- *pdf /= num_possible;
- *sampled_portal = p;
- return D;
- }
-
- portal--;
- }
-
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device_inline float3
-background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf)
-{
- /* Probability of sampling portals instead of the map. */
- float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
-
- /* Check if there are portals in the scene which we can sample. */
- if (portal_sampling_pdf > 0.0f) {
- int num_portals = background_num_possible_portals(kg, P);
- if (num_portals > 0) {
- if (portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) {
- if (portal_sampling_pdf < 1.0f) {
- randu /= portal_sampling_pdf;
- }
- int portal;
- float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
- if (num_portals > 1) {
- /* Ignore the chosen portal, its pdf is already included. */
- *pdf += background_portal_pdf(kg, P, D, portal, NULL);
- }
- /* We could also have sampled the map, so combine with MIS. */
- if (portal_sampling_pdf < 1.0f) {
- float cdf_pdf = background_map_pdf(kg, D);
- *pdf = (portal_sampling_pdf * (*pdf) + (1.0f - portal_sampling_pdf) * cdf_pdf);
- }
- return D;
- }
- else {
- /* Sample map, but with nonzero portal_sampling_pdf for MIS. */
- randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf);
- }
- }
- else {
- /* We can't sample a portal.
- * Check if we can sample the map instead.
- */
- if (portal_sampling_pdf == 1.0f) {
- /* Use uniform as a fallback if we can't sample the map. */
- *pdf = 1.0f / M_4PI_F;
- return sample_uniform_sphere(randu, randv);
- }
- else {
- portal_sampling_pdf = 0.0f;
- }
- }
- }
-
- float3 D = background_map_sample(kg, randu, randv, pdf);
- /* Use MIS if portals could be sampled as well. */
- if (portal_sampling_pdf > 0.0f) {
- float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL);
- *pdf = (portal_sampling_pdf * portal_pdf + (1.0f - portal_sampling_pdf) * (*pdf));
- }
- return D;
-}
-
-ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direction)
-{
- /* Probability of sampling portals instead of the map. */
- float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
-
- float portal_pdf = 0.0f, map_pdf = 0.0f;
- if (portal_sampling_pdf > 0.0f) {
- /* Evaluate PDF of sampling this direction by portal sampling. */
- bool is_possible = false;
- portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf;
- if (!is_possible) {
- /* Portal sampling is not possible here because all portals point to the wrong side.
- * If map sampling is possible, it would be used instead,
- * otherwise fallback sampling is used. */
- if (portal_sampling_pdf == 1.0f) {
- return kernel_data.integrator.pdf_lights / M_4PI_F;
- }
- else {
- /* Force map sampling. */
- portal_sampling_pdf = 0.0f;
- }
- }
- }
- if (portal_sampling_pdf < 1.0f) {
- /* Evaluate PDF of sampling this direction by map sampling. */
- map_pdf = background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf);
- }
- return (portal_pdf + map_pdf) * kernel_data.integrator.pdf_lights;
-}
-#endif
-
/* Regular Light */
ccl_device_inline bool lamp_light_sample(
@@ -594,7 +102,7 @@ ccl_device_inline bool lamp_light_sample(
/* spot light attenuation */
float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
ls->eval_fac *= spot_light_attenuation(
- dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls);
+ dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls->Ng);
if (ls->eval_fac == 0.0f) {
return false;
}
@@ -732,7 +240,7 @@ ccl_device bool lamp_light_eval(
/* spot light attenuation */
float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
ls->eval_fac *= spot_light_attenuation(
- dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls);
+ dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls->Ng);
if (ls->eval_fac == 0.0f)
return false;
@@ -805,20 +313,18 @@ ccl_device_inline bool triangle_world_space_vertices(
triangle_vertices(kg, prim, V);
}
-#ifdef __INSTANCING__
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-# ifdef __OBJECT_MOTION__
+#ifdef __OBJECT_MOTION__
float object_time = (time >= 0.0f) ? time : 0.5f;
Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL);
-# else
+#else
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
-# endif
+#endif
V[0] = transform_point(&tfm, V[0]);
V[1] = transform_point(&tfm, V[1]);
V[2] = transform_point(&tfm, V[2]);
has_motion = true;
}
-#endif
return has_motion;
}
diff --git a/intern/cycles/kernel/kernel_light_background.h b/intern/cycles/kernel/kernel_light_background.h
new file mode 100644
index 00000000000..30e336f0f80
--- /dev/null
+++ b/intern/cycles/kernel/kernel_light_background.h
@@ -0,0 +1,448 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_light_common.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Background Light */
+
+#ifdef __BACKGROUND_MIS__
+
+ccl_device float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
+{
+ /* for the following, the CDF values are actually a pair of floats, with the
+ * function value as X and the actual CDF as Y. The last entry's function
+ * value is the CDF total. */
+ int res_x = kernel_data.background.map_res_x;
+ int res_y = kernel_data.background.map_res_y;
+ int cdf_width = res_x + 1;
+
+ /* this is basically std::lower_bound as used by pbrt */
+ int first = 0;
+ int count = res_y;
+
+ while (count > 0) {
+ int step = count >> 1;
+ int middle = first + step;
+
+ if (kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
+ first = middle + 1;
+ count -= step + 1;
+ }
+ else
+ count = step;
+ }
+
+ int index_v = max(0, first - 1);
+ kernel_assert(index_v >= 0 && index_v < res_y);
+
+ float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
+ float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
+ float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
+
+ /* importance-sampled V direction */
+ float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
+ float v = (index_v + dv) / res_y;
+
+ /* this is basically std::lower_bound as used by pbrt */
+ first = 0;
+ count = res_x;
+ while (count > 0) {
+ int step = count >> 1;
+ int middle = first + step;
+
+ if (kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y <
+ randu) {
+ first = middle + 1;
+ count -= step + 1;
+ }
+ else
+ count = step;
+ }
+
+ int index_u = max(0, first - 1);
+ kernel_assert(index_u >= 0 && index_u < res_x);
+
+ float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + index_u);
+ float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + index_u + 1);
+ float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + res_x);
+
+ /* importance-sampled U direction */
+ float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
+ float u = (index_u + du) / res_x;
+
+ /* compute pdf */
+ float sin_theta = sinf(M_PI_F * v);
+ float denom = (M_2PI_F * M_PI_F * sin_theta) * cdf_last_u.x * cdf_last_v.x;
+
+ if (sin_theta == 0.0f || denom == 0.0f)
+ *pdf = 0.0f;
+ else
+ *pdf = (cdf_u.x * cdf_v.x) / denom;
+
+ /* compute direction */
+ return equirectangular_to_direction(u, v);
+}
+
+/* TODO(sergey): Same as above, after the release we should consider using
+ * 'noinline' for all devices.
+ */
+ccl_device float background_map_pdf(KernelGlobals *kg, float3 direction)
+{
+ float2 uv = direction_to_equirectangular(direction);
+ int res_x = kernel_data.background.map_res_x;
+ int res_y = kernel_data.background.map_res_y;
+ int cdf_width = res_x + 1;
+
+ float sin_theta = sinf(uv.y * M_PI_F);
+
+ if (sin_theta == 0.0f)
+ return 0.0f;
+
+ int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1);
+ int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1);
+
+ /* pdfs in V direction */
+ float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + res_x);
+ float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
+
+ float denom = (M_2PI_F * M_PI_F * sin_theta) * cdf_last_u.x * cdf_last_v.x;
+
+ if (denom == 0.0f)
+ return 0.0f;
+
+ /* pdfs in U direction */
+ float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
+ index_v * cdf_width + index_u);
+ float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
+
+ return (cdf_u.x * cdf_v.x) / denom;
+}
+
+ccl_device_inline bool background_portal_data_fetch_and_check_side(
+ KernelGlobals *kg, float3 P, int index, float3 *lightpos, float3 *dir)
+{
+ int portal = kernel_data.background.portal_offset + index;
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+
+ *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+ *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+
+ /* Check whether portal is on the right side. */
+ if (dot(*dir, P - *lightpos) > 1e-4f)
+ return true;
+
+ return false;
+}
+
+ccl_device_inline float background_portal_pdf(
+ KernelGlobals *kg, float3 P, float3 direction, int ignore_portal, bool *is_possible)
+{
+ float portal_pdf = 0.0f;
+
+ int num_possible = 0;
+ for (int p = 0; p < kernel_data.background.num_portals; p++) {
+ if (p == ignore_portal)
+ continue;
+
+ float3 lightpos, dir;
+ if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ continue;
+
+ /* There's a portal that could be sampled from this position. */
+ if (is_possible) {
+ *is_possible = true;
+ }
+ num_possible++;
+
+ int portal = kernel_data.background.portal_offset + p;
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+ float3 axisu = make_float3(
+ klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+ float3 axisv = make_float3(
+ klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+ bool is_round = (klight->area.invarea < 0.0f);
+
+ if (!ray_quad_intersect(P,
+ direction,
+ 1e-4f,
+ FLT_MAX,
+ lightpos,
+ axisu,
+ axisv,
+ dir,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ is_round))
+ continue;
+
+ if (is_round) {
+ float t;
+ float3 D = normalize_len(lightpos - P, &t);
+ portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
+ }
+ else {
+ portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
+ }
+ }
+
+ if (ignore_portal >= 0) {
+ /* We have skipped a portal that could be sampled as well. */
+ num_possible++;
+ }
+
+ return (num_possible > 0) ? portal_pdf / num_possible : 0.0f;
+}
+
+ccl_device int background_num_possible_portals(KernelGlobals *kg, float3 P)
+{
+ int num_possible_portals = 0;
+ for (int p = 0; p < kernel_data.background.num_portals; p++) {
+ float3 lightpos, dir;
+ if (background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ num_possible_portals++;
+ }
+ return num_possible_portals;
+}
+
+ccl_device float3 background_portal_sample(KernelGlobals *kg,
+ float3 P,
+ float randu,
+ float randv,
+ int num_possible,
+ int *sampled_portal,
+ float *pdf)
+{
+ /* Pick a portal, then re-normalize randv. */
+ randv *= num_possible;
+ int portal = (int)randv;
+ randv -= portal;
+
+ /* TODO(sergey): Some smarter way of finding portal to sample
+ * is welcome.
+ */
+ for (int p = 0; p < kernel_data.background.num_portals; p++) {
+ /* Search for the sampled portal. */
+ float3 lightpos, dir;
+ if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+ continue;
+
+ if (portal == 0) {
+ /* p is the portal to be sampled. */
+ int portal = kernel_data.background.portal_offset + p;
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+ float3 axisu = make_float3(
+ klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+ float3 axisv = make_float3(
+ klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+ bool is_round = (klight->area.invarea < 0.0f);
+
+ float3 D;
+ if (is_round) {
+ lightpos += ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
+ float t;
+ D = normalize_len(lightpos - P, &t);
+ *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
+ }
+ else {
+ *pdf = rect_light_sample(P, &lightpos, axisu, axisv, randu, randv, true);
+ D = normalize(lightpos - P);
+ }
+
+ *pdf /= num_possible;
+ *sampled_portal = p;
+ return D;
+ }
+
+ portal--;
+ }
+
+ return make_float3(0.0f, 0.0f, 0.0f);
+}
+
+ccl_device_inline float3 background_sun_sample(KernelGlobals *kg,
+ float randu,
+ float randv,
+ float *pdf)
+{
+ float3 D;
+ const float3 N = float4_to_float3(kernel_data.background.sun);
+ const float angle = kernel_data.background.sun.w;
+ sample_uniform_cone(N, angle, randu, randv, &D, pdf);
+ return D;
+}
+
+ccl_device_inline float background_sun_pdf(KernelGlobals *kg, float3 D)
+{
+ const float3 N = float4_to_float3(kernel_data.background.sun);
+ const float angle = kernel_data.background.sun.w;
+ return pdf_uniform_cone(N, D, angle);
+}
+
+ccl_device_inline float3
+background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf)
+{
+ float portal_method_pdf = kernel_data.background.portal_weight;
+ float sun_method_pdf = kernel_data.background.sun_weight;
+ float map_method_pdf = kernel_data.background.map_weight;
+
+ int num_portals = 0;
+ if (portal_method_pdf > 0.0f) {
+ /* Check if there are portals in the scene which we can sample. */
+ num_portals = background_num_possible_portals(kg, P);
+ if (num_portals == 0) {
+ portal_method_pdf = 0.0f;
+ }
+ }
+
+ float pdf_fac = (portal_method_pdf + sun_method_pdf + map_method_pdf);
+ if (pdf_fac == 0.0f) {
+ /* Use uniform as a fallback if we can't use any strategy. */
+ *pdf = 1.0f / M_4PI_F;
+ return sample_uniform_sphere(randu, randv);
+ }
+
+ pdf_fac = 1.0f / pdf_fac;
+ portal_method_pdf *= pdf_fac;
+ sun_method_pdf *= pdf_fac;
+ map_method_pdf *= pdf_fac;
+
+ /* We have 100% in total and split it between the three categories.
+ * Therefore, we pick portals if randu is between 0 and portal_method_pdf,
+ * sun if randu is between portal_method_pdf and (portal_method_pdf + sun_method_pdf)
+ * and map if randu is between (portal_method_pdf + sun_method_pdf) and 1. */
+ float sun_method_cdf = portal_method_pdf + sun_method_pdf;
+
+ int method = 0;
+ float3 D;
+ if (randu < portal_method_pdf) {
+ method = 0;
+ /* Rescale randu. */
+ if (portal_method_pdf != 1.0f) {
+ randu /= portal_method_pdf;
+ }
+
+ /* Sample a portal. */
+ int portal;
+ D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
+ if (num_portals > 1) {
+ /* Ignore the chosen portal, its pdf is already included. */
+ *pdf += background_portal_pdf(kg, P, D, portal, NULL);
+ }
+
+ /* Skip MIS if this is the only method. */
+ if (portal_method_pdf == 1.0f) {
+ return D;
+ }
+ *pdf *= portal_method_pdf;
+ }
+ else if (randu < sun_method_cdf) {
+ method = 1;
+ /* Rescale randu. */
+ if (sun_method_pdf != 1.0f) {
+ randu = (randu - portal_method_pdf) / sun_method_pdf;
+ }
+
+ D = background_sun_sample(kg, randu, randv, pdf);
+
+ /* Skip MIS if this is the only method. */
+ if (sun_method_pdf == 1.0f) {
+ return D;
+ }
+ *pdf *= sun_method_pdf;
+ }
+ else {
+ method = 2;
+ /* Rescale randu. */
+ if (map_method_pdf != 1.0f) {
+ randu = (randu - sun_method_cdf) / map_method_pdf;
+ }
+
+ D = background_map_sample(kg, randu, randv, pdf);
+
+ /* Skip MIS if this is the only method. */
+ if (map_method_pdf == 1.0f) {
+ return D;
+ }
+ *pdf *= map_method_pdf;
+ }
+
+ /* MIS weighting. */
+ if (method != 0 && portal_method_pdf != 0.0f) {
+ *pdf += portal_method_pdf * background_portal_pdf(kg, P, D, -1, NULL);
+ }
+ if (method != 1 && sun_method_pdf != 0.0f) {
+ *pdf += sun_method_pdf * background_sun_pdf(kg, D);
+ }
+ if (method != 2 && map_method_pdf != 0.0f) {
+ *pdf += map_method_pdf * background_map_pdf(kg, D);
+ }
+ return D;
+}
+
+ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direction)
+{
+ float portal_method_pdf = kernel_data.background.portal_weight;
+ float sun_method_pdf = kernel_data.background.sun_weight;
+ float map_method_pdf = kernel_data.background.map_weight;
+
+ float portal_pdf = 0.0f;
+ /* Portals are a special case here since we need to compute their pdf in order
+ * to find out if we can sample them. */
+ if (portal_method_pdf > 0.0f) {
+ /* Evaluate PDF of sampling this direction by portal sampling. */
+ bool is_possible = false;
+ portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible);
+ if (!is_possible) {
+ /* Portal sampling is not possible here because all portals point to the wrong side.
+ * If other methods can be used instead, do so, otherwise uniform sampling is used as a
+ * fallback. */
+ portal_method_pdf = 0.0f;
+ }
+ }
+
+ float pdf_fac = (portal_method_pdf + sun_method_pdf + map_method_pdf);
+ if (pdf_fac == 0.0f) {
+ /* Use uniform as a fallback if we can't use any strategy. */
+ return kernel_data.integrator.pdf_lights / M_4PI_F;
+ }
+
+ pdf_fac = 1.0f / pdf_fac;
+ portal_method_pdf *= pdf_fac;
+ sun_method_pdf *= pdf_fac;
+ map_method_pdf *= pdf_fac;
+
+ float pdf = portal_pdf * portal_method_pdf;
+ if (sun_method_pdf != 0.0f) {
+ pdf += background_sun_pdf(kg, direction) * sun_method_pdf;
+ }
+ if (map_method_pdf != 0.0f) {
+ pdf += background_map_pdf(kg, direction) * map_method_pdf;
+ }
+
+ return pdf * kernel_data.integrator.pdf_lights;
+}
+
+#endif
+
+CCL_NAMESPACE_END \ No newline at end of file
diff --git a/intern/cycles/kernel/kernel_light_common.h b/intern/cycles/kernel/kernel_light_common.h
new file mode 100644
index 00000000000..39503a4b479
--- /dev/null
+++ b/intern/cycles/kernel/kernel_light_common.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Area light sampling */
+
+/* Uses the following paper:
+ *
+ * Carlos Urena et al.
+ * An Area-Preserving Parametrization for Spherical Rectangles.
+ *
+ * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
+ *
+ * Note: light_p is modified when sample_coord is true.
+ */
+ccl_device_inline float rect_light_sample(float3 P,
+ float3 *light_p,
+ float3 axisu,
+ float3 axisv,
+ float randu,
+ float randv,
+ bool sample_coord)
+{
+ /* In our name system we're using P for the center,
+ * which is o in the paper.
+ */
+
+ float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
+ float axisu_len, axisv_len;
+ /* Compute local reference system R. */
+ float3 x = normalize_len(axisu, &axisu_len);
+ float3 y = normalize_len(axisv, &axisv_len);
+ float3 z = cross(x, y);
+ /* Compute rectangle coords in local reference system. */
+ float3 dir = corner - P;
+ float z0 = dot(dir, z);
+ /* Flip 'z' to make it point against Q. */
+ if (z0 > 0.0f) {
+ z *= -1.0f;
+ z0 *= -1.0f;
+ }
+ float x0 = dot(dir, x);
+ float y0 = dot(dir, y);
+ float x1 = x0 + axisu_len;
+ float y1 = y0 + axisv_len;
+ /* Compute internal angles (gamma_i). */
+ float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
+ float4 nz = make_float4(y0, x1, y1, x0) * diff;
+ nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
+ float g0 = safe_acosf(-nz.x * nz.y);
+ float g1 = safe_acosf(-nz.y * nz.z);
+ float g2 = safe_acosf(-nz.z * nz.w);
+ float g3 = safe_acosf(-nz.w * nz.x);
+ /* Compute predefined constants. */
+ float b0 = nz.x;
+ float b1 = nz.z;
+ float b0sq = b0 * b0;
+ float k = M_2PI_F - g2 - g3;
+ /* Compute solid angle from internal angles. */
+ float S = g0 + g1 - k;
+
+ if (sample_coord) {
+ /* Compute cu. */
+ float au = randu * S + k;
+ float fu = (cosf(au) * b0 - b1) / sinf(au);
+ float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
+ cu = clamp(cu, -1.0f, 1.0f);
+ /* Compute xu. */
+ float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
+ xu = clamp(xu, x0, x1);
+ /* Compute yv. */
+ float z0sq = z0 * z0;
+ float y0sq = y0 * y0;
+ float y1sq = y1 * y1;
+ float d = sqrtf(xu * xu + z0sq);
+ float h0 = y0 / sqrtf(d * d + y0sq);
+ float h1 = y1 / sqrtf(d * d + y1sq);
+ float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
+ float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
+
+ /* Transform (xu, yv, z0) to world coords. */
+ *light_p = P + xu * x + yv * y + z0 * z;
+ }
+
+ /* return pdf */
+ if (S != 0.0f)
+ return 1.0f / S;
+ else
+ return 0.0f;
+}
+
+ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv)
+{
+ to_unit_disk(&randu, &randv);
+ return ru * randu + rv * randv;
+}
+
+ccl_device float3 disk_light_sample(float3 v, float randu, float randv)
+{
+ float3 ru, rv;
+
+ make_orthonormals(v, &ru, &rv);
+
+ return ellipse_sample(ru, rv, randu, randv);
+}
+
+ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv)
+{
+ return normalize(D + disk_light_sample(D, randu, randv) * radius);
+}
+
+ccl_device float3
+sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
+{
+ return disk_light_sample(normalize(P - center), randu, randv) * radius;
+}
+
+ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, float3 N)
+{
+ float attenuation = dot(dir, N);
+
+ if (attenuation <= spot_angle) {
+ attenuation = 0.0f;
+ }
+ else {
+ float t = attenuation - spot_angle;
+
+ if (t < spot_smooth && spot_smooth != 0.0f)
+ attenuation *= smoothstepf(t / spot_smooth);
+ }
+
+ return attenuation;
+}
+
+ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
+{
+ float cos_pi = dot(Ng, I);
+
+ if (cos_pi <= 0.0f)
+ return 0.0f;
+
+ return t * t / cos_pi;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index 5c776e06547..0edcc1a5a14 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -98,6 +98,16 @@ ccl_device_inline void sample_uniform_cone(
*pdf = M_1_2PI_F / (1.0f - zMin);
}
+ccl_device_inline float pdf_uniform_cone(const float3 N, float3 D, float angle)
+{
+ float zMin = cosf(angle);
+ float z = dot(N, D);
+ if (z > zMin) {
+ return M_1_2PI_F / (1.0f - zMin);
+ }
+ return 0.0f;
+}
+
/* sample uniform point on the surface of a sphere */
ccl_device float3 sample_uniform_sphere(float u1, float u2)
{
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 9700aaba80f..3d9f787f267 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -63,10 +63,8 @@ ccl_device_noinline
{
PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
-#ifdef __INSTANCING__
sd->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) :
isect->object;
-#endif
sd->lamp = LAMP_NONE;
sd->type = isect->type;
@@ -82,18 +80,13 @@ ccl_device_noinline
sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
sd->ray_length = isect->t;
-#ifdef __UV__
sd->u = isect->u;
sd->v = isect->v;
-#endif
#ifdef __HAIR__
if (sd->type & PRIMITIVE_ALL_CURVE) {
/* curve */
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
-
- sd->shader = __float_as_int(curvedata.z);
- sd->P = curve_refine(kg, sd, isect, ray);
+ curve_shader_setup(kg, sd, isect, ray);
}
else
#endif
@@ -125,17 +118,15 @@ ccl_device_noinline
sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-#ifdef __INSTANCING__
if (isect->object != OBJECT_NONE) {
/* instance transform */
object_normal_transform_auto(kg, sd, &sd->N);
object_normal_transform_auto(kg, sd, &sd->Ng);
-# ifdef __DPDU__
+#ifdef __DPDU__
object_dir_transform_auto(kg, sd, &sd->dPdu);
object_dir_transform_auto(kg, sd, &sd->dPdv);
-# endif
- }
#endif
+ }
/* backfacing test */
bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
@@ -185,10 +176,8 @@ ccl_device_inline
sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
sd->type = isect->type;
-# ifdef __UV__
sd->u = isect->u;
sd->v = isect->v;
-# endif
/* fetch triangle data */
if (sd->type == PRIMITIVE_TRIANGLE) {
@@ -215,17 +204,15 @@ ccl_device_inline
sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-# ifdef __INSTANCING__
if (isect->object != OBJECT_NONE) {
/* instance transform */
object_normal_transform_auto(kg, sd, &sd->N);
object_normal_transform_auto(kg, sd, &sd->Ng);
-# ifdef __DPDU__
+# ifdef __DPDU__
object_dir_transform_auto(kg, sd, &sd->dPdu);
object_dir_transform_auto(kg, sd, &sd->dPdv);
-# endif
- }
# endif
+ }
/* backfacing test */
if (backfacing) {
@@ -284,17 +271,13 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
else
sd->type = PRIMITIVE_NONE;
- /* primitive */
-#ifdef __INSTANCING__
+ /* primitive */
sd->object = object;
-#endif
sd->lamp = LAMP_NONE;
/* currently no access to bvh prim index for strand sd->prim*/
sd->prim = prim;
-#ifdef __UV__
sd->u = u;
sd->v = v;
-#endif
sd->time = time;
sd->ray_length = t;
@@ -330,23 +313,19 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
if (sd->shader & SHADER_SMOOTH_NORMAL) {
sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
-#ifdef __INSTANCING__
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
object_normal_transform_auto(kg, sd, &sd->N);
}
-#endif
}
/* dPdu/dPdv */
#ifdef __DPDU__
triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
-# ifdef __INSTANCING__
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
object_dir_transform_auto(kg, sd, &sd->dPdu);
object_dir_transform_auto(kg, sd, &sd->dPdv);
}
-# endif
#endif
}
else {
@@ -432,15 +411,11 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg,
sd->time = ray->time;
sd->ray_length = 0.0f;
-#ifdef __INSTANCING__
sd->object = OBJECT_NONE;
-#endif
sd->lamp = LAMP_NONE;
sd->prim = PRIM_NONE;
-#ifdef __UV__
sd->u = 0.0f;
sd->v = 0.0f;
-#endif
#ifdef __DPDU__
/* dPdu/dPdv */
@@ -481,17 +456,13 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s
sd->time = ray->time;
sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
-# ifdef __INSTANCING__
sd->object = OBJECT_NONE; /* todo: fill this for texture coordinates */
-# endif
sd->lamp = LAMP_NONE;
sd->prim = PRIM_NONE;
sd->type = PRIMITIVE_NONE;
-# ifdef __UV__
sd->u = 0.0f;
sd->v = 0.0f;
-# endif
# ifdef __DPDU__
/* dPdu/dPdv */
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 0a0cf1bd6c0..fc9cc73a704 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -84,9 +84,7 @@ CCL_NAMESPACE_BEGIN
/* Kernel features */
#define __SOBOL__
-#define __INSTANCING__
#define __DPDU__
-#define __UV__
#define __BACKGROUND__
#define __CAUSTICS_TRICKS__
#define __VISIBILITY_FLAG__
@@ -125,9 +123,6 @@ CCL_NAMESPACE_BEGIN
/* Device specific features */
#ifdef __KERNEL_CPU__
-# ifdef __KERNEL_SSE2__
-# define __QBVH__
-# endif
# ifdef WITH_OSL
# define __OSL__
# endif
@@ -696,27 +691,38 @@ typedef enum PrimitiveType {
PRIMITIVE_NONE = 0,
PRIMITIVE_TRIANGLE = (1 << 0),
PRIMITIVE_MOTION_TRIANGLE = (1 << 1),
- PRIMITIVE_CURVE = (1 << 2),
- PRIMITIVE_MOTION_CURVE = (1 << 3),
+ PRIMITIVE_CURVE_THICK = (1 << 2),
+ PRIMITIVE_MOTION_CURVE_THICK = (1 << 3),
+ PRIMITIVE_CURVE_RIBBON = (1 << 4),
+ PRIMITIVE_MOTION_CURVE_RIBBON = (1 << 5),
/* Lamp primitive is not included below on purpose,
* since it is no real traceable primitive.
*/
- PRIMITIVE_LAMP = (1 << 4),
+ PRIMITIVE_LAMP = (1 << 6),
PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE | PRIMITIVE_MOTION_TRIANGLE),
- PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE | PRIMITIVE_MOTION_CURVE),
- PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE | PRIMITIVE_MOTION_CURVE),
+ PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE_THICK | PRIMITIVE_MOTION_CURVE_THICK |
+ PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON),
+ PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE | PRIMITIVE_MOTION_CURVE_THICK |
+ PRIMITIVE_MOTION_CURVE_RIBBON),
PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE | PRIMITIVE_ALL_CURVE),
/* Total number of different traceable primitives.
* NOTE: This is an actual value, not a bitflag.
*/
- PRIMITIVE_NUM_TOTAL = 4,
+ PRIMITIVE_NUM_TOTAL = 6,
} PrimitiveType;
#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << PRIMITIVE_NUM_TOTAL) | (type))
#define PRIMITIVE_UNPACK_SEGMENT(type) (type >> PRIMITIVE_NUM_TOTAL)
+typedef enum CurveShapeType {
+ CURVE_RIBBON = 0,
+ CURVE_THICK = 1,
+
+ CURVE_NUM_SHAPE_TYPES,
+} CurveShapeType;
+
/* Attributes */
typedef enum AttributePrimitive {
@@ -1291,6 +1297,24 @@ typedef struct KernelBackground {
float ao_factor;
float ao_distance;
float ao_bounces_factor;
+
+ /* portal sampling */
+ float portal_weight;
+ int num_portals;
+ int portal_offset;
+
+ /* sun sampling */
+ float sun_weight;
+ /* xyz store direction, w the angle. float4 instead of float3 is used
+ * to ensure consistent padding/alignment across devices. */
+ float4 sun;
+
+ /* map sampling */
+ float map_weight;
+ int map_res_x;
+ int map_res_y;
+
+ int use_mis;
} KernelBackground;
static_assert_align(KernelBackground, 16);
@@ -1302,15 +1326,8 @@ typedef struct KernelIntegrator {
int num_all_lights;
float pdf_triangles;
float pdf_lights;
- int pdf_background_res_x;
- int pdf_background_res_y;
float light_inv_rr_threshold;
- /* light portals */
- float portal_pdf;
- int num_portals;
- int portal_offset;
-
/* bounces */
int min_bounce;
int max_bounce;
@@ -1372,7 +1389,7 @@ typedef struct KernelIntegrator {
int max_closures;
- int pad1;
+ int pad1, pad2;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);
@@ -1380,13 +1397,11 @@ typedef enum KernelBVHLayout {
BVH_LAYOUT_NONE = 0,
BVH_LAYOUT_BVH2 = (1 << 0),
- BVH_LAYOUT_BVH4 = (1 << 1),
- BVH_LAYOUT_BVH8 = (1 << 2),
+ BVH_LAYOUT_EMBREE = (1 << 1),
+ BVH_LAYOUT_OPTIX = (1 << 2),
- BVH_LAYOUT_EMBREE = (1 << 3),
- BVH_LAYOUT_OPTIX = (1 << 4),
-
- BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8,
+ /* Default BVH layout to use for CPU. */
+ BVH_LAYOUT_AUTO = BVH_LAYOUT_EMBREE,
BVH_LAYOUT_ALL = (unsigned int)(~0u),
} KernelBVHLayout;
@@ -1395,9 +1410,9 @@ typedef struct KernelBVH {
int root;
int have_motion;
int have_curves;
- int have_instancing;
int bvh_layout;
int use_bvh_steps;
+ int curve_subdivisions;
/* Custom BVH */
#ifdef __KERNEL_OPTIX__
@@ -1415,25 +1430,6 @@ typedef struct KernelBVH {
} KernelBVH;
static_assert_align(KernelBVH, 16);
-typedef enum CurveFlag {
- /* runtime flags */
- CURVE_KN_BACKFACING = 1, /* backside of cylinder? */
- CURVE_KN_ENCLOSEFILTER = 2, /* don't consider strands surrounding start point? */
- CURVE_KN_INTERPOLATE = 4, /* render as a curve? */
- CURVE_KN_ACCURATE = 8, /* use accurate intersections test? */
- CURVE_KN_INTERSECTCORRECTION = 16, /* correct for width after determing closest midpoint? */
- CURVE_KN_TRUETANGENTGNORMAL = 32, /* use tangent normal for geometry? */
- CURVE_KN_RIBBONS = 64, /* use flat curve ribbons */
-} CurveFlag;
-
-typedef struct KernelCurves {
- int curveflags;
- int subdivisions;
-
- int pad1, pad2;
-} KernelCurves;
-static_assert_align(KernelCurves, 16);
-
typedef struct KernelTables {
int beckmann_offset;
int pad1, pad2, pad3;
@@ -1454,7 +1450,6 @@ typedef struct KernelData {
KernelBackground background;
KernelIntegrator integrator;
KernelBVH bvh;
- KernelCurves curve;
KernelTables tables;
KernelBake bake;
} KernelData;
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index 8829a14ead5..8040bfb7b33 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -64,12 +64,14 @@ CCL_NAMESPACE_BEGIN
/* Memory Copy */
-void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size)
+void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t)
{
- if (strcmp(name, "__data") == 0)
- memcpy(&kg->__data, host, size);
- else
+ if (strcmp(name, "__data") == 0) {
+ kg->__data = *(KernelData *)host;
+ }
+ else {
assert(0);
+ }
}
void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
index e03504316ad..c730d952ed4 100644
--- a/intern/cycles/kernel/kernels/optix/kernel_optix.cu
+++ b/intern/cycles/kernel/kernels/optix/kernel_optix.cu
@@ -256,11 +256,9 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
}
#ifdef __HAIR__
-extern "C" __global__ void __intersection__curve()
+ccl_device_inline void optix_intersection_curve(const uint prim, const uint type)
{
- const uint prim = optixGetPrimitiveIndex();
const uint object = get_object_id<true>();
- const uint type = kernel_tex_fetch(__prim_type, prim);
const uint visibility = optixGetPayload_4();
float3 P = optixGetObjectRayOrigin();
@@ -282,14 +280,30 @@ extern "C" __global__ void __intersection__curve()
if (isect.t != FLT_MAX)
isect.t *= len;
- if (!(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) ?
- curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type) :
- cardinal_curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type)) {
+ if (curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type)) {
optixReportIntersection(isect.t / len,
type & PRIMITIVE_ALL,
__float_as_int(isect.u), // Attribute_0
__float_as_int(isect.v)); // Attribute_1
}
+
+}
+
+extern "C" __global__ void __intersection__curve_ribbon()
+{
+ const uint prim = optixGetPrimitiveIndex();
+ const uint type = kernel_tex_fetch(__prim_type, prim);
+
+ if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
+ optix_intersection_curve(prim, type);
+ }
+}
+
+extern "C" __global__ void __intersection__curve_all()
+{
+ const uint prim = optixGetPrimitiveIndex();
+ const uint type = kernel_tex_fetch(__prim_type, prim);
+ optix_intersection_curve(prim, type);
}
#endif
diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt
index fc0c845fd4f..d7ab778181e 100644
--- a/intern/cycles/kernel/osl/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/CMakeLists.txt
@@ -36,6 +36,15 @@ set(LIB
# OSL and LLVM are built without RTTI
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}")
+if(APPLE)
+ # Disable allocation warning on macOS prior to 10.14: the OSLRenderServices
+ # contains member which is 64 bytes aligned (cache inside of OIIO's
+ # unordered_map_concurrent). This is not something what the SDK supportsm, but
+ # since we take care of allocations ourselves is is OK to ignore the
+ # diagnostic message.
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-allocation")
+endif()
+
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index 872a55143cc..7ee467a46dd 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -362,6 +362,9 @@ void OSLShader::register_closures(OSLShadingSystem *ss_)
id++,
closure_bsdf_transparent_params(),
closure_bsdf_transparent_prepare);
+
+ register_closure(
+ ss, "microfacet", id++, closure_bsdf_microfacet_params(), closure_bsdf_microfacet_prepare);
register_closure(ss,
"microfacet_ggx",
id++,
@@ -508,6 +511,82 @@ bool CBSDFClosure::skip(const ShaderData *sd, int path_flag, int scattering)
return false;
}
+/* Standard Microfacet Closure */
+
+class MicrofacetClosure : public CBSDFClosure {
+ public:
+ MicrofacetBsdf params;
+ ustring distribution;
+ int refract;
+
+ void setup(ShaderData *sd, int path_flag, float3 weight)
+ {
+ static ustring u_ggx("ggx");
+ static ustring u_default("default");
+
+ const int label = (refract) ? LABEL_TRANSMIT : LABEL_REFLECT;
+ if (skip(sd, path_flag, LABEL_GLOSSY | label)) {
+ return;
+ }
+
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+ sd, sizeof(MicrofacetBsdf), weight, &params);
+
+ if (!bsdf) {
+ return;
+ }
+
+ /* GGX */
+ if (distribution == u_ggx || distribution == u_default) {
+ if (!refract) {
+ if (params.alpha_x == params.alpha_y) {
+ /* Isotropic */
+ sd->flag |= bsdf_microfacet_ggx_isotropic_setup(bsdf);
+ }
+ else {
+ /* Anisotropic */
+ sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+ }
+ }
+ else {
+ sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+ }
+ }
+ /* Beckmann */
+ else {
+ if (!refract) {
+ if (params.alpha_x == params.alpha_y) {
+ /* Isotropic */
+ sd->flag |= bsdf_microfacet_beckmann_isotropic_setup(bsdf);
+ }
+ else {
+ /* Anisotropic */
+ sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+ }
+ }
+ else {
+ sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
+ }
+ }
+ }
+};
+
+ClosureParam *closure_bsdf_microfacet_params()
+{
+ static ClosureParam params[] = {CLOSURE_STRING_PARAM(MicrofacetClosure, distribution),
+ CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.N),
+ CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.T),
+ CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_x),
+ CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_y),
+ CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.ior),
+ CLOSURE_INT_PARAM(MicrofacetClosure, refract),
+ CLOSURE_STRING_KEYPARAM(MicrofacetClosure, label, "label"),
+ CLOSURE_FINISH_PARAM(MicrofacetClosure)};
+
+ return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_microfacet_prepare, MicrofacetClosure)
+
/* GGX closures with Fresnel */
class MicrofacetFresnelClosure : public CBSDFClosure {
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
index d12afdb80dd..e4058e3a746 100644
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@@ -51,6 +51,7 @@ OSL::ClosureParam *closure_bsdf_transparent_params();
OSL::ClosureParam *closure_bssrdf_params();
OSL::ClosureParam *closure_absorption_params();
OSL::ClosureParam *closure_henyey_greenstein_params();
+OSL::ClosureParam *closure_bsdf_microfacet_params();
OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_params();
OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params();
OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params();
@@ -70,6 +71,7 @@ void closure_bsdf_transparent_prepare(OSL::RendererServices *, int id, void *dat
void closure_bssrdf_prepare(OSL::RendererServices *, int id, void *data);
void closure_absorption_prepare(OSL::RendererServices *, int id, void *data);
void closure_henyey_greenstein_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_microfacet_prepare(OSL::RendererServices *, int id, void *data);
void closure_bsdf_microfacet_multi_ggx_prepare(OSL::RendererServices *, int id, void *data);
void closure_bsdf_microfacet_multi_ggx_glass_prepare(OSL::RendererServices *, int id, void *data);
void closure_bsdf_microfacet_multi_ggx_aniso_prepare(OSL::RendererServices *, int id, void *data);
diff --git a/intern/cycles/kernel/shaders/node_sky_texture.osl b/intern/cycles/kernel/shaders/node_sky_texture.osl
index 4def237a2e0..08bc8f85120 100644
--- a/intern/cycles/kernel/shaders/node_sky_texture.osl
+++ b/intern/cycles/kernel/shaders/node_sky_texture.osl
@@ -44,13 +44,13 @@ float sky_perez_function(float lam[9], float theta, float gamma)
(1.0 + lam[2] * exp(lam[3] * gamma) + lam[4] * cgamma * cgamma);
}
-color sky_radiance_old(normal dir,
- float sunphi,
- float suntheta,
- color radiance,
- float config_x[9],
- float config_y[9],
- float config_z[9])
+color sky_radiance_preetham(normal dir,
+ float sunphi,
+ float suntheta,
+ color radiance,
+ float config_x[9],
+ float config_y[9],
+ float config_z[9])
{
/* convert vector to spherical coordinates */
vector spherical = sky_spherical_coordinates(dir);
@@ -88,13 +88,13 @@ float sky_radiance_internal(float config[9], float theta, float gamma)
(config[2] + config[3] * expM + config[5] * rayM + config[6] * mieM + config[7] * zenith);
}
-color sky_radiance_new(normal dir,
- float sunphi,
- float suntheta,
- color radiance,
- float config_x[9],
- float config_y[9],
- float config_z[9])
+color sky_radiance_hosek(normal dir,
+ float sunphi,
+ float suntheta,
+ color radiance,
+ float config_x[9],
+ float config_y[9],
+ float config_z[9])
{
/* convert vector to spherical coordinates */
vector spherical = sky_spherical_coordinates(dir);
@@ -116,16 +116,103 @@ color sky_radiance_new(normal dir,
return xyz_to_rgb(x, y, z) * (M_2PI / 683);
}
+/* Nishita improved */
+vector geographical_to_direction(float lat, float lon)
+{
+ return vector(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat));
+}
+
+color sky_radiance_nishita(vector dir, float nishita_data[9], string filename)
+{
+ /* definitions */
+ float sun_elevation = nishita_data[6];
+ float sun_rotation = nishita_data[7];
+ float angular_diameter = nishita_data[8];
+ int sun_disc = angular_diameter > 0;
+ float alpha = 1.0;
+ color xyz;
+ /* convert dir to spherical coordinates */
+ vector direction = sky_spherical_coordinates(dir);
+
+ /* render above the horizon */
+ if (dir[2] >= 0.0) {
+ /* definitions */
+ vector sun_dir = geographical_to_direction(sun_elevation, sun_rotation + M_PI_2);
+ float sun_dir_angle = acos(dot(dir, sun_dir));
+ float half_angular = angular_diameter / 2.0;
+ float dir_elevation = M_PI_2 - direction[0];
+
+ /* if ray inside sun disc render it, otherwise render sky */
+ if (sun_dir_angle < half_angular && sun_disc == 1) {
+ /* get 3 pixels data */
+ color pixel_bottom = color(nishita_data[0], nishita_data[1], nishita_data[2]);
+ color pixel_top = color(nishita_data[3], nishita_data[4], nishita_data[5]);
+ float y;
+
+ /* sun interpolation */
+ if (sun_elevation - half_angular > 0.0) {
+ if ((sun_elevation + half_angular) > 0.0) {
+ y = ((dir_elevation - sun_elevation) / angular_diameter) + 0.5;
+ xyz = mix(pixel_bottom, pixel_top, y);
+ }
+ }
+ else {
+ if (sun_elevation + half_angular > 0.0) {
+ y = dir_elevation / (sun_elevation + half_angular);
+ xyz = mix(pixel_bottom, pixel_top, y);
+ }
+ }
+ /* limb darkening, coefficient is 0.6f */
+ float angle_fraction = sun_dir_angle / half_angular;
+ float limb_darkening = (1.0 - 0.6 * (1.0 - sqrt(1.0 - angle_fraction * angle_fraction)));
+ xyz *= limb_darkening;
+ }
+ /* sky */
+ else {
+ /* sky interpolation */
+ float x = (direction[1] + M_PI + sun_rotation) / M_2PI;
+ float y = 1.0 - (dir_elevation / M_PI_2);
+ if (x > 1.0) {
+ x = x - 1.0;
+ }
+ xyz = (color)texture(filename, x, y, "wrap", "clamp", "interp", "linear", "alpha", alpha);
+ }
+ }
+ /* ground */
+ else {
+ if (dir[2] < -0.4) {
+ xyz = color(0, 0, 0);
+ }
+ else {
+ /* black ground fade */
+ float mul = pow(1.0 + dir[2] * 2.5, 3.0);
+ /* interpolation */
+ float x = (direction[1] + M_PI + sun_rotation) / M_2PI;
+ float y = 1.5;
+ if (x > 1.0) {
+ x = x - 1.0;
+ }
+ xyz = (color)texture(
+ filename, x, y, "wrap", "periodic", "interp", "linear", "alpha", alpha) *
+ mul;
+ }
+ }
+ /* convert to RGB and adjust strength */
+ return xyz_to_rgb(xyz[0], xyz[1], xyz[2]) * 120000.0;
+}
+
shader node_sky_texture(int use_mapping = 0,
matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
vector Vector = P,
string type = "hosek_wilkie",
float theta = 0.0,
float phi = 0.0,
+ string filename = "",
color radiance = color(0.0, 0.0, 0.0),
float config_x[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
float config_y[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
float config_z[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+ float nishita_data[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
output color Color = color(0.0, 0.0, 0.0))
{
vector p = Vector;
@@ -133,8 +220,10 @@ shader node_sky_texture(int use_mapping = 0,
if (use_mapping)
p = transform(mapping, p);
+ if (type == "nishita_improved")
+ Color = sky_radiance_nishita(p, nishita_data, filename);
if (type == "hosek_wilkie")
- Color = sky_radiance_new(p, phi, theta, radiance, config_x, config_y, config_z);
- else
- Color = sky_radiance_old(p, phi, theta, radiance, config_x, config_y, config_z);
+ Color = sky_radiance_hosek(p, phi, theta, radiance, config_x, config_y, config_z);
+ if (type == "preetham")
+ Color = sky_radiance_preetham(p, phi, theta, radiance, config_x, config_y, config_z);
}
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 2c57a142692..1ae94f1d766 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -847,39 +847,29 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
float3 weight = sd->svm_closure_weight * mix_weight;
- if (sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) {
- /* todo: giving a fixed weight here will cause issues when
- * mixing multiple BSDFS. energy will not be conserved and
- * the throughput can blow up after multiple bounces. we
- * better figure out a way to skip backfaces from rays
- * spawned by transmission from the front */
- bsdf_transparent_setup(sd, make_float3(1.0f, 1.0f, 1.0f), path_flag);
- }
- else {
- HairBsdf *bsdf = (HairBsdf *)bsdf_alloc(sd, sizeof(HairBsdf), weight);
+ HairBsdf *bsdf = (HairBsdf *)bsdf_alloc(sd, sizeof(HairBsdf), weight);
- if (bsdf) {
- bsdf->N = N;
- bsdf->roughness1 = param1;
- bsdf->roughness2 = param2;
- bsdf->offset = -stack_load_float(stack, data_node.z);
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->roughness1 = param1;
+ bsdf->roughness2 = param2;
+ bsdf->offset = -stack_load_float(stack, data_node.z);
- if (stack_valid(data_node.y)) {
- bsdf->T = normalize(stack_load_float3(stack, data_node.y));
- }
- else if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
- bsdf->T = normalize(sd->dPdv);
- bsdf->offset = 0.0f;
- }
- else
- bsdf->T = normalize(sd->dPdu);
+ if (stack_valid(data_node.y)) {
+ bsdf->T = normalize(stack_load_float3(stack, data_node.y));
+ }
+ else if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
+ bsdf->T = normalize(sd->dPdv);
+ bsdf->offset = 0.0f;
+ }
+ else
+ bsdf->T = normalize(sd->dPdu);
- if (type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
- sd->flag |= bsdf_hair_reflection_setup(bsdf);
- }
- else {
- sd->flag |= bsdf_hair_transmission_setup(bsdf);
- }
+ if (type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
+ sd->flag |= bsdf_hair_reflection_setup(bsdf);
+ }
+ else {
+ sd->flag |= bsdf_hair_transmission_setup(bsdf);
}
}
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index 019c6294082..77df19b2298 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -41,11 +41,9 @@ ccl_device_inline void svm_node_geometry(
case NODE_GEOM_Ng:
data = sd->Ng;
break;
-#ifdef __UV__
case NODE_GEOM_uv:
data = make_float3(sd->u, sd->v, 0.0f);
break;
-#endif
default:
data = make_float3(0.0f, 0.0f, 0.0f);
}
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 914ef2089a9..7db8ffcc6e1 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -573,8 +573,8 @@ ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f)
*
* Point Offset from v0
* v0 (0, 0, 0)
- * v1 (0, 0, 1) The full avx type is computed by inserting the following
- * v2 (0, 1, 0) sse types into both the low and high parts of the avx.
+ * v1 (0, 0, 1) The full AVX type is computed by inserting the following
+ * v2 (0, 1, 0) SSE types into both the low and high parts of the AVX.
* v3 (0, 1, 1)
* v4 (1, 0, 0)
* v5 (1, 0, 1) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h
index 50fe0c8232f..e877bd9a5c8 100644
--- a/intern/cycles/kernel/svm/svm_sky.h
+++ b/intern/cycles/kernel/svm/svm_sky.h
@@ -37,16 +37,16 @@ ccl_device float sky_perez_function(float *lam, float theta, float gamma)
(1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma);
}
-ccl_device float3 sky_radiance_old(KernelGlobals *kg,
- float3 dir,
- float sunphi,
- float suntheta,
- float radiance_x,
- float radiance_y,
- float radiance_z,
- float *config_x,
- float *config_y,
- float *config_z)
+ccl_device float3 sky_radiance_preetham(KernelGlobals *kg,
+ float3 dir,
+ float sunphi,
+ float suntheta,
+ float radiance_x,
+ float radiance_y,
+ float radiance_z,
+ float *config_x,
+ float *config_y,
+ float *config_z)
{
/* convert vector to spherical coordinates */
float2 spherical = direction_to_spherical(dir);
@@ -90,16 +90,16 @@ ccl_device float sky_radiance_internal(float *configuration, float theta, float
configuration[6] * mieM + configuration[7] * zenith);
}
-ccl_device float3 sky_radiance_new(KernelGlobals *kg,
- float3 dir,
- float sunphi,
- float suntheta,
- float radiance_x,
- float radiance_y,
- float radiance_z,
- float *config_x,
- float *config_y,
- float *config_z)
+ccl_device float3 sky_radiance_hosek(KernelGlobals *kg,
+ float3 dir,
+ float sunphi,
+ float suntheta,
+ float radiance_x,
+ float radiance_y,
+ float radiance_z,
+ float *config_x,
+ float *config_y,
+ float *config_z)
{
/* convert vector to spherical coordinates */
float2 spherical = direction_to_spherical(dir);
@@ -121,93 +121,206 @@ ccl_device float3 sky_radiance_new(KernelGlobals *kg,
return xyz_to_rgb(kg, make_float3(x, y, z)) * (M_2PI_F / 683);
}
+/* Nishita improved sky model */
+ccl_device float3 geographical_to_direction(float lat, float lon)
+{
+ return make_float3(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat));
+}
+
+ccl_device float3 sky_radiance_nishita(KernelGlobals *kg,
+ float3 dir,
+ float *nishita_data,
+ uint texture_id)
+{
+ /* definitions */
+ float sun_elevation = nishita_data[6];
+ float sun_rotation = nishita_data[7];
+ float angular_diameter = nishita_data[8];
+ bool sun_disc = (angular_diameter > 0.0f);
+ float3 xyz;
+ /* convert dir to spherical coordinates */
+ float2 direction = direction_to_spherical(dir);
+
+ /* render above the horizon */
+ if (dir.z >= 0.0f) {
+ /* definitions */
+ float3 sun_dir = geographical_to_direction(sun_elevation, sun_rotation + M_PI_2_F);
+ float sun_dir_angle = acos(dot(dir, sun_dir));
+ float half_angular = angular_diameter / 2.0f;
+ float dir_elevation = M_PI_2_F - direction.x;
+
+ /* if ray inside sun disc render it, otherwise render sky */
+ if (sun_disc && sun_dir_angle < half_angular) {
+ /* get 3 pixels data */
+ float3 pixel_bottom = make_float3(nishita_data[0], nishita_data[1], nishita_data[2]);
+ float3 pixel_top = make_float3(nishita_data[3], nishita_data[4], nishita_data[5]);
+ float y;
+
+ /* sun interpolation */
+ if (sun_elevation - half_angular > 0.0f) {
+ if (sun_elevation + half_angular > 0.0f) {
+ y = ((dir_elevation - sun_elevation) / angular_diameter) + 0.5f;
+ xyz = interp(pixel_bottom, pixel_top, y);
+ }
+ }
+ else {
+ if (sun_elevation + half_angular > 0.0f) {
+ y = dir_elevation / (sun_elevation + half_angular);
+ xyz = interp(pixel_bottom, pixel_top, y);
+ }
+ }
+ /* limb darkening, coefficient is 0.6f */
+ float limb_darkening = (1.0f -
+ 0.6f * (1.0f - sqrtf(1.0f - sqr(sun_dir_angle / half_angular))));
+ xyz *= limb_darkening;
+ }
+ /* sky */
+ else {
+ /* sky interpolation */
+ float x = (direction.y + M_PI_F + sun_rotation) / M_2PI_F;
+ float y = dir_elevation / M_PI_2_F;
+ if (x > 1.0f) {
+ x -= 1.0f;
+ }
+ xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, y));
+ }
+ }
+ /* ground */
+ else {
+ if (dir.z < -0.4f) {
+ xyz = make_float3(0.0f, 0.0f, 0.0f);
+ }
+ else {
+ /* black ground fade */
+ float fade = 1.0f + dir.z * 2.5f;
+ fade = sqr(fade) * fade;
+ /* interpolation */
+ float x = (direction.y + M_PI_F + sun_rotation) / M_2PI_F;
+ if (x > 1.0f) {
+ x -= 1.0f;
+ }
+ xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, -0.5)) * fade;
+ }
+ }
+
+ /* convert to rgb and adjust strength */
+ return xyz_to_rgb(kg, xyz) * 120000.0f;
+}
+
ccl_device void svm_node_tex_sky(
KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
{
- /* Define variables */
- float sunphi, suntheta, radiance_x, radiance_y, radiance_z;
- float config_x[9], config_y[9], config_z[9];
-
/* Load data */
uint dir_offset = node.y;
uint out_offset = node.z;
int sky_model = node.w;
- float4 data = read_node_float(kg, offset);
- sunphi = data.x;
- suntheta = data.y;
- radiance_x = data.z;
- radiance_y = data.w;
-
- data = read_node_float(kg, offset);
- radiance_z = data.x;
- config_x[0] = data.y;
- config_x[1] = data.z;
- config_x[2] = data.w;
-
- data = read_node_float(kg, offset);
- config_x[3] = data.x;
- config_x[4] = data.y;
- config_x[5] = data.z;
- config_x[6] = data.w;
-
- data = read_node_float(kg, offset);
- config_x[7] = data.x;
- config_x[8] = data.y;
- config_y[0] = data.z;
- config_y[1] = data.w;
-
- data = read_node_float(kg, offset);
- config_y[2] = data.x;
- config_y[3] = data.y;
- config_y[4] = data.z;
- config_y[5] = data.w;
-
- data = read_node_float(kg, offset);
- config_y[6] = data.x;
- config_y[7] = data.y;
- config_y[8] = data.z;
- config_z[0] = data.w;
-
- data = read_node_float(kg, offset);
- config_z[1] = data.x;
- config_z[2] = data.y;
- config_z[3] = data.z;
- config_z[4] = data.w;
-
- data = read_node_float(kg, offset);
- config_z[5] = data.x;
- config_z[6] = data.y;
- config_z[7] = data.z;
- config_z[8] = data.w;
-
float3 dir = stack_load_float3(stack, dir_offset);
float3 f;
- /* Compute Sky */
- if (sky_model == 0) {
- f = sky_radiance_old(kg,
- dir,
- sunphi,
- suntheta,
- radiance_x,
- radiance_y,
- radiance_z,
- config_x,
- config_y,
- config_z);
+ /* Preetham and Hosek share the same data */
+ if (sky_model == 0 || sky_model == 1) {
+ /* Define variables */
+ float sunphi, suntheta, radiance_x, radiance_y, radiance_z;
+ float config_x[9], config_y[9], config_z[9];
+
+ float4 data = read_node_float(kg, offset);
+ sunphi = data.x;
+ suntheta = data.y;
+ radiance_x = data.z;
+ radiance_y = data.w;
+
+ data = read_node_float(kg, offset);
+ radiance_z = data.x;
+ config_x[0] = data.y;
+ config_x[1] = data.z;
+ config_x[2] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_x[3] = data.x;
+ config_x[4] = data.y;
+ config_x[5] = data.z;
+ config_x[6] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_x[7] = data.x;
+ config_x[8] = data.y;
+ config_y[0] = data.z;
+ config_y[1] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_y[2] = data.x;
+ config_y[3] = data.y;
+ config_y[4] = data.z;
+ config_y[5] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_y[6] = data.x;
+ config_y[7] = data.y;
+ config_y[8] = data.z;
+ config_z[0] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_z[1] = data.x;
+ config_z[2] = data.y;
+ config_z[3] = data.z;
+ config_z[4] = data.w;
+
+ data = read_node_float(kg, offset);
+ config_z[5] = data.x;
+ config_z[6] = data.y;
+ config_z[7] = data.z;
+ config_z[8] = data.w;
+
+ /* Compute Sky */
+ if (sky_model == 0) {
+ f = sky_radiance_preetham(kg,
+ dir,
+ sunphi,
+ suntheta,
+ radiance_x,
+ radiance_y,
+ radiance_z,
+ config_x,
+ config_y,
+ config_z);
+ }
+ else {
+ f = sky_radiance_hosek(kg,
+ dir,
+ sunphi,
+ suntheta,
+ radiance_x,
+ radiance_y,
+ radiance_z,
+ config_x,
+ config_y,
+ config_z);
+ }
}
+ /* Nishita */
else {
- f = sky_radiance_new(kg,
- dir,
- sunphi,
- suntheta,
- radiance_x,
- radiance_y,
- radiance_z,
- config_x,
- config_y,
- config_z);
+ /* Define variables */
+ float nishita_data[9];
+
+ float4 data = read_node_float(kg, offset);
+ nishita_data[0] = data.x;
+ nishita_data[1] = data.y;
+ nishita_data[2] = data.z;
+ nishita_data[3] = data.w;
+
+ data = read_node_float(kg, offset);
+ nishita_data[4] = data.x;
+ nishita_data[5] = data.y;
+ nishita_data[6] = data.z;
+ nishita_data[7] = data.w;
+
+ data = read_node_float(kg, offset);
+ nishita_data[8] = data.x;
+ uint texture_id = __float_as_uint(data.y);
+
+ /* Compute Sky */
+ f = sky_radiance_nishita(kg, dir, nishita_data, texture_id);
}
stack_store_float3(stack, out_offset, f);
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index e913d9e0489..f1ebb37e23e 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -414,7 +414,7 @@ typedef enum NodeWaveProfile {
NODE_WAVE_PROFILE_TRI,
} NodeWaveProfile;
-typedef enum NodeSkyType { NODE_SKY_OLD, NODE_SKY_NEW } NodeSkyType;
+typedef enum NodeSkyType { NODE_SKY_PREETHAM, NODE_SKY_HOSEK, NODE_SKY_NISHITA } NodeSkyType;
typedef enum NodeGradientType {
NODE_BLEND_LINEAR,
diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt
index 472b5a0c101..e37a0407976 100644
--- a/intern/cycles/render/CMakeLists.txt
+++ b/intern/cycles/render/CMakeLists.txt
@@ -24,6 +24,7 @@ set(SRC
hair.cpp
image.cpp
image_oiio.cpp
+ image_sky.cpp
image_vdb.cpp
integrator.cpp
jitter.cpp
@@ -64,6 +65,7 @@ set(SRC_HEADERS
hair.h
image.h
image_oiio.h
+ image_sky.h
image_vdb.h
integrator.h
light.h
diff --git a/intern/cycles/render/curves.cpp b/intern/cycles/render/curves.cpp
index 1907bb33d06..db48d8b6430 100644
--- a/intern/cycles/render/curves.cpp
+++ b/intern/cycles/render/curves.cpp
@@ -36,13 +36,12 @@ void curvebounds(float *lower, float *upper, float3 *p, int dim)
float *p2 = &p[2].x;
float *p3 = &p[3].x;
- float fc = 0.71f;
+ /* Catmull-Rom weights. */
float curve_coef[4];
curve_coef[0] = p1[dim];
- curve_coef[1] = -fc * p0[dim] + fc * p2[dim];
- curve_coef[2] = 2.0f * fc * p0[dim] + (fc - 3.0f) * p1[dim] + (3.0f - 2.0f * fc) * p2[dim] -
- fc * p3[dim];
- curve_coef[3] = -fc * p0[dim] + (2.0f - fc) * p1[dim] + (fc - 2.0f) * p2[dim] + fc * p3[dim];
+ curve_coef[1] = 0.5f * (-p0[dim] + p2[dim]);
+ curve_coef[2] = 0.5f * (2 * p0[dim] - 5 * p1[dim] + 4 * p2[dim] - p3[dim]);
+ curve_coef[3] = 0.5f * (-p0[dim] + 3 * p1[dim] - 3 * p2[dim] + p3[dim]);
float discroot = curve_coef[2] * curve_coef[2] - 3 * curve_coef[3] * curve_coef[1];
float ta = -1.0f;
@@ -77,105 +76,4 @@ void curvebounds(float *lower, float *upper, float3 *p, int dim)
*lower = min(*lower, min(exa, exb));
}
-/* Hair System Manager */
-
-CurveSystemManager::CurveSystemManager()
-{
- primitive = CURVE_LINE_SEGMENTS;
- curve_shape = CURVE_THICK;
- line_method = CURVE_CORRECTED;
- triangle_method = CURVE_CAMERA_TRIANGLES;
- resolution = 3;
- subdivisions = 3;
-
- use_curves = true;
- use_encasing = true;
- use_backfacing = false;
- use_tangent_normal_geometry = false;
-
- need_update = true;
- need_mesh_update = false;
-}
-
-CurveSystemManager::~CurveSystemManager()
-{
-}
-
-void CurveSystemManager::device_update(Device *device,
- DeviceScene *dscene,
- Scene * /*scene*/,
- Progress &progress)
-{
- if (!need_update)
- return;
-
- device_free(device, dscene);
-
- progress.set_status("Updating Hair settings", "Copying Hair settings to device");
-
- KernelCurves *kcurve = &dscene->data.curve;
-
- kcurve->curveflags = 0;
-
- if (use_curves) {
- if (primitive == CURVE_SEGMENTS || primitive == CURVE_RIBBONS)
- kcurve->curveflags |= CURVE_KN_INTERPOLATE;
- if (primitive == CURVE_RIBBONS)
- kcurve->curveflags |= CURVE_KN_RIBBONS;
-
- if (line_method == CURVE_ACCURATE)
- kcurve->curveflags |= CURVE_KN_ACCURATE;
- else if (line_method == CURVE_CORRECTED)
- kcurve->curveflags |= CURVE_KN_INTERSECTCORRECTION;
-
- if (use_tangent_normal_geometry)
- kcurve->curveflags |= CURVE_KN_TRUETANGENTGNORMAL;
- if (use_backfacing)
- kcurve->curveflags |= CURVE_KN_BACKFACING;
- if (use_encasing)
- kcurve->curveflags |= CURVE_KN_ENCLOSEFILTER;
-
- kcurve->subdivisions = subdivisions;
- }
-
- if (progress.get_cancel())
- return;
-
- need_update = false;
-}
-
-void CurveSystemManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/)
-{
-}
-
-bool CurveSystemManager::modified(const CurveSystemManager &CurveSystemManager)
-{
- return !(
- curve_shape == CurveSystemManager.curve_shape &&
- line_method == CurveSystemManager.line_method && primitive == CurveSystemManager.primitive &&
- use_encasing == CurveSystemManager.use_encasing &&
- use_tangent_normal_geometry == CurveSystemManager.use_tangent_normal_geometry &&
- use_backfacing == CurveSystemManager.use_backfacing &&
- triangle_method == CurveSystemManager.triangle_method &&
- resolution == CurveSystemManager.resolution && use_curves == CurveSystemManager.use_curves &&
- subdivisions == CurveSystemManager.subdivisions);
-}
-
-bool CurveSystemManager::modified_mesh(const CurveSystemManager &CurveSystemManager)
-{
- return !(
- primitive == CurveSystemManager.primitive && curve_shape == CurveSystemManager.curve_shape &&
- triangle_method == CurveSystemManager.triangle_method &&
- resolution == CurveSystemManager.resolution && use_curves == CurveSystemManager.use_curves);
-}
-
-void CurveSystemManager::tag_update(Scene * /*scene*/)
-{
- need_update = true;
-}
-
-void CurveSystemManager::tag_update_mesh()
-{
- need_mesh_update = true;
-}
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/curves.h b/intern/cycles/render/curves.h
index ade289a402e..c52fcb9c882 100644
--- a/intern/cycles/render/curves.h
+++ b/intern/cycles/render/curves.h
@@ -20,6 +20,8 @@
#include "util/util_array.h"
#include "util/util_types.h"
+#include "render/hair.h"
+
CCL_NAMESPACE_BEGIN
class Device;
@@ -29,33 +31,6 @@ class Scene;
void curvebounds(float *lower, float *upper, float3 *p, int dim);
-typedef enum CurvePrimitiveType {
- CURVE_TRIANGLES = 0,
- CURVE_LINE_SEGMENTS = 1,
- CURVE_SEGMENTS = 2,
- CURVE_RIBBONS = 3,
-
- CURVE_NUM_PRIMITIVE_TYPES,
-} CurvePrimitiveType;
-
-typedef enum CurveShapeType {
- CURVE_RIBBON = 0,
- CURVE_THICK = 1,
-
- CURVE_NUM_SHAPE_TYPES,
-} CurveShapeType;
-
-typedef enum CurveTriangleMethod {
- CURVE_CAMERA_TRIANGLES,
- CURVE_TESSELATED_TRIANGLES
-} CurveTriangleMethod;
-
-typedef enum CurveLineMethod {
- CURVE_ACCURATE,
- CURVE_CORRECTED,
- CURVE_UNCORRECTED
-} CurveLineMethod;
-
class ParticleCurveData {
public:
@@ -75,43 +50,12 @@ class ParticleCurveData {
array<int> curve_keynum;
array<float> curve_length;
array<float2> curve_uv;
- array<float3> curve_vcol;
+ array<float4> curve_vcol;
array<float3> curvekey_co;
array<float> curvekey_time;
};
-/* HairSystem Manager */
-
-class CurveSystemManager {
- public:
- CurvePrimitiveType primitive;
- CurveShapeType curve_shape;
- CurveLineMethod line_method;
- CurveTriangleMethod triangle_method;
- int resolution;
- int subdivisions;
-
- bool use_curves;
- bool use_encasing;
- bool use_backfacing;
- bool use_tangent_normal_geometry;
-
- bool need_update;
- bool need_mesh_update;
-
- CurveSystemManager();
- ~CurveSystemManager();
-
- void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
- void device_free(Device *device, DeviceScene *dscene);
- bool modified(const CurveSystemManager &CurveSystemManager);
- bool modified_mesh(const CurveSystemManager &CurveSystemManager);
-
- void tag_update(Scene *scene);
- void tag_update_mesh();
-};
-
CCL_NAMESPACE_END
#endif /* __CURVES_H__ */
diff --git a/intern/cycles/render/denoising.cpp b/intern/cycles/render/denoising.cpp
index 4d819d1119e..4055bc4773b 100644
--- a/intern/cycles/render/denoising.cpp
+++ b/intern/cycles/render/denoising.cpp
@@ -21,6 +21,7 @@
#include "util/util_foreach.h"
#include "util/util_map.h"
#include "util/util_system.h"
+#include "util/util_task.h"
#include "util/util_time.h"
#include <OpenImageIO/filesystem.h>
@@ -377,8 +378,9 @@ void DenoiseTask::create_task(DeviceTask &task)
/* Denoising parameters. */
task.denoising = denoiser->params;
- task.denoising_do_filter = true;
- task.denoising_write_passes = false;
+ task.denoising.type = DENOISER_NLM;
+ task.denoising.use = true;
+ task.denoising.store_passes = false;
task.denoising_from_render = false;
task.denoising_frames.resize(neighbor_frames.size());
diff --git a/intern/cycles/render/geometry.cpp b/intern/cycles/render/geometry.cpp
index d46ed430c4f..3d1b6e1d865 100644
--- a/intern/cycles/render/geometry.cpp
+++ b/intern/cycles/render/geometry.cpp
@@ -16,10 +16,9 @@
#include "bvh/bvh.h"
#include "bvh/bvh_build.h"
+#include "bvh/bvh_embree.h"
-#ifdef WITH_EMBREE
-# include "bvh/bvh_embree.h"
-#endif
+#include "device/device.h"
#include "render/attribute.h"
#include "render/camera.h"
@@ -212,8 +211,7 @@ void Geometry::compute_bvh(
bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
bparams.num_motion_curve_steps = params->num_bvh_time_steps;
bparams.bvh_type = params->bvh_type;
- bparams.curve_flags = dscene->data.curve.curveflags;
- bparams.curve_subdivisions = dscene->data.curve.subdivisions;
+ bparams.curve_subdivisions = params->curve_subdivisions();
delete bvh;
bvh = BVH::create(bparams, geometry, objects);
@@ -1027,28 +1025,18 @@ void GeometryManager::device_update_bvh(Device *device,
bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
bparams.bvh_type = scene->params.bvh_type;
- bparams.curve_flags = dscene->data.curve.curveflags;
- bparams.curve_subdivisions = dscene->data.curve.subdivisions;
+ bparams.curve_subdivisions = scene->params.curve_subdivisions();
VLOG(1) << "Using " << bvh_layout_name(bparams.bvh_layout) << " layout.";
-#ifdef WITH_EMBREE
- if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
- if (dscene->data.bvh.scene) {
- BVHEmbree::destroy(dscene->data.bvh.scene);
- }
- }
-#endif
-
BVH *bvh = BVH::create(bparams, scene->geometry, scene->objects);
bvh->build(progress, &device->stats);
if (progress.get_cancel()) {
#ifdef WITH_EMBREE
- if (bparams.bvh_layout == BVH_LAYOUT_EMBREE) {
- if (dscene->data.bvh.scene) {
- BVHEmbree::destroy(dscene->data.bvh.scene);
- }
+ if (dscene->data.bvh.scene) {
+ BVHEmbree::destroy(dscene->data.bvh.scene);
+ dscene->data.bvh.scene = NULL;
}
#endif
delete bvh;
@@ -1104,6 +1092,7 @@ void GeometryManager::device_update_bvh(Device *device,
dscene->data.bvh.root = pack.root_index;
dscene->data.bvh.bvh_layout = bparams.bvh_layout;
dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
+ dscene->data.bvh.curve_subdivisions = scene->params.curve_subdivisions();
bvh->copy_to_device(progress, dscene);
@@ -1146,6 +1135,12 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro
create_volume_mesh(mesh, progress);
}
}
+
+ if (geom->type == Geometry::HAIR) {
+ /* Set curve shape, still a global scene setting for now. */
+ Hair *hair = static_cast<Hair *>(geom);
+ hair->curve_shape = scene->params.hair_shape;
+ }
}
need_flags_update = false;
@@ -1413,6 +1408,14 @@ void GeometryManager::device_update(Device *device,
void GeometryManager::device_free(Device *device, DeviceScene *dscene)
{
+#ifdef WITH_EMBREE
+ if (dscene->data.bvh.scene) {
+ if (dscene->data.bvh.bvh_layout == BVH_LAYOUT_EMBREE)
+ BVHEmbree::destroy(dscene->data.bvh.scene);
+ dscene->data.bvh.scene = NULL;
+ }
+#endif
+
dscene->bvh_nodes.free();
dscene->bvh_leaf_nodes.free();
dscene->object_node.free();
diff --git a/intern/cycles/render/hair.cpp b/intern/cycles/render/hair.cpp
index 3daa4cc1e35..816c15cf4ef 100644
--- a/intern/cycles/render/hair.cpp
+++ b/intern/cycles/render/hair.cpp
@@ -294,6 +294,7 @@ NODE_DEFINE(Hair)
Hair::Hair() : Geometry(node_type, Geometry::HAIR)
{
curvekey_offset = 0;
+ curve_shape = CURVE_RIBBON;
}
Hair::~Hair()
diff --git a/intern/cycles/render/hair.h b/intern/cycles/render/hair.h
index 79f77a78753..39d6a34d799 100644
--- a/intern/cycles/render/hair.h
+++ b/intern/cycles/render/hair.h
@@ -96,6 +96,7 @@ class Hair : public Geometry {
/* BVH */
size_t curvekey_offset;
+ CurveShapeType curve_shape;
/* Constructor/Destructor */
Hair();
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 75050b66bf2..8d187814d64 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -27,6 +27,7 @@
#include "util/util_logging.h"
#include "util/util_path.h"
#include "util/util_progress.h"
+#include "util/util_task.h"
#include "util/util_texture.h"
#include "util/util_unique_ptr.h"
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index 2000582ce70..fffe7c5152a 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -17,7 +17,6 @@
#ifndef __IMAGE_H__
#define __IMAGE_H__
-#include "device/device.h"
#include "device/device_memory.h"
#include "render/colorspace.h"
@@ -31,6 +30,7 @@
CCL_NAMESPACE_BEGIN
class Device;
+class DeviceInfo;
class ImageHandle;
class ImageKey;
class ImageMetaData;
diff --git a/intern/cycles/render/image_sky.cpp b/intern/cycles/render/image_sky.cpp
new file mode 100644
index 00000000000..442e1d7941f
--- /dev/null
+++ b/intern/cycles/render/image_sky.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/image_sky.h"
+
+#include "util/util_image.h"
+#include "util/util_logging.h"
+#include "util/util_path.h"
+#include "util/util_sky_model.h"
+#include "util/util_task.h"
+
+CCL_NAMESPACE_BEGIN
+
+SkyLoader::SkyLoader(
+ float sun_elevation, int altitude, float air_density, float dust_density, float ozone_density)
+ : sun_elevation(sun_elevation),
+ altitude(altitude),
+ air_density(air_density),
+ dust_density(dust_density),
+ ozone_density(ozone_density)
+{
+}
+
+SkyLoader::~SkyLoader(){};
+
+bool SkyLoader::load_metadata(ImageMetaData &metadata)
+{
+ metadata.width = 512;
+ metadata.height = 128;
+ metadata.channels = 3;
+ metadata.depth = 1;
+ metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+ metadata.compress_as_srgb = false;
+ return true;
+}
+
+bool SkyLoader::load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t /*pixels_size*/,
+ const bool /*associate_alpha*/)
+{
+ /* definitions */
+ int width = metadata.width;
+ int height = metadata.height;
+ float *pixel_data = (float *)pixels;
+ float altitude_f = (float)altitude;
+
+ /* precompute sky texture */
+ const int rows_per_task = divide_up(1024, width);
+ parallel_for(blocked_range<size_t>(0, height, rows_per_task),
+ [&](const blocked_range<size_t> &r) {
+ nishita_skymodel_precompute_texture(pixel_data,
+ metadata.channels,
+ r.begin(),
+ r.end(),
+ width,
+ height,
+ sun_elevation,
+ altitude_f,
+ air_density,
+ dust_density,
+ ozone_density);
+ });
+
+ return true;
+}
+
+string SkyLoader::name() const
+{
+ return "sky_nishita";
+}
+
+bool SkyLoader::equals(const ImageLoader & /*other*/) const
+{
+ return false;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/image_sky.h b/intern/cycles/render/image_sky.h
new file mode 100644
index 00000000000..cf4a3e8942c
--- /dev/null
+++ b/intern/cycles/render/image_sky.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/image.h"
+
+CCL_NAMESPACE_BEGIN
+
+class SkyLoader : public ImageLoader {
+ private:
+ float sun_elevation;
+ int altitude;
+ float air_density;
+ float dust_density;
+ float ozone_density;
+
+ public:
+ SkyLoader(float sun_elevation,
+ int altitude,
+ float air_density,
+ float dust_density,
+ float ozone_density);
+ ~SkyLoader();
+
+ bool load_metadata(ImageMetaData &metadata) override;
+
+ bool load_pixels(const ImageMetaData &metadata,
+ void *pixels,
+ const size_t /*pixels_size*/,
+ const bool /*associate_alpha*/) override;
+
+ string name() const override;
+
+ bool equals(const ImageLoader & /*other*/) const override;
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index d4beb06e57b..eff416efa2b 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -29,6 +29,7 @@
#include "util/util_foreach.h"
#include "util/util_hash.h"
#include "util/util_logging.h"
+#include "util/util_task.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index cb7474017fa..c0615c6217b 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -31,6 +31,7 @@
#include "util/util_logging.h"
#include "util/util_path.h"
#include "util/util_progress.h"
+#include "util/util_task.h"
CCL_NAMESPACE_BEGIN
@@ -450,6 +451,7 @@ void LightManager::device_update_distribution(Device *,
/* update device */
KernelIntegrator *kintegrator = &dscene->data.integrator;
+ KernelBackground *kbackground = &dscene->data.background;
KernelFilm *kfilm = &dscene->data.film;
kintegrator->use_direct_light = (totarea > 0.0f);
@@ -493,15 +495,18 @@ void LightManager::device_update_distribution(Device *,
/* Portals */
if (num_portals > 0) {
- kintegrator->portal_offset = light_index;
- kintegrator->num_portals = num_portals;
- kintegrator->portal_pdf = background_mis ? 0.5f : 1.0f;
+ kbackground->portal_offset = light_index;
+ kbackground->num_portals = num_portals;
+ kbackground->portal_weight = 1.0f;
}
else {
- kintegrator->num_portals = 0;
- kintegrator->portal_offset = 0;
- kintegrator->portal_pdf = 0.0f;
+ kbackground->num_portals = 0;
+ kbackground->portal_offset = 0;
+ kbackground->portal_weight = 0.0f;
}
+
+ /* Map */
+ kbackground->map_weight = background_mis ? 1.0f : 0.0f;
}
else {
dscene->light_distribution.free();
@@ -511,9 +516,12 @@ void LightManager::device_update_distribution(Device *,
kintegrator->pdf_triangles = 0.0f;
kintegrator->pdf_lights = 0.0f;
kintegrator->use_lamp_mis = false;
- kintegrator->num_portals = 0;
- kintegrator->portal_offset = 0;
- kintegrator->portal_pdf = 0.0f;
+
+ kbackground->num_portals = 0;
+ kbackground->portal_offset = 0;
+ kbackground->portal_weight = 0.0f;
+ kbackground->sun_weight = 0.0f;
+ kbackground->map_weight = 0.0f;
kfilm->pass_shadow_scale = 1.0f;
}
@@ -562,7 +570,7 @@ void LightManager::device_update_background(Device *device,
Scene *scene,
Progress &progress)
{
- KernelIntegrator *kintegrator = &dscene->data.integrator;
+ KernelBackground *kbackground = &dscene->data.background;
Light *background_light = NULL;
/* find background light */
@@ -575,31 +583,79 @@ void LightManager::device_update_background(Device *device,
/* no background light found, signal renderer to skip sampling */
if (!background_light || !background_light->is_enabled) {
- kintegrator->pdf_background_res_x = 0;
- kintegrator->pdf_background_res_y = 0;
+ kbackground->map_res_x = 0;
+ kbackground->map_res_y = 0;
+ kbackground->map_weight = 0.0f;
+ kbackground->sun_weight = 0.0f;
+ kbackground->use_mis = (kbackground->portal_weight > 0.0f);
return;
}
progress.set_status("Updating Lights", "Importance map");
- assert(kintegrator->use_direct_light);
+ assert(dscene->data.integrator.use_direct_light);
+
+ int2 environment_res = make_int2(0, 0);
+ Shader *shader = scene->background->get_shader(scene);
+ int num_suns = 0;
+ foreach (ShaderNode *node, shader->graph->nodes) {
+ if (node->type == EnvironmentTextureNode::node_type) {
+ EnvironmentTextureNode *env = (EnvironmentTextureNode *)node;
+ ImageMetaData metadata;
+ if (!env->handle.empty()) {
+ ImageMetaData metadata = env->handle.metadata();
+ environment_res.x = max(environment_res.x, metadata.width);
+ environment_res.y = max(environment_res.y, metadata.height);
+ }
+ }
+ if (node->type == SkyTextureNode::node_type) {
+ SkyTextureNode *sky = (SkyTextureNode *)node;
+ if (sky->type == NODE_SKY_NISHITA && sky->sun_disc) {
+ /* Ensure that the input coordinates aren't transformed before they reach the node.
+ * If that is the case, the logic used for sampling the sun's location does not work
+ * and we have to fall back to map-based sampling. */
+ const ShaderInput *vec_in = sky->input("Vector");
+ if (vec_in && vec_in->link && vec_in->link->parent) {
+ ShaderNode *vec_src = vec_in->link->parent;
+ if ((vec_src->type != TextureCoordinateNode::node_type) ||
+ (vec_in->link != vec_src->output("Generated"))) {
+ environment_res.x = max(environment_res.x, 4096);
+ environment_res.y = max(environment_res.y, 2048);
+ continue;
+ }
+ }
+
+ float latitude = sky->sun_elevation;
+ float longitude = M_2PI_F - sky->sun_rotation + M_PI_2_F;
+ float half_angle = sky->sun_size * 0.5f;
+ kbackground->sun = make_float4(cosf(latitude) * cosf(longitude),
+ cosf(latitude) * sinf(longitude),
+ sinf(latitude),
+ half_angle);
+ kbackground->sun_weight = 4.0f;
+ environment_res.x = max(environment_res.x, 512);
+ environment_res.y = max(environment_res.y, 256);
+ num_suns++;
+ }
+ }
+ }
+
+ /* If there's more than one sun, fall back to map sampling instead. */
+ if (num_suns != 1) {
+ kbackground->sun_weight = 0.0f;
+ environment_res.x = max(environment_res.x, 4096);
+ environment_res.y = max(environment_res.y, 2048);
+ }
+
+ /* Enable MIS for background sampling if any strategy is active. */
+ kbackground->use_mis = (kbackground->portal_weight + kbackground->map_weight +
+ kbackground->sun_weight) > 0.0f;
/* get the resolution from the light's size (we stuff it in there) */
int2 res = make_int2(background_light->map_resolution, background_light->map_resolution / 2);
/* If the resolution isn't set manually, try to find an environment texture. */
if (res.x == 0) {
- Shader *shader = scene->background->get_shader(scene);
- foreach (ShaderNode *node, shader->graph->nodes) {
- if (node->type == EnvironmentTextureNode::node_type) {
- EnvironmentTextureNode *env = (EnvironmentTextureNode *)node;
- ImageMetaData metadata;
- if (!env->handle.empty()) {
- ImageMetaData metadata = env->handle.metadata();
- res.x = max(res.x, metadata.width);
- res.y = max(res.y, metadata.height);
- }
- }
- }
+ res = environment_res;
if (res.x > 0 && res.y > 0) {
VLOG(2) << "Automatically set World MIS resolution to " << res.x << " by " << res.y << "\n";
}
@@ -609,8 +665,8 @@ void LightManager::device_update_background(Device *device,
res = make_int2(1024, 512);
VLOG(2) << "Setting World MIS resolution to default\n";
}
- kintegrator->pdf_background_res_x = res.x;
- kintegrator->pdf_background_res_y = res.y;
+ kbackground->map_res_x = res.x;
+ kbackground->map_res_y = res.y;
vector<float3> pixels;
shade_background_pixels(device, dscene, res.x, res.y, pixels, progress);
@@ -624,29 +680,13 @@ void LightManager::device_update_background(Device *device,
float2 *cond_cdf = dscene->light_background_conditional_cdf.alloc(cdf_width * res.y);
double time_start = time_dt();
- if (max(res.x, res.y) < 512) {
- /* Small enough resolution, faster to do single-threaded. */
- background_cdf(0, res.y, res.x, res.y, &pixels, cond_cdf);
- }
- else {
- /* Threaded evaluation for large resolution. */
- const int num_blocks = TaskScheduler::num_threads();
- const int chunk_size = res.y / num_blocks;
- int start_row = 0;
- TaskPool pool;
- for (int i = 0; i < num_blocks; ++i) {
- const int current_chunk_size = (i != num_blocks - 1) ? chunk_size : (res.y - i * chunk_size);
- pool.push(function_bind(&background_cdf,
- start_row,
- start_row + current_chunk_size,
- res.x,
- res.y,
- &pixels,
- cond_cdf));
- start_row += current_chunk_size;
- }
- pool.wait_work();
- }
+
+ /* Create CDF in parallel. */
+ const int rows_per_task = divide_up(10240, res.x);
+ parallel_for(blocked_range<size_t>(0, res.y, rows_per_task),
+ [&](const blocked_range<size_t> &r) {
+ background_cdf(r.begin(), r.end(), res.x, res.y, &pixels, cond_cdf);
+ });
/* marginal CDFs (column, V direction, sum of rows) */
marg_cdf[0].x = cond_cdf[res.x].x;
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index cdcaeb246dd..ab392839e52 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -19,6 +19,7 @@
#include "render/constant_fold.h"
#include "render/film.h"
#include "render/image.h"
+#include "render/image_sky.h"
#include "render/integrator.h"
#include "render/light.h"
#include "render/mesh.h"
@@ -630,7 +631,7 @@ typedef struct SunSky {
/* Parameter */
float radiance_x, radiance_y, radiance_z;
- float config_x[9], config_y[9], config_z[9];
+ float config_x[9], config_y[9], config_z[9], nishita_data[9];
} SunSky;
/* Preetham model */
@@ -640,7 +641,7 @@ static float sky_perez_function(float lam[6], float theta, float gamma)
(1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cosf(gamma) * cosf(gamma));
}
-static void sky_texture_precompute_old(SunSky *sunsky, float3 dir, float turbidity)
+static void sky_texture_precompute_preetham(SunSky *sunsky, float3 dir, float turbidity)
{
/*
* We re-use the SunSky struct of the new model, to avoid extra variables
@@ -703,10 +704,10 @@ static void sky_texture_precompute_old(SunSky *sunsky, float3 dir, float turbidi
}
/* Hosek / Wilkie */
-static void sky_texture_precompute_new(SunSky *sunsky,
- float3 dir,
- float turbidity,
- float ground_albedo)
+static void sky_texture_precompute_hosek(SunSky *sunsky,
+ float3 dir,
+ float turbidity,
+ float ground_albedo)
{
/* Calculate Sun Direction and save coordinates */
float2 spherical = sky_spherical_coordinates(dir);
@@ -743,6 +744,34 @@ static void sky_texture_precompute_new(SunSky *sunsky,
arhosekskymodelstate_free(sky_state);
}
+/* Nishita improved */
+static void sky_texture_precompute_nishita(SunSky *sunsky,
+ bool sun_disc,
+ float sun_size,
+ float sun_elevation,
+ float sun_rotation,
+ int altitude,
+ float air_density,
+ float dust_density)
+{
+ /* sample 2 sun pixels */
+ float pixel_bottom[3];
+ float pixel_top[3];
+ float altitude_f = (float)altitude;
+ nishita_skymodel_precompute_sun(
+ sun_elevation, sun_size, altitude_f, air_density, dust_density, pixel_bottom, pixel_top);
+ /* send data to svm_sky */
+ sunsky->nishita_data[0] = pixel_bottom[0];
+ sunsky->nishita_data[1] = pixel_bottom[1];
+ sunsky->nishita_data[2] = pixel_bottom[2];
+ sunsky->nishita_data[3] = pixel_top[0];
+ sunsky->nishita_data[4] = pixel_top[1];
+ sunsky->nishita_data[5] = pixel_top[2];
+ sunsky->nishita_data[6] = sun_elevation;
+ sunsky->nishita_data[7] = M_2PI_F - sun_rotation;
+ sunsky->nishita_data[8] = sun_disc ? sun_size : 0.0f;
+}
+
NODE_DEFINE(SkyTextureNode)
{
NodeType *type = NodeType::add("sky_texture", create, NodeType::SHADER);
@@ -750,13 +779,22 @@ NODE_DEFINE(SkyTextureNode)
TEXTURE_MAPPING_DEFINE(SkyTextureNode);
static NodeEnum type_enum;
- type_enum.insert("preetham", NODE_SKY_OLD);
- type_enum.insert("hosek_wilkie", NODE_SKY_NEW);
- SOCKET_ENUM(type, "Type", type_enum, NODE_SKY_NEW);
+ type_enum.insert("preetham", NODE_SKY_PREETHAM);
+ type_enum.insert("hosek_wilkie", NODE_SKY_HOSEK);
+ type_enum.insert("nishita_improved", NODE_SKY_NISHITA);
+ SOCKET_ENUM(type, "Type", type_enum, NODE_SKY_NISHITA);
SOCKET_VECTOR(sun_direction, "Sun Direction", make_float3(0.0f, 0.0f, 1.0f));
SOCKET_FLOAT(turbidity, "Turbidity", 2.2f);
SOCKET_FLOAT(ground_albedo, "Ground Albedo", 0.3f);
+ SOCKET_BOOLEAN(sun_disc, "Sun Disc", true);
+ SOCKET_FLOAT(sun_size, "Sun Size", 0.009512f);
+ SOCKET_FLOAT(sun_elevation, "Sun Elevation", M_PI_2_F);
+ SOCKET_FLOAT(sun_rotation, "Sun Rotation", 0.0f);
+ SOCKET_INT(altitude, "Altitude", 0);
+ SOCKET_FLOAT(air_density, "Air", 1.0f);
+ SOCKET_FLOAT(dust_density, "Dust", 1.0f);
+ SOCKET_FLOAT(ozone_density, "Ozone", 1.0f);
SOCKET_IN_POINT(
vector, "Vector", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_TEXTURE_GENERATED);
@@ -776,10 +814,32 @@ void SkyTextureNode::compile(SVMCompiler &compiler)
ShaderOutput *color_out = output("Color");
SunSky sunsky;
- if (type == NODE_SKY_OLD)
- sky_texture_precompute_old(&sunsky, sun_direction, turbidity);
- else if (type == NODE_SKY_NEW)
- sky_texture_precompute_new(&sunsky, sun_direction, turbidity, ground_albedo);
+ if (type == NODE_SKY_PREETHAM)
+ sky_texture_precompute_preetham(&sunsky, sun_direction, turbidity);
+ else if (type == NODE_SKY_HOSEK)
+ sky_texture_precompute_hosek(&sunsky, sun_direction, turbidity, ground_albedo);
+ else if (type == NODE_SKY_NISHITA) {
+ sky_texture_precompute_nishita(&sunsky,
+ sun_disc,
+ sun_size,
+ sun_elevation,
+ sun_rotation,
+ altitude,
+ air_density,
+ dust_density);
+ /* precomputed texture image parameters */
+ ImageManager *image_manager = compiler.scene->image_manager;
+ ImageParams impar;
+ impar.interpolation = INTERPOLATION_LINEAR;
+ impar.extension = EXTENSION_EXTEND;
+
+ /* precompute sky texture */
+ if (handle.empty()) {
+ SkyLoader *loader = new SkyLoader(
+ sun_elevation, altitude, air_density, dust_density, ozone_density);
+ handle = image_manager->add_image(loader, impar);
+ }
+ }
else
assert(false);
@@ -787,38 +847,52 @@ void SkyTextureNode::compile(SVMCompiler &compiler)
compiler.stack_assign(color_out);
compiler.add_node(NODE_TEX_SKY, vector_offset, compiler.stack_assign(color_out), type);
- compiler.add_node(__float_as_uint(sunsky.phi),
- __float_as_uint(sunsky.theta),
- __float_as_uint(sunsky.radiance_x),
- __float_as_uint(sunsky.radiance_y));
- compiler.add_node(__float_as_uint(sunsky.radiance_z),
- __float_as_uint(sunsky.config_x[0]),
- __float_as_uint(sunsky.config_x[1]),
- __float_as_uint(sunsky.config_x[2]));
- compiler.add_node(__float_as_uint(sunsky.config_x[3]),
- __float_as_uint(sunsky.config_x[4]),
- __float_as_uint(sunsky.config_x[5]),
- __float_as_uint(sunsky.config_x[6]));
- compiler.add_node(__float_as_uint(sunsky.config_x[7]),
- __float_as_uint(sunsky.config_x[8]),
- __float_as_uint(sunsky.config_y[0]),
- __float_as_uint(sunsky.config_y[1]));
- compiler.add_node(__float_as_uint(sunsky.config_y[2]),
- __float_as_uint(sunsky.config_y[3]),
- __float_as_uint(sunsky.config_y[4]),
- __float_as_uint(sunsky.config_y[5]));
- compiler.add_node(__float_as_uint(sunsky.config_y[6]),
- __float_as_uint(sunsky.config_y[7]),
- __float_as_uint(sunsky.config_y[8]),
- __float_as_uint(sunsky.config_z[0]));
- compiler.add_node(__float_as_uint(sunsky.config_z[1]),
- __float_as_uint(sunsky.config_z[2]),
- __float_as_uint(sunsky.config_z[3]),
- __float_as_uint(sunsky.config_z[4]));
- compiler.add_node(__float_as_uint(sunsky.config_z[5]),
- __float_as_uint(sunsky.config_z[6]),
- __float_as_uint(sunsky.config_z[7]),
- __float_as_uint(sunsky.config_z[8]));
+ /* nishita doesn't need this data */
+ if (type != NODE_SKY_NISHITA) {
+ compiler.add_node(__float_as_uint(sunsky.phi),
+ __float_as_uint(sunsky.theta),
+ __float_as_uint(sunsky.radiance_x),
+ __float_as_uint(sunsky.radiance_y));
+ compiler.add_node(__float_as_uint(sunsky.radiance_z),
+ __float_as_uint(sunsky.config_x[0]),
+ __float_as_uint(sunsky.config_x[1]),
+ __float_as_uint(sunsky.config_x[2]));
+ compiler.add_node(__float_as_uint(sunsky.config_x[3]),
+ __float_as_uint(sunsky.config_x[4]),
+ __float_as_uint(sunsky.config_x[5]),
+ __float_as_uint(sunsky.config_x[6]));
+ compiler.add_node(__float_as_uint(sunsky.config_x[7]),
+ __float_as_uint(sunsky.config_x[8]),
+ __float_as_uint(sunsky.config_y[0]),
+ __float_as_uint(sunsky.config_y[1]));
+ compiler.add_node(__float_as_uint(sunsky.config_y[2]),
+ __float_as_uint(sunsky.config_y[3]),
+ __float_as_uint(sunsky.config_y[4]),
+ __float_as_uint(sunsky.config_y[5]));
+ compiler.add_node(__float_as_uint(sunsky.config_y[6]),
+ __float_as_uint(sunsky.config_y[7]),
+ __float_as_uint(sunsky.config_y[8]),
+ __float_as_uint(sunsky.config_z[0]));
+ compiler.add_node(__float_as_uint(sunsky.config_z[1]),
+ __float_as_uint(sunsky.config_z[2]),
+ __float_as_uint(sunsky.config_z[3]),
+ __float_as_uint(sunsky.config_z[4]));
+ compiler.add_node(__float_as_uint(sunsky.config_z[5]),
+ __float_as_uint(sunsky.config_z[6]),
+ __float_as_uint(sunsky.config_z[7]),
+ __float_as_uint(sunsky.config_z[8]));
+ }
+ else {
+ compiler.add_node(__float_as_uint(sunsky.nishita_data[0]),
+ __float_as_uint(sunsky.nishita_data[1]),
+ __float_as_uint(sunsky.nishita_data[2]),
+ __float_as_uint(sunsky.nishita_data[3]));
+ compiler.add_node(__float_as_uint(sunsky.nishita_data[4]),
+ __float_as_uint(sunsky.nishita_data[5]),
+ __float_as_uint(sunsky.nishita_data[6]),
+ __float_as_uint(sunsky.nishita_data[7]));
+ compiler.add_node(__float_as_uint(sunsky.nishita_data[8]), handle.svm_slot(), 0, 0);
+ }
tex_mapping.compile_end(compiler, vector_in, vector_offset);
}
@@ -828,10 +902,32 @@ void SkyTextureNode::compile(OSLCompiler &compiler)
tex_mapping.compile(compiler);
SunSky sunsky;
- if (type == NODE_SKY_OLD)
- sky_texture_precompute_old(&sunsky, sun_direction, turbidity);
- else if (type == NODE_SKY_NEW)
- sky_texture_precompute_new(&sunsky, sun_direction, turbidity, ground_albedo);
+ if (type == NODE_SKY_PREETHAM)
+ sky_texture_precompute_preetham(&sunsky, sun_direction, turbidity);
+ else if (type == NODE_SKY_HOSEK)
+ sky_texture_precompute_hosek(&sunsky, sun_direction, turbidity, ground_albedo);
+ else if (type == NODE_SKY_NISHITA) {
+ sky_texture_precompute_nishita(&sunsky,
+ sun_disc,
+ sun_size,
+ sun_elevation,
+ sun_rotation,
+ altitude,
+ air_density,
+ dust_density);
+ /* precomputed texture image parameters */
+ ImageManager *image_manager = compiler.scene->image_manager;
+ ImageParams impar;
+ impar.interpolation = INTERPOLATION_LINEAR;
+ impar.extension = EXTENSION_EXTEND;
+
+ /* precompute sky texture */
+ if (handle.empty()) {
+ SkyLoader *loader = new SkyLoader(
+ sun_elevation, altitude, air_density, dust_density, ozone_density);
+ handle = image_manager->add_image(loader, impar);
+ }
+ }
else
assert(false);
@@ -843,6 +939,11 @@ void SkyTextureNode::compile(OSLCompiler &compiler)
compiler.parameter_array("config_x", sunsky.config_x, 9);
compiler.parameter_array("config_y", sunsky.config_y, 9);
compiler.parameter_array("config_z", sunsky.config_z, 9);
+ compiler.parameter_array("nishita_data", sunsky.nishita_data, 9);
+ /* nishita texture */
+ if (type == NODE_SKY_NISHITA) {
+ compiler.parameter_texture("filename", handle.svm_slot());
+ }
compiler.add(this, "node_sky_texture");
}
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index 83c3ad071ae..846ba7423e5 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -168,7 +168,16 @@ class SkyTextureNode : public TextureNode {
float3 sun_direction;
float turbidity;
float ground_albedo;
+ bool sun_disc;
+ float sun_size;
+ float sun_elevation;
+ float sun_rotation;
+ int altitude;
+ float air_density;
+ float dust_density;
+ float ozone_density;
float3 vector;
+ ImageHandle handle;
};
class OutputNode : public ShaderNode {
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index 61deef4cd76..c45ae5553a8 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -31,6 +31,7 @@
#include "util/util_murmurhash.h"
#include "util/util_progress.h"
#include "util/util_set.h"
+#include "util/util_task.h"
#include "util/util_vector.h"
#include "subd/subd_patch_table.h"
@@ -77,7 +78,6 @@ struct UpdateObjectTransformState {
Scene *scene;
/* Some locks to keep everything thread-safe. */
- thread_spin_lock queue_lock;
thread_spin_lock surface_area_lock;
/* First unused object index in the queue. */
@@ -219,7 +219,6 @@ void Object::tag_update(Scene *scene)
}
scene->camera->need_flags_update = true;
- scene->curve_system_manager->need_update = true;
scene->geometry_manager->need_update = true;
scene->object_manager->need_update = true;
}
@@ -550,41 +549,6 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
}
}
-bool ObjectManager::device_update_object_transform_pop_work(UpdateObjectTransformState *state,
- int *start_index,
- int *num_objects)
-{
- /* Tweakable parameter, number of objects per chunk.
- * Too small value will cause some extra overhead due to spin lock,
- * too big value might not use all threads nicely.
- */
- static const int OBJECTS_PER_TASK = 32;
- bool have_work = false;
- state->queue_lock.lock();
- int num_scene_objects = state->scene->objects.size();
- if (state->queue_start_object < num_scene_objects) {
- int count = min(OBJECTS_PER_TASK, num_scene_objects - state->queue_start_object);
- *start_index = state->queue_start_object;
- *num_objects = count;
- state->queue_start_object += count;
- have_work = true;
- }
- state->queue_lock.unlock();
- return have_work;
-}
-
-void ObjectManager::device_update_object_transform_task(UpdateObjectTransformState *state)
-{
- int start_index, num_objects;
- while (device_update_object_transform_pop_work(state, &start_index, &num_objects)) {
- for (int i = 0; i < num_objects; ++i) {
- const int object_index = start_index + i;
- Object *ob = state->scene->objects[object_index];
- device_update_object_transform(state, ob);
- }
- }
-}
-
void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene, Progress &progress)
{
UpdateObjectTransformState state;
@@ -630,28 +594,19 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene,
numparticles += psys->particles.size();
}
- /* NOTE: If it's just a handful of objects we deal with them in a single
- * thread to avoid threading overhead. However, this threshold is might
- * need some tweaks to make mid-complex scenes optimal.
- */
- if (scene->objects.size() < 64) {
- foreach (Object *ob, scene->objects) {
- device_update_object_transform(&state, ob);
- if (progress.get_cancel()) {
- return;
- }
- }
- }
- else {
- const int num_threads = TaskScheduler::num_threads();
- TaskPool pool;
- for (int i = 0; i < num_threads; ++i) {
- pool.push(function_bind(&ObjectManager::device_update_object_transform_task, this, &state));
- }
- pool.wait_work();
- if (progress.get_cancel()) {
- return;
- }
+ /* Parallel object update, with grain size to avoid too much threading overhead
+ * for individual objects. */
+ static const int OBJECTS_PER_TASK = 32;
+ parallel_for(blocked_range<size_t>(0, scene->objects.size(), OBJECTS_PER_TASK),
+ [&](const blocked_range<size_t> &r) {
+ for (size_t i = r.begin(); i != r.end(); i++) {
+ Object *ob = state.scene->objects[i];
+ device_update_object_transform(&state, ob);
+ }
+ });
+
+ if (progress.get_cancel()) {
+ return;
}
dscene->objects.copy_to_device();
@@ -664,7 +619,6 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene,
dscene->data.bvh.have_motion = state.have_motion;
dscene->data.bvh.have_curves = state.have_curves;
- dscene->data.bvh.have_instancing = true;
}
void ObjectManager::device_update(Device *device,
@@ -839,7 +793,6 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, P
bool motion_blur = need_motion == Scene::MOTION_BLUR;
bool apply_to_motion = need_motion != Scene::MOTION_PASS;
int i = 0;
- bool have_instancing = false;
foreach (Object *object, scene->objects) {
map<Geometry *, int>::iterator it = geometry_users.find(object->geometry);
@@ -885,22 +838,15 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, P
if (geom->transform_negative_scaled)
object_flag[i] |= SD_OBJECT_NEGATIVE_SCALE_APPLIED;
}
- else
- have_instancing = true;
}
- else
- have_instancing = true;
i++;
}
-
- dscene->data.bvh.have_instancing = have_instancing;
}
void ObjectManager::tag_update(Scene *scene)
{
need_update = true;
- scene->curve_system_manager->need_update = true;
scene->geometry_manager->need_update = true;
scene->light_manager->need_update = true;
}
diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp
index f5b68d5a4fe..9016a8d325f 100644
--- a/intern/cycles/render/scene.cpp
+++ b/intern/cycles/render/scene.cpp
@@ -108,7 +108,6 @@ Scene::Scene(const SceneParams &params_, Device *device)
integrator = new Integrator();
image_manager = new ImageManager(device->info);
particle_system_manager = new ParticleSystemManager();
- curve_system_manager = new CurveSystemManager();
bake_manager = new BakeManager();
/* OSL only works on the CPU */
@@ -156,7 +155,6 @@ void Scene::free_memory(bool final)
light_manager->device_free(device, &dscene);
particle_system_manager->device_free(device, &dscene);
- curve_system_manager->device_free(device, &dscene);
bake_manager->device_free(device, &dscene);
@@ -180,7 +178,6 @@ void Scene::free_memory(bool final)
delete shader_manager;
delete light_manager;
delete particle_system_manager;
- delete curve_system_manager;
delete image_manager;
delete bake_manager;
}
@@ -233,12 +230,6 @@ void Scene::device_update(Device *device_, Progress &progress)
if (progress.get_cancel() || device->have_error())
return;
- progress.set_status("Updating Hair Systems");
- curve_system_manager->device_update(device, &dscene, this, progress);
-
- if (progress.get_cancel() || device->have_error())
- return;
-
progress.set_status("Updating Particle Systems");
particle_system_manager->device_update(device, &dscene, this, progress);
@@ -369,8 +360,7 @@ bool Scene::need_data_update()
return (background->need_update || image_manager->need_update || object_manager->need_update ||
geometry_manager->need_update || light_manager->need_update ||
lookup_tables->need_update || integrator->need_update || shader_manager->need_update ||
- particle_system_manager->need_update || curve_system_manager->need_update ||
- bake_manager->need_update || film->need_update);
+ particle_system_manager->need_update || bake_manager->need_update || film->need_update);
}
bool Scene::need_reset()
@@ -393,7 +383,6 @@ void Scene::reset()
geometry_manager->tag_update(this);
light_manager->tag_update(this);
particle_system_manager->tag_update(this);
- curve_system_manager->tag_update(this);
}
void Scene::device_free()
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index 6b10a901d7b..67616262c03 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -168,6 +168,8 @@ class SceneParams {
bool use_bvh_spatial_split;
bool use_bvh_unaligned_nodes;
int num_bvh_time_steps;
+ int hair_subdivisions;
+ CurveShapeType hair_shape;
bool persistent_data;
int texture_limit;
@@ -181,6 +183,8 @@ class SceneParams {
use_bvh_spatial_split = false;
use_bvh_unaligned_nodes = true;
num_bvh_time_steps = 0;
+ hair_subdivisions = 3;
+ hair_shape = CURVE_RIBBON;
persistent_data = false;
texture_limit = 0;
background = true;
@@ -193,8 +197,15 @@ class SceneParams {
use_bvh_spatial_split == params.use_bvh_spatial_split &&
use_bvh_unaligned_nodes == params.use_bvh_unaligned_nodes &&
num_bvh_time_steps == params.num_bvh_time_steps &&
+ hair_subdivisions == params.hair_subdivisions && hair_shape == params.hair_shape &&
persistent_data == params.persistent_data && texture_limit == params.texture_limit);
}
+
+ int curve_subdivisions()
+ {
+ /* Matching the tesselation rate limit in Embree. */
+ return clamp(1 << hair_subdivisions, 1, 16);
+ }
};
/* Scene */
@@ -226,7 +237,6 @@ class Scene {
GeometryManager *geometry_manager;
ObjectManager *object_manager;
ParticleSystemManager *particle_system_manager;
- CurveSystemManager *curve_system_manager;
BakeManager *bake_manager;
/* default shaders */
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index f5bfebbaf78..1a94d3e9db7 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -61,8 +61,10 @@ Session::Session(const SessionParams &params_)
TaskScheduler::init(params.threads);
+ /* Create CPU/GPU devices. */
device = Device::create(params.device, stats, profiler, params.background);
+ /* Create buffers for interactive rendering. */
if (params.background && !params.write_render_cb) {
buffers = NULL;
display = NULL;
@@ -72,6 +74,9 @@ Session::Session(const SessionParams &params_)
display = new DisplayBuffer(device, params.display_buffer_linear);
}
+ /* Validate denoising parameters. */
+ set_denoising(params.denoising);
+
session_thread = NULL;
scene = NULL;
@@ -773,6 +778,7 @@ DeviceRequestedFeatures Session::get_requested_device_features()
*/
bool use_motion = scene->need_motion() == Scene::MotionType::MOTION_BLUR;
requested_features.use_hair = false;
+ requested_features.use_hair_thick = (scene->params.hair_shape == CURVE_THICK);
requested_features.use_object_motion = false;
requested_features.use_camera_motion = use_motion && scene->camera->use_motion();
foreach (Object *object, scene->objects) {
@@ -804,7 +810,7 @@ DeviceRequestedFeatures Session::get_requested_device_features()
requested_features.use_baking = bake_manager->get_baking();
requested_features.use_integrator_branched = (scene->integrator->method ==
Integrator::BRANCHED_PATH);
- if (params.run_denoising) {
+ if (params.denoising.use || params.denoising.store_passes) {
requested_features.use_denoising = true;
requested_features.use_shadow_tricks = true;
}
@@ -941,24 +947,35 @@ void Session::set_pause(bool pause_)
pause_cond.notify_all();
}
-void Session::set_denoising(bool denoising, bool optix_denoising)
+void Session::set_denoising(const DenoiseParams &denoising)
{
+ bool need_denoise = denoising.need_denoising_task();
+
/* Lock buffers so no denoising operation is triggered while the settings are changed here. */
thread_scoped_lock buffers_lock(buffers_mutex);
+ params.denoising = denoising;
+
+ if (!(params.device.denoisers & denoising.type)) {
+ if (need_denoise) {
+ progress.set_error("Denoiser type not supported by compute device");
+ }
- params.run_denoising = denoising;
- params.full_denoising = !optix_denoising;
- params.optix_denoising = optix_denoising;
+ params.denoising.use = false;
+ need_denoise = false;
+ }
// TODO(pmours): Query the required overlap value for denoising from the device?
- tile_manager.slice_overlap = denoising && !params.background ? 64 : 0;
- tile_manager.schedule_denoising = denoising && !buffers;
+ tile_manager.slice_overlap = need_denoise && !params.background ? 64 : 0;
+
+ /* Schedule per tile denoising for final renders if we are either denoising or
+ * need prefiltered passes for the native denoiser. */
+ tile_manager.schedule_denoising = need_denoise && !buffers;
}
void Session::set_denoising_start_sample(int sample)
{
- if (sample != params.denoising_start_sample) {
- params.denoising_start_sample = sample;
+ if (sample != params.denoising.start_sample) {
+ params.denoising.start_sample = sample;
pause_cond.notify_all();
}
@@ -1078,10 +1095,10 @@ void Session::update_status_time(bool show_pause, bool show_done)
*/
substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples);
}
- if (params.full_denoising || params.optix_denoising) {
+ if (params.denoising.use && params.denoising.type != DENOISER_OPENIMAGEDENOISE) {
substatus += string_printf(", Denoised %d tiles", progress.get_denoised_tiles());
}
- else if (params.run_denoising) {
+ else if (params.denoising.store_passes && params.denoising.type == DENOISER_NLM) {
substatus += string_printf(", Prefiltered %d tiles", progress.get_denoised_tiles());
}
}
@@ -1110,7 +1127,7 @@ bool Session::render_need_denoise(bool &delayed)
delayed = false;
/* Denoising enabled? */
- if (!params.run_denoising) {
+ if (!params.denoising.need_denoising_task()) {
return false;
}
@@ -1127,7 +1144,7 @@ bool Session::render_need_denoise(bool &delayed)
}
/* Do not denoise until the sample at which denoising should start is reached. */
- if (tile_manager.state.sample < params.denoising_start_sample) {
+ if (tile_manager.state.sample < min(params.denoising.start_sample, params.samples - 1)) {
return false;
}
@@ -1178,9 +1195,6 @@ void Session::render(bool need_denoise)
task.pass_denoising_clean = scene->film->denoising_clean_offset;
task.denoising_from_render = true;
- task.denoising_do_filter = params.full_denoising;
- task.denoising_use_optix = params.optix_denoising;
- task.denoising_write_passes = params.write_denoising_passes;
if (tile_manager.schedule_denoising) {
/* Acquire denoising tiles during rendering. */
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index 2707eed5531..0141629762c 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -62,10 +62,6 @@ class SessionParams {
bool display_buffer_linear;
- bool run_denoising;
- bool write_denoising_passes;
- bool full_denoising;
- bool optix_denoising;
DenoiseParams denoising;
double cancel_timeout;
@@ -94,11 +90,6 @@ class SessionParams {
use_profiling = false;
- run_denoising = false;
- write_denoising_passes = false;
- full_denoising = false;
- optix_denoising = false;
-
display_buffer_linear = false;
cancel_timeout = 0.1;
@@ -125,7 +116,8 @@ class SessionParams {
cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout &&
text_timeout == params.text_timeout &&
progressive_update_timeout == params.progressive_update_timeout &&
- tile_order == params.tile_order && shadingsystem == params.shadingsystem);
+ tile_order == params.tile_order && shadingsystem == params.shadingsystem &&
+ denoising.type == params.denoising.type);
}
};
@@ -161,7 +153,7 @@ class Session {
void reset(BufferParams &params, int samples);
void set_pause(bool pause);
void set_samples(int samples);
- void set_denoising(bool denoising, bool optix_denoising);
+ void set_denoising(const DenoiseParams &denoising);
void set_denoising_start_sample(int sample);
bool update_scene();
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 39ba45a751a..1120d909e98 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -33,6 +33,7 @@
#include "util/util_foreach.h"
#include "util/util_murmurhash.h"
+#include "util/util_task.h"
#ifdef WITH_OCIO
# include <OpenColorIO/OpenColorIO.h>
diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp
index ea3dbaf8e03..88714e20a90 100644
--- a/intern/cycles/render/svm.cpp
+++ b/intern/cycles/render/svm.cpp
@@ -94,8 +94,7 @@ void SVMShaderManager::device_update(Device *device,
scene,
scene->shaders[i],
&progress,
- &shader_svm_nodes[i]),
- false);
+ &shader_svm_nodes[i]));
}
task_pool.wait_work();
diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp
index 87389ebfb16..4ea3470cda8 100644
--- a/intern/cycles/test/render_graph_finalize_test.cpp
+++ b/intern/cycles/test/render_graph_finalize_test.cpp
@@ -17,11 +17,15 @@
#include "testing/mock_log.h"
#include "testing/testing.h"
+#include "device/device.h"
+
#include "render/graph.h"
#include "render/nodes.h"
#include "render/scene.h"
+
#include "util/util_array.h"
#include "util/util_logging.h"
+#include "util/util_stats.h"
#include "util/util_string.h"
#include "util/util_vector.h"
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index c1f71461dfd..ad4ea9c86e0 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -29,7 +29,7 @@ set(SRC
)
set(LIB
-
+ ${TBB_LIBRARIES}
)
if(WITH_CYCLES_STANDALONE)
@@ -86,6 +86,7 @@ set(SRC_HEADERS
util_math_matrix.h
util_md5.h
util_murmurhash.h
+ util_openimagedenoise.h
util_opengl.h
util_optimization.h
util_param.h
@@ -100,6 +101,7 @@ set(SRC_HEADERS
util_sky_model.cpp
util_sky_model.h
util_sky_model_data.h
+ util_sky_nishita.cpp
util_avxf.h
util_avxb.h
util_semaphore.h
@@ -112,6 +114,7 @@ set(SRC_HEADERS
util_string.h
util_system.h
util_task.h
+ util_tbb.h
util_texture.h
util_thread.h
util_time.h
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index 3ce65802cff..6ad4f709ab5 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -31,7 +31,7 @@ DebugFlags::CPU::CPU()
sse41(true),
sse3(true),
sse2(true),
- bvh_layout(BVH_LAYOUT_DEFAULT),
+ bvh_layout(BVH_LAYOUT_AUTO),
split_kernel(false)
{
reset();
@@ -57,18 +57,7 @@ void DebugFlags::CPU::reset()
#undef STRINGIFY
#undef CHECK_CPU_FLAGS
- if (getenv("CYCLES_BVH2") != NULL) {
- bvh_layout = BVH_LAYOUT_BVH2;
- }
- else if (getenv("CYCLES_BVH4") != NULL) {
- bvh_layout = BVH_LAYOUT_BVH4;
- }
- else if (getenv("CYCLES_BVH8") != NULL) {
- bvh_layout = BVH_LAYOUT_BVH8;
- }
- else {
- bvh_layout = BVH_LAYOUT_DEFAULT;
- }
+ bvh_layout = BVH_LAYOUT_AUTO;
split_kernel = false;
}
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index cf6b442b878..da9f5408b59 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -73,10 +73,10 @@ class DebugFlags {
return sse2;
}
- /* Requested BVH size.
+ /* Requested BVH layout.
*
- * Rendering will use widest possible BVH which is below or equal
- * this one.
+ * By default the fastest will be used. For debugging the BVH used by other
+ * CPUs and GPUs can be selected here instead.
*/
BVHLayout bvh_layout;
diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h
index dbed83ab84d..e979bd9e0c0 100644
--- a/intern/cycles/util/util_math_fast.h
+++ b/intern/cycles/util/util_math_fast.h
@@ -446,6 +446,11 @@ ccl_device_inline float fast_expf(float x)
}
#ifndef __KERNEL_GPU__
+/* MSVC seems to have a code-gen bug here in at least SSE41/AVX
+ * see T78047 for details. */
+# ifdef _MSC_VER
+# pragma optimize("", off)
+# endif
ccl_device float4 fast_exp2f4(float4 x)
{
const float4 one = make_float4(1.0f);
@@ -461,6 +466,9 @@ ccl_device float4 fast_exp2f4(float4 x)
r = madd4(x, r, make_float4(1.0f));
return __int4_as_float4(__float4_as_int4(r) + (m << 23));
}
+# ifdef _MSC_VER
+# pragma optimize("", on)
+# endif
ccl_device_inline float4 fast_expf4(float4 x)
{
diff --git a/intern/cycles/util/util_openimagedenoise.h b/intern/cycles/util/util_openimagedenoise.h
new file mode 100644
index 00000000000..aafa69cb530
--- /dev/null
+++ b/intern/cycles/util/util_openimagedenoise.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_OPENIMAGEDENOISE_H__
+#define __UTIL_OPENIMAGEDENOISE_H__
+
+#ifdef WITH_OPENIMAGEDENOISE
+# include <OpenImageDenoise/oidn.hpp>
+#endif
+
+#include "util_system.h"
+
+CCL_NAMESPACE_BEGIN
+
+static inline bool openimagedenoise_supported()
+{
+#ifdef WITH_OPENIMAGEDENOISE
+ return system_cpu_support_sse41();
+#else
+ return false;
+#endif
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_OPENIMAGEDENOISE_H__ */
diff --git a/intern/cycles/util/util_sky_model.h b/intern/cycles/util/util_sky_model.h
index 84340614b2c..36f1079a16d 100644
--- a/intern/cycles/util/util_sky_model.h
+++ b/intern/cycles/util/util_sky_model.h
@@ -298,6 +298,8 @@ HINT #1: if you want to model the sky of an earth-like planet that orbits
previous paragraph.
*/
+#include "util/util_types.h"
+
CCL_NAMESPACE_BEGIN
#ifndef _SKY_MODEL_H_
@@ -426,4 +428,26 @@ double arhosekskymodel_solar_radiance(ArHosekSkyModelState *state,
#endif // _SKY_MODEL_H_
+/* Nishita improved sky model */
+
+void nishita_skymodel_precompute_texture(float *pixels,
+ int stride,
+ int start_y,
+ int end_y,
+ int width,
+ int height,
+ float sun_elevation,
+ float altitude,
+ float air_density,
+ float dust_density,
+ float ozone_density);
+
+void nishita_skymodel_precompute_sun(float sun_elevation,
+ float angular_diameter,
+ float altitude,
+ float air_density,
+ float dust_density,
+ float *pixel_bottom,
+ float *pixel_top);
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_sky_nishita.cpp b/intern/cycles/util/util_sky_nishita.cpp
new file mode 100644
index 00000000000..92397804d43
--- /dev/null
+++ b/intern/cycles/util/util_sky_nishita.cpp
@@ -0,0 +1,371 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/util_math.h"
+#include "util/util_sky_model.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Constants */
+static const float rayleigh_scale = 8000.0f; // Rayleigh scale height (m)
+static const float mie_scale = 1200.0f; // Mie scale height (m)
+static const float mie_coeff = 2e-5f; // Mie scattering coefficient
+static const float mie_G = 0.76f; // aerosols anisotropy
+static const float earth_radius = 6360000.0f; // radius of Earth (m)
+static const float atmosphere_radius = 6420000.0f; // radius of atmosphere (m)
+static const int steps = 32; // segments per primary ray
+static const int steps_light = 16; // segments per sun connection ray
+static const int num_wavelengths = 21; // number of wavelengths
+/* irradiance at top of atmosphere */
+static const float irradiance[] = {
+ 1.45756829855592995315f, 1.56596305559738380175f, 1.65148449067670455293f,
+ 1.71496242737209314555f, 1.75797983805020541226f, 1.78256407885924539336f,
+ 1.79095108475838560302f, 1.78541550133410664714f, 1.76815554864306845317f,
+ 1.74122069647250410362f, 1.70647127164943679389f, 1.66556087452739887134f,
+ 1.61993437242451854274f, 1.57083597368892080581f, 1.51932335059305478886f,
+ 1.46628494965214395407f, 1.41245852740172450623f, 1.35844961970384092709f,
+ 1.30474913844739281998f, 1.25174963272610817455f, 1.19975998755420620867f};
+/* Rayleigh scattering coefficient */
+static const float rayleigh_coeff[] = {
+ 0.00005424820087636473f, 0.00004418549866505454f, 0.00003635151910165377f,
+ 0.00003017929012024763f, 0.00002526320226989157f, 0.00002130859310621843f,
+ 0.00001809838025320633f, 0.00001547057129129042f, 0.00001330284977336850f,
+ 0.00001150184784075764f, 0.00000999557429990163f, 0.00000872799973630707f,
+ 0.00000765513700977967f, 0.00000674217203751443f, 0.00000596134125832052f,
+ 0.00000529034598065810f, 0.00000471115687557433f, 0.00000420910481110487f,
+ 0.00000377218381260133f, 0.00000339051255477280f, 0.00000305591531679811f};
+/* Ozone absorption coefficient */
+static const float ozone_coeff[] = {
+ 0.00000000325126849861f, 0.00000000585395365047f, 0.00000001977191155085f,
+ 0.00000007309568762914f, 0.00000020084561514287f, 0.00000040383958096161f,
+ 0.00000063551335912363f, 0.00000096707041180970f, 0.00000154797400424410f,
+ 0.00000209038647223331f, 0.00000246128056164565f, 0.00000273551299461512f,
+ 0.00000215125863128643f, 0.00000159051840791988f, 0.00000112356197979857f,
+ 0.00000073527551487574f, 0.00000046450130357806f, 0.00000033096079921048f,
+ 0.00000022512612292678f, 0.00000014879129266490f, 0.00000016828623364192f};
+/* CIE XYZ color matching functions */
+static const float cmf_xyz[][3] = {{0.00136800000f, 0.00003900000f, 0.00645000100f},
+ {0.01431000000f, 0.00039600000f, 0.06785001000f},
+ {0.13438000000f, 0.00400000000f, 0.64560000000f},
+ {0.34828000000f, 0.02300000000f, 1.74706000000f},
+ {0.29080000000f, 0.06000000000f, 1.66920000000f},
+ {0.09564000000f, 0.13902000000f, 0.81295010000f},
+ {0.00490000000f, 0.32300000000f, 0.27200000000f},
+ {0.06327000000f, 0.71000000000f, 0.07824999000f},
+ {0.29040000000f, 0.95400000000f, 0.02030000000f},
+ {0.59450000000f, 0.99500000000f, 0.00390000000f},
+ {0.91630000000f, 0.87000000000f, 0.00165000100f},
+ {1.06220000000f, 0.63100000000f, 0.00080000000f},
+ {0.85444990000f, 0.38100000000f, 0.00019000000f},
+ {0.44790000000f, 0.17500000000f, 0.00002000000f},
+ {0.16490000000f, 0.06100000000f, 0.00000000000f},
+ {0.04677000000f, 0.01700000000f, 0.00000000000f},
+ {0.01135916000f, 0.00410200000f, 0.00000000000f},
+ {0.00289932700f, 0.00104700000f, 0.00000000000f},
+ {0.00069007860f, 0.00024920000f, 0.00000000000f},
+ {0.00016615050f, 0.00006000000f, 0.00000000000f},
+ {0.00004150994f, 0.00001499000f, 0.00000000000f}};
+
+static float3 geographical_to_direction(float lat, float lon)
+{
+ return make_float3(cosf(lat) * cosf(lon), cosf(lat) * sinf(lon), sinf(lat));
+}
+
+static float3 spec_to_xyz(float *spectrum)
+{
+ float3 xyz = make_float3(0.0f, 0.0f, 0.0f);
+ for (int i = 0; i < num_wavelengths; i++) {
+ xyz.x += cmf_xyz[i][0] * spectrum[i];
+ xyz.y += cmf_xyz[i][1] * spectrum[i];
+ xyz.z += cmf_xyz[i][2] * spectrum[i];
+ }
+ return xyz * (20 * 683 * 1e-9f);
+}
+
+/* Atmosphere volume models */
+
+static float density_rayleigh(float height)
+{
+ return expf(-height / rayleigh_scale);
+}
+
+static float density_mie(float height)
+{
+ return expf(-height / mie_scale);
+}
+
+static float density_ozone(float height)
+{
+ float den = 0.0f;
+ if (height >= 10000.0f && height < 25000.0f)
+ den = 1.0f / 15000.0f * height - 2.0f / 3.0f;
+ else if (height >= 25000 && height < 40000)
+ den = -(1.0f / 15000.0f * height - 8.0f / 3.0f);
+ return den;
+}
+
+static float phase_rayleigh(float mu)
+{
+ return 3.0f / (16.0f * M_PI_F) * (1.0f + sqr(mu));
+}
+
+static float phase_mie(float mu)
+{
+ static const float sqr_G = mie_G * mie_G;
+
+ return (3.0f * (1.0f - sqr_G) * (1.0f + sqr(mu))) /
+ (8.0f * M_PI_F * (2.0f + sqr_G) * powf((1.0f + sqr_G - 2.0f * mie_G * mu), 1.5));
+}
+
+/* Intersection helpers */
+static bool surface_intersection(float3 pos, float3 dir)
+{
+ if (dir.z >= 0)
+ return false;
+ float t = dot(dir, -pos) / len_squared(dir);
+ float D = pos.x * pos.x - 2.0f * (-pos.x) * dir.x * t + dir.x * t * dir.x * t + pos.y * pos.y -
+ 2.0f * (-pos.y) * dir.y * t + (dir.y * t) * (dir.y * t) + pos.z * pos.z -
+ 2.0f * (-pos.z) * dir.z * t + dir.z * t * dir.z * t;
+ return (D <= sqr(earth_radius));
+}
+
+static float3 atmosphere_intersection(float3 pos, float3 dir)
+{
+ float b = -2.0f * dot(dir, -pos);
+ float c = len_squared(pos) - sqr(atmosphere_radius);
+ float t = (-b + sqrtf(b * b - 4.0f * c)) / 2.0f;
+ return make_float3(pos.x + dir.x * t, pos.y + dir.y * t, pos.z + dir.z * t);
+}
+
+static float3 ray_optical_depth(float3 ray_origin, float3 ray_dir)
+{
+ /* This code computes the optical depth along a ray through the atmosphere. */
+ float3 ray_end = atmosphere_intersection(ray_origin, ray_dir);
+ float ray_length = distance(ray_origin, ray_end);
+
+ /* To compute the optical depth, we step along the ray in segments and
+ * accumulate the optical depth along each segment. */
+ float segment_length = ray_length / steps_light;
+ float3 segment = segment_length * ray_dir;
+
+ /* Instead of tracking the transmission spectrum across all wavelengths directly,
+ * we use the fact that the density always has the same spectrum for each type of
+ * scattering, so we split the density into a constant spectrum and a factor and
+ * only track the factors. */
+ float3 optical_depth = make_float3(0.0f, 0.0f, 0.0f);
+
+ /* The density of each segment is evaluated at its middle. */
+ float3 P = ray_origin + 0.5f * segment;
+ for (int i = 0; i < steps_light; i++) {
+ /* Compute height above sea level. */
+ float height = len(P) - earth_radius;
+
+ /* Accumulate optical depth of this segment (density is assumed to be constant along it). */
+ float3 density = make_float3(
+ density_rayleigh(height), density_mie(height), density_ozone(height));
+ optical_depth += segment_length * density;
+
+ /* Advance along ray. */
+ P += segment;
+ }
+
+ return optical_depth;
+}
+
+/* Single Scattering implementation */
+static void single_scattering(float3 ray_dir,
+ float3 sun_dir,
+ float3 ray_origin,
+ float air_density,
+ float dust_density,
+ float ozone_density,
+ float *r_spectrum)
+{
+ /* This code computes single-inscattering along a ray through the atmosphere. */
+ float3 ray_end = atmosphere_intersection(ray_origin, ray_dir);
+ float ray_length = distance(ray_origin, ray_end);
+
+ /* To compute the inscattering, we step along the ray in segments and accumulate
+ * the inscattering as well as the optical depth along each segment. */
+ float segment_length = ray_length / steps;
+ float3 segment = segment_length * ray_dir;
+
+ /* Instead of tracking the transmission spectrum across all wavelengths directly,
+ * we use the fact that the density always has the same spectrum for each type of
+ * scattering, so we split the density into a constant spectrum and a factor and
+ * only track the factors. */
+ float3 optical_depth = make_float3(0.0f, 0.0f, 0.0f);
+
+ /* Zero out light accumulation. */
+ for (int wl = 0; wl < num_wavelengths; wl++) {
+ r_spectrum[wl] = 0.0f;
+ }
+
+ /* Compute phase function for scattering and the density scale factor. */
+ float mu = dot(ray_dir, sun_dir);
+ float3 phase_function = make_float3(phase_rayleigh(mu), phase_mie(mu), 0.0f);
+ float3 density_scale = make_float3(air_density, dust_density, ozone_density);
+
+ /* The density and in-scattering of each segment is evaluated at its middle. */
+ float3 P = ray_origin + 0.5f * segment;
+ for (int i = 0; i < steps; i++) {
+ /* Compute height above sea level. */
+ float height = len(P) - earth_radius;
+
+ /* Evaluate and accumulate optical depth along the ray. */
+ float3 density = density_scale * make_float3(density_rayleigh(height),
+ density_mie(height),
+ density_ozone(height));
+ optical_depth += segment_length * density;
+
+ /* If the earth isn't in the way, evaluate inscattering from the sun. */
+ if (!surface_intersection(P, sun_dir)) {
+ float3 light_optical_depth = density_scale * ray_optical_depth(P, sun_dir);
+ float3 total_optical_depth = optical_depth + light_optical_depth;
+
+ /* attenuation of light */
+ for (int wl = 0; wl < num_wavelengths; wl++) {
+ float3 extinction_density = total_optical_depth * make_float3(rayleigh_coeff[wl],
+ 1.11f * mie_coeff,
+ ozone_coeff[wl]);
+ float attenuation = expf(-reduce_add(extinction_density));
+
+ float3 scattering_density = density * make_float3(rayleigh_coeff[wl], mie_coeff, 0.0f);
+
+ /* The total inscattered radiance from one segment is:
+ * Tr(A<->B) * Tr(B<->C) * sigma_s * phase * L * segment_length
+ *
+ * These terms are:
+ * Tr(A<->B): Transmission from start to scattering position (tracked in optical_depth)
+ * Tr(B<->C): Transmission from scattering position to light (computed in
+ * ray_optical_depth) sigma_s: Scattering density phase: Phase function of the scattering
+ * type (Rayleigh or Mie) L: Radiance coming from the light source segment_length: The
+ * length of the segment
+ *
+ * The code here is just that, with a bit of additional optimization to not store full
+ * spectra for the optical depth.
+ */
+ r_spectrum[wl] += attenuation * reduce_add(phase_function * scattering_density) *
+ irradiance[wl] * segment_length;
+ }
+ }
+
+ /* Advance along ray. */
+ P += segment;
+ }
+}
+
+/* calculate texture array */
+void nishita_skymodel_precompute_texture(float *pixels,
+ int stride,
+ int start_y,
+ int end_y,
+ int width,
+ int height,
+ float sun_elevation,
+ float altitude,
+ float air_density,
+ float dust_density,
+ float ozone_density)
+{
+ /* calculate texture pixels */
+ float spectrum[num_wavelengths];
+ int half_width = width / 2;
+ float3 cam_pos = make_float3(0, 0, earth_radius + altitude);
+ float3 sun_dir = geographical_to_direction(sun_elevation, 0.0f);
+
+ float latitude_step = M_PI_2_F / height;
+ float longitude_step = M_2PI_F / width;
+
+ for (int y = start_y; y < end_y; y++) {
+ float latitude = latitude_step * y;
+
+ float *pixel_row = pixels + (y * width) * stride;
+ for (int x = 0; x < half_width; x++) {
+ float longitude = longitude_step * x - M_PI_F;
+
+ float3 dir = geographical_to_direction(latitude, longitude);
+ single_scattering(dir, sun_dir, cam_pos, air_density, dust_density, ozone_density, spectrum);
+ float3 xyz = spec_to_xyz(spectrum);
+
+ pixel_row[x * stride + 0] = xyz.x;
+ pixel_row[x * stride + 1] = xyz.y;
+ pixel_row[x * stride + 2] = xyz.z;
+ int mirror_x = width - x - 1;
+ pixel_row[mirror_x * stride + 0] = xyz.x;
+ pixel_row[mirror_x * stride + 1] = xyz.y;
+ pixel_row[mirror_x * stride + 2] = xyz.z;
+ }
+ }
+}
+
+/* Sun disc */
+static void sun_radiation(float3 cam_dir,
+ float altitude,
+ float air_density,
+ float dust_density,
+ float solid_angle,
+ float *r_spectrum)
+{
+ float3 cam_pos = make_float3(0, 0, earth_radius + altitude);
+ float3 optical_depth = ray_optical_depth(cam_pos, cam_dir);
+
+ /* Compute final spectrum. */
+ for (int i = 0; i < num_wavelengths; i++) {
+ /* Combine spectra and the optical depth into transmittance. */
+ float transmittance = rayleigh_coeff[i] * optical_depth.x * air_density +
+ 1.11f * mie_coeff * optical_depth.y * dust_density;
+ r_spectrum[i] = (irradiance[i] / solid_angle) * expf(-transmittance);
+ }
+}
+
+void nishita_skymodel_precompute_sun(float sun_elevation,
+ float angular_diameter,
+ float altitude,
+ float air_density,
+ float dust_density,
+ float *pixel_bottom,
+ float *pixel_top)
+{
+ /* definitions */
+ float half_angular = angular_diameter / 2.0f;
+ float solid_angle = M_2PI_F * (1.0f - cosf(half_angular));
+ float spectrum[num_wavelengths];
+ float bottom = sun_elevation - half_angular;
+ float top = sun_elevation + half_angular;
+ float elevation_bottom, elevation_top;
+ float3 pix_bottom, pix_top, sun_dir;
+
+ /* compute 2 pixels for sun disc */
+ elevation_bottom = (bottom > 0.0f) ? bottom : 0.0f;
+ elevation_top = (top > 0.0f) ? top : 0.0f;
+ sun_dir = geographical_to_direction(elevation_bottom, 0.0f);
+ sun_radiation(sun_dir, altitude, air_density, dust_density, solid_angle, spectrum);
+ pix_bottom = spec_to_xyz(spectrum);
+ sun_dir = geographical_to_direction(elevation_top, 0.0f);
+ sun_radiation(sun_dir, altitude, air_density, dust_density, solid_angle, spectrum);
+ pix_top = spec_to_xyz(spectrum);
+
+ /* store pixels */
+ pixel_bottom[0] = pix_bottom.x;
+ pixel_bottom[1] = pix_bottom.y;
+ pixel_bottom[2] = pix_bottom.z;
+ pixel_top[0] = pix_top.x;
+ pixel_top[1] = pix_top.y;
+ pixel_top[2] = pix_top.z;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp
index 61aa28c6815..4fb61392e92 100644
--- a/intern/cycles/util/util_task.cpp
+++ b/intern/cycles/util/util_task.cpp
@@ -20,100 +20,28 @@
#include "util/util_system.h"
#include "util/util_time.h"
-//#define THREADING_DEBUG_ENABLED
-
-#ifdef THREADING_DEBUG_ENABLED
-# include <stdio.h>
-# define THREADING_DEBUG(...) \
- do { \
- printf(__VA_ARGS__); \
- fflush(stdout); \
- } while (0)
-#else
-# define THREADING_DEBUG(...)
-#endif
-
CCL_NAMESPACE_BEGIN
/* Task Pool */
-TaskPool::TaskPool()
+TaskPool::TaskPool() : start_time(time_dt()), num_tasks_handled(0)
{
- num_tasks_handled = 0;
- num = 0;
- do_cancel = false;
}
TaskPool::~TaskPool()
{
- stop();
+ cancel();
}
-void TaskPool::push(Task *task, bool front)
+void TaskPool::push(TaskRunFunction &&task)
{
- TaskScheduler::Entry entry;
-
- entry.task = task;
- entry.pool = this;
-
- TaskScheduler::push(entry, front);
-}
-
-void TaskPool::push(const TaskRunFunction &run, bool front)
-{
- push(new Task(run), front);
+ tbb_group.run(std::move(task));
+ num_tasks_handled++;
}
void TaskPool::wait_work(Summary *stats)
{
- thread_scoped_lock num_lock(num_mutex);
-
- while (num != 0) {
- num_lock.unlock();
-
- thread_scoped_lock queue_lock(TaskScheduler::queue_mutex);
-
- /* find task from this pool. if we get a task from another pool,
- * we can get into deadlock */
- TaskScheduler::Entry work_entry;
- bool found_entry = false;
- list<TaskScheduler::Entry>::iterator it;
-
- for (it = TaskScheduler::queue.begin(); it != TaskScheduler::queue.end(); it++) {
- TaskScheduler::Entry &entry = *it;
-
- if (entry.pool == this) {
- work_entry = entry;
- found_entry = true;
- TaskScheduler::queue.erase(it);
- break;
- }
- }
-
- queue_lock.unlock();
-
- /* if found task, do it, otherwise wait until other tasks are done */
- if (found_entry) {
- /* run task */
- work_entry.task->run(0);
-
- /* delete task */
- delete work_entry.task;
-
- /* notify pool task was done */
- num_decrease(1);
- }
-
- num_lock.lock();
- if (num == 0)
- break;
-
- if (!found_entry) {
- THREADING_DEBUG("num==%d, Waiting for condition in TaskPool::wait_work !found_entry\n", num);
- num_cond.wait(num_lock);
- THREADING_DEBUG("num==%d, condition wait done in TaskPool::wait_work !found_entry\n", num);
- }
- }
+ tbb_group.wait();
if (stats != NULL) {
stats->time_total = time_dt() - start_time;
@@ -123,193 +51,21 @@ void TaskPool::wait_work(Summary *stats)
void TaskPool::cancel()
{
- do_cancel = true;
-
- TaskScheduler::clear(this);
-
- {
- thread_scoped_lock num_lock(num_mutex);
-
- while (num) {
- THREADING_DEBUG("num==%d, Waiting for condition in TaskPool::cancel\n", num);
- num_cond.wait(num_lock);
- THREADING_DEBUG("num==%d condition wait done in TaskPool::cancel\n", num);
- }
- }
-
- do_cancel = false;
-}
-
-void TaskPool::stop()
-{
- TaskScheduler::clear(this);
-
- assert(num == 0);
+ tbb_group.cancel();
+ tbb_group.wait();
}
bool TaskPool::canceled()
{
- return do_cancel;
-}
-
-bool TaskPool::finished()
-{
- thread_scoped_lock num_lock(num_mutex);
- return num == 0;
-}
-
-void TaskPool::num_decrease(int done)
-{
- num_mutex.lock();
- num -= done;
-
- assert(num >= 0);
- if (num == 0) {
- THREADING_DEBUG("num==%d, notifying all in TaskPool::num_decrease\n", num);
- num_cond.notify_all();
- }
-
- num_mutex.unlock();
-}
-
-void TaskPool::num_increase()
-{
- thread_scoped_lock num_lock(num_mutex);
- if (num_tasks_handled == 0) {
- start_time = time_dt();
- }
- num++;
- num_tasks_handled++;
- THREADING_DEBUG("num==%d, notifying all in TaskPool::num_increase\n", num);
- num_cond.notify_all();
+ return tbb_group.is_canceling();
}
/* Task Scheduler */
thread_mutex TaskScheduler::mutex;
int TaskScheduler::users = 0;
-vector<thread *> TaskScheduler::threads;
-bool TaskScheduler::do_exit = false;
-
-list<TaskScheduler::Entry> TaskScheduler::queue;
-thread_mutex TaskScheduler::queue_mutex;
-thread_condition_variable TaskScheduler::queue_cond;
-
-namespace {
-
-/* Get number of processors on each of the available nodes. The result is sized
- * by the highest node index, and element corresponds to number of processors on
- * that node.
- * If node is not available, then the corresponding number of processors is
- * zero. */
-void get_per_node_num_processors(vector<int> *num_per_node_processors)
-{
- const int num_nodes = system_cpu_num_numa_nodes();
- if (num_nodes == 0) {
- LOG(ERROR) << "Zero available NUMA nodes, is not supposed to happen.";
- return;
- }
- num_per_node_processors->resize(num_nodes);
- for (int node = 0; node < num_nodes; ++node) {
- if (!system_cpu_is_numa_node_available(node)) {
- (*num_per_node_processors)[node] = 0;
- continue;
- }
- (*num_per_node_processors)[node] = system_cpu_num_numa_node_processors(node);
- }
-}
-
-/* Calculate total number of processors on all available nodes.
- * This is similar to system_cpu_thread_count(), but uses pre-calculated number
- * of processors on each of the node, avoiding extra system calls and checks for
- * the node availability. */
-int get_num_total_processors(const vector<int> &num_per_node_processors)
-{
- int num_total_processors = 0;
- foreach (int num_node_processors, num_per_node_processors) {
- num_total_processors += num_node_processors;
- }
- return num_total_processors;
-}
-
-/* Compute NUMA node for every thread to run on, for the best performance. */
-vector<int> distribute_threads_on_nodes(const int num_threads)
-{
- /* Start with all threads unassigned to any specific NUMA node. */
- vector<int> thread_nodes(num_threads, -1);
- const int num_active_group_processors = system_cpu_num_active_group_processors();
- VLOG(1) << "Detected " << num_active_group_processors << " processors "
- << "in active group.";
- if (num_active_group_processors >= num_threads) {
- /* If the current thread is set up in a way that its affinity allows to
- * use at least requested number of threads we do not explicitly set
- * affinity to the worker threads.
- * This way we allow users to manually edit affinity of the parent
- * thread, and here we follow that affinity. This way it's possible to
- * have two Cycles/Blender instances running manually set to a different
- * dies on a CPU. */
- VLOG(1) << "Not setting thread group affinity.";
- return thread_nodes;
- }
- vector<int> num_per_node_processors;
- get_per_node_num_processors(&num_per_node_processors);
- if (num_per_node_processors.size() == 0) {
- /* Error was already reported, here we can't do anything, so we simply
- * leave default affinity to all the worker threads. */
- return thread_nodes;
- }
- const int num_nodes = num_per_node_processors.size();
- int thread_index = 0;
- /* First pass: fill in all the nodes to their maximum.
- *
- * If there is less threads than the overall nodes capacity, some of the
- * nodes or parts of them will idle.
- *
- * TODO(sergey): Consider picking up fastest nodes if number of threads
- * fits on them. For example, on Threadripper2 we might consider using nodes
- * 0 and 2 if user requested 32 render threads. */
- const int num_total_node_processors = get_num_total_processors(num_per_node_processors);
- int current_node_index = 0;
- while (thread_index < num_total_node_processors && thread_index < num_threads) {
- const int num_node_processors = num_per_node_processors[current_node_index];
- for (int processor_index = 0; processor_index < num_node_processors; ++processor_index) {
- VLOG(1) << "Scheduling thread " << thread_index << " to node " << current_node_index << ".";
- thread_nodes[thread_index] = current_node_index;
- ++thread_index;
- if (thread_index == num_threads) {
- /* All threads are scheduled on their nodes. */
- return thread_nodes;
- }
- }
- ++current_node_index;
- }
- /* Second pass: keep scheduling threads to each node one by one,
- * uniformly filling them in.
- * This is where things becomes tricky to predict for the maximum
- * performance: on the one hand this avoids too much threading overhead on
- * few nodes, but for the final performance having all the overhead on one
- * node might be better idea (since other nodes will have better chance of
- * rendering faster).
- * But more tricky is that nodes might have difference capacity, so we might
- * want to do some weighted scheduling. For example, if node 0 has 16
- * processors and node 1 has 32 processors, we'd better schedule 1 extra
- * thread on node 0 and 2 extra threads on node 1. */
- current_node_index = 0;
- while (thread_index < num_threads) {
- /* Skip unavailable nodes. */
- /* TODO(sergey): Add sanity check against deadlock. */
- while (num_per_node_processors[current_node_index] == 0) {
- current_node_index = (current_node_index + 1) % num_nodes;
- }
- VLOG(1) << "Scheduling thread " << thread_index << " to node " << current_node_index << ".";
- ++thread_index;
- current_node_index = (current_node_index + 1) % num_nodes;
- }
-
- return thread_nodes;
-}
-
-} // namespace
+int TaskScheduler::active_num_threads = 0;
+tbb::global_control *TaskScheduler::global_control = nullptr;
void TaskScheduler::init(int num_threads)
{
@@ -320,22 +76,15 @@ void TaskScheduler::init(int num_threads)
if (users != 1) {
return;
}
- do_exit = false;
- const bool use_auto_threads = (num_threads == 0);
- if (use_auto_threads) {
+ if (num_threads > 0) {
/* Automatic number of threads. */
- num_threads = system_cpu_thread_count();
+ VLOG(1) << "Overriding number of TBB threads to " << num_threads << ".";
+ global_control = new tbb::global_control(tbb::global_control::max_allowed_parallelism,
+ num_threads);
+ active_num_threads = num_threads;
}
- VLOG(1) << "Creating pool of " << num_threads << " threads.";
-
- /* Compute distribution on NUMA nodes. */
- vector<int> thread_nodes = distribute_threads_on_nodes(num_threads);
-
- /* Launch threads that will be waiting for work. */
- threads.resize(num_threads);
- for (int thread_index = 0; thread_index < num_threads; ++thread_index) {
- threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run, thread_index + 1),
- thread_nodes[thread_index]);
+ else {
+ active_num_threads = system_cpu_thread_count();
}
}
@@ -344,105 +93,20 @@ void TaskScheduler::exit()
thread_scoped_lock lock(mutex);
users--;
if (users == 0) {
- VLOG(1) << "De-initializing thread pool of task scheduler.";
- /* stop all waiting threads */
- TaskScheduler::queue_mutex.lock();
- do_exit = true;
- TaskScheduler::queue_cond.notify_all();
- TaskScheduler::queue_mutex.unlock();
-
- /* delete threads */
- foreach (thread *t, threads) {
- t->join();
- delete t;
- }
- threads.clear();
+ delete global_control;
+ global_control = nullptr;
+ active_num_threads = 0;
}
}
void TaskScheduler::free_memory()
{
assert(users == 0);
- threads.free_memory();
-}
-
-bool TaskScheduler::thread_wait_pop(Entry &entry)
-{
- thread_scoped_lock queue_lock(queue_mutex);
-
- while (queue.empty() && !do_exit)
- queue_cond.wait(queue_lock);
-
- if (queue.empty()) {
- assert(do_exit);
- return false;
- }
-
- entry = queue.front();
- queue.pop_front();
-
- return true;
}
-void TaskScheduler::thread_run(int thread_id)
+int TaskScheduler::num_threads()
{
- Entry entry;
-
- /* todo: test affinity/denormal mask */
-
- /* keep popping off tasks */
- while (thread_wait_pop(entry)) {
- /* run task */
- entry.task->run(thread_id);
-
- /* delete task */
- delete entry.task;
-
- /* notify pool task was done */
- entry.pool->num_decrease(1);
- }
-}
-
-void TaskScheduler::push(Entry &entry, bool front)
-{
- entry.pool->num_increase();
-
- /* add entry to queue */
- TaskScheduler::queue_mutex.lock();
- if (front)
- TaskScheduler::queue.push_front(entry);
- else
- TaskScheduler::queue.push_back(entry);
-
- TaskScheduler::queue_cond.notify_one();
- TaskScheduler::queue_mutex.unlock();
-}
-
-void TaskScheduler::clear(TaskPool *pool)
-{
- thread_scoped_lock queue_lock(TaskScheduler::queue_mutex);
-
- /* erase all tasks from this pool from the queue */
- list<Entry>::iterator it = queue.begin();
- int done = 0;
-
- while (it != queue.end()) {
- Entry &entry = *it;
-
- if (entry.pool == pool) {
- done++;
- delete entry.task;
-
- it = queue.erase(it);
- }
- else
- it++;
- }
-
- queue_lock.unlock();
-
- /* notify done */
- pool->num_decrease(done);
+ return active_num_threads;
}
/* Dedicated Task Pool */
@@ -458,31 +122,30 @@ DedicatedTaskPool::DedicatedTaskPool()
DedicatedTaskPool::~DedicatedTaskPool()
{
- stop();
+ wait();
+
+ do_exit = true;
+ queue_cond.notify_all();
+
worker_thread->join();
delete worker_thread;
}
-void DedicatedTaskPool::push(Task *task, bool front)
+void DedicatedTaskPool::push(TaskRunFunction &&task, bool front)
{
num_increase();
/* add task to queue */
queue_mutex.lock();
if (front)
- queue.push_front(task);
+ queue.emplace_front(std::move(task));
else
- queue.push_back(task);
+ queue.emplace_back(std::move(task));
queue_cond.notify_one();
queue_mutex.unlock();
}
-void DedicatedTaskPool::push(const TaskRunFunction &run, bool front)
-{
- push(new Task(run), front);
-}
-
void DedicatedTaskPool::wait()
{
thread_scoped_lock num_lock(num_mutex);
@@ -501,18 +164,6 @@ void DedicatedTaskPool::cancel()
do_cancel = false;
}
-void DedicatedTaskPool::stop()
-{
- clear();
-
- do_exit = true;
- queue_cond.notify_all();
-
- wait();
-
- assert(num == 0);
-}
-
bool DedicatedTaskPool::canceled()
{
return do_cancel;
@@ -535,7 +186,7 @@ void DedicatedTaskPool::num_increase()
num_cond.notify_all();
}
-bool DedicatedTaskPool::thread_wait_pop(Task *&task)
+bool DedicatedTaskPool::thread_wait_pop(TaskRunFunction &task)
{
thread_scoped_lock queue_lock(queue_mutex);
@@ -555,15 +206,15 @@ bool DedicatedTaskPool::thread_wait_pop(Task *&task)
void DedicatedTaskPool::thread_run()
{
- Task *task;
+ TaskRunFunction task;
/* keep popping off tasks */
while (thread_wait_pop(task)) {
/* run task */
- task->run(0);
+ task();
/* delete task */
- delete task;
+ task = nullptr;
/* notify task was done */
num_decrease(1);
@@ -575,15 +226,8 @@ void DedicatedTaskPool::clear()
thread_scoped_lock queue_lock(queue_mutex);
/* erase all tasks from the queue */
- list<Task *>::iterator it = queue.begin();
- int done = 0;
-
- while (it != queue.end()) {
- done++;
- delete *it;
-
- it = queue.erase(it);
- }
+ int done = queue.size();
+ queue.clear();
queue_lock.unlock();
diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h
index fd30a33d8ef..a56ca62f62c 100644
--- a/intern/cycles/util/util_task.h
+++ b/intern/cycles/util/util_task.h
@@ -19,48 +19,16 @@
#include "util/util_list.h"
#include "util/util_string.h"
+#include "util/util_tbb.h"
#include "util/util_thread.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
-class Task;
class TaskPool;
class TaskScheduler;
-/* Notes on Thread ID
- *
- * Thread ID argument reports the 0-based ID of a working thread from which
- * the run() callback is being invoked. Thread ID of 0 denotes the thread from
- * which wait_work() was called.
- *
- * DO NOT use this ID to control execution flaw, use it only for things like
- * emulating TLS which does not affect on scheduling. Don't use this ID to make
- * any decisions.
- *
- * It is to be noted here that dedicated task pool will always report thread ID
- * of 0.
- */
-
-typedef function<void(int thread_id)> TaskRunFunction;
-
-/* Task
- *
- * Base class for tasks to be executed in threads. */
-
-class Task {
- public:
- Task(){};
- explicit Task(const TaskRunFunction &run_) : run(run_)
- {
- }
-
- virtual ~Task()
- {
- }
-
- TaskRunFunction run;
-};
+typedef function<void(void)> TaskRunFunction;
/* Task Pool
*
@@ -68,8 +36,7 @@ class Task {
* pool, we can wait for all tasks to be done, or cancel them before they are
* done.
*
- * The run callback that actually executes the task may be created like this:
- * function_bind(&MyClass::task_execute, this, _1, _2) */
+ * TaskRunFunction may be created with std::bind or lambda expressions. */
class TaskPool {
public:
@@ -89,27 +56,15 @@ class TaskPool {
TaskPool();
~TaskPool();
- void push(Task *task, bool front = false);
- void push(const TaskRunFunction &run, bool front = false);
+ void push(TaskRunFunction &&task);
void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */
- void cancel(); /* cancel all tasks, keep worker threads running */
- void stop(); /* stop all worker threads */
- bool finished(); /* check if all work has been completed */
+ void cancel(); /* cancel all tasks and wait until they are no longer executing */
bool canceled(); /* for worker threads, test if canceled */
protected:
- friend class TaskScheduler;
-
- void num_decrease(int done);
- void num_increase();
-
- thread_mutex num_mutex;
- thread_condition_variable num_cond;
-
- int num;
- bool do_cancel;
+ tbb::task_group tbb_group;
/* ** Statistics ** */
@@ -131,40 +86,19 @@ class TaskScheduler {
static void exit();
static void free_memory();
- /* number of threads that can work on task */
- static int num_threads()
- {
- return threads.size();
- }
-
- /* test if any session is using the scheduler */
- static bool active()
- {
- return users != 0;
- }
+ /* Approximate number of threads that will work on task, which may be lower
+ * or higher than the actual number of threads. Use as little as possible and
+ * leave splitting up tasks to the scheduler.. */
+ static int num_threads();
protected:
- friend class TaskPool;
-
- struct Entry {
- Task *task;
- TaskPool *pool;
- };
-
static thread_mutex mutex;
static int users;
- static vector<thread *> threads;
- static bool do_exit;
+ static int active_num_threads;
- static list<Entry> queue;
- static thread_mutex queue_mutex;
- static thread_condition_variable queue_cond;
-
- static void thread_run(int thread_id);
- static bool thread_wait_pop(Entry &entry);
-
- static void push(Entry &entry, bool front);
- static void clear(TaskPool *pool);
+#ifdef WITH_TBB_GLOBAL_CONTROL
+ static tbb::global_control *global_control;
+#endif
};
/* Dedicated Task Pool
@@ -179,12 +113,10 @@ class DedicatedTaskPool {
DedicatedTaskPool();
~DedicatedTaskPool();
- void push(Task *task, bool front = false);
- void push(const TaskRunFunction &run, bool front = false);
+ void push(TaskRunFunction &&run, bool front = false);
void wait(); /* wait until all tasks are done */
void cancel(); /* cancel all tasks, keep worker thread running */
- void stop(); /* stop worker thread */
bool canceled(); /* for worker thread, test if canceled */
@@ -193,14 +125,14 @@ class DedicatedTaskPool {
void num_increase();
void thread_run();
- bool thread_wait_pop(Task *&entry);
+ bool thread_wait_pop(TaskRunFunction &task);
void clear();
thread_mutex num_mutex;
thread_condition_variable num_cond;
- list<Task *> queue;
+ list<TaskRunFunction> queue;
thread_mutex queue_mutex;
thread_condition_variable queue_cond;
diff --git a/intern/cycles/util/util_tbb.h b/intern/cycles/util/util_tbb.h
new file mode 100644
index 00000000000..301cb80c5b0
--- /dev/null
+++ b/intern/cycles/util/util_tbb.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TBB_H__
+#define __UTIL_TBB_H__
+
+/* TBB includes <windows.h>, do it ourselves first so we are sure
+ * WIN32_LEAN_AND_MEAN and similar are defined beforehand. */
+#include "util_windows.h"
+
+#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1
+#include <tbb/tbb.h>
+
+#if TBB_INTERFACE_VERSION_MAJOR >= 10
+# define WITH_TBB_GLOBAL_CONTROL
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+using tbb::blocked_range;
+using tbb::enumerable_thread_specific;
+using tbb::parallel_for;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TBB_H__ */
diff --git a/intern/cycles/util/util_version.h b/intern/cycles/util/util_version.h
index bb2c99cc6d7..8bce5ff85aa 100644
--- a/intern/cycles/util/util_version.h
+++ b/intern/cycles/util/util_version.h
@@ -22,7 +22,7 @@
CCL_NAMESPACE_BEGIN
#define CYCLES_VERSION_MAJOR 1
-#define CYCLES_VERSION_MINOR 12
+#define CYCLES_VERSION_MINOR 13
#define CYCLES_VERSION_PATCH 0
#define CYCLES_MAKE_VERSION_STRING2(a, b, c) #a "." #b "." #c
diff --git a/intern/ghost/intern/GHOST_IXrGraphicsBinding.h b/intern/ghost/intern/GHOST_IXrGraphicsBinding.h
index 5794a682023..b199c5f9b28 100644
--- a/intern/ghost/intern/GHOST_IXrGraphicsBinding.h
+++ b/intern/ghost/intern/GHOST_IXrGraphicsBinding.h
@@ -56,7 +56,8 @@ class GHOST_IXrGraphicsBinding {
std::string *r_requirement_info) const = 0;
virtual void initFromGhostContext(class GHOST_Context *ghost_ctx) = 0;
virtual bool chooseSwapchainFormat(const std::vector<int64_t> &runtime_formats,
- int64_t *r_result) const = 0;
+ int64_t &r_result,
+ bool &r_is_rgb_format) const = 0;
virtual std::vector<XrSwapchainImageBaseHeader *> createSwapchainImages(
uint32_t image_count) = 0;
virtual void submitToSwapchainImage(XrSwapchainImageBaseHeader *swapchain_image,
diff --git a/intern/ghost/intern/GHOST_XrGraphicsBinding.cpp b/intern/ghost/intern/GHOST_XrGraphicsBinding.cpp
index 71e6af3fa4f..7d7405a974d 100644
--- a/intern/ghost/intern/GHOST_XrGraphicsBinding.cpp
+++ b/intern/ghost/intern/GHOST_XrGraphicsBinding.cpp
@@ -36,7 +36,7 @@
static bool choose_swapchain_format_from_candidates(std::vector<int64_t> gpu_binding_formats,
std::vector<int64_t> runtime_formats,
- int64_t *r_result)
+ int64_t &r_result)
{
if (gpu_binding_formats.empty()) {
return false;
@@ -50,7 +50,7 @@ static bool choose_swapchain_format_from_candidates(std::vector<int64_t> gpu_bin
return false;
}
- *r_result = *res;
+ r_result = *res;
return true;
}
@@ -132,10 +132,20 @@ class GHOST_XrGraphicsBindingOpenGL : public GHOST_IXrGraphicsBinding {
}
bool chooseSwapchainFormat(const std::vector<int64_t> &runtime_formats,
- int64_t *r_result) const override
+ int64_t &r_result,
+ bool &r_is_srgb_format) const override
{
- std::vector<int64_t> gpu_binding_formats = {GL_RGBA8};
- return choose_swapchain_format_from_candidates(gpu_binding_formats, runtime_formats, r_result);
+ std::vector<int64_t> gpu_binding_formats = {
+ GL_RGBA8,
+ GL_SRGB8_ALPHA8,
+ };
+
+ if (choose_swapchain_format_from_candidates(gpu_binding_formats, runtime_formats, r_result)) {
+ r_is_srgb_format = (r_result == GL_SRGB8_ALPHA8);
+ return true;
+ }
+
+ return false;
}
std::vector<XrSwapchainImageBaseHeader *> createSwapchainImages(uint32_t image_count) override
@@ -248,10 +258,19 @@ class GHOST_XrGraphicsBindingD3D : public GHOST_IXrGraphicsBinding {
}
bool chooseSwapchainFormat(const std::vector<int64_t> &runtime_formats,
- int64_t *r_result) const override
+ int64_t &r_result,
+ bool &r_is_srgb_format) const override
{
- std::vector<int64_t> gpu_binding_formats = {DXGI_FORMAT_R8G8B8A8_UNORM};
- return choose_swapchain_format_from_candidates(gpu_binding_formats, runtime_formats, r_result);
+ std::vector<int64_t> gpu_binding_formats = {
+ DXGI_FORMAT_R8G8B8A8_UNORM,
+ DXGI_FORMAT_R8G8B8A8_UNORM_SRGB,
+ };
+
+ if (choose_swapchain_format_from_candidates(gpu_binding_formats, runtime_formats, r_result)) {
+ r_is_srgb_format = (r_result == DXGI_FORMAT_R8G8B8A8_UNORM_SRGB);
+ return true;
+ }
+ return false;
}
std::vector<XrSwapchainImageBaseHeader *> createSwapchainImages(uint32_t image_count) override
diff --git a/intern/ghost/intern/GHOST_XrSession.cpp b/intern/ghost/intern/GHOST_XrSession.cpp
index 7ae819dbfb2..5a747b1e787 100644
--- a/intern/ghost/intern/GHOST_XrSession.cpp
+++ b/intern/ghost/intern/GHOST_XrSession.cpp
@@ -375,15 +375,6 @@ static void ghost_xr_draw_view_info_from_view(const XrView &view, GHOST_XrDrawVi
r_info.fov.angle_down = view.fov.angleDown;
}
-static bool ghost_xr_draw_view_expects_srgb_buffer(const GHOST_XrContext *context)
-{
- /* Monado seems to be faulty and doesn't do OETF transform correctly. So expect a SRGB buffer to
- * compensate. You get way too dark rendering without this, it's pretty obvious (even in the
- * default startup scene). */
- GHOST_TXrOpenXRRuntimeID runtime_id = context->getOpenXRRuntimeID();
- return (runtime_id == OPENXR_RUNTIME_MONADO) || (runtime_id == OPENXR_RUNTIME_STEAMVR);
-}
-
void GHOST_XrSession::drawView(GHOST_XrSwapchain &swapchain,
XrCompositionLayerProjectionView &r_proj_layer_view,
XrSpaceLocation &view_location,
@@ -398,7 +389,7 @@ void GHOST_XrSession::drawView(GHOST_XrSwapchain &swapchain,
r_proj_layer_view.fov = view.fov;
swapchain.updateCompositionLayerProjectViewSubImage(r_proj_layer_view.subImage);
- draw_view_info.expects_srgb_buffer = ghost_xr_draw_view_expects_srgb_buffer(m_context);
+ draw_view_info.expects_srgb_buffer = swapchain.isBufferSRGB();
draw_view_info.ofsx = r_proj_layer_view.subImage.imageRect.offset.x;
draw_view_info.ofsy = r_proj_layer_view.subImage.imageRect.offset.y;
draw_view_info.width = r_proj_layer_view.subImage.imageRect.extent.width;
diff --git a/intern/ghost/intern/GHOST_XrSwapchain.cpp b/intern/ghost/intern/GHOST_XrSwapchain.cpp
index f50cfde0687..f7808c20112 100644
--- a/intern/ghost/intern/GHOST_XrSwapchain.cpp
+++ b/intern/ghost/intern/GHOST_XrSwapchain.cpp
@@ -68,7 +68,7 @@ GHOST_XrSwapchain::GHOST_XrSwapchain(GHOST_IXrGraphicsBinding &gpu_binding,
"Failed to get swapchain image formats.");
assert(swapchain_formats.size() == format_count);
- if (!gpu_binding.chooseSwapchainFormat(swapchain_formats, &chosen_format)) {
+ if (!gpu_binding.chooseSwapchainFormat(swapchain_formats, chosen_format, m_is_srgb_buffer)) {
throw GHOST_XrException(
"Error: No format matching OpenXR runtime supported swapchain formats found.");
}
@@ -132,6 +132,11 @@ void GHOST_XrSwapchain::updateCompositionLayerProjectViewSubImage(XrSwapchainSub
r_sub_image.imageRect.extent = {m_image_width, m_image_height};
}
+bool GHOST_XrSwapchain::isBufferSRGB()
+{
+ return m_is_srgb_buffer;
+}
+
void GHOST_XrSwapchain::releaseImage()
{
XrSwapchainImageReleaseInfo release_info = {XR_TYPE_SWAPCHAIN_IMAGE_RELEASE_INFO};
diff --git a/intern/ghost/intern/GHOST_XrSwapchain.h b/intern/ghost/intern/GHOST_XrSwapchain.h
index ab0a6736c9c..7a3e7fcea68 100644
--- a/intern/ghost/intern/GHOST_XrSwapchain.h
+++ b/intern/ghost/intern/GHOST_XrSwapchain.h
@@ -38,9 +38,12 @@ class GHOST_XrSwapchain {
void updateCompositionLayerProjectViewSubImage(XrSwapchainSubImage &r_sub_image);
+ bool isBufferSRGB();
+
private:
std::unique_ptr<OpenXRSwapchainData> m_oxr; /* Could use stack, but PImpl is preferable. */
int32_t m_image_width, m_image_height;
+ bool m_is_srgb_buffer = false;
};
#endif // GHOST_XRSWAPCHAIN_H
diff --git a/intern/guardedalloc/MEM_guardedalloc.h b/intern/guardedalloc/MEM_guardedalloc.h
index 602297576c8..bbba69edf1d 100644
--- a/intern/guardedalloc/MEM_guardedalloc.h
+++ b/intern/guardedalloc/MEM_guardedalloc.h
@@ -187,7 +187,8 @@ extern size_t (*MEM_get_peak_memory)(void) ATTR_WARN_UNUSED_RESULT;
do { \
typeof(&(v)) _v = &(v); \
if (*_v) { \
- MEM_freeN(*_v); \
+ /* Cast so we can free constant arrays. */ \
+ MEM_freeN((void *)*_v); \
*_v = NULL; \
} \
} while (0)
diff --git a/intern/libmv/libmv/multiview/projection_test.cc b/intern/libmv/libmv/multiview/projection_test.cc
index 460a186e7c4..40e766bfae7 100644
--- a/intern/libmv/libmv/multiview/projection_test.cc
+++ b/intern/libmv/libmv/multiview/projection_test.cc
@@ -68,10 +68,10 @@ TEST(Projection, isInFrontOfCamera) {
Vec4 X_front = GetRandomPoint();
Vec4 X_back = GetRandomPoint();
- X_front(2) = 10; // Any point in the positive Z direction
- // where Z > 1 is infront of the camera.
- X_back(2) = -10; // Any point int he negative Z dirstaion
- // is behind the camera.
+ X_front(2) = 10; /* Any point in the positive Z direction
+ * where Z > 1 is in front of the camera. */
+ X_back(2) = -10; /* Any point in the negative Z direction
+ * is behind the camera. */
bool res_front = isInFrontOfCamera(P, X_front);
bool res_back = isInFrontOfCamera(P, X_back);
diff --git a/intern/libmv/libmv/simple_pipeline/bundle.cc b/intern/libmv/libmv/simple_pipeline/bundle.cc
index 2ecc0505e1f..22ab0cdf864 100644
--- a/intern/libmv/libmv/simple_pipeline/bundle.cc
+++ b/intern/libmv/libmv/simple_pipeline/bundle.cc
@@ -21,6 +21,7 @@
#include "libmv/simple_pipeline/bundle.h"
#include <map>
+#include <thread>
#include "ceres/ceres.h"
#include "ceres/rotation.h"
@@ -35,10 +36,6 @@
#include "libmv/simple_pipeline/tracks.h"
#include "libmv/simple_pipeline/distortion_models.h"
-#ifdef _OPENMP
-# include <omp.h>
-#endif
-
namespace libmv {
// The intrinsics need to get combined into a single parameter block; use these
@@ -642,11 +639,7 @@ void EuclideanBundlePointsOnly(const CameraIntrinsics *invariant_intrinsics,
options.use_explicit_schur_complement = true;
options.use_inner_iterations = true;
options.max_num_iterations = 100;
-
-#ifdef _OPENMP
- options.num_threads = omp_get_max_threads();
- options.num_linear_solver_threads = omp_get_max_threads();
-#endif
+ options.num_threads = std::thread::hardware_concurrency();
// Solve!
ceres::Solver::Summary summary;
@@ -807,11 +800,7 @@ void EuclideanBundleCommonIntrinsics(
options.use_explicit_schur_complement = true;
options.use_inner_iterations = true;
options.max_num_iterations = 100;
-
-#ifdef _OPENMP
- options.num_threads = omp_get_max_threads();
- options.num_linear_solver_threads = omp_get_max_threads();
-#endif
+ options.num_threads = std::thread::hardware_concurrency();
// Solve!
ceres::Solver::Summary summary;
diff --git a/intern/mantaflow/extern/manta_fluid_API.h b/intern/mantaflow/extern/manta_fluid_API.h
index 7825ad14d7d..d78aa6732b1 100644
--- a/intern/mantaflow/extern/manta_fluid_API.h
+++ b/intern/mantaflow/extern/manta_fluid_API.h
@@ -41,23 +41,23 @@ int manta_write_config(struct MANTA *fluid, struct FluidModifierData *mmd, int f
int manta_write_data(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
int manta_write_noise(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
int manta_read_config(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
-int manta_read_data(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
-int manta_read_noise(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
+int manta_read_data(struct MANTA *fluid,
+ struct FluidModifierData *mmd,
+ int framenr,
+ bool resumable);
+int manta_read_noise(struct MANTA *fluid,
+ struct FluidModifierData *mmd,
+ int framenr,
+ bool resumable);
int manta_read_mesh(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
-int manta_read_particles(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
+int manta_read_particles(struct MANTA *fluid,
+ struct FluidModifierData *mmd,
+ int framenr,
+ bool resumable);
int manta_read_guiding(struct MANTA *fluid,
struct FluidModifierData *mmd,
int framenr,
bool sourceDomain);
-int manta_update_liquid_structures(struct MANTA *fluid,
- struct FluidModifierData *mmd,
- int framenr);
-int manta_update_mesh_structures(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
-int manta_update_particle_structures(struct MANTA *fluid,
- struct FluidModifierData *mmd,
- int framenr);
-int manta_update_smoke_structures(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
-int manta_update_noise_structures(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
int manta_bake_data(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
int manta_bake_noise(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
int manta_bake_mesh(struct MANTA *fluid, struct FluidModifierData *mmd, int framenr);
diff --git a/intern/mantaflow/intern/MANTA_main.cpp b/intern/mantaflow/intern/MANTA_main.cpp
index e760cef8641..a008100a3e2 100644
--- a/intern/mantaflow/intern/MANTA_main.cpp
+++ b/intern/mantaflow/intern/MANTA_main.cpp
@@ -301,9 +301,8 @@ void MANTA::initDomain(FluidModifierData *mmd)
string tmpString = fluid_variables + fluid_solver + fluid_alloc + fluid_cache_helper +
fluid_bake_multiprocessing + fluid_bake_data + fluid_bake_noise +
fluid_bake_mesh + fluid_bake_particles + fluid_bake_guiding +
- fluid_file_import + fluid_file_export + fluid_save_data + fluid_load_data +
- fluid_pre_step + fluid_post_step + fluid_adapt_time_step +
- fluid_time_stepping;
+ fluid_file_import + fluid_file_export + fluid_pre_step + fluid_post_step +
+ fluid_adapt_time_step + fluid_time_stepping;
string finalString = parseScript(tmpString, mmd);
pythonCommands.push_back(finalString);
runPythonString(pythonCommands);
@@ -674,7 +673,7 @@ void MANTA::initializeRNAMap(FluidModifierData *mmd)
if (!mmd) {
if (with_debug)
- cout << "No modifier data given in RNA map setup - returning early" << endl;
+ cout << "Fluid: No modifier data given in RNA map setup - returning early" << endl;
return;
}
@@ -730,6 +729,20 @@ void MANTA::initializeRNAMap(FluidModifierData *mmd)
float viscosity = mds->viscosity_base * pow(10.0f, -mds->viscosity_exponent);
float domainSize = MAX3(mds->global_size[0], mds->global_size[1], mds->global_size[2]);
+ string vdbCompressionMethod = "Compression_None";
+ if (mds->openvdb_compression == VDB_COMPRESSION_NONE)
+ vdbCompressionMethod = "Compression_None";
+ else if (mds->openvdb_compression == VDB_COMPRESSION_ZIP)
+ vdbCompressionMethod = "Compression_Zip";
+ else if (mds->openvdb_compression == VDB_COMPRESSION_BLOSC)
+ vdbCompressionMethod = "Compression_Blosc";
+
+ string vdbPrecisionHalf = "True";
+ if (mds->openvdb_data_depth == VDB_PRECISION_HALF_FLOAT)
+ vdbPrecisionHalf = "True";
+ else if (mds->openvdb_data_depth == VDB_PRECISION_FULL_FLOAT)
+ vdbPrecisionHalf = "False";
+
mRNAMap["USING_SMOKE"] = getBooleanString(mds->type == FLUID_DOMAIN_TYPE_GAS);
mRNAMap["USING_LIQUID"] = getBooleanString(mds->type == FLUID_DOMAIN_TYPE_LIQUID);
mRNAMap["USING_COLORS"] = getBooleanString(mds->active_fields & FLUID_DOMAIN_ACTIVE_COLORS);
@@ -742,8 +755,8 @@ void MANTA::initializeRNAMap(FluidModifierData *mmd)
mRNAMap["USING_OUTFLOW"] = getBooleanString(mds->active_fields & FLUID_DOMAIN_ACTIVE_OUTFLOW);
mRNAMap["USING_LOG_DISSOLVE"] = getBooleanString(mds->flags & FLUID_DOMAIN_USE_DISSOLVE_LOG);
mRNAMap["USING_DISSOLVE"] = getBooleanString(mds->flags & FLUID_DOMAIN_USE_DISSOLVE);
- mRNAMap["DO_OPEN"] = getBooleanString(mds->border_collisions == 0);
- mRNAMap["CACHE_RESUMABLE"] = getBooleanString(mds->cache_type != FLUID_DOMAIN_CACHE_FINAL);
+ mRNAMap["DOMAIN_CLOSED"] = getBooleanString(borderCollisions.compare("") == 0);
+ mRNAMap["CACHE_RESUMABLE"] = getBooleanString(mds->flags & FLUID_DOMAIN_USE_RESUMABLE_CACHE);
mRNAMap["USING_ADAPTIVETIME"] = getBooleanString(mds->flags & FLUID_DOMAIN_USE_ADAPTIVE_TIME);
mRNAMap["USING_SPEEDVECTORS"] = getBooleanString(mds->flags & FLUID_DOMAIN_USE_SPEED_VECTORS);
mRNAMap["USING_FRACTIONS"] = getBooleanString(mds->flags & FLUID_DOMAIN_USE_FRACTIONS);
@@ -850,6 +863,9 @@ void MANTA::initializeRNAMap(FluidModifierData *mmd)
mRNAMap["LIQUID_SURFACE_TENSION"] = to_string(mds->surface_tension);
mRNAMap["FLUID_VISCOSITY"] = to_string(viscosity);
mRNAMap["FLUID_DOMAIN_SIZE"] = to_string(domainSize);
+ mRNAMap["FLUID_DOMAIN_SIZE_X"] = to_string(mds->global_size[0]);
+ mRNAMap["FLUID_DOMAIN_SIZE_Y"] = to_string(mds->global_size[1]);
+ mRNAMap["FLUID_DOMAIN_SIZE_Z"] = to_string(mds->global_size[2]);
mRNAMap["SNDPARTICLE_TYPES"] = particleTypesStr;
mRNAMap["GUIDING_ALPHA"] = to_string(mds->guide_alpha);
mRNAMap["GUIDING_BETA"] = to_string(mds->guide_beta);
@@ -858,6 +874,8 @@ void MANTA::initializeRNAMap(FluidModifierData *mmd)
mRNAMap["GRAVITY_Y"] = to_string(mds->gravity[1]);
mRNAMap["GRAVITY_Z"] = to_string(mds->gravity[2]);
mRNAMap["CACHE_DIR"] = cacheDirectory;
+ mRNAMap["COMPRESSION_OPENVDB"] = vdbCompressionMethod;
+ mRNAMap["PRECISION_OPENVDB"] = vdbPrecisionHalf;
/* Fluid object names. */
mRNAMap["NAME_FLAGS"] = FLUID_NAME_FLAGS;
@@ -900,6 +918,8 @@ void MANTA::initializeRNAMap(FluidModifierData *mmd)
mRNAMap["NAME_DENSITYIN"] = FLUID_NAME_DENSITYIN;
mRNAMap["NAME_HEAT"] = FLUID_NAME_HEAT;
mRNAMap["NAME_HEATIN"] = FLUID_NAME_HEATIN;
+ mRNAMap["NAME_TEMPERATURE"] = FLUID_NAME_TEMPERATURE;
+ mRNAMap["NAME_TEMPERATUREIN"] = FLUID_NAME_TEMPERATUREIN;
mRNAMap["NAME_COLORR"] = FLUID_NAME_COLORR;
mRNAMap["NAME_COLORG"] = FLUID_NAME_COLORG;
mRNAMap["NAME_COLORB"] = FLUID_NAME_COLORB;
@@ -921,6 +941,8 @@ void MANTA::initializeRNAMap(FluidModifierData *mmd)
mRNAMap["NAME_MAPWEIGHTS"] = FLUID_NAME_MAPWEIGHTS;
mRNAMap["NAME_PP"] = FLUID_NAME_PP;
mRNAMap["NAME_PVEL"] = FLUID_NAME_PVEL;
+ mRNAMap["NAME_PARTS"] = FLUID_NAME_PARTS;
+ mRNAMap["NAME_PARTSVELOCITY"] = FLUID_NAME_PARTSVELOCITY;
mRNAMap["NAME_PINDEX"] = FLUID_NAME_PINDEX;
mRNAMap["NAME_GPI"] = FLUID_NAME_GPI;
mRNAMap["NAME_CURVATURE"] = FLUID_NAME_CURVATURE;
@@ -967,6 +989,10 @@ void MANTA::initializeRNAMap(FluidModifierData *mmd)
mRNAMap["NAME_PVEL_PARTICLES"] = FLUID_NAME_PVEL_PARTICLES;
mRNAMap["NAME_PFORCE_PARTICLES"] = FLUID_NAME_PFORCE_PARTICLES;
mRNAMap["NAME_PLIFE_PARTICLES"] = FLUID_NAME_PLIFE_PARTICLES;
+ mRNAMap["NAME_PARTS_PARTICLES"] = FLUID_NAME_PARTS_PARTICLES;
+ mRNAMap["NAME_PARTSVEL_PARTICLES"] = FLUID_NAME_PARTSVEL_PARTICLES;
+ mRNAMap["NAME_PARTSFORCE_PARTICLES"] = FLUID_NAME_PARTSFORCE_PARTICLES;
+ mRNAMap["NAME_PARTSLIFE_PARTICLES"] = FLUID_NAME_PARTSLIFE_PARTICLES;
mRNAMap["NAME_VELOCITY_PARTICLES"] = FLUID_NAME_VELOCITY_PARTICLES;
mRNAMap["NAME_FLAGS_PARTICLES"] = FLUID_NAME_FLAGS_PARTICLES;
mRNAMap["NAME_PHI_PARTICLES"] = FLUID_NAME_PHI_PARTICLES;
@@ -1000,9 +1026,6 @@ void MANTA::initializeRNAMap(FluidModifierData *mmd)
string MANTA::getRealValue(const string &varName)
{
- if (with_debug)
- cout << "MANTA::getRealValue()" << endl;
-
unordered_map<string, string>::iterator it;
it = mRNAMap.find(varName);
@@ -1011,9 +1034,6 @@ string MANTA::getRealValue(const string &varName)
<< endl;
return "";
}
- if (with_debug) {
- cout << "Found variable " << varName << " with value " << it->second << endl;
- }
return it->second;
}
@@ -1062,409 +1082,6 @@ string MANTA::parseScript(const string &setup_string, FluidModifierData *mmd)
return res.str();
}
-bool MANTA::updateFlipStructures(FluidModifierData *mmd, int framenr)
-{
- if (MANTA::with_debug)
- cout << "MANTA::updateFlipStructures()" << endl;
-
- FluidDomainSettings *mds = mmd->domain;
- mFlipFromFile = false;
-
- if (!mUsingLiquid)
- return false;
- if (BLI_path_is_rel(mds->cache_directory))
- return false;
-
- int result = 0;
- int expected = 0; /* Expected number of read successes for this frame. */
-
- /* Ensure empty data structures at start. */
- if (!mFlipParticleData || !mFlipParticleVelocity)
- return false;
-
- mFlipParticleData->clear();
- mFlipParticleVelocity->clear();
-
- string pformat = getCacheFileEnding(mds->cache_particle_format);
- string file = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_PP, pformat, framenr);
-
- expected += 1;
- if (BLI_exists(file.c_str())) {
- result += updateParticlesFromFile(file, false, false);
- assert(result == expected);
- }
-
- file = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_PVEL, pformat, framenr);
- expected += 1;
- if (BLI_exists(file.c_str())) {
- result += updateParticlesFromFile(file, false, true);
- assert(result == expected);
- }
-
- return mFlipFromFile = (result == expected);
-}
-
-bool MANTA::updateMeshStructures(FluidModifierData *mmd, int framenr)
-{
- if (MANTA::with_debug)
- cout << "MANTA::updateMeshStructures()" << endl;
-
- FluidDomainSettings *mds = mmd->domain;
- mMeshFromFile = false;
-
- if (!mUsingMesh)
- return false;
- if (BLI_path_is_rel(mds->cache_directory))
- return false;
-
- int result = 0;
- int expected = 0; /* Expected number of read successes for this frame. */
-
- /* Ensure empty data structures at start. */
- if (!mMeshNodes || !mMeshTriangles)
- return false;
-
- mMeshNodes->clear();
- mMeshTriangles->clear();
-
- if (mMeshVelocities)
- mMeshVelocities->clear();
-
- string mformat = getCacheFileEnding(mds->cache_mesh_format);
- string dformat = getCacheFileEnding(mds->cache_data_format);
- string file = getFile(mmd, FLUID_DOMAIN_DIR_MESH, FLUID_NAME_LMESH, mformat, framenr);
-
- expected += 1;
- if (BLI_exists(file.c_str())) {
- result += updateMeshFromFile(file);
- assert(result == expected);
- }
-
- if (mUsingMVel) {
- file = getFile(mmd, FLUID_DOMAIN_DIR_MESH, FLUID_NAME_VELOCITYVEC_MESH, dformat, framenr);
- expected += 1;
- if (BLI_exists(file.c_str())) {
- result += updateMeshFromFile(file);
- assert(result == expected);
- }
- }
-
- return mMeshFromFile = (result == expected);
-}
-
-bool MANTA::updateParticleStructures(FluidModifierData *mmd, int framenr)
-{
- if (MANTA::with_debug)
- cout << "MANTA::updateParticleStructures()" << endl;
-
- FluidDomainSettings *mds = mmd->domain;
- mParticlesFromFile = false;
-
- if (!mUsingDrops && !mUsingBubbles && !mUsingFloats && !mUsingTracers)
- return false;
- if (BLI_path_is_rel(mds->cache_directory))
- return false;
-
- int result = 0;
- int expected = 0; /* Expected number of read successes for this frame. */
-
- /* Ensure empty data structures at start. */
- if (!mSndParticleData || !mSndParticleVelocity || !mSndParticleLife)
- return false;
-
- mSndParticleData->clear();
- mSndParticleVelocity->clear();
- mSndParticleLife->clear();
-
- string pformat = getCacheFileEnding(mds->cache_particle_format);
- string file = getFile(
- mmd, FLUID_DOMAIN_DIR_PARTICLES, FLUID_NAME_PP_PARTICLES, pformat, framenr);
-
- expected += 1;
- if (BLI_exists(file.c_str())) {
- result += updateParticlesFromFile(file, true, false);
- assert(result == expected);
- }
-
- file = getFile(mmd, FLUID_DOMAIN_DIR_PARTICLES, FLUID_NAME_PVEL_PARTICLES, pformat, framenr);
- expected += 1;
- if (BLI_exists(file.c_str())) {
- result += updateParticlesFromFile(file, true, true);
- assert(result == expected);
- }
-
- file = getFile(mmd, FLUID_DOMAIN_DIR_PARTICLES, FLUID_NAME_PLIFE_PARTICLES, pformat, framenr);
- expected += 1;
- if (BLI_exists(file.c_str())) {
- result += updateParticlesFromFile(file, true, false);
- assert(result == expected);
- }
-
- return mParticlesFromFile = (result == expected);
-}
-
-static void assertGridItems(vector<MANTA::GridItem> gList)
-{
- vector<MANTA::GridItem>::iterator gIter = gList.begin();
- int *resPrev = (*gIter).res;
-
- for (vector<MANTA::GridItem>::iterator it = gList.begin(); it != gList.end(); ++it) {
- MANTA::GridItem item = *it;
- assert(
- ELEM(item.type, FLUID_DOMAIN_GRID_FLOAT, FLUID_DOMAIN_GRID_INT, FLUID_DOMAIN_GRID_VEC3F));
- assert(item.pointer[0]);
- if (item.type == FLUID_DOMAIN_GRID_VEC3F) {
- assert(item.pointer[1] && item.pointer[2]);
- }
- assert(item.res[0] == resPrev[0] && item.res[1] == resPrev[1] && item.res[2] == resPrev[2]);
- assert((item.name).compare("") != 0);
- }
-
- UNUSED_VARS(resPrev);
-}
-
-bool MANTA::updateSmokeStructures(FluidModifierData *mmd, int framenr)
-{
- if (MANTA::with_debug)
- cout << "MANTA::updateGridStructures()" << endl;
-
- FluidDomainSettings *mds = mmd->domain;
- mSmokeFromFile = false;
-
- if (!mUsingSmoke)
- return false;
- if (BLI_path_is_rel(mds->cache_directory))
- return false;
-
- int result = 0;
- string dformat = getCacheFileEnding(mds->cache_data_format);
-
- vector<FileItem> filesData;
- vector<GridItem> gridsData;
-
- int res[] = {mResX, mResY, mResZ};
-
- /* Put grid pointers into pointer lists, some grids have more than 1 pointer. */
- void *aDensity[] = {mDensity};
- void *aShadow[] = {mShadow};
- void *aVelocities[] = {mVelocityX, mVelocityY, mVelocityZ};
- void *aHeat[] = {mHeat};
- void *aColorR[] = {mColorR};
- void *aColorG[] = {mColorG};
- void *aColorB[] = {mColorB};
- void *aFlame[] = {mFlame};
- void *aFuel[] = {mFuel};
- void *aReact[] = {mReact};
-
- /* File names for grids. */
- string fDensity = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_DENSITY, dformat, framenr);
- string fShadow = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_SHADOW, dformat, framenr);
- string fVel = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_VELOCITY, dformat, framenr);
- string fHeat = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_HEAT, dformat, framenr);
- string fColorR = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_COLORR, dformat, framenr);
- string fColorG = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_COLORG, dformat, framenr);
- string fColorB = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_COLORB, dformat, framenr);
- string fFlame = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_FLAME, dformat, framenr);
- string fFuel = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_FUEL, dformat, framenr);
- string fReact = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_REACT, dformat, framenr);
- string fFluid = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_DATA, dformat, framenr);
-
- /* Prepare grid info containers. */
- GridItem gDensity = {aDensity, FLUID_DOMAIN_GRID_FLOAT, res, FLUID_NAME_DENSITY};
- GridItem gShadow = {aShadow, FLUID_DOMAIN_GRID_FLOAT, res, FLUID_NAME_SHADOW};
- GridItem gVel = {aVelocities, FLUID_DOMAIN_GRID_VEC3F, res, FLUID_NAME_VELOCITY};
- GridItem gHeat = {aHeat, FLUID_DOMAIN_GRID_FLOAT, res, FLUID_NAME_HEAT};
- GridItem gColorR = {aColorR, FLUID_DOMAIN_GRID_FLOAT, res, FLUID_NAME_COLORR};
- GridItem gColorG = {aColorG, FLUID_DOMAIN_GRID_FLOAT, res, FLUID_NAME_COLORG};
- GridItem gColorB = {aColorB, FLUID_DOMAIN_GRID_FLOAT, res, FLUID_NAME_COLORB};
- GridItem gFlame = {aFlame, FLUID_DOMAIN_GRID_FLOAT, res, FLUID_NAME_FLAME};
- GridItem gFuel = {aFuel, FLUID_DOMAIN_GRID_FLOAT, res, FLUID_NAME_FUEL};
- GridItem gReact = {aReact, FLUID_DOMAIN_GRID_FLOAT, res, FLUID_NAME_REACT};
-
- /* TODO (sebbas): For now, only allow single file mode. Combined grid file export is todo. */
- const int fileMode = FLUID_DOMAIN_CACHE_FILES_SINGLE;
- if (fileMode == FLUID_DOMAIN_CACHE_FILES_SINGLE) {
-
- filesData.push_back({fDensity, {gDensity}});
- filesData.push_back({fShadow, {gShadow}});
- filesData.push_back({fVel, {gVel}});
- if (mUsingHeat) {
- filesData.push_back({fHeat, {gHeat}});
- }
- if (mUsingColors) {
- filesData.push_back({fColorR, {gColorR}});
- filesData.push_back({fColorG, {gColorG}});
- filesData.push_back({fColorB, {gColorB}});
- }
- if (mUsingFire) {
- filesData.push_back({fFlame, {gFlame}});
- filesData.push_back({fFuel, {gFuel}});
- filesData.push_back({fReact, {gReact}});
- }
- }
- else if (fileMode == FLUID_DOMAIN_CACHE_FILES_COMBINED) {
-
- gridsData.push_back(gDensity);
- gridsData.push_back(gShadow);
- gridsData.push_back(gVel);
- if (mUsingHeat) {
- gridsData.push_back(gHeat);
- }
- if (mUsingColors) {
- gridsData.push_back(gColorR);
- gridsData.push_back(gColorG);
- gridsData.push_back(gColorB);
- }
- if (mUsingFire) {
- gridsData.push_back(gFlame);
- gridsData.push_back(gFuel);
- gridsData.push_back(gReact);
- }
-
- if (with_debug) {
- assertGridItems(gridsData);
- }
- filesData.push_back({fFluid, gridsData});
- }
-
- /* Update files from data directory. */
- for (vector<FileItem>::iterator it = filesData.begin(); it != filesData.end(); ++it) {
- FileItem item = *it;
- if (BLI_exists(item.filename.c_str())) {
- result += updateGridsFromFile(item.filename, item.grids);
- assert(result);
- }
- }
-
- return mSmokeFromFile = result;
-}
-
-bool MANTA::updateNoiseStructures(FluidModifierData *mmd, int framenr)
-{
- if (MANTA::with_debug)
- cout << "MANTA::updateNoiseStructures()" << endl;
-
- FluidDomainSettings *mds = mmd->domain;
- mNoiseFromFile = false;
-
- if (!mUsingSmoke || !mUsingNoise)
- return false;
- if (BLI_path_is_rel(mds->cache_directory))
- return false;
-
- int result = 0;
- string dformat = getCacheFileEnding(mds->cache_data_format);
- string nformat = getCacheFileEnding(mds->cache_noise_format);
-
- vector<FileItem> filesData, filesNoise;
- vector<GridItem> gridsData, gridsNoise;
-
- int resData[] = {mResX, mResY, mResZ};
- int resNoise[] = {mResXNoise, mResYNoise, mResZNoise};
-
- /* Put grid pointers into pointer lists, some grids have more than 1 pointer. */
- void *aShadow[] = {mShadow};
- void *aVelocities[] = {mVelocityX, mVelocityY, mVelocityZ};
- void *aDensity[] = {mDensityHigh};
- void *aColorR[] = {mColorRHigh};
- void *aColorG[] = {mColorGHigh};
- void *aColorB[] = {mColorBHigh};
- void *aFlame[] = {mFlameHigh};
- void *aFuel[] = {mFuelHigh};
- void *aReact[] = {mReactHigh};
-
- /* File names for grids. */
- string fShadow = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_SHADOW, dformat, framenr);
- string fVel = getFile(mmd, FLUID_DOMAIN_DIR_DATA, FLUID_NAME_VELOCITY, dformat, framenr);
- string fFluid = getFile(mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_DATA, dformat, framenr);
-
- string fDensity = getFile(
- mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_DENSITY_NOISE, nformat, framenr);
- string fColorR = getFile(mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_COLORR_NOISE, nformat, framenr);
- string fColorG = getFile(mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_COLORG_NOISE, nformat, framenr);
- string fColorB = getFile(mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_COLORB_NOISE, nformat, framenr);
- string fFlame = getFile(mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_FLAME_NOISE, nformat, framenr);
- string fFuel = getFile(mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_FUEL_NOISE, nformat, framenr);
- string fReact = getFile(mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_REACT_NOISE, nformat, framenr);
- string fNoise = getFile(mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_NOISE, nformat, framenr);
-
- /* Prepare grid info containers. */
- GridItem gShadow = {aShadow, FLUID_DOMAIN_GRID_FLOAT, resData, FLUID_NAME_SHADOW};
- GridItem gVel = {aVelocities, FLUID_DOMAIN_GRID_VEC3F, resData, FLUID_NAME_VELOCITY};
-
- GridItem gDensity = {aDensity, FLUID_DOMAIN_GRID_FLOAT, resNoise, FLUID_NAME_DENSITY_NOISE};
- GridItem gColorR = {aColorR, FLUID_DOMAIN_GRID_FLOAT, resNoise, FLUID_NAME_COLORR_NOISE};
- GridItem gColorG = {aColorG, FLUID_DOMAIN_GRID_FLOAT, resNoise, FLUID_NAME_COLORG_NOISE};
- GridItem gColorB = {aColorB, FLUID_DOMAIN_GRID_FLOAT, resNoise, FLUID_NAME_COLORB_NOISE};
- GridItem gFlame = {aFlame, FLUID_DOMAIN_GRID_FLOAT, resNoise, FLUID_NAME_FLAME_NOISE};
- GridItem gFuel = {aFuel, FLUID_DOMAIN_GRID_FLOAT, resNoise, FLUID_NAME_FUEL_NOISE};
- GridItem gReact = {aReact, FLUID_DOMAIN_GRID_FLOAT, resNoise, FLUID_NAME_REACT_NOISE};
-
- /* TODO (sebbas): For now, only allow single file mode. Combined grid file export is todo. */
- const int fileMode = FLUID_DOMAIN_CACHE_FILES_SINGLE;
- if (fileMode == FLUID_DOMAIN_CACHE_FILES_SINGLE) {
-
- filesData.push_back({fShadow, {gShadow}});
- filesData.push_back({fVel, {gVel}});
-
- filesNoise.push_back({fDensity, {gDensity}});
- if (mUsingColors) {
- filesNoise.push_back({fColorR, {gColorR}});
- filesNoise.push_back({fColorG, {gColorG}});
- filesNoise.push_back({fColorB, {gColorB}});
- }
- if (mUsingFire) {
- filesNoise.push_back({fFlame, {gFlame}});
- filesNoise.push_back({fFuel, {gFuel}});
- filesNoise.push_back({fReact, {gReact}});
- }
- }
- else if (fileMode == FLUID_DOMAIN_CACHE_FILES_COMBINED) {
-
- gridsData.push_back(gShadow);
- gridsData.push_back(gVel);
-
- gridsNoise.push_back(gDensity);
- if (mUsingColors) {
- gridsNoise.push_back(gColorR);
- gridsNoise.push_back(gColorG);
- gridsNoise.push_back(gColorB);
- }
- if (mUsingFire) {
- gridsNoise.push_back(gFlame);
- gridsNoise.push_back(gFuel);
- gridsNoise.push_back(gReact);
- }
-
- if (with_debug) {
- assertGridItems(gridsData);
- assertGridItems(gridsNoise);
- }
- filesData.push_back({fFluid, gridsData});
- filesNoise.push_back({fNoise, gridsNoise});
- }
-
- /* Update files from data directory. */
- for (vector<FileItem>::iterator it = filesData.begin(); it != filesData.end(); ++it) {
- FileItem item = *it;
- if (BLI_exists(item.filename.c_str())) {
- result += updateGridsFromFile(item.filename, item.grids);
- assert(result);
- }
- }
-
- /* Update files from noise directory. */
- for (vector<FileItem>::iterator it = filesNoise.begin(); it != filesNoise.end(); ++it) {
- FileItem item = *it;
- if (BLI_exists(item.filename.c_str())) {
- result += updateGridsFromFile(item.filename, item.grids);
- assert(result);
- }
- }
-
- return mNoiseFromFile = result;
-}
-
/* Dirty hack: Needed to format paths from python code that is run via PyRun_SimpleString */
static string escapeSlashes(string const &s)
{
@@ -1514,6 +1131,7 @@ bool MANTA::writeConfiguration(FluidModifierData *mmd, int framenr)
gzwrite(gzf, &mds->res_max, 3 * sizeof(int));
gzwrite(gzf, &mds->active_color, 3 * sizeof(float));
gzwrite(gzf, &mds->time_total, sizeof(int));
+ gzwrite(gzf, &FLUID_CACHE_VERSION, 4 * sizeof(char));
return (gzclose(gzf) == Z_OK);
}
@@ -1528,27 +1146,19 @@ bool MANTA::writeData(FluidModifierData *mmd, int framenr)
FluidDomainSettings *mds = mmd->domain;
string directory = getDirectory(mmd, FLUID_DOMAIN_DIR_DATA);
- string dformat = getCacheFileEnding(mds->cache_data_format);
- string pformat = getCacheFileEnding(mds->cache_particle_format);
-
- bool final_cache = (mds->cache_type == FLUID_DOMAIN_CACHE_FINAL);
- string resumable_cache = (final_cache) ? "False" : "True";
-
- ss.str("");
- ss << "fluid_save_data_" << mCurrentID << "('" << escapeSlashes(directory) << "', " << framenr
- << ", '" << dformat << "', " << resumable_cache << ")";
- pythonCommands.push_back(ss.str());
+ string volume_format = getCacheFileEnding(mds->cache_data_format);
+ string resumable_cache = !(mds->flags & FLUID_DOMAIN_USE_RESUMABLE_CACHE) ? "False" : "True";
if (mUsingSmoke) {
ss.str("");
ss << "smoke_save_data_" << mCurrentID << "('" << escapeSlashes(directory) << "', " << framenr
- << ", '" << dformat << "', " << resumable_cache << ")";
+ << ", '" << volume_format << "', " << resumable_cache << ")";
pythonCommands.push_back(ss.str());
}
if (mUsingLiquid) {
ss.str("");
ss << "liquid_save_data_" << mCurrentID << "('" << escapeSlashes(directory) << "', " << framenr
- << ", '" << dformat << "', " << resumable_cache << ")";
+ << ", '" << volume_format << "', " << resumable_cache << ")";
pythonCommands.push_back(ss.str());
}
return runPythonString(pythonCommands);
@@ -1564,15 +1174,13 @@ bool MANTA::writeNoise(FluidModifierData *mmd, int framenr)
FluidDomainSettings *mds = mmd->domain;
string directory = getDirectory(mmd, FLUID_DOMAIN_DIR_NOISE);
- string nformat = getCacheFileEnding(mds->cache_noise_format);
-
- bool final_cache = (mds->cache_type == FLUID_DOMAIN_CACHE_FINAL);
- string resumable_cache = (final_cache) ? "False" : "True";
+ string volume_format = getCacheFileEnding(mds->cache_data_format);
+ string resumable_cache = !(mds->flags & FLUID_DOMAIN_USE_RESUMABLE_CACHE) ? "False" : "True";
if (mUsingSmoke && mUsingNoise) {
ss.str("");
ss << "smoke_save_noise_" << mCurrentID << "('" << escapeSlashes(directory) << "', " << framenr
- << ", '" << nformat << "', " << resumable_cache << ")";
+ << ", '" << volume_format << "', " << resumable_cache << ")";
pythonCommands.push_back(ss.str());
}
return runPythonString(pythonCommands);
@@ -1593,7 +1201,7 @@ bool MANTA::readConfiguration(FluidModifierData *mmd, int framenr)
if (!hasConfig(mmd, framenr))
return false;
- gzFile gzf = (gzFile)BLI_gzopen(file.c_str(), "rb"); // do some compression
+ gzFile gzf = (gzFile)BLI_gzopen(file.c_str(), "rb"); /* Do some compression. */
if (!gzf) {
cerr << "Fluid Error -- Cannot open file " << file << endl;
return false;
@@ -1602,7 +1210,7 @@ bool MANTA::readConfiguration(FluidModifierData *mmd, int framenr)
gzread(gzf, &mds->active_fields, sizeof(int));
gzread(gzf, &mds->res, 3 * sizeof(int));
gzread(gzf, &mds->dx, sizeof(float));
- gzread(gzf, &dummy, sizeof(float)); // dt not needed right now
+ gzread(gzf, &dummy, sizeof(float)); /* dt not needed right now. */
gzread(gzf, &mds->p0, 3 * sizeof(float));
gzread(gzf, &mds->p1, 3 * sizeof(float));
gzread(gzf, &mds->dp0, 3 * sizeof(float));
@@ -1614,13 +1222,14 @@ bool MANTA::readConfiguration(FluidModifierData *mmd, int framenr)
gzread(gzf, &mds->res_max, 3 * sizeof(int));
gzread(gzf, &mds->active_color, 3 * sizeof(float));
gzread(gzf, &mds->time_total, sizeof(int));
+ gzread(gzf, &mds->cache_id, 4 * sizeof(char)); /* Older caches might have no id. */
mds->total_cells = mds->res[0] * mds->res[1] * mds->res[2];
return (gzclose(gzf) == Z_OK);
}
-bool MANTA::readData(FluidModifierData *mmd, int framenr)
+bool MANTA::readData(FluidModifierData *mmd, int framenr, bool resumable)
{
if (with_debug)
cout << "MANTA::readData()" << endl;
@@ -1634,39 +1243,31 @@ bool MANTA::readData(FluidModifierData *mmd, int framenr)
bool result = true;
string directory = getDirectory(mmd, FLUID_DOMAIN_DIR_DATA);
- string dformat = getCacheFileEnding(mds->cache_data_format);
- string pformat = getCacheFileEnding(mds->cache_particle_format);
-
- bool final_cache = (mds->cache_type == FLUID_DOMAIN_CACHE_FINAL);
- string resumable_cache = (final_cache) ? "False" : "True";
+ string volume_format = getCacheFileEnding(mds->cache_data_format);
+ string resumable_cache = (!resumable) ? "False" : "True";
/* Sanity check: Are cache files present? */
if (!hasData(mmd, framenr))
return false;
- ss.str("");
- ss << "fluid_load_data_" << mCurrentID << "('" << escapeSlashes(directory) << "', " << framenr
- << ", '" << dformat << "', " << resumable_cache << ")";
- pythonCommands.push_back(ss.str());
-
if (mUsingSmoke) {
ss.str("");
ss << "smoke_load_data_" << mCurrentID << "('" << escapeSlashes(directory) << "', " << framenr
- << ", '" << dformat << "', " << resumable_cache << ")";
+ << ", '" << volume_format << "', " << resumable_cache << ")";
pythonCommands.push_back(ss.str());
result &= runPythonString(pythonCommands);
}
if (mUsingLiquid) {
ss.str("");
ss << "liquid_load_data_" << mCurrentID << "('" << escapeSlashes(directory) << "', " << framenr
- << ", '" << dformat << "', " << resumable_cache << ")";
+ << ", '" << volume_format << "', " << resumable_cache << ")";
pythonCommands.push_back(ss.str());
result &= runPythonString(pythonCommands);
}
return result;
}
-bool MANTA::readNoise(FluidModifierData *mmd, int framenr)
+bool MANTA::readNoise(FluidModifierData *mmd, int framenr, bool resumable)
{
if (with_debug)
cout << "MANTA::readNoise()" << endl;
@@ -1679,10 +1280,12 @@ bool MANTA::readNoise(FluidModifierData *mmd, int framenr)
FluidDomainSettings *mds = mmd->domain;
string directory = getDirectory(mmd, FLUID_DOMAIN_DIR_NOISE);
- string nformat = getCacheFileEnding(mds->cache_noise_format);
+ string resumable_cache = (!resumable) ? "False" : "True";
- bool final_cache = (mds->cache_type == FLUID_DOMAIN_CACHE_FINAL);
- string resumable_cache = (final_cache) ? "False" : "True";
+ /* Support older caches which had more granular file format control. */
+ char format = (!strcmp(mds->cache_id, FLUID_CACHE_VERSION)) ? mds->cache_data_format :
+ mds->cache_noise_format;
+ string volume_format = getCacheFileEnding(format);
/* Sanity check: Are cache files present? */
if (!hasNoise(mmd, framenr))
@@ -1690,15 +1293,12 @@ bool MANTA::readNoise(FluidModifierData *mmd, int framenr)
ss.str("");
ss << "smoke_load_noise_" << mCurrentID << "('" << escapeSlashes(directory) << "', " << framenr
- << ", '" << nformat << "', " << resumable_cache << ")";
+ << ", '" << volume_format << "', " << resumable_cache << ")";
pythonCommands.push_back(ss.str());
return runPythonString(pythonCommands);
}
-/* Deprecated! This function reads mesh data via the Manta Python API.
- * MANTA:updateMeshStructures() reads cache files directly from disk
- * and is preferred due to its better performance. */
bool MANTA::readMesh(FluidModifierData *mmd, int framenr)
{
if (with_debug)
@@ -1712,8 +1312,8 @@ bool MANTA::readMesh(FluidModifierData *mmd, int framenr)
FluidDomainSettings *mds = mmd->domain;
string directory = getDirectory(mmd, FLUID_DOMAIN_DIR_MESH);
- string mformat = getCacheFileEnding(mds->cache_mesh_format);
- string dformat = getCacheFileEnding(mds->cache_data_format);
+ string mesh_format = getCacheFileEnding(mds->cache_mesh_format);
+ string volume_format = getCacheFileEnding(mds->cache_data_format);
/* Sanity check: Are cache files present? */
if (!hasMesh(mmd, framenr))
@@ -1721,23 +1321,20 @@ bool MANTA::readMesh(FluidModifierData *mmd, int framenr)
ss.str("");
ss << "liquid_load_mesh_" << mCurrentID << "('" << escapeSlashes(directory) << "', " << framenr
- << ", '" << mformat << "')";
+ << ", '" << mesh_format << "')";
pythonCommands.push_back(ss.str());
if (mUsingMVel) {
ss.str("");
ss << "liquid_load_meshvel_" << mCurrentID << "('" << escapeSlashes(directory) << "', "
- << framenr << ", '" << dformat << "')";
+ << framenr << ", '" << volume_format << "')";
pythonCommands.push_back(ss.str());
}
return runPythonString(pythonCommands);
}
-/* Deprecated! This function reads particle data via the Manta Python API.
- * MANTA:updateParticleStructures() reads cache files directly from disk
- * and is preferred due to its better performance. */
-bool MANTA::readParticles(FluidModifierData *mmd, int framenr)
+bool MANTA::readParticles(FluidModifierData *mmd, int framenr, bool resumable)
{
if (with_debug)
cout << "MANTA::readParticles()" << endl;
@@ -1752,10 +1349,12 @@ bool MANTA::readParticles(FluidModifierData *mmd, int framenr)
FluidDomainSettings *mds = mmd->domain;
string directory = getDirectory(mmd, FLUID_DOMAIN_DIR_PARTICLES);
- string pformat = getCacheFileEnding(mds->cache_particle_format);
+ string resumable_cache = (!resumable) ? "False" : "True";
- bool final_cache = (mds->cache_type == FLUID_DOMAIN_CACHE_FINAL);
- string resumable_cache = (final_cache) ? "False" : "True";
+ /* Support older caches which had more granular file format control. */
+ char format = (!strcmp(mds->cache_id, FLUID_CACHE_VERSION)) ? mds->cache_data_format :
+ mds->cache_particle_format;
+ string volume_format = getCacheFileEnding(format);
/* Sanity check: Are cache files present? */
if (!hasParticles(mmd, framenr))
@@ -1763,7 +1362,7 @@ bool MANTA::readParticles(FluidModifierData *mmd, int framenr)
ss.str("");
ss << "liquid_load_particles_" << mCurrentID << "('" << escapeSlashes(directory) << "', "
- << framenr << ", '" << pformat << "', " << resumable_cache << ")";
+ << framenr << ", '" << volume_format << "', " << resumable_cache << ")";
pythonCommands.push_back(ss.str());
return runPythonString(pythonCommands);
@@ -1786,7 +1385,7 @@ bool MANTA::readGuiding(FluidModifierData *mmd, int framenr, bool sourceDomain)
string directory = (sourceDomain) ? getDirectory(mmd, FLUID_DOMAIN_DIR_DATA) :
getDirectory(mmd, FLUID_DOMAIN_DIR_GUIDE);
- string gformat = getCacheFileEnding(mds->cache_data_format);
+ string volume_format = getCacheFileEnding(mds->cache_data_format);
/* Sanity check: Are cache files present? */
if (!hasGuiding(mmd, framenr, sourceDomain))
@@ -1795,12 +1394,12 @@ bool MANTA::readGuiding(FluidModifierData *mmd, int framenr, bool sourceDomain)
if (sourceDomain) {
ss.str("");
ss << "fluid_load_vel_" << mCurrentID << "('" << escapeSlashes(directory) << "', " << framenr
- << ", '" << gformat << "')";
+ << ", '" << volume_format << "')";
}
else {
ss.str("");
ss << "fluid_load_guiding_" << mCurrentID << "('" << escapeSlashes(directory) << "', "
- << framenr << ", '" << gformat << "')";
+ << framenr << ", '" << volume_format << "')";
}
pythonCommands.push_back(ss.str());
@@ -1821,9 +1420,7 @@ bool MANTA::bakeData(FluidModifierData *mmd, int framenr)
cacheDirData[0] = '\0';
cacheDirGuiding[0] = '\0';
- string dformat = getCacheFileEnding(mds->cache_data_format);
- string pformat = getCacheFileEnding(mds->cache_particle_format);
- string gformat = dformat; // Use same data format for guiding format
+ string volume_format = getCacheFileEnding(mds->cache_data_format);
BLI_path_join(
cacheDirData, sizeof(cacheDirData), mds->cache_directory, FLUID_DOMAIN_DIR_DATA, nullptr);
@@ -1836,9 +1433,8 @@ bool MANTA::bakeData(FluidModifierData *mmd, int framenr)
BLI_path_make_safe(cacheDirGuiding);
ss.str("");
- ss << "bake_fluid_data_" << mCurrentID << "('" << escapeSlashes(cacheDirData) << "', '"
- << escapeSlashes(cacheDirGuiding) << "', " << framenr << ", '" << dformat << "', '" << pformat
- << "', '" << gformat << "')";
+ ss << "bake_fluid_data_" << mCurrentID << "('" << escapeSlashes(cacheDirData) << "', " << framenr
+ << ", '" << volume_format << "')";
pythonCommands.push_back(ss.str());
return runPythonString(pythonCommands);
@@ -1853,27 +1449,18 @@ bool MANTA::bakeNoise(FluidModifierData *mmd, int framenr)
vector<string> pythonCommands;
FluidDomainSettings *mds = mmd->domain;
- char cacheDirData[FILE_MAX], cacheDirNoise[FILE_MAX];
- cacheDirData[0] = '\0';
+ char cacheDirNoise[FILE_MAX];
cacheDirNoise[0] = '\0';
- string dformat = getCacheFileEnding(mds->cache_data_format);
- string nformat = getCacheFileEnding(mds->cache_noise_format);
+ string volume_format = getCacheFileEnding(mds->cache_data_format);
- bool final_cache = (mds->cache_type == FLUID_DOMAIN_CACHE_FINAL);
- string resumable_cache = (final_cache) ? "False" : "True";
-
- BLI_path_join(
- cacheDirData, sizeof(cacheDirData), mds->cache_directory, FLUID_DOMAIN_DIR_DATA, nullptr);
BLI_path_join(
cacheDirNoise, sizeof(cacheDirNoise), mds->cache_directory, FLUID_DOMAIN_DIR_NOISE, nullptr);
- BLI_path_make_safe(cacheDirData);
BLI_path_make_safe(cacheDirNoise);
ss.str("");
- ss << "bake_noise_" << mCurrentID << "('" << escapeSlashes(cacheDirData) << "', '"
- << escapeSlashes(cacheDirNoise) << "', " << framenr << ", '" << dformat << "', '" << nformat
- << "', " << resumable_cache << ")";
+ ss << "bake_noise_" << mCurrentID << "('" << escapeSlashes(cacheDirNoise) << "', " << framenr
+ << ", '" << volume_format << "')";
pythonCommands.push_back(ss.str());
return runPythonString(pythonCommands);
@@ -1888,25 +1475,19 @@ bool MANTA::bakeMesh(FluidModifierData *mmd, int framenr)
vector<string> pythonCommands;
FluidDomainSettings *mds = mmd->domain;
- char cacheDirData[FILE_MAX], cacheDirMesh[FILE_MAX];
- cacheDirData[0] = '\0';
+ char cacheDirMesh[FILE_MAX];
cacheDirMesh[0] = '\0';
- string dformat = getCacheFileEnding(mds->cache_data_format);
- string mformat = getCacheFileEnding(mds->cache_mesh_format);
- string pformat = getCacheFileEnding(mds->cache_particle_format);
+ string volume_format = getCacheFileEnding(mds->cache_data_format);
+ string mesh_format = getCacheFileEnding(mds->cache_mesh_format);
BLI_path_join(
- cacheDirData, sizeof(cacheDirData), mds->cache_directory, FLUID_DOMAIN_DIR_DATA, nullptr);
- BLI_path_join(
cacheDirMesh, sizeof(cacheDirMesh), mds->cache_directory, FLUID_DOMAIN_DIR_MESH, nullptr);
- BLI_path_make_safe(cacheDirData);
BLI_path_make_safe(cacheDirMesh);
ss.str("");
- ss << "bake_mesh_" << mCurrentID << "('" << escapeSlashes(cacheDirData) << "', '"
- << escapeSlashes(cacheDirMesh) << "', " << framenr << ", '" << dformat << "', '" << mformat
- << "', '" << pformat << "')";
+ ss << "bake_mesh_" << mCurrentID << "('" << escapeSlashes(cacheDirMesh) << "', " << framenr
+ << ", '" << volume_format << "', '" << mesh_format << "')";
pythonCommands.push_back(ss.str());
return runPythonString(pythonCommands);
@@ -1921,30 +1502,22 @@ bool MANTA::bakeParticles(FluidModifierData *mmd, int framenr)
vector<string> pythonCommands;
FluidDomainSettings *mds = mmd->domain;
- char cacheDirData[FILE_MAX], cacheDirParticles[FILE_MAX];
- cacheDirData[0] = '\0';
+ char cacheDirParticles[FILE_MAX];
cacheDirParticles[0] = '\0';
- string dformat = getCacheFileEnding(mds->cache_data_format);
- string pformat = getCacheFileEnding(mds->cache_particle_format);
-
- bool final_cache = (mds->cache_type == FLUID_DOMAIN_CACHE_FINAL);
- string resumable_cache = (final_cache) ? "False" : "True";
+ string volume_format = getCacheFileEnding(mds->cache_data_format);
+ string resumable_cache = !(mds->flags & FLUID_DOMAIN_USE_RESUMABLE_CACHE) ? "False" : "True";
- BLI_path_join(
- cacheDirData, sizeof(cacheDirData), mds->cache_directory, FLUID_DOMAIN_DIR_DATA, nullptr);
BLI_path_join(cacheDirParticles,
sizeof(cacheDirParticles),
mds->cache_directory,
FLUID_DOMAIN_DIR_PARTICLES,
nullptr);
- BLI_path_make_safe(cacheDirData);
BLI_path_make_safe(cacheDirParticles);
ss.str("");
- ss << "bake_particles_" << mCurrentID << "('" << escapeSlashes(cacheDirData) << "', '"
- << escapeSlashes(cacheDirParticles) << "', " << framenr << ", '" << dformat << "', '"
- << pformat << "', " << resumable_cache << ")";
+ ss << "bake_particles_" << mCurrentID << "('" << escapeSlashes(cacheDirParticles) << "', "
+ << framenr << ", '" << volume_format << "', " << resumable_cache << ")";
pythonCommands.push_back(ss.str());
return runPythonString(pythonCommands);
@@ -1962,10 +1535,7 @@ bool MANTA::bakeGuiding(FluidModifierData *mmd, int framenr)
char cacheDirGuiding[FILE_MAX];
cacheDirGuiding[0] = '\0';
- string gformat = getCacheFileEnding(mds->cache_data_format);
-
- bool final_cache = (mds->cache_type == FLUID_DOMAIN_CACHE_FINAL);
- string resumable_cache = (final_cache) ? "False" : "True";
+ string volume_format = getCacheFileEnding(mds->cache_data_format);
BLI_path_join(cacheDirGuiding,
sizeof(cacheDirGuiding),
@@ -1976,7 +1546,7 @@ bool MANTA::bakeGuiding(FluidModifierData *mmd, int framenr)
ss.str("");
ss << "bake_guiding_" << mCurrentID << "('" << escapeSlashes(cacheDirGuiding) << "', " << framenr
- << ", '" << gformat << "', " << resumable_cache << ")";
+ << ", '" << volume_format << "')";
pythonCommands.push_back(ss.str());
return runPythonString(pythonCommands);
@@ -2092,8 +1662,7 @@ void MANTA::exportSmokeScript(FluidModifierData *mmd)
manta_script += header_time + fluid_time_stepping + fluid_adapt_time_step;
// Import
- manta_script += header_import + fluid_file_import + fluid_cache_helper + fluid_load_data +
- smoke_load_data;
+ manta_script += header_import + fluid_file_import + fluid_cache_helper + smoke_load_data;
if (noise)
manta_script += smoke_load_noise;
if (guiding)
@@ -2198,8 +1767,7 @@ void MANTA::exportLiquidScript(FluidModifierData *mmd)
manta_script += header_time + fluid_time_stepping + fluid_adapt_time_step;
// Import
- manta_script += header_import + fluid_file_import + fluid_cache_helper + fluid_load_data +
- liquid_load_data;
+ manta_script += header_import + fluid_file_import + fluid_cache_helper + liquid_load_data;
if (mesh)
manta_script += liquid_load_mesh;
if (drops || bubble || floater || tracer)
@@ -2241,7 +1809,7 @@ static PyObject *callPythonFunction(string varName, string functionName, bool is
{
if ((varName == "") || (functionName == "")) {
if (MANTA::with_debug)
- cout << "Missing Python variable name and/or function name -- name is: " << varName
+ cout << "Fluid: Missing Python variable name and/or function name -- name is: " << varName
<< ", function name is: " << functionName << endl;
return nullptr;
}
@@ -2384,952 +1952,6 @@ void MANTA::adaptTimestep()
runPythonString(pythonCommands);
}
-bool MANTA::updateMeshFromFile(string filename)
-{
- string fname(filename);
- string::size_type idx;
-
- idx = fname.rfind('.');
- if (idx != string::npos) {
- string extension = fname.substr(idx + 1);
-
- if (extension.compare("gz") == 0)
- return updateMeshFromBobj(filename);
- else if (extension.compare("obj") == 0)
- return updateMeshFromObj(filename);
- else if (extension.compare("uni") == 0)
- return updateMeshFromUni(filename);
- else
- cerr << "Fluid Error -- updateMeshFromFile(): Invalid file extension in file: " << filename
- << endl;
- }
- else {
- cerr << "Fluid Error -- updateMeshFromFile(): Unable to open file: " << filename << endl;
- }
- return false;
-}
-
-bool MANTA::updateMeshFromBobj(string filename)
-{
- if (with_debug)
- cout << "MANTA::updateMeshFromBobj()" << endl;
-
- gzFile gzf;
-
- gzf = (gzFile)BLI_gzopen(filename.c_str(), "rb1"); // do some compression
- if (!gzf) {
- cerr << "Fluid Error -- updateMeshFromBobj(): Unable to open file: " << filename << endl;
- return false;
- }
-
- int numBuffer = 0, readBytes = 0;
-
- // Num vertices
- readBytes = gzread(gzf, &numBuffer, sizeof(int));
- if (!readBytes) {
- cerr << "Fluid Error -- updateMeshFromBobj(): Unable to read number of mesh vertices from "
- << filename << endl;
- gzclose(gzf);
- return false;
- }
-
- if (with_debug)
- cout << "read mesh , num verts: " << numBuffer << " , in file: " << filename << endl;
-
- int numChunks = (int)(ceil((float)numBuffer / NODE_CHUNK));
- int readLen, readStart, readEnd, k;
-
- if (numBuffer) {
- // Vertices
- int todoVertices = numBuffer;
- float *bufferVerts = (float *)MEM_malloc_arrayN(
- NODE_CHUNK, sizeof(float) * 3, "fluid_mesh_vertices");
-
- mMeshNodes->resize(numBuffer);
-
- for (int i = 0; i < numChunks && todoVertices > 0; ++i) {
- readLen = NODE_CHUNK;
- if (todoVertices < NODE_CHUNK) {
- readLen = todoVertices;
- }
-
- readBytes = gzread(gzf, bufferVerts, readLen * sizeof(float) * 3);
- if (!readBytes) {
- cerr << "Fluid Error -- updateMeshFromBobj(): Unable to read mesh vertices from "
- << filename << endl;
- MEM_freeN(bufferVerts);
- gzclose(gzf);
- return false;
- }
-
- readStart = (numBuffer - todoVertices);
- CLAMP(readStart, 0, numBuffer);
- readEnd = readStart + readLen;
- CLAMP(readEnd, 0, numBuffer);
-
- k = 0;
- for (vector<MANTA::Node>::size_type j = readStart; j < readEnd; j++, k += 3) {
- mMeshNodes->at(j).pos[0] = bufferVerts[k];
- mMeshNodes->at(j).pos[1] = bufferVerts[k + 1];
- mMeshNodes->at(j).pos[2] = bufferVerts[k + 2];
- }
- todoVertices -= readLen;
- }
- MEM_freeN(bufferVerts);
- }
-
- // Num normals
- readBytes = gzread(gzf, &numBuffer, sizeof(int));
- if (!readBytes) {
- cerr << "Fluid Error -- updateMeshFromBobj(): Unable to read number of mesh normals from "
- << filename << endl;
- gzclose(gzf);
- return false;
- }
-
- if (with_debug)
- cout << "read mesh , num normals : " << numBuffer << " , in file: " << filename << endl;
-
- if (numBuffer) {
- // Normals
- int todoNormals = numBuffer;
- float *bufferNormals = (float *)MEM_malloc_arrayN(
- NODE_CHUNK, sizeof(float) * 3, "fluid_mesh_normals");
-
- if (!getNumVertices())
- mMeshNodes->resize(numBuffer);
-
- for (int i = 0; i < numChunks && todoNormals > 0; ++i) {
- readLen = NODE_CHUNK;
- if (todoNormals < NODE_CHUNK) {
- readLen = todoNormals;
- }
-
- readBytes = gzread(gzf, bufferNormals, readLen * sizeof(float) * 3);
- if (!readBytes) {
- cerr << "Fluid Error -- updateMeshFromBobj(): Unable to read mesh normals from "
- << filename << endl;
- MEM_freeN(bufferNormals);
- gzclose(gzf);
- return false;
- }
-
- readStart = (numBuffer - todoNormals);
- CLAMP(readStart, 0, numBuffer);
- readEnd = readStart + readLen;
- CLAMP(readEnd, 0, numBuffer);
-
- k = 0;
- for (vector<MANTA::Node>::size_type j = readStart; j < readEnd; j++, k += 3) {
- mMeshNodes->at(j).normal[0] = bufferNormals[k];
- mMeshNodes->at(j).normal[1] = bufferNormals[k + 1];
- mMeshNodes->at(j).normal[2] = bufferNormals[k + 2];
- }
- todoNormals -= readLen;
- }
- MEM_freeN(bufferNormals);
- }
-
- // Num triangles
- readBytes = gzread(gzf, &numBuffer, sizeof(int));
- if (!readBytes) {
- cerr << "Fluid Error -- updateMeshFromBobj(): Unable to read number of mesh triangles from "
- << filename << endl;
- gzclose(gzf);
- return false;
- }
-
- if (with_debug)
- cout << "Fluid: Read mesh , num triangles : " << numBuffer << " , in file: " << filename
- << endl;
-
- numChunks = (int)(ceil((float)numBuffer / TRIANGLE_CHUNK));
-
- if (numBuffer) {
- // Triangles
- int todoTriangles = numBuffer;
- int *bufferTriangles = (int *)MEM_malloc_arrayN(
- TRIANGLE_CHUNK, sizeof(int) * 3, "fluid_mesh_triangles");
-
- mMeshTriangles->resize(numBuffer);
-
- for (int i = 0; i < numChunks && todoTriangles > 0; ++i) {
- readLen = TRIANGLE_CHUNK;
- if (todoTriangles < TRIANGLE_CHUNK) {
- readLen = todoTriangles;
- }
-
- readBytes = gzread(gzf, bufferTriangles, readLen * sizeof(int) * 3);
- if (!readBytes) {
- cerr << "Fluid Error -- updateMeshFromBobj(): Unable to read mesh triangles from "
- << filename << endl;
- MEM_freeN(bufferTriangles);
- gzclose(gzf);
- return false;
- }
-
- readStart = (numBuffer - todoTriangles);
- CLAMP(readStart, 0, numBuffer);
- readEnd = readStart + readLen;
- CLAMP(readEnd, 0, numBuffer);
-
- k = 0;
- for (vector<MANTA::Triangle>::size_type j = readStart; j < readEnd; j++, k += 3) {
- mMeshTriangles->at(j).c[0] = bufferTriangles[k];
- mMeshTriangles->at(j).c[1] = bufferTriangles[k + 1];
- mMeshTriangles->at(j).c[2] = bufferTriangles[k + 2];
- }
- todoTriangles -= readLen;
- }
- MEM_freeN(bufferTriangles);
- }
- return (gzclose(gzf) == Z_OK);
-}
-
-bool MANTA::updateMeshFromObj(string filename)
-{
- if (with_debug)
- cout << "MANTA::updateMeshFromObj()" << endl;
-
- ifstream ifs(filename);
- float fbuffer[3];
- int ibuffer[3];
- int cntVerts = 0, cntNormals = 0, cntTris = 0;
-
- if (!ifs.good()) {
- cerr << "Fluid Error -- updateMeshFromObj(): Unable to open file: " << filename << endl;
- return false;
- }
-
- while (ifs.good() && !ifs.eof()) {
- string id;
- ifs >> id;
-
- if (id[0] == '#') {
- // comment
- getline(ifs, id);
- continue;
- }
- if (id == "vt") {
- // tex coord, ignore
- }
- else if (id == "vn") {
- // normals
- if (getNumVertices() != cntVerts) {
- cerr << "Fluid Error -- updateMeshFromObj(): Invalid number of mesh nodes in file: "
- << filename << endl;
- return false;
- }
-
- ifs >> fbuffer[0] >> fbuffer[1] >> fbuffer[2];
- MANTA::Node *node = &mMeshNodes->at(cntNormals);
- (*node).normal[0] = fbuffer[0];
- (*node).normal[1] = fbuffer[1];
- (*node).normal[2] = fbuffer[2];
- cntNormals++;
- }
- else if (id == "v") {
- // vertex
- ifs >> fbuffer[0] >> fbuffer[1] >> fbuffer[2];
- MANTA::Node node;
- node.pos[0] = fbuffer[0];
- node.pos[1] = fbuffer[1];
- node.pos[2] = fbuffer[2];
- mMeshNodes->push_back(node);
- cntVerts++;
- }
- else if (id == "g") {
- // group
- string group;
- ifs >> group;
- }
- else if (id == "f") {
- // face
- string face;
- for (int i = 0; i < 3; i++) {
- ifs >> face;
- if (face.find('/') != string::npos)
- face = face.substr(0, face.find('/')); // ignore other indices
- int idx = atoi(face.c_str()) - 1;
- if (idx < 0) {
- cerr << "Fluid Error -- updateMeshFromObj(): Invalid face encountered in file: "
- << filename << endl;
- return false;
- }
- ibuffer[i] = idx;
- }
- MANTA::Triangle triangle;
- triangle.c[0] = ibuffer[0];
- triangle.c[1] = ibuffer[1];
- triangle.c[2] = ibuffer[2];
- mMeshTriangles->push_back(triangle);
- cntTris++;
- }
- else {
- // whatever, ignore
- }
- // kill rest of line
- getline(ifs, id);
- }
- ifs.close();
- return true;
-}
-
-bool MANTA::updateMeshFromUni(string filename)
-{
- if (with_debug)
- cout << "MANTA::updateMeshFromUni()" << endl;
-
- gzFile gzf;
- float fbuffer[4];
- int ibuffer[4];
-
- gzf = (gzFile)BLI_gzopen(filename.c_str(), "rb1"); // do some compression
- if (!gzf) {
- cerr << "Fluid Error -- updateMeshFromUni(): Unable to open file: " << filename << endl;
- return false;
- }
-
- int readBytes = 0;
- char file_magic[5] = {0, 0, 0, 0, 0};
- readBytes = gzread(gzf, file_magic, 4);
- if (!readBytes) {
- cerr << "Fluid Error -- updateMeshFromUni(): Unable to read header in file: " << filename
- << endl;
- gzclose(gzf);
- return false;
- }
-
- vector<pVel> *velocityPointer = mMeshVelocities;
-
- // mdata uni header
- const int STR_LEN_PDATA = 256;
- int elementType, bytesPerElement, numParticles;
- char info[STR_LEN_PDATA]; // mantaflow build information
- unsigned long long timestamp; // creation time
-
- // read mesh header
- gzread(gzf, &ibuffer, sizeof(int) * 4); // num particles, dimX, dimY, dimZ
- gzread(gzf, &elementType, sizeof(int));
- gzread(gzf, &bytesPerElement, sizeof(int));
- gzread(gzf, &info, sizeof(info));
- gzread(gzf, &timestamp, sizeof(unsigned long long));
-
- if (with_debug)
- cout << "Fluid: Read " << ibuffer[0] << " vertices in file: " << filename << endl;
-
- // Sanity checks
- const int meshSize = sizeof(float) * 3 + sizeof(int);
- if (!(bytesPerElement == meshSize) && (elementType == 0)) {
- cerr << "Fluid Error -- updateMeshFromUni(): Invalid header in file: " << filename << endl;
- gzclose(gzf);
- return false;
- }
- if (!ibuffer[0]) { // Any vertices present?
- cerr << "Fluid Error -- updateMeshFromUni(): No vertices present in file: " << filename
- << endl;
- gzclose(gzf);
- return false;
- }
-
- // Reading mesh
- if (!strcmp(file_magic, "MB01")) {
- // TODO (sebbas): Future update could add uni mesh support
- }
- // Reading mesh data file v1 with vec3
- else if (!strcmp(file_magic, "MD01")) {
- numParticles = ibuffer[0];
-
- velocityPointer->resize(numParticles);
- MANTA::pVel *bufferPVel;
- for (vector<pVel>::iterator it = velocityPointer->begin(); it != velocityPointer->end();
- ++it) {
- gzread(gzf, fbuffer, sizeof(float) * 3);
- bufferPVel = (MANTA::pVel *)fbuffer;
- it->pos[0] = bufferPVel->pos[0];
- it->pos[1] = bufferPVel->pos[1];
- it->pos[2] = bufferPVel->pos[2];
- }
- }
- return (gzclose(gzf) == Z_OK);
-}
-
-bool MANTA::updateParticlesFromFile(string filename, bool isSecondarySys, bool isVelData)
-{
- if (with_debug)
- cout << "MANTA::updateParticlesFromFile()" << endl;
-
- string fname(filename);
- string::size_type idx;
-
- idx = fname.rfind('.');
- if (idx != string::npos) {
- string extension = fname.substr(idx + 1);
-
- if (extension.compare("uni") == 0)
- return updateParticlesFromUni(filename, isSecondarySys, isVelData);
- else
- cerr << "Fluid Error -- updateParticlesFromFile(): Invalid file extension in file: "
- << filename << endl;
- return false;
- }
- else {
- cerr << "Fluid Error -- updateParticlesFromFile(): Unable to open file: " << filename << endl;
- return false;
- }
-}
-
-bool MANTA::updateParticlesFromUni(string filename, bool isSecondarySys, bool isVelData)
-{
- if (with_debug)
- cout << "MANTA::updateParticlesFromUni()" << endl;
-
- gzFile gzf;
- int ibuffer[4];
-
- gzf = (gzFile)BLI_gzopen(filename.c_str(), "rb1"); // do some compression
- if (!gzf) {
- cerr << "Fluid Error -- updateParticlesFromUni(): Unable to open file: " << filename << endl;
- return false;
- }
-
- int readBytes = 0;
- char file_magic[5] = {0, 0, 0, 0, 0};
- readBytes = gzread(gzf, file_magic, 4);
- if (!readBytes) {
- cerr << "Fluid Error -- updateParticlesFromUni(): Unable to read header in file: " << filename
- << endl;
- gzclose(gzf);
- return false;
- }
-
- if (!strcmp(file_magic, "PB01")) {
- cerr << "Fluid Error -- updateParticlesFromUni(): Particle uni file format v01 not "
- "supported anymore."
- << endl;
- gzclose(gzf);
- return false;
- }
-
- // Pointer to FLIP system or to secondary particle system
- vector<pData> *dataPointer = nullptr;
- vector<pVel> *velocityPointer = nullptr;
- vector<float> *lifePointer = nullptr;
-
- if (isSecondarySys) {
- dataPointer = mSndParticleData;
- velocityPointer = mSndParticleVelocity;
- lifePointer = mSndParticleLife;
- }
- else {
- dataPointer = mFlipParticleData;
- velocityPointer = mFlipParticleVelocity;
- }
-
- // pdata uni header
- const int STR_LEN_PDATA = 256;
- int elementType, bytesPerElement, numParticles;
- char info[STR_LEN_PDATA]; // mantaflow build information
- unsigned long long timestamp; // creation time
-
- // read particle header
- gzread(gzf, &ibuffer, sizeof(int) * 4); // num particles, dimX, dimY, dimZ
- gzread(gzf, &elementType, sizeof(int));
- gzread(gzf, &bytesPerElement, sizeof(int));
- gzread(gzf, &info, sizeof(info));
- gzread(gzf, &timestamp, sizeof(unsigned long long));
-
- if (with_debug)
- cout << "Fluid: Read " << ibuffer[0] << " particles in file: " << filename << endl;
-
- // Sanity checks
- const int partSysSize = sizeof(float) * 3 + sizeof(int);
- if (!(bytesPerElement == partSysSize) && (elementType == 0)) {
- cerr << "Fluid Error -- updateParticlesFromUni(): Invalid header in file: " << filename
- << endl;
- gzclose(gzf);
- return false;
- }
- if (!ibuffer[0]) { // Any particles present?
- if (with_debug)
- cout << "Fluid: No particles present in file: " << filename << endl;
- gzclose(gzf);
- return true; // return true since having no particles in a cache file is valid
- }
-
- numParticles = ibuffer[0];
-
- const int numChunks = (int)(ceil((float)numParticles / PARTICLE_CHUNK));
- int todoParticles, readLen;
- int readStart, readEnd;
-
- // Reading base particle system file v2
- if (!strcmp(file_magic, "PB02")) {
- MANTA::pData *bufferPData;
- todoParticles = numParticles;
- bufferPData = (MANTA::pData *)MEM_malloc_arrayN(
- PARTICLE_CHUNK, sizeof(MANTA::pData), "fluid_particle_data");
-
- dataPointer->resize(numParticles);
-
- for (int i = 0; i < numChunks && todoParticles > 0; ++i) {
- readLen = PARTICLE_CHUNK;
- if (todoParticles < PARTICLE_CHUNK) {
- readLen = todoParticles;
- }
-
- readBytes = gzread(gzf, bufferPData, readLen * sizeof(pData));
- if (!readBytes) {
- cerr << "Fluid Error -- updateParticlesFromUni(): Unable to read particle data in file: "
- << filename << endl;
- MEM_freeN(bufferPData);
- gzclose(gzf);
- return false;
- }
-
- readStart = (numParticles - todoParticles);
- CLAMP(readStart, 0, numParticles);
- readEnd = readStart + readLen;
- CLAMP(readEnd, 0, numParticles);
-
- int k = 0;
- for (vector<MANTA::pData>::size_type j = readStart; j < readEnd; j++, k++) {
- dataPointer->at(j).pos[0] = bufferPData[k].pos[0];
- dataPointer->at(j).pos[1] = bufferPData[k].pos[1];
- dataPointer->at(j).pos[2] = bufferPData[k].pos[2];
- dataPointer->at(j).flag = bufferPData[k].flag;
- }
- todoParticles -= readLen;
- }
- MEM_freeN(bufferPData);
- }
- // Reading particle data file v1 with velocities
- else if (!strcmp(file_magic, "PD01") && isVelData) {
- MANTA::pVel *bufferPVel;
- todoParticles = numParticles;
- bufferPVel = (MANTA::pVel *)MEM_malloc_arrayN(
- PARTICLE_CHUNK, sizeof(MANTA::pVel), "fluid_particle_velocity");
-
- velocityPointer->resize(numParticles);
-
- for (int i = 0; i < numChunks && todoParticles > 0; ++i) {
- readLen = PARTICLE_CHUNK;
- if (todoParticles < PARTICLE_CHUNK) {
- readLen = todoParticles;
- }
-
- readBytes = gzread(gzf, bufferPVel, readLen * sizeof(pVel));
- if (!readBytes) {
- cerr << "Fluid Error -- updateParticlesFromUni(): Unable to read particle velocities "
- "in file: "
- << filename << endl;
- MEM_freeN(bufferPVel);
- gzclose(gzf);
- return false;
- }
-
- readStart = (numParticles - todoParticles);
- CLAMP(readStart, 0, numParticles);
- readEnd = readStart + readLen;
- CLAMP(readEnd, 0, numParticles);
-
- int k = 0;
- for (vector<MANTA::pVel>::size_type j = readStart; j < readEnd; j++, k++) {
- velocityPointer->at(j).pos[0] = bufferPVel[k].pos[0];
- velocityPointer->at(j).pos[1] = bufferPVel[k].pos[1];
- velocityPointer->at(j).pos[2] = bufferPVel[k].pos[2];
- }
- todoParticles -= readLen;
- }
- MEM_freeN(bufferPVel);
- }
- // Reading particle data file v1 with lifetime
- else if (!strcmp(file_magic, "PD01")) {
- float *bufferPLife;
- todoParticles = numParticles;
- bufferPLife = (float *)MEM_malloc_arrayN(PARTICLE_CHUNK, sizeof(float), "fluid_particle_life");
-
- lifePointer->resize(numParticles);
-
- for (int i = 0; i < numChunks && todoParticles > 0; ++i) {
- readLen = PARTICLE_CHUNK;
- if (todoParticles < PARTICLE_CHUNK) {
- readLen = todoParticles;
- }
-
- readBytes = gzread(gzf, bufferPLife, readLen * sizeof(float));
- if (!readBytes) {
- cerr << "Fluid Error -- updateParticlesFromUni(): Unable to read particle life in file: "
- << filename << endl;
- MEM_freeN(bufferPLife);
- gzclose(gzf);
- return false;
- }
-
- readStart = (numParticles - todoParticles);
- CLAMP(readStart, 0, numParticles);
- readEnd = readStart + readLen;
- CLAMP(readEnd, 0, numParticles);
-
- int k = 0;
- for (vector<float>::size_type j = readStart; j < readEnd; j++, k++) {
- lifePointer->at(j) = bufferPLife[k];
- }
- todoParticles -= readLen;
- }
- MEM_freeN(bufferPLife);
- }
- return (gzclose(gzf) == Z_OK);
-}
-
-bool MANTA::updateGridsFromFile(string filename, vector<GridItem> grids)
-{
- if (with_debug)
- cout << "MANTA::updateGridsFromFile()" << endl;
-
- if (grids.empty()) {
- cerr << "Fluid Error -- updateGridsFromFile(): Cannot read into uninitialized grid vector."
- << endl;
- return false;
- }
-
- string fname(filename);
- string::size_type idx;
-
- idx = fname.rfind('.');
- if (idx != string::npos) {
- string extension = fname.substr(idx);
-
- if (extension.compare(FLUID_DOMAIN_EXTENSION_UNI) == 0) {
- return updateGridsFromUni(filename, grids);
- }
-#if OPENVDB == 1
- else if (extension.compare(FLUID_DOMAIN_EXTENSION_OPENVDB) == 0) {
- return updateGridsFromVDB(filename, grids);
- }
-#endif
- else if (extension.compare(FLUID_DOMAIN_EXTENSION_RAW) == 0) {
- return updateGridsFromRaw(filename, grids);
- }
- else {
- cerr << "Fluid Error -- updateGridsFromFile(): Invalid file extension in file: " << filename
- << endl;
- }
- return false;
- }
- else {
- cerr << "Fluid Error -- updateGridsFromFile(): Unable to open file: " << filename << endl;
- return false;
- }
-}
-
-bool MANTA::updateGridsFromUni(string filename, vector<GridItem> grids)
-{
- if (with_debug)
- cout << "MANTA::updateGridsFromUni()" << endl;
-
- gzFile gzf;
- int expectedBytes = 0, readBytes = 0;
- int ibuffer[4];
-
- gzf = (gzFile)BLI_gzopen(filename.c_str(), "rb1");
- if (!gzf) {
- cerr << "Fluid Error -- updateGridsFromUni(): Unable to open file: " << filename << endl;
- return false;
- }
-
- char file_magic[5] = {0, 0, 0, 0, 0};
- readBytes = gzread(gzf, file_magic, 4);
- if (!readBytes) {
- cerr << "Fluid Error -- updateGridsFromUni(): Invalid header in file: " << filename << endl;
- gzclose(gzf);
- return false;
- }
- if (!strcmp(file_magic, "DDF2") || !strcmp(file_magic, "MNT1") || !strcmp(file_magic, "MNT2")) {
- cerr << "Fluid Error -- updateGridsFromUni(): Unsupported header in file: " << filename
- << endl;
- gzclose(gzf);
- return false;
- }
-
- if (!strcmp(file_magic, "MNT3")) {
-
- // grid uni header
- const int STR_LEN_GRID = 252;
- int elementType, bytesPerElement; // data type info
- char info[STR_LEN_GRID]; // mantaflow build information
- int dimT; // optionally store forth dimension for 4d grids
- unsigned long long timestamp; // creation time
-
- // read grid header
- gzread(gzf, &ibuffer, sizeof(int) * 4); // dimX, dimY, dimZ, gridType
- gzread(gzf, &elementType, sizeof(int));
- gzread(gzf, &bytesPerElement, sizeof(int));
- gzread(gzf, &info, sizeof(info));
- gzread(gzf, &dimT, sizeof(int));
- gzread(gzf, &timestamp, sizeof(unsigned long long));
-
- if (with_debug)
- cout << "Fluid: Read " << ibuffer[3] << " grid type in file: " << filename << endl;
-
- for (vector<GridItem>::iterator gIter = grids.begin(); gIter != grids.end(); ++gIter) {
- GridItem gridItem = *gIter;
- void **pointerList = gridItem.pointer;
- int type = gridItem.type;
- int *res = gridItem.res;
- assert(pointerList[0]);
- assert(res[0] == res[0] && res[1] == res[1] && res[2] == res[2]);
- UNUSED_VARS(res);
-
- switch (type) {
- case FLUID_DOMAIN_GRID_VEC3F: {
- assert(pointerList[1] && pointerList[2]);
- float **fpointers = (float **)pointerList;
- expectedBytes = sizeof(float) * 3 * ibuffer[0] * ibuffer[1] * ibuffer[2];
- readBytes = 0;
- for (int i = 0; i < ibuffer[0] * ibuffer[1] * ibuffer[2]; ++i) {
- for (int j = 0; j < 3; ++j) {
- readBytes += gzread(gzf, fpointers[j], sizeof(float));
- ++fpointers[j];
- }
- }
- break;
- }
- case FLUID_DOMAIN_GRID_FLOAT: {
- float **fpointers = (float **)pointerList;
- expectedBytes = sizeof(float) * ibuffer[0] * ibuffer[1] * ibuffer[2];
- readBytes = gzread(
- gzf, fpointers[0], sizeof(float) * ibuffer[0] * ibuffer[1] * ibuffer[2]);
- break;
- }
- default: {
- cerr << "Fluid Error -- Unknown grid type" << endl;
- }
- }
-
- if (!readBytes) {
- cerr << "Fluid Error -- updateGridFromRaw(): Unable to read raw file: " << filename
- << endl;
- gzclose(gzf);
- return false;
- }
- assert(expectedBytes == readBytes);
- UNUSED_VARS(expectedBytes);
-
- if (with_debug)
- cout << "Fluid: Read successfully: " << filename << endl;
- }
- }
- else {
- cerr << "Fluid Error -- updateGridsFromUni(): Unknown header in file: " << filename << endl;
- gzclose(gzf);
- return false;
- }
-
- return (gzclose(gzf) == Z_OK);
-}
-
-#if OPENVDB == 1
-bool MANTA::updateGridsFromVDB(string filename, vector<GridItem> grids)
-{
- if (with_debug)
- cout << "MANTA::updateGridsFromVDB()" << endl;
-
- openvdb::initialize();
- openvdb::io::File file(filename);
- try {
- file.open();
- }
- catch (const openvdb::IoError &) {
- cerr << "Fluid Error -- updateGridsFromVDB(): IOError, invalid OpenVDB file: " << filename
- << endl;
- return false;
- }
- if (grids.empty()) {
- cerr << "Fluid Error -- updateGridsFromVDB(): No grids found in grid vector" << endl;
- return false;
- }
-
- unordered_map<string, openvdb::FloatGrid::Accessor> floatAccessors;
- unordered_map<string, openvdb::Vec3SGrid::Accessor> vec3fAccessors;
- openvdb::GridBase::Ptr baseGrid;
-
- /* Get accessors to all grids in this OpenVDB file.*/
- for (vector<GridItem>::iterator gIter = grids.begin(); gIter != grids.end(); ++gIter) {
- GridItem gridItem = *gIter;
- string itemName = gridItem.name;
- int itemType = gridItem.type;
-
- for (openvdb::io::File::NameIterator nameIter = file.beginName(); nameIter != file.endName();
- ++nameIter) {
- string vdbName = nameIter.gridName();
- bool nameMatch = !itemName.compare(vdbName);
-
- /* Support for <= 2.83: If file has only one grid in it, use that grid. */
- openvdb::io::File::NameIterator peekNext = nameIter;
- bool onlyGrid = (++peekNext == file.endName());
- if (onlyGrid) {
- vdbName = itemName;
- }
-
- if (nameMatch || onlyGrid) {
- baseGrid = file.readGrid(nameIter.gridName());
-
- switch (itemType) {
- case FLUID_DOMAIN_GRID_VEC3F: {
- openvdb::Vec3SGrid::Ptr gridVDB = openvdb::gridPtrCast<openvdb::Vec3SGrid>(baseGrid);
- openvdb::Vec3SGrid::Accessor vdbAccessor = gridVDB->getAccessor();
- vec3fAccessors.emplace(vdbName, vdbAccessor);
- break;
- }
- case FLUID_DOMAIN_GRID_FLOAT: {
- openvdb::FloatGrid::Ptr gridVDB = openvdb::gridPtrCast<openvdb::FloatGrid>(baseGrid);
- openvdb::FloatGrid::Accessor vdbAccessor = gridVDB->getAccessor();
- floatAccessors.emplace(vdbName, vdbAccessor);
- break;
- }
- default: {
- cerr << "Fluid Error -- Unknown grid type" << endl;
- }
- }
- }
- else {
- cerr << "Fluid Error -- Could not read grid from file" << endl;
- return false;
- }
- }
- }
- file.close();
-
- size_t index = 0;
-
- /* Use res of first grid for grid loop. All grids must be same size anyways. */
- vector<GridItem>::iterator gIter = grids.begin();
- int *res = (*gIter).res;
-
- for (int z = 0; z < res[2]; ++z) {
- for (int y = 0; y < res[1]; ++y) {
- for (int x = 0; x < res[0]; ++x, ++index) {
- openvdb::Coord xyz(x, y, z);
-
- for (vector<GridItem>::iterator gIter = grids.begin(); gIter != grids.end(); ++gIter) {
- GridItem gridItem = *gIter;
- void **pointerList = gridItem.pointer;
- int type = gridItem.type;
- int *res = gridItem.res;
- assert(pointerList[0]);
- assert(res[0] == res[0] && res[1] == res[1] && res[2] == res[2]);
- UNUSED_VARS(res);
-
- switch (type) {
- case FLUID_DOMAIN_GRID_VEC3F: {
- unordered_map<string, openvdb::Vec3SGrid::Accessor>::iterator it;
- it = vec3fAccessors.find(gridItem.name);
- if (it == vec3fAccessors.end()) {
- cerr << "Fluid Error -- '" << gridItem.name << "' not in vdb grid map" << endl;
- return false;
- }
- openvdb::Vec3f v = it->second.getValue(xyz);
-
- assert(pointerList[1] && pointerList[2]);
- float **fpointers = (float **)pointerList;
- for (int j = 0; j < 3; ++j) {
- (fpointers[j])[index] = (float)v[j];
- }
- break;
- }
- case FLUID_DOMAIN_GRID_FLOAT: {
- unordered_map<string, openvdb::FloatGrid::Accessor>::iterator it;
- it = floatAccessors.find(gridItem.name);
- if (it == floatAccessors.end()) {
- cerr << "Fluid Error -- '" << gridItem.name << "' not in vdb grid map" << endl;
- return false;
- }
- float v = it->second.getValue(xyz);
- float **fpointers = (float **)pointerList;
- (fpointers[0])[index] = v;
- break;
- }
- default: {
- cerr << "Fluid Error -- Unknown grid type" << endl;
- }
- }
- }
- }
- }
- }
- if (with_debug)
- cout << "Fluid: Read successfully: " << filename << endl;
-
- return true;
-}
-#endif
-
-bool MANTA::updateGridsFromRaw(string filename, vector<GridItem> grids)
-{
- if (with_debug)
- cout << "MANTA::updateGridsFromRaw()" << endl;
-
- gzFile gzf;
- int expectedBytes, readBytes;
-
- gzf = (gzFile)BLI_gzopen(filename.c_str(), "rb");
- if (!gzf) {
- cout << "MANTA::updateGridsFromRaw(): unable to open file" << endl;
- return false;
- }
-
- for (vector<GridItem>::iterator gIter = grids.begin(); gIter != grids.end(); ++gIter) {
- GridItem gridItem = *gIter;
- void **pointerList = gridItem.pointer;
- int type = gridItem.type;
- int *res = gridItem.res;
- assert(pointerList[0]);
- assert(res[0] == res[0] && res[1] == res[1] && res[2] == res[2]);
- UNUSED_VARS(res);
-
- switch (type) {
- case FLUID_DOMAIN_GRID_VEC3F: {
- assert(pointerList[1] && pointerList[2]);
- float **fpointers = (float **)pointerList;
- expectedBytes = sizeof(float) * 3 * res[0] * res[1] * res[2];
- readBytes = 0;
- for (int i = 0; i < res[0] * res[1] * res[2]; ++i) {
- for (int j = 0; j < 3; ++j) {
- readBytes += gzread(gzf, fpointers[j], sizeof(float));
- ++fpointers[j];
- }
- }
- break;
- }
- case FLUID_DOMAIN_GRID_FLOAT: {
- float **fpointers = (float **)pointerList;
- expectedBytes = sizeof(float) * res[0] * res[1] * res[2];
- readBytes = gzread(gzf, fpointers[0], expectedBytes);
- break;
- }
- default: {
- cerr << "Fluid Error -- Unknown grid type" << endl;
- }
- }
-
- if (!readBytes) {
- cerr << "Fluid Error -- updateGridsFromRaw(): Unable to read raw file: " << filename << endl;
- gzclose(gzf);
- return false;
- }
- assert(expectedBytes == readBytes);
-
- if (with_debug)
- cout << "Fluid: Read successfully: " << filename << endl;
- }
-
- if (with_debug)
- cout << "Fluid: Read successfully: " << filename << endl;
-
- return (gzclose(gzf) == Z_OK);
-}
-
void MANTA::updatePointers()
{
if (with_debug)
@@ -3487,43 +2109,78 @@ bool MANTA::hasData(FluidModifierData *mmd, int framenr)
string filename = (mUsingSmoke) ? FLUID_NAME_DENSITY : FLUID_NAME_PP;
exists = BLI_exists(getFile(mmd, FLUID_DOMAIN_DIR_DATA, filename, extension, framenr).c_str());
}
+ if (with_debug)
+ cout << "Fluid: Has Data: " << exists << endl;
+
return exists;
}
bool MANTA::hasNoise(FluidModifierData *mmd, int framenr)
{
- string extension = getCacheFileEnding(mmd->domain->cache_noise_format);
+ string extension = getCacheFileEnding(mmd->domain->cache_data_format);
bool exists = BLI_exists(
getFile(mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_NOISE, extension, framenr).c_str());
/* Check single file naming. */
if (!exists) {
+ extension = getCacheFileEnding(mmd->domain->cache_data_format);
+ exists = BLI_exists(
+ getFile(mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_DENSITY_NOISE, extension, framenr)
+ .c_str());
+ }
+ /* Check single file naming with deprecated extension. */
+ if (!exists) {
+ extension = getCacheFileEnding(mmd->domain->cache_noise_format);
exists = BLI_exists(
getFile(mmd, FLUID_DOMAIN_DIR_NOISE, FLUID_NAME_DENSITY_NOISE, extension, framenr)
.c_str());
}
+ if (with_debug)
+ cout << "Fluid: Has Noise: " << exists << endl;
+
return exists;
}
bool MANTA::hasMesh(FluidModifierData *mmd, int framenr)
{
string extension = getCacheFileEnding(mmd->domain->cache_mesh_format);
- return BLI_exists(
- getFile(mmd, FLUID_DOMAIN_DIR_MESH, FLUID_NAME_LMESH, extension, framenr).c_str());
+ bool exists = BLI_exists(
+ getFile(mmd, FLUID_DOMAIN_DIR_MESH, FLUID_NAME_MESH, extension, framenr).c_str());
+
+ /* Check old file naming. */
+ if (!exists) {
+ exists = BLI_exists(
+ getFile(mmd, FLUID_DOMAIN_DIR_MESH, FLUID_NAME_LMESH, extension, framenr).c_str());
+ }
+ if (with_debug)
+ cout << "Fluid: Has Mesh: " << exists << endl;
+
+ return exists;
}
bool MANTA::hasParticles(FluidModifierData *mmd, int framenr)
{
- string extension = getCacheFileEnding(mmd->domain->cache_particle_format);
+ string extension = getCacheFileEnding(mmd->domain->cache_data_format);
bool exists = BLI_exists(
getFile(mmd, FLUID_DOMAIN_DIR_PARTICLES, FLUID_NAME_PARTICLES, extension, framenr).c_str());
/* Check single file naming. */
if (!exists) {
+ extension = getCacheFileEnding(mmd->domain->cache_data_format);
exists = BLI_exists(
getFile(mmd, FLUID_DOMAIN_DIR_PARTICLES, FLUID_NAME_PP_PARTICLES, extension, framenr)
.c_str());
}
+ /* Check single file naming with deprecated extension. */
+ if (!exists) {
+ extension = getCacheFileEnding(mmd->domain->cache_particle_format);
+ exists = BLI_exists(
+ getFile(mmd, FLUID_DOMAIN_DIR_PARTICLES, FLUID_NAME_PP_PARTICLES, extension, framenr)
+ .c_str());
+ }
+ if (with_debug)
+ cout << "Fluid: Has Particles: " << exists << endl;
+
return exists;
}
@@ -3532,7 +2189,11 @@ bool MANTA::hasGuiding(FluidModifierData *mmd, int framenr, bool sourceDomain)
string subdirectory = (sourceDomain) ? FLUID_DOMAIN_DIR_DATA : FLUID_DOMAIN_DIR_GUIDE;
string filename = (sourceDomain) ? FLUID_NAME_VELOCITY : FLUID_NAME_GUIDEVEL;
string extension = getCacheFileEnding(mmd->domain->cache_data_format);
- return BLI_exists(getFile(mmd, subdirectory, filename, extension, framenr).c_str());
+ bool exists = BLI_exists(getFile(mmd, subdirectory, filename, extension, framenr).c_str());
+ if (with_debug)
+ cout << "Fluid: Has Guiding: " << exists << endl;
+
+ return exists;
}
string MANTA::getDirectory(FluidModifierData *mmd, string subdirectory)
diff --git a/intern/mantaflow/intern/MANTA_main.h b/intern/mantaflow/intern/MANTA_main.h
index 6a8484c75d9..38cbd33ea0a 100644
--- a/intern/mantaflow/intern/MANTA_main.h
+++ b/intern/mantaflow/intern/MANTA_main.h
@@ -60,19 +60,6 @@ struct MANTA {
int flags;
} Triangle;
- // Cache helper typedefs
- typedef struct GridItem {
- void **pointer; /* Array of pointers for this grid.*/
- int type;
- int *res;
- string name;
- } GridItem;
-
- typedef struct FileItem {
- string filename;
- vector<GridItem> grids;
- } FileItem;
-
// Manta step, handling everything
void step(struct FluidModifierData *mmd, int startFrame);
@@ -104,10 +91,10 @@ struct MANTA {
// Read cache (via Manta save/load)
bool readConfiguration(FluidModifierData *mmd, int framenr);
- bool readData(FluidModifierData *mmd, int framenr);
- bool readNoise(FluidModifierData *mmd, int framenr);
+ bool readData(FluidModifierData *mmd, int framenr, bool resumable);
+ bool readNoise(FluidModifierData *mmd, int framenr, bool resumable);
bool readMesh(FluidModifierData *mmd, int framenr);
- bool readParticles(FluidModifierData *mmd, int framenr);
+ bool readParticles(FluidModifierData *mmd, int framenr, bool resumable);
bool readGuiding(FluidModifierData *mmd, int framenr, bool sourceDomain);
// Read cache (via file read functions in MANTA - e.g. read .bobj.gz meshes, .uni particles)
@@ -899,16 +886,6 @@ struct MANTA {
string getRealValue(const string &varName);
string parseLine(const string &line);
string parseScript(const string &setup_string, FluidModifierData *mmd = NULL);
- bool updateMeshFromBobj(string filename);
- bool updateMeshFromObj(string filename);
- bool updateMeshFromUni(string filename);
- bool updateParticlesFromUni(string filename, bool isSecondarySys, bool isVelData);
- bool updateGridsFromUni(string filename, vector<GridItem> grids);
- bool updateGridsFromVDB(string filename, vector<GridItem> grids);
- bool updateGridsFromRaw(string filename, vector<GridItem> grids);
- bool updateMeshFromFile(string filename);
- bool updateParticlesFromFile(string filename, bool isSecondarySys, bool isVelData);
- bool updateGridsFromFile(string filename, vector<GridItem> grids);
string getDirectory(struct FluidModifierData *mmd, string subdirectory);
string getFile(struct FluidModifierData *mmd,
string subdirectory,
diff --git a/intern/mantaflow/intern/manta_fluid_API.cpp b/intern/mantaflow/intern/manta_fluid_API.cpp
index 49bc224b3fa..f1607f1bd99 100644
--- a/intern/mantaflow/intern/manta_fluid_API.cpp
+++ b/intern/mantaflow/intern/manta_fluid_API.cpp
@@ -94,18 +94,18 @@ int manta_read_config(MANTA *fluid, FluidModifierData *mmd, int framenr)
return fluid->readConfiguration(mmd, framenr);
}
-int manta_read_data(MANTA *fluid, FluidModifierData *mmd, int framenr)
+int manta_read_data(MANTA *fluid, FluidModifierData *mmd, int framenr, bool resumable)
{
if (!fluid || !mmd)
return 0;
- return fluid->readData(mmd, framenr);
+ return fluid->readData(mmd, framenr, resumable);
}
-int manta_read_noise(MANTA *fluid, FluidModifierData *mmd, int framenr)
+int manta_read_noise(MANTA *fluid, FluidModifierData *mmd, int framenr, bool resumable)
{
if (!fluid || !mmd)
return 0;
- return fluid->readNoise(mmd, framenr);
+ return fluid->readNoise(mmd, framenr, resumable);
}
int manta_read_mesh(MANTA *fluid, FluidModifierData *mmd, int framenr)
@@ -115,11 +115,11 @@ int manta_read_mesh(MANTA *fluid, FluidModifierData *mmd, int framenr)
return fluid->readMesh(mmd, framenr);
}
-int manta_read_particles(MANTA *fluid, FluidModifierData *mmd, int framenr)
+int manta_read_particles(MANTA *fluid, FluidModifierData *mmd, int framenr, bool resumable)
{
if (!fluid || !mmd)
return 0;
- return fluid->readParticles(mmd, framenr);
+ return fluid->readParticles(mmd, framenr, resumable);
}
int manta_read_guiding(MANTA *fluid, FluidModifierData *mmd, int framenr, bool sourceDomain)
@@ -129,41 +129,6 @@ int manta_read_guiding(MANTA *fluid, FluidModifierData *mmd, int framenr, bool s
return fluid->readGuiding(mmd, framenr, sourceDomain);
}
-int manta_update_liquid_structures(MANTA *fluid, FluidModifierData *mmd, int framenr)
-{
- if (!fluid || !mmd)
- return 0;
- return fluid->updateFlipStructures(mmd, framenr);
-}
-
-int manta_update_mesh_structures(MANTA *fluid, FluidModifierData *mmd, int framenr)
-{
- if (!fluid || !mmd)
- return 0;
- return fluid->updateMeshStructures(mmd, framenr);
-}
-
-int manta_update_particle_structures(MANTA *fluid, FluidModifierData *mmd, int framenr)
-{
- if (!fluid || !mmd)
- return 0;
- return fluid->updateParticleStructures(mmd, framenr);
-}
-
-int manta_update_smoke_structures(MANTA *fluid, FluidModifierData *mmd, int framenr)
-{
- if (!fluid || !mmd)
- return 0;
- return fluid->updateSmokeStructures(mmd, framenr);
-}
-
-int manta_update_noise_structures(MANTA *fluid, FluidModifierData *mmd, int framenr)
-{
- if (!fluid || !mmd)
- return 0;
- return fluid->updateNoiseStructures(mmd, framenr);
-}
-
int manta_bake_data(MANTA *fluid, FluidModifierData *mmd, int framenr)
{
if (!fluid || !mmd)
diff --git a/intern/mantaflow/intern/strings/fluid_script.h b/intern/mantaflow/intern/strings/fluid_script.h
index 637dd22f128..62274101859 100644
--- a/intern/mantaflow/intern/strings/fluid_script.h
+++ b/intern/mantaflow/intern/strings/fluid_script.h
@@ -33,7 +33,7 @@ from manta import *\n\
import os.path, shutil, math, sys, gc, multiprocessing, platform, time\n\
\n\
withMPBake = False # Bake files asynchronously\n\
-withMPSave = True # Save files asynchronously\n\
+withMPSave = False # Save files asynchronously\n\
isWindows = platform.system() != 'Darwin' and platform.system() != 'Linux'\n\
# TODO (sebbas): Use this to simulate Windows multiprocessing (has default mode spawn)\n\
#try:\n\
@@ -96,7 +96,7 @@ gravity_s$ID$ = vec3($GRAVITY_X$, $GRAVITY_Y$, $GRAVITY_Z$) # in SI unit (e.g. m
gs_s$ID$ = vec3($RESX$, $RESY$, $RESZ$)\n\
maxVel_s$ID$ = 0\n\
\n\
-doOpen_s$ID$ = $DO_OPEN$\n\
+domainClosed_s$ID$ = $DOMAIN_CLOSED$\n\
boundConditions_s$ID$ = '$BOUND_CONDITIONS$'\n\
boundaryWidth_s$ID$ = $BOUNDARY_WIDTH$\n\
deleteInObstacle_s$ID$ = $DELETE_IN_OBSTACLE$\n\
@@ -161,7 +161,19 @@ mantaMsg('scaleSpeed is ' + str(scaleSpeedFrames_s$ID$))\n\
scaleSpeedTime_s$ID$ = ratioResToBLength_s$ID$ * ratioBTimeToTimstep_s$ID$ # [blength/btime] to [cells/frameLength]\n\
mantaMsg('scaleSpeedTime is ' + str(scaleSpeedTime_s$ID$))\n\
\n\
-gravity_s$ID$ *= scaleAcceleration_s$ID$ # scale from world acceleration to cell based acceleration\n";
+gravity_s$ID$ *= scaleAcceleration_s$ID$ # scale from world acceleration to cell based acceleration\n\
+\n\
+# OpenVDB options\n\
+vdbCompression_s$ID$ = $COMPRESSION_OPENVDB$\n\
+vdbPrecisionHalf_s$ID$ = $PRECISION_OPENVDB$\n\
+\n\
+# Cache file names\n\
+file_data_s$ID$ = '$NAME_DATA$'\n\
+file_noise_s$ID$ = '$NAME_NOISE$'\n\
+file_mesh_s$ID$ = '$NAME_MESH$'\n\
+file_meshvel_s$ID$ = '$NAME_MESH$'\n\
+file_particles_s$ID$ = '$NAME_PARTICLES$'\n\
+file_guiding_s$ID$ = '$NAME_GUIDING$'";
const std::string fluid_variables_noise =
"\n\
@@ -282,8 +294,8 @@ phiIn_s$ID$.setConst(9999)\n\
phiOut_s$ID$.setConst(9999)\n\
\n\
# Keep track of important objects in dict to load them later on\n\
-fluid_data_dict_final_s$ID$ = dict(vel=vel_s$ID$, velTmp=velTmp_s$ID$)\n\
-fluid_data_dict_resume_s$ID$ = dict(phiObs=phiObs_s$ID$, phiIn=phiIn_s$ID$, phiOut=phiOut_s$ID$, flags=flags_s$ID$)\n";
+fluid_data_dict_final_s$ID$ = { 'vel' : vel_s$ID$ }\n\
+fluid_data_dict_resume_s$ID$ = { 'phiObs' : phiObs_s$ID$, 'phiIn' : phiIn_s$ID$, 'phiOut' : phiOut_s$ID$, 'flags' : flags_s$ID$, 'velTmp' : velTmp_s$ID$ }\n";
const std::string fluid_alloc_obstacle =
"\n\
@@ -497,10 +509,12 @@ def fluid_cache_get_framenr_formatted_$ID$(framenr):\n\
const std::string fluid_bake_multiprocessing =
"\n\
-def fluid_cache_multiprocessing_start_$ID$(function, framenr, format_data=None, format_noise=None, format_mesh=None, format_particles=None, format_guiding=None, path_data=None, path_noise=None, path_mesh=None, path_particles=None, path_guiding=None, dict=None, do_join=True, resumable=False):\n\
+def fluid_cache_multiprocessing_start_$ID$(function, framenr, file_name=None, format_data=None, format_noise=None, format_mesh=None, format_particles=None, format_guiding=None, path_data=None, path_noise=None, path_mesh=None, path_particles=None, path_guiding=None, dict=None, do_join=True, resumable=False):\n\
mantaMsg('Multiprocessing cache')\n\
if __name__ == '__main__':\n\
args = (framenr,)\n\
+ if file_name:\n\
+ args += (file_name,)\n\
if format_data:\n\
args += (format_data,)\n\
if format_noise:\n\
@@ -531,7 +545,7 @@ def fluid_cache_multiprocessing_start_$ID$(function, framenr, format_data=None,
const std::string fluid_bake_data =
"\n\
-def bake_fluid_process_data_$ID$(framenr, format_data, format_particles, format_guiding, path_data, path_guiding):\n\
+def bake_fluid_process_data_$ID$(framenr, format_data, path_data):\n\
mantaMsg('Bake fluid data')\n\
\n\
s$ID$.frame = framenr\n\
@@ -545,15 +559,15 @@ def bake_fluid_process_data_$ID$(framenr, format_data, format_particles, format_
liquid_adaptive_step_$ID$(framenr)\n\
mantaMsg('--- Step: %s seconds ---' % (time.time() - start_time))\n\
\n\
-def bake_fluid_data_$ID$(path_data, path_guiding, framenr, format_data, format_particles, format_guiding):\n\
+def bake_fluid_data_$ID$(path_data, framenr, format_data):\n\
if not withMPBake or isWindows:\n\
- bake_fluid_process_data_$ID$(framenr, format_data, format_particles, format_guiding, path_data, path_guiding)\n\
+ bake_fluid_process_data_$ID$(framenr, format_data, path_data)\n\
else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=bake_fluid_process_data_$ID$, framenr=framenr, format_data=format_data, format_particles=format_particles, format_guiding=format_guiding, path_data=path_data, path_guiding=path_guiding, do_join=False)\n";
+ fluid_cache_multiprocessing_start_$ID$(function=bake_fluid_process_data_$ID$, framenr=framenr, format_data=format_data, path_data=path_data, do_join=False)\n";
const std::string fluid_bake_noise =
"\n\
-def bake_noise_process_$ID$(framenr, format_data, format_noise, path_data, path_noise, resumable):\n\
+def bake_noise_process_$ID$(framenr, format_noise, path_noise):\n\
mantaMsg('Bake fluid noise')\n\
\n\
sn$ID$.frame = framenr\n\
@@ -563,15 +577,15 @@ def bake_noise_process_$ID$(framenr, format_data, format_noise, path_data, path_
\n\
smoke_step_noise_$ID$(framenr)\n\
\n\
-def bake_noise_$ID$(path_data, path_noise, framenr, format_data, format_noise, resumable):\n\
+def bake_noise_$ID$(path_noise, framenr, format_noise):\n\
if not withMPBake or isWindows:\n\
- bake_noise_process_$ID$(framenr, format_data, format_noise, path_data, path_noise, resumable)\n\
+ bake_noise_process_$ID$(framenr, format_noise, path_noise)\n\
else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=bake_noise_process_$ID$, framenr=framenr, format_data=format_data, format_noise=format_noise, path_data=path_data, path_noise=path_noise, resumable=resumable)\n";
+ fluid_cache_multiprocessing_start_$ID$(function=bake_noise_process_$ID$, framenr=framenr, format_noise=format_noise, path_noise=path_noise)\n";
const std::string fluid_bake_mesh =
"\n\
-def bake_mesh_process_$ID$(framenr, format_data, format_mesh, format_particles, path_data, path_mesh):\n\
+def bake_mesh_process_$ID$(framenr, format_data, format_mesh, path_mesh):\n\
mantaMsg('Bake fluid mesh')\n\
\n\
sm$ID$.frame = framenr\n\
@@ -587,15 +601,15 @@ def bake_mesh_process_$ID$(framenr, format_data, format_mesh, format_particles,
if using_speedvectors_s$ID$:\n\
liquid_save_meshvel_$ID$(path_mesh, framenr, format_data)\n\
\n\
-def bake_mesh_$ID$(path_data, path_mesh, framenr, format_data, format_mesh, format_particles):\n\
+def bake_mesh_$ID$(path_mesh, framenr, format_data, format_mesh):\n\
if not withMPBake or isWindows:\n\
- bake_mesh_process_$ID$(framenr, format_data, format_mesh, format_particles, path_data, path_mesh)\n\
+ bake_mesh_process_$ID$(framenr, format_data, format_mesh, path_mesh)\n\
else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=bake_mesh_process_$ID$, framenr=framenr, format_data=format_data, format_mesh=format_mesh, format_particles=format_particles, path_data=path_data, path_mesh=path_mesh)\n";
+ fluid_cache_multiprocessing_start_$ID$(function=bake_mesh_process_$ID$, framenr=framenr, format_data=format_data, format_mesh=format_mesh, path_mesh=path_mesh)\n";
const std::string fluid_bake_particles =
"\n\
-def bake_particles_process_$ID$(framenr, format_data, format_particles, path_data, path_particles, resumable):\n\
+def bake_particles_process_$ID$(framenr, format_particles, path_particles, resumable):\n\
mantaMsg('Bake secondary particles')\n\
\n\
sp$ID$.frame = framenr\n\
@@ -609,11 +623,11 @@ def bake_particles_process_$ID$(framenr, format_data, format_particles, path_dat
liquid_step_particles_$ID$()\n\
liquid_save_particles_$ID$(path_particles, framenr, format_particles, resumable)\n\
\n\
-def bake_particles_$ID$(path_data, path_particles, framenr, format_data, format_particles, resumable):\n\
+def bake_particles_$ID$(path_particles, framenr, format_particles, resumable):\n\
if not withMPBake or isWindows:\n\
- bake_particles_process_$ID$(framenr, format_data, format_particles, path_data, path_particles, resumable)\n\
+ bake_particles_process_$ID$(framenr, format_particles, path_particles, resumable)\n\
else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=bake_particles_process_$ID$, framenr=framenr, format_data=format_data, format_particles=format_particles, path_data=path_data, path_particles=path_particles, resumable=resumable)\n";
+ fluid_cache_multiprocessing_start_$ID$(function=bake_particles_process_$ID$, framenr=framenr, format_particles=format_particles, path_particles=path_particles, resumable=resumable)\n";
const std::string fluid_bake_guiding =
"\n\
@@ -650,43 +664,47 @@ def bake_guiding_$ID$(path_guiding, framenr, format_guiding, resumable):\n\
const std::string fluid_file_import =
"\n\
-def fluid_file_import_s$ID$(dict, path, framenr, file_format):\n\
+def fluid_file_import_s$ID$(dict, path, framenr, file_format, file_name=None):\n\
+ mantaMsg('Fluid file import, frame: ' + str(framenr))\n\
try:\n\
framenr = fluid_cache_get_framenr_formatted_$ID$(framenr)\n\
- for name, object in dict.items():\n\
- file = os.path.join(path, name + '_' + framenr + file_format)\n\
+ # New cache: Try to load the data from a single file\n\
+ loadCombined = 0\n\
+ if file_name is not None:\n\
+ file = os.path.join(path, file_name + '_' + framenr + file_format)\n\
if os.path.isfile(file):\n\
- object.load(file)\n\
- else:\n\
- mantaMsg('Could not load file ' + str(file))\n\
- except:\n\
- mantaMsg('exception found')\n\
- #mantaMsg(str(e))\n\
- pass # Just skip file load errors for now\n";
-
-const std::string fluid_load_data =
- "\n\
-def fluid_load_data_$ID$(path, framenr, file_format, resumable):\n\
- mantaMsg('Fluid load data, frame ' + str(framenr))\n\
- fluid_file_import_s$ID$(dict=fluid_data_dict_final_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
- \n\
- if resumable:\n\
- fluid_file_import_s$ID$(dict=fluid_data_dict_resume_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
+ if file_format == '.vdb':\n\
+ loadCombined = load(name=file, objects=list(dict.values()), worldSize=domainSize_s$ID$)\n\
+ elif file_format == '.bobj.gz' or file_format == '.obj':\n\
+ for name, object in dict.items():\n\
+ if os.path.isfile(file):\n\
+ loadCombined = object.load(file)\n\
+ \n\
+ # Old cache: Try to load the data from separate files, i.e. per object with the object based load() function\n\
+ if not loadCombined:\n\
+ for name, object in dict.items():\n\
+ file = os.path.join(path, name + '_' + framenr + file_format)\n\
+ if os.path.isfile(file):\n\
+ loadCombined = object.load(file)\n\
\n\
- # When adaptive domain bake is resumed we need correct values in xyz vel grids\n\
- copyVec3ToReal(source=vel_s$ID$, targetX=x_vel_s$ID$, targetY=y_vel_s$ID$, targetZ=z_vel_s$ID$)\n";
+ if not loadCombined:\n\
+ mantaMsg('Could not load file ' + str(file))\n\
+ \n\
+ except Exception as e:\n\
+ mantaMsg('Exception in Python fluid file import: ' + str(e))\n\
+ pass # Just skip file load errors for now\n";
const std::string fluid_load_guiding =
"\n\
def fluid_load_guiding_$ID$(path, framenr, file_format):\n\
mantaMsg('Fluid load guiding, frame ' + str(framenr))\n\
- fluid_file_import_s$ID$(dict=fluid_guiding_dict_s$ID$, path=path, framenr=framenr, file_format=file_format)\n";
+ fluid_file_import_s$ID$(dict=fluid_guiding_dict_s$ID$, path=path, framenr=framenr, file_format=file_format, file_name=file_guiding_s$ID$)\n";
const std::string fluid_load_vel =
"\n\
def fluid_load_vel_$ID$(path, framenr, file_format):\n\
mantaMsg('Fluid load vel, frame ' + str(framenr))\n\
- fluid_vel_dict_s$ID$ = dict(vel=guidevel_sg$ID$)\n\
+ fluid_vel_dict_s$ID$ = { 'vel' : guidevel_sg$ID$ }\n\
fluid_file_import_s$ID$(dict=fluid_vel_dict_s$ID$, path=path, framenr=framenr, file_format=file_format)\n";
//////////////////////////////////////////////////////////////////////
@@ -695,7 +713,7 @@ def fluid_load_vel_$ID$(path, framenr, file_format):\n\
const std::string fluid_file_export =
"\n\
-def fluid_file_export_s$ID$(framenr, file_format, path, dict, mode_override=True, skip_subframes=True):\n\
+def fluid_file_export_s$ID$(framenr, file_format, path, dict, file_name=None, mode_override=True, skip_subframes=True):\n\
if skip_subframes and ((timePerFrame_s$ID$ + dt0_s$ID$) < frameLength_s$ID$):\n\
return\n\
mantaMsg('Fluid file export, frame: ' + str(framenr))\n\
@@ -703,36 +721,37 @@ def fluid_file_export_s$ID$(framenr, file_format, path, dict, mode_override=True
framenr = fluid_cache_get_framenr_formatted_$ID$(framenr)\n\
if not os.path.exists(path):\n\
os.makedirs(path)\n\
- for name, object in dict.items():\n\
- file = os.path.join(path, name + '_' + framenr + file_format)\n\
- if not os.path.isfile(file) or mode_override: object.save(file)\n\
+ \n\
+ # New cache: Try to save the data to a single file\n\
+ saveCombined = 0\n\
+ if file_name is not None:\n\
+ file = os.path.join(path, file_name + '_' + framenr + file_format)\n\
+ if not os.path.isfile(file) or mode_override:\n\
+ if file_format == '.vdb':\n\
+ saveCombined = save(name=file, objects=list(dict.values()), worldSize=domainSize_s$ID$, skipDeletedParts=True, compression=vdbCompression_s$ID$, precisionHalf=vdbPrecisionHalf_s$ID$)\n\
+ elif file_format == '.bobj.gz' or file_format == '.obj':\n\
+ for name, object in dict.items():\n\
+ if not os.path.isfile(file) or mode_override:\n\
+ saveCombined = object.save(file)\n\
+ \n\
+ # Old cache: Try to save the data to separate files, i.e. per object with the object based save() function\n\
+ if not saveCombined:\n\
+ for name, object in dict.items():\n\
+ file = os.path.join(path, name + '_' + framenr + file_format)\n\
+ if not os.path.isfile(file) or mode_override: object.save(file)\n\
+ \n\
except Exception as e:\n\
- mantaMsg(str(e))\n\
+ mantaMsg('Exception in Python fluid file export: ' + str(e))\n\
pass # Just skip file save errors for now\n";
-const std::string fluid_save_data =
- "\n\
-def fluid_save_data_$ID$(path, framenr, file_format, resumable):\n\
- mantaMsg('Fluid save data, frame ' + str(framenr))\n\
- start_time = time.time()\n\
- if not withMPSave or isWindows:\n\
- fluid_file_export_s$ID$(framenr=framenr, file_format=file_format, path=path, dict=fluid_data_dict_final_s$ID$)\n\
- if resumable:\n\
- fluid_file_export_s$ID$(framenr=framenr, file_format=file_format, path=path, dict=fluid_data_dict_resume_s$ID$)\n\
- else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=fluid_data_dict_final_s$ID$, do_join=False)\n\
- if resumable:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=fluid_data_dict_resume_s$ID$, do_join=False)\n\
- mantaMsg('--- Save: %s seconds ---' % (time.time() - start_time))\n";
-
const std::string fluid_save_guiding =
"\n\
-def fluid_save_guiding_$ID$(path, framenr, file_format, resumable):\n\
+def fluid_save_guiding_$ID$(path, framenr, file_format):\n\
mantaMsg('Fluid save guiding, frame ' + str(framenr))\n\
if not withMPSave or isWindows:\n\
- fluid_file_export_s$ID$(dict=fluid_guiding_dict_s$ID$, framenr=framenr, file_format=file_format, path=path)\n\
+ fluid_file_export_s$ID$(dict=fluid_guiding_dict_s$ID$, framenr=framenr, file_format=file_format, path=path, file_name=file_guiding_s$ID$)\n\
else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=fluid_guiding_dict_s$ID$, do_join=False)\n";
+ fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, file_name=file_guiding_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=fluid_guiding_dict_s$ID$, do_join=False)\n";
//////////////////////////////////////////////////////////////////////
// STANDALONE MODE
diff --git a/intern/mantaflow/intern/strings/liquid_script.h b/intern/mantaflow/intern/strings/liquid_script.h
index 26b6644f231..04505206601 100644
--- a/intern/mantaflow/intern/strings/liquid_script.h
+++ b/intern/mantaflow/intern/strings/liquid_script.h
@@ -86,16 +86,16 @@ mapWeights_s$ID$ = s$ID$.create(MACGrid, name='$NAME_MAPWEIGHTS$')\n\
fractions_s$ID$ = None # allocated dynamically\n\
curvature_s$ID$ = None\n\
\n\
-pp_s$ID$ = s$ID$.create(BasicParticleSystem, name='$NAME_PP$')\n\
-pVel_pp$ID$ = pp_s$ID$.create(PdataVec3, name='$NAME_PVEL$')\n\
+pp_s$ID$ = s$ID$.create(BasicParticleSystem, name='$NAME_PARTS$')\n\
+pVel_pp$ID$ = pp_s$ID$.create(PdataVec3, name='$NAME_PARTSVELOCITY$')\n\
\n\
# Acceleration data for particle nbs\n\
pindex_s$ID$ = s$ID$.create(ParticleIndexSystem, name='$NAME_PINDEX$')\n\
gpi_s$ID$ = s$ID$.create(IntGrid, name='$NAME_GPI$')\n\
\n\
# Keep track of important objects in dict to load them later on\n\
-liquid_data_dict_final_s$ID$ = dict(pp=pp_s$ID$, pVel=pVel_pp$ID$)\n\
-liquid_data_dict_resume_s$ID$ = dict(phiParts=phiParts_s$ID$, phi=phi_s$ID$, phiTmp=phiTmp_s$ID$)\n";
+liquid_data_dict_final_s$ID$ = { 'pVel' : pVel_pp$ID$, 'pp' : pp_s$ID$ }\n\
+liquid_data_dict_resume_s$ID$ = { 'phiParts' : phiParts_s$ID$, 'phi' : phi_s$ID$, 'phiTmp' : phiTmp_s$ID$ }\n";
const std::string liquid_alloc_mesh =
"\n\
@@ -104,7 +104,7 @@ phiParts_sm$ID$ = sm$ID$.create(LevelsetGrid, name='$NAME_PHIPARTS_MESH$')\n\
phi_sm$ID$ = sm$ID$.create(LevelsetGrid, name='$NAME_PHI_MESH$')\n\
pp_sm$ID$ = sm$ID$.create(BasicParticleSystem, name='$NAME_PP_MESH$')\n\
flags_sm$ID$ = sm$ID$.create(FlagGrid, name='$NAME_FLAGS_MESH$')\n\
-mesh_sm$ID$ = sm$ID$.create(Mesh, name='$NAME_LMESH$')\n\
+mesh_sm$ID$ = sm$ID$.create(Mesh, name='$NAME_MESH$')\n\
\n\
if using_speedvectors_s$ID$:\n\
mVel_mesh$ID$ = mesh_sm$ID$.create(MdataVec3, name='$NAME_VELOCITYVEC_MESH$')\n\
@@ -119,10 +119,10 @@ phiParts_sm$ID$.setConst(9999)\n\
phi_sm$ID$.setConst(9999)\n\
\n\
# Keep track of important objects in dict to load them later on\n\
-liquid_mesh_dict_s$ID$ = dict(lMesh=mesh_sm$ID$)\n\
+liquid_mesh_dict_s$ID$ = { 'lMesh' : mesh_sm$ID$ }\n\
\n\
if using_speedvectors_s$ID$:\n\
- liquid_meshvel_dict_s$ID$ = dict(lVelMesh=mVel_mesh$ID$)\n";
+ liquid_meshvel_dict_s$ID$ = { 'lVelMesh' : mVel_mesh$ID$ }\n";
const std::string liquid_alloc_curvature =
"\n\
@@ -131,20 +131,20 @@ curvature_s$ID$ = s$ID$.create(RealGrid, name='$NAME_CURVATURE$')\n";
const std::string liquid_alloc_particles =
"\n\
-ppSnd_sp$ID$ = sp$ID$.create(BasicParticleSystem, name='$FLUID_NAME_PP_PARTICLES$')\n\
-pVelSnd_pp$ID$ = ppSnd_sp$ID$.create(PdataVec3, name='$FLUID_NAME_PVEL_PARTICLES$')\n\
-pForceSnd_pp$ID$ = ppSnd_sp$ID$.create(PdataVec3, name='$FLUID_NAME_PFORCE_PARTICLES$')\n\
-pLifeSnd_pp$ID$ = ppSnd_sp$ID$.create(PdataReal, name='$FLUID_NAME_PLIFE_PARTICLES$')\n\
-vel_sp$ID$ = sp$ID$.create(MACGrid, name='$FLUID_NAME_VELOCITY_PARTICLES$')\n\
-flags_sp$ID$ = sp$ID$.create(FlagGrid, name='$FLUID_NAME_FLAGS_PARTICLES$')\n\
-phi_sp$ID$ = sp$ID$.create(LevelsetGrid, name='$FLUID_NAME_PHI_PARTICLES$')\n\
-phiObs_sp$ID$ = sp$ID$.create(LevelsetGrid, name='$FLUID_NAME_PHIOBS_PARTICLES$')\n\
-phiOut_sp$ID$ = sp$ID$.create(LevelsetGrid, name='$FLUID_NAME_PHIOUT_PARTICLES$')\n\
-normal_sp$ID$ = sp$ID$.create(VecGrid, name='$FLUID_NAME_NORMAL_PARTICLES$')\n\
-neighborRatio_sp$ID$ = sp$ID$.create(RealGrid, name='$FLUID_NAME_NEIGHBORRATIO_PARTICLES$')\n\
-trappedAir_sp$ID$ = sp$ID$.create(RealGrid, name='$FLUID_NAME_TRAPPEDAIR_PARTICLES$')\n\
-waveCrest_sp$ID$ = sp$ID$.create(RealGrid, name='$FLUID_NAME_WAVECREST_PARTICLES$')\n\
-kineticEnergy_sp$ID$ = sp$ID$.create(RealGrid, name='$FLUID_NAME_KINETICENERGY_PARTICLES$')\n\
+ppSnd_sp$ID$ = sp$ID$.create(BasicParticleSystem, name='$NAME_PARTS_PARTICLES$')\n\
+pVelSnd_pp$ID$ = ppSnd_sp$ID$.create(PdataVec3, name='$NAME_PARTSVEL_PARTICLES$')\n\
+pForceSnd_pp$ID$ = ppSnd_sp$ID$.create(PdataVec3, name='$NAME_PARTSFORCE_PARTICLES$')\n\
+pLifeSnd_pp$ID$ = ppSnd_sp$ID$.create(PdataReal, name='$NAME_PARTSLIFE_PARTICLES$')\n\
+vel_sp$ID$ = sp$ID$.create(MACGrid, name='$NAME_VELOCITY_PARTICLES$')\n\
+flags_sp$ID$ = sp$ID$.create(FlagGrid, name='$NAME_FLAGS_PARTICLES$')\n\
+phi_sp$ID$ = sp$ID$.create(LevelsetGrid, name='$NAME_PHI_PARTICLES$')\n\
+phiObs_sp$ID$ = sp$ID$.create(LevelsetGrid, name='$NAME_PHIOBS_PARTICLES$')\n\
+phiOut_sp$ID$ = sp$ID$.create(LevelsetGrid, name='$NAME_PHIOUT_PARTICLES$')\n\
+normal_sp$ID$ = sp$ID$.create(VecGrid, name='$NAME_NORMAL_PARTICLES$')\n\
+neighborRatio_sp$ID$ = sp$ID$.create(RealGrid, name='$NAME_NEIGHBORRATIO_PARTICLES$')\n\
+trappedAir_sp$ID$ = sp$ID$.create(RealGrid, name='$NAME_TRAPPEDAIR_PARTICLES$')\n\
+waveCrest_sp$ID$ = sp$ID$.create(RealGrid, name='$NAME_WAVECREST_PARTICLES$')\n\
+kineticEnergy_sp$ID$ = sp$ID$.create(RealGrid, name='$NAME_KINETICENERGY_PARTICLES$')\n\
\n\
# Set some initial values\n\
phi_sp$ID$.setConst(9999)\n\
@@ -152,8 +152,8 @@ phiObs_sp$ID$.setConst(9999)\n\
phiOut_sp$ID$.setConst(9999)\n\
\n\
# Keep track of important objects in dict to load them later on\n\
-liquid_particles_dict_final_s$ID$ = dict(ppSnd=ppSnd_sp$ID$, pVelSnd=pVelSnd_pp$ID$, pLifeSnd=pLifeSnd_pp$ID$)\n\
-liquid_particles_dict_resume_s$ID$ = dict(trappedAir=trappedAir_sp$ID$, waveCrest=waveCrest_sp$ID$, kineticEnergy=kineticEnergy_sp$ID$)\n";
+liquid_particles_dict_final_s$ID$ = { 'pVelSnd' : pVelSnd_pp$ID$, 'pLifeSnd' : pLifeSnd_pp$ID$, 'ppSnd' : ppSnd_sp$ID$ }\n\
+liquid_particles_dict_resume_s$ID$ = { 'trappedAir' : trappedAir_sp$ID$, 'waveCrest' : waveCrest_sp$ID$, 'kineticEnergy' : kineticEnergy_sp$ID$ }\n";
const std::string liquid_init_phi =
"\n\
@@ -211,10 +211,10 @@ def liquid_adaptive_step_$ID$(framenr):\n\
if using_invel_s$ID$:\n\
extrapolateVec3Simple(vel=invelC_s$ID$, phi=phiIn_s$ID$, distance=6, inside=True)\n\
resampleVec3ToMac(source=invelC_s$ID$, target=invel_s$ID$)\n\
- pVel_pp$ID$.setSource(invel_s$ID$, isMAC=True)\n\
- # ensure that pvel has vel as source (important when resuming bake jobs)\n\
+ pVel_pp$ID$.setSource(grid=invel_s$ID$, isMAC=True)\n\
+ # reset pvel grid source before sampling new particles - ensures that new particles are initialized with 0 velocity\n\
else:\n\
- pVel_pp$ID$.setSource(vel_s$ID$, isMAC=True)\n\
+ pVel_pp$ID$.setSource(grid=None, isMAC=False)\n\
\n\
sampleLevelsetWithParticles(phi=phiIn_s$ID$, flags=flags_s$ID$, parts=pp_s$ID$, discretization=particleNumber_s$ID$, randomness=randomness_s$ID$)\n\
flags_s$ID$.updateFromLevelset(phi_s$ID$)\n\
@@ -257,7 +257,7 @@ def liquid_step_$ID$():\n\
extrapolateLsSimple(phi=phi_s$ID$, distance=3)\n\
phi_s$ID$.setBoundNeumann(0) # make sure no particles are placed at outer boundary\n\
\n\
- if doOpen_s$ID$ or using_outflow_s$ID$:\n\
+ if not domainClosed_s$ID$ or using_outflow_s$ID$:\n\
resetOutflow(flags=flags_s$ID$, phi=phi_s$ID$, parts=pp_s$ID$, index=gpi_s$ID$, indexSys=pindex_s$ID$)\n\
flags_s$ID$.updateFromLevelset(phi_s$ID$)\n\
\n\
@@ -298,10 +298,10 @@ def liquid_step_$ID$():\n\
\n\
if using_guiding_s$ID$:\n\
mantaMsg('Guiding and pressure')\n\
- PD_fluid_guiding(vel=vel_s$ID$, velT=velT_s$ID$, flags=flags_s$ID$, phi=phi_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, weight=weightGuide_s$ID$, blurRadius=beta_sg$ID$, pressure=pressure_s$ID$, tau=tau_sg$ID$, sigma=sigma_sg$ID$, theta=theta_sg$ID$, zeroPressureFixing=not doOpen_s$ID$)\n\
+ PD_fluid_guiding(vel=vel_s$ID$, velT=velT_s$ID$, flags=flags_s$ID$, phi=phi_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, weight=weightGuide_s$ID$, blurRadius=beta_sg$ID$, pressure=pressure_s$ID$, tau=tau_sg$ID$, sigma=sigma_sg$ID$, theta=theta_sg$ID$, zeroPressureFixing=domainClosed_s$ID$)\n\
else:\n\
mantaMsg('Pressure')\n\
- solvePressure(flags=flags_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, phi=phi_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, obvel=obvel_s$ID$ if using_fractions_s$ID$ else None)\n\
+ solvePressure(flags=flags_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, phi=phi_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, obvel=obvel_s$ID$ if using_fractions_s$ID$ else None, zeroPressureFixing=domainClosed_s$ID$)\n\
\n\
extrapolateMACSimple(flags=flags_s$ID$, vel=vel_s$ID$, distance=4, intoObs=True if using_fractions_s$ID$ else False)\n\
setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\
@@ -310,7 +310,7 @@ def liquid_step_$ID$():\n\
extrapolateMACSimple(flags=flags_s$ID$, vel=vel_s$ID$)\n\
\n\
# set source grids for resampling, used in adjustNumber!\n\
- pVel_pp$ID$.setSource(vel_s$ID$, isMAC=True)\n\
+ pVel_pp$ID$.setSource(grid=vel_s$ID$, isMAC=True)\n\
adjustNumber(parts=pp_s$ID$, vel=vel_s$ID$, flags=flags_s$ID$, minParticles=minParticles_s$ID$, maxParticles=maxParticles_s$ID$, phi=phi_s$ID$, exclude=phiObs_s$ID$, radiusFactor=radiusFactor_s$ID$, narrowBand=adjustedNarrowBandWidth_s$ID$)\n\
flipVelocityUpdate(vel=vel_s$ID$, velOld=velOld_s$ID$, flags=flags_s$ID$, parts=pp_s$ID$, partVel=pVel_pp$ID$, flipRatio=flipRatio_s$ID$)\n";
@@ -347,7 +347,7 @@ def liquid_step_mesh_$ID$():\n\
# Vert vel vector needs to pull data from vel grid with correct dim\n\
if using_speedvectors_s$ID$:\n\
interpolateMACGrid(target=vel_sm$ID$, source=vel_s$ID$)\n\
- mVel_mesh$ID$.setSource(vel_sm$ID$, isMAC=True)\n\
+ mVel_mesh$ID$.setSource(grid=vel_sm$ID$, isMAC=True)\n\
\n\
# Set 0.5 boundary at walls + account for extra wall thickness in fractions mode + account for grid scaling:\n\
# E.g. at upres=1 we expect 1 cell border (or 2 with fractions), at upres=2 we expect 2 cell border (or 4 with fractions), etc.\n\
@@ -401,27 +401,29 @@ const std::string liquid_load_data =
"\n\
def liquid_load_data_$ID$(path, framenr, file_format, resumable):\n\
mantaMsg('Liquid load data')\n\
- fluid_file_import_s$ID$(dict=liquid_data_dict_final_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
- if resumable:\n\
- fluid_file_import_s$ID$(dict=liquid_data_dict_resume_s$ID$, path=path, framenr=framenr, file_format=file_format)\n";
+ dict = { **fluid_data_dict_final_s$ID$, **fluid_data_dict_resume_s$ID$, **liquid_data_dict_final_s$ID$, **liquid_data_dict_resume_s$ID$ } if resumable else { **fluid_data_dict_final_s$ID$, **liquid_data_dict_final_s$ID$ }\n\
+ fluid_file_import_s$ID$(dict=dict, path=path, framenr=framenr, file_format=file_format, file_name=file_data_s$ID$)\n\
+ \n\
+ copyVec3ToReal(source=vel_s$ID$, targetX=x_vel_s$ID$, targetY=y_vel_s$ID$, targetZ=z_vel_s$ID$)\n";
const std::string liquid_load_mesh =
"\n\
def liquid_load_mesh_$ID$(path, framenr, file_format):\n\
mantaMsg('Liquid load mesh')\n\
- fluid_file_import_s$ID$(dict=liquid_mesh_dict_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
+ dict = liquid_mesh_dict_s$ID$\n\
+ fluid_file_import_s$ID$(dict=dict, path=path, framenr=framenr, file_format=file_format, file_name=file_mesh_s$ID$)\n\
\n\
def liquid_load_meshvel_$ID$(path, framenr, file_format):\n\
mantaMsg('Liquid load meshvel')\n\
- fluid_file_import_s$ID$(dict=liquid_meshvel_dict_s$ID$, path=path, framenr=framenr, file_format=file_format)\n";
+ dict = liquid_meshvel_dict_s$ID$\n\
+ fluid_file_import_s$ID$(dict=dict, path=path, framenr=framenr, file_format=file_format, file_name=file_meshvel_s$ID$)\n";
const std::string liquid_load_particles =
"\n\
def liquid_load_particles_$ID$(path, framenr, file_format, resumable):\n\
mantaMsg('Liquid load particles')\n\
- fluid_file_import_s$ID$(dict=liquid_particles_dict_final_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
- if resumable:\n\
- fluid_file_import_s$ID$(dict=liquid_particles_dict_resume_s$ID$, path=path, framenr=framenr, file_format=file_format)\n";
+ dict = { **liquid_particles_dict_final_s$ID$, **liquid_particles_dict_resume_s$ID$ } if resumable else { **liquid_particles_dict_final_s$ID$ }\n\
+ fluid_file_import_s$ID$(dict=dict, path=path, framenr=framenr, file_format=file_format, file_name=file_particles_s$ID$)\n";
//////////////////////////////////////////////////////////////////////
// EXPORT
@@ -431,43 +433,39 @@ const std::string liquid_save_data =
"\n\
def liquid_save_data_$ID$(path, framenr, file_format, resumable):\n\
mantaMsg('Liquid save data')\n\
+ dict = { **fluid_data_dict_final_s$ID$, **fluid_data_dict_resume_s$ID$, **liquid_data_dict_final_s$ID$, **liquid_data_dict_resume_s$ID$ } if resumable else { **fluid_data_dict_final_s$ID$, **liquid_data_dict_final_s$ID$ }\n\
if not withMPSave or isWindows:\n\
- fluid_file_export_s$ID$(dict=liquid_data_dict_final_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
- if resumable:\n\
- fluid_file_export_s$ID$(dict=liquid_data_dict_resume_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
+ fluid_file_export_s$ID$(dict=dict, path=path, framenr=framenr, file_format=file_format, file_name=file_data_s$ID$)\n\
else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=liquid_data_dict_final_s$ID$, do_join=False)\n\
- if resumable:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=liquid_data_dict_resume_s$ID$, do_join=False)\n";
+ fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, file_name=file_data_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=dict, do_join=False)\n";
const std::string liquid_save_mesh =
"\n\
def liquid_save_mesh_$ID$(path, framenr, file_format):\n\
mantaMsg('Liquid save mesh')\n\
+ dict = liquid_mesh_dict_s$ID$\n\
if not withMPSave or isWindows:\n\
- fluid_file_export_s$ID$(dict=liquid_mesh_dict_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
+ fluid_file_export_s$ID$(dict=dict, path=path, framenr=framenr, file_format=file_format, file_name=file_mesh_s$ID$)\n\
else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=liquid_mesh_dict_s$ID$, do_join=False)\n\
+ fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, file_name=file_mesh_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=dict, do_join=False)\n\
\n\
def liquid_save_meshvel_$ID$(path, framenr, file_format):\n\
mantaMsg('Liquid save mesh vel')\n\
+ dict = liquid_meshvel_dict_s$ID$\n\
if not withMPSave or isWindows:\n\
- fluid_file_export_s$ID$(dict=liquid_meshvel_dict_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
+ fluid_file_export_s$ID$(dict=dict, path=path, framenr=framenr, file_format=file_format)\n\
else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=liquid_meshvel_dict_s$ID$, do_join=False)\n";
+ fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=dict, do_join=False)\n";
const std::string liquid_save_particles =
"\n\
def liquid_save_particles_$ID$(path, framenr, file_format, resumable):\n\
mantaMsg('Liquid save particles')\n\
+ dict = { **liquid_particles_dict_final_s$ID$, **liquid_particles_dict_resume_s$ID$ } if resumable else { **liquid_particles_dict_final_s$ID$ }\n\
if not withMPSave or isWindows:\n\
- fluid_file_export_s$ID$(dict=liquid_particles_dict_final_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
- if resumable:\n\
- fluid_file_export_s$ID$(dict=liquid_particles_dict_resume_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
+ fluid_file_export_s$ID$(dict=dict, path=path, framenr=framenr, file_format=file_format, file_name=file_particles_s$ID$)\n\
else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=liquid_particles_dict_final_s$ID$, do_join=False)\n\
- if resumable:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=liquid_particles_dict_resume_s$ID$, do_join=False)\n";
+ fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, file_name=file_particles_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=dict, do_join=False)\n";
//////////////////////////////////////////////////////////////////////
// STANDALONE MODE
@@ -477,7 +475,6 @@ const std::string liquid_standalone =
"\n\
# Helper function to call cache load functions\n\
def load(frame, cache_resumable):\n\
- fluid_load_data_$ID$(os.path.join(cache_dir, 'data'), frame, file_format_data, cache_resumable)\n\
liquid_load_data_$ID$(os.path.join(cache_dir, 'data'), frame, file_format_data, cache_resumable)\n\
if using_sndparts_s$ID$:\n\
liquid_load_particles_$ID$(os.path.join(cache_dir, 'particles'), frame, file_format_particles, cache_resumable)\n\
diff --git a/intern/mantaflow/intern/strings/smoke_script.h b/intern/mantaflow/intern/strings/smoke_script.h
index a592ad8644a..612d01b85ef 100644
--- a/intern/mantaflow/intern/strings/smoke_script.h
+++ b/intern/mantaflow/intern/strings/smoke_script.h
@@ -101,8 +101,8 @@ color_g_in_s$ID$ = None\n\
color_b_in_s$ID$ = None\n\
\n\
# Keep track of important objects in dict to load them later on\n\
-smoke_data_dict_final_s$ID$ = dict(density=density_s$ID$, shadow=shadow_s$ID$)\n\
-smoke_data_dict_resume_s$ID$ = dict(densityIn=densityIn_s$ID$, emission=emission_s$ID$)\n";
+smoke_data_dict_final_s$ID$ = { 'density' : density_s$ID$, 'shadow' : shadow_s$ID$ }\n\
+smoke_data_dict_resume_s$ID$ = { 'densityIn' : densityIn_s$ID$, 'emission' : emission_s$ID$ }\n";
const std::string smoke_alloc_noise =
"\n\
@@ -213,8 +213,8 @@ if 'heat_s$ID$' in globals(): del heat_s$ID$\n\
if 'heatIn_s$ID$' in globals(): del heatIn_s$ID$\n\
\n\
mantaMsg('Allocating heat')\n\
-heat_s$ID$ = s$ID$.create(RealGrid, name='$NAME_HEAT$')\n\
-heatIn_s$ID$ = s$ID$.create(RealGrid, name='$NAME_HEATIN$')\n\
+heat_s$ID$ = s$ID$.create(RealGrid, name='$NAME_TEMPERATURE$')\n\
+heatIn_s$ID$ = s$ID$.create(RealGrid, name='$NAME_TEMPERATUREIN$')\n\
\n\
# Add objects to dict to load them later on\n\
if 'smoke_data_dict_final_s$ID$' in globals():\n\
@@ -365,7 +365,7 @@ def smoke_step_$ID$():\n\
mantaMsg('Advecting velocity')\n\
advectSemiLagrange(flags=flags_s$ID$, vel=vel_s$ID$, grid=vel_s$ID$, order=2)\n\
\n\
- if doOpen_s$ID$ or using_outflow_s$ID$:\n\
+ if not domainClosed_s$ID$ or using_outflow_s$ID$:\n\
resetOutflow(flags=flags_s$ID$, real=density_s$ID$)\n\
\n\
mantaMsg('Vorticity')\n\
@@ -406,10 +406,10 @@ def smoke_step_$ID$():\n\
mantaMsg('Using preconditioner: ' + str(preconditioner_s$ID$))\n\
if using_guiding_s$ID$:\n\
mantaMsg('Guiding and pressure')\n\
- PD_fluid_guiding(vel=vel_s$ID$, velT=velT_s$ID$, flags=flags_s$ID$, weight=weightGuide_s$ID$, blurRadius=beta_sg$ID$, pressure=pressure_s$ID$, tau=tau_sg$ID$, sigma=sigma_sg$ID$, theta=theta_sg$ID$, preconditioner=preconditioner_s$ID$, zeroPressureFixing=not doOpen_s$ID$)\n\
+ PD_fluid_guiding(vel=vel_s$ID$, velT=velT_s$ID$, flags=flags_s$ID$, weight=weightGuide_s$ID$, blurRadius=beta_sg$ID$, pressure=pressure_s$ID$, tau=tau_sg$ID$, sigma=sigma_sg$ID$, theta=theta_sg$ID$, preconditioner=preconditioner_s$ID$, zeroPressureFixing=domainClosed_s$ID$)\n\
else:\n\
mantaMsg('Pressure')\n\
- solvePressure(flags=flags_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, preconditioner=preconditioner_s$ID$, zeroPressureFixing=not doOpen_s$ID$) # closed domains require pressure fixing\n\
+ solvePressure(flags=flags_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, preconditioner=preconditioner_s$ID$, zeroPressureFixing=domainClosed_s$ID$) # closed domains require pressure fixing\n\
\n\
def process_burn_$ID$():\n\
mantaMsg('Process burn')\n\
@@ -542,19 +542,19 @@ const std::string smoke_load_data =
"\n\
def smoke_load_data_$ID$(path, framenr, file_format, resumable):\n\
mantaMsg('Smoke load data')\n\
- fluid_file_import_s$ID$(dict=smoke_data_dict_final_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
- if resumable:\n\
- fluid_file_import_s$ID$(dict=smoke_data_dict_resume_s$ID$, path=path, framenr=framenr, file_format=file_format)\n";
+ dict = { **fluid_data_dict_final_s$ID$, **fluid_data_dict_resume_s$ID$, **smoke_data_dict_final_s$ID$, **smoke_data_dict_resume_s$ID$ } if resumable else { **fluid_data_dict_final_s$ID$, **smoke_data_dict_final_s$ID$ }\n\
+ fluid_file_import_s$ID$(dict=dict, path=path, framenr=framenr, file_format=file_format, file_name=file_data_s$ID$)\n\
+ \n\
+ copyVec3ToReal(source=vel_s$ID$, targetX=x_vel_s$ID$, targetY=y_vel_s$ID$, targetZ=z_vel_s$ID$)\n";
const std::string smoke_load_noise =
"\n\
def smoke_load_noise_$ID$(path, framenr, file_format, resumable):\n\
mantaMsg('Smoke load noise')\n\
- fluid_file_import_s$ID$(dict=smoke_noise_dict_final_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
+ dict = { **smoke_noise_dict_final_s$ID$, **smoke_noise_dict_resume_s$ID$ } if resumable else { **smoke_noise_dict_final_s$ID$ } \n\
+ fluid_file_import_s$ID$(dict=dict, path=path, framenr=framenr, file_format=file_format, file_name=file_noise_s$ID$)\n\
\n\
if resumable:\n\
- fluid_file_import_s$ID$(dict=smoke_noise_dict_resume_s$ID$, path=path, framenr=framenr, file_format=file_format)\n\
- \n\
# Fill up xyz texture grids, important when resuming a bake\n\
copyVec3ToReal(source=uvGrid0_s$ID$, targetX=texture_u_s$ID$, targetY=texture_v_s$ID$, targetZ=texture_w_s$ID$)\n\
copyVec3ToReal(source=uvGrid1_s$ID$, targetX=texture_u2_s$ID$, targetY=texture_v2_s$ID$, targetZ=texture_w2_s$ID$)\n";
@@ -568,28 +568,22 @@ const std::string smoke_save_data =
def smoke_save_data_$ID$(path, framenr, file_format, resumable):\n\
mantaMsg('Smoke save data')\n\
start_time = time.time()\n\
+ dict = { **fluid_data_dict_final_s$ID$, **fluid_data_dict_resume_s$ID$, **smoke_data_dict_final_s$ID$, **smoke_data_dict_resume_s$ID$ } if resumable else { **fluid_data_dict_final_s$ID$, **smoke_data_dict_final_s$ID$ } \n\
if not withMPSave or isWindows:\n\
- fluid_file_export_s$ID$(framenr=framenr, file_format=file_format, path=path, dict=smoke_data_dict_final_s$ID$,)\n\
- if resumable:\n\
- fluid_file_export_s$ID$(framenr=framenr, file_format=file_format, path=path, dict=smoke_data_dict_resume_s$ID$,)\n\
+ fluid_file_export_s$ID$(dict=dict, path=path, framenr=framenr, file_format=file_format, file_name=file_data_s$ID$)\n\
else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=smoke_data_dict_final_s$ID$, do_join=False)\n\
- if resumable:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=smoke_data_dict_resume_s$ID$, do_join=False)\n\
+ fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, file_name=file_data_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=dict, do_join=False)\n\
mantaMsg('--- Save: %s seconds ---' % (time.time() - start_time))\n";
const std::string smoke_save_noise =
"\n\
def smoke_save_noise_$ID$(path, framenr, file_format, resumable):\n\
mantaMsg('Smoke save noise')\n\
+ dict = { **smoke_noise_dict_final_s$ID$, **smoke_noise_dict_resume_s$ID$ } if resumable else { **smoke_noise_dict_final_s$ID$ } \n\
if not withMPSave or isWindows:\n\
- fluid_file_export_s$ID$(dict=smoke_noise_dict_final_s$ID$, framenr=framenr, file_format=file_format, path=path)\n\
- if resumable:\n\
- fluid_file_export_s$ID$(dict=smoke_noise_dict_resume_s$ID$, framenr=framenr, file_format=file_format, path=path)\n\
+ fluid_file_export_s$ID$(dict=dict, framenr=framenr, file_format=file_format, path=path, file_name=file_noise_s$ID$)\n\
else:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=smoke_noise_dict_final_s$ID$, do_join=False)\n\
- if resumable:\n\
- fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=smoke_noise_dict_resume_s$ID$, do_join=False)\n";
+ fluid_cache_multiprocessing_start_$ID$(function=fluid_file_export_s$ID$, file_name=file_noise_s$ID$, framenr=framenr, format_data=file_format, path_data=path, dict=dict, do_join=False)\n";
//////////////////////////////////////////////////////////////////////
// STANDALONE MODE
@@ -599,7 +593,6 @@ const std::string smoke_standalone =
"\n\
# Helper function to call cache load functions\n\
def load(frame, cache_resumable):\n\
- fluid_load_data_$ID$(os.path.join(cache_dir, 'data'), frame, file_format_data, cache_resumable)\n\
smoke_load_data_$ID$(os.path.join(cache_dir, 'data'), frame, file_format_data, cache_resumable)\n\
if using_noise_s$ID$:\n\
smoke_load_noise_$ID$(os.path.join(cache_dir, 'noise'), frame, file_format_noise, cache_resumable)\n\
diff --git a/intern/quadriflow/quadriflow_capi.cpp b/intern/quadriflow/quadriflow_capi.cpp
index 302c7a0ae30..53237289874 100644
--- a/intern/quadriflow/quadriflow_capi.cpp
+++ b/intern/quadriflow/quadriflow_capi.cpp
@@ -49,7 +49,7 @@ struct ObjVertex {
}
};
-struct ObjVertexHash : std::unary_function<ObjVertex, size_t> {
+struct ObjVertexHash {
std::size_t operator()(const ObjVertex &v) const
{
size_t hash = std::hash<uint32_t>()(v.p);
diff --git a/intern/rigidbody/RBI_api.h b/intern/rigidbody/RBI_api.h
index 9546b840419..d46cb5a7eed 100644
--- a/intern/rigidbody/RBI_api.h
+++ b/intern/rigidbody/RBI_api.h
@@ -175,7 +175,7 @@ void RB_body_set_linear_velocity(rbRigidBody *body, const float v_in[3]);
void RB_body_get_angular_velocity(rbRigidBody *body, float v_out[3]);
void RB_body_set_angular_velocity(rbRigidBody *body, const float v_in[3]);
-/* Linear/Angular Factor, used to lock translation/roation axes */
+/* Linear/Angular Factor, used to lock translation/rotation axes */
void RB_body_set_linear_factor(rbRigidBody *object, float x, float y, float z);
void RB_body_set_angular_factor(rbRigidBody *object, float x, float y, float z);