diff options
Diffstat (limited to 'intern/cycles')
75 files changed, 1479 insertions, 594 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index f669adb9f37..1afb321da3d 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -74,7 +74,7 @@ enum_panorama_types = ( "Similar to most fisheye modern lens, takes sensor dimensions into consideration"), ('MIRRORBALL', "Mirror Ball", "Uses the mirror ball mapping"), ('FISHEYE_LENS_POLYNOMIAL', "Fisheye Lens Polynomial", - "Defines the lens projection as polynomial to allow real world camera lenses to be mimicked."), + "Defines the lens projection as polynomial to allow real world camera lenses to be mimicked"), ) enum_curve_shape = ( @@ -667,6 +667,11 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): description="Use special type BVH optimized for hair (uses more ram but renders faster)", default=True, ) + debug_use_compact_bvh: BoolProperty( + name="Use Compact BVH", + description="Use compact BVH structure (uses less ram but renders slower)", + default=True, + ) debug_bvh_time_steps: IntProperty( name="BVH Time Steps", description="Split BVH primitives by this number of time steps to speed up render time in cost of memory", @@ -896,27 +901,27 @@ class CyclesCameraSettings(bpy.types.PropertyGroup): fisheye_polynomial_k0: FloatProperty( name="Fisheye Polynomial K0", - description="Coefficient K0 of the lens polinomial", + description="Coefficient K0 of the lens polynomial", default=camera.default_fisheye_polynomial[0], precision=6, step=0.1, subtype='ANGLE', ) fisheye_polynomial_k1: FloatProperty( name="Fisheye Polynomial K1", - description="Coefficient K1 of the lens polinomial", + description="Coefficient K1 of the lens polynomial", default=camera.default_fisheye_polynomial[1], precision=6, step=0.1, subtype='ANGLE', ) fisheye_polynomial_k2: FloatProperty( name="Fisheye Polynomial K2", - description="Coefficient K2 of the lens polinomial", + description="Coefficient K2 of the lens polynomial", default=camera.default_fisheye_polynomial[2], precision=6, step=0.1, subtype='ANGLE', ) fisheye_polynomial_k3: FloatProperty( name="Fisheye Polynomial K3", - description="Coefficient K3 of the lens polinomial", + description="Coefficient K3 of the lens polynomial", default=camera.default_fisheye_polynomial[3], precision=6, step=0.1, subtype='ANGLE', ) fisheye_polynomial_k4: FloatProperty( name="Fisheye Polynomial K4", - description="Coefficient K4 of the lens polinomial", + description="Coefficient K4 of the lens polynomial", default=camera.default_fisheye_polynomial[4], precision=6, step=0.1, subtype='ANGLE', ) @@ -1447,6 +1452,19 @@ class CyclesPreferences(bpy.types.AddonPreferences): num += 1 return num + def has_multi_device(self): + import _cycles + compute_device_type = self.get_compute_device_type() + device_list = _cycles.available_devices(compute_device_type) + for device in device_list: + if device[1] == compute_device_type: + continue + for dev in self.devices: + if dev.use and dev.id == device[2]: + return True + + return False + def has_active_device(self): return self.get_num_gpu_devices() > 0 diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 5b600692152..e4b2fef87c3 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -118,6 +118,12 @@ def use_optix(context): return (get_device_type(context) == 'OPTIX' and cscene.device == 'GPU') +def use_multi_device(context): + cscene = context.scene.cycles + if cscene.device != 'GPU': + return False + return context.preferences.addons[__package__].preferences.has_multi_device() + def show_device_active(context): cscene = context.scene.cycles @@ -661,6 +667,10 @@ class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Pa bl_label = "Acceleration Structure" bl_parent_id = "CYCLES_RENDER_PT_performance" + @classmethod + def poll(cls, context): + return not use_optix(context) or use_multi_device(context) + def draw(self, context): import _cycles @@ -673,21 +683,33 @@ class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Pa col = layout.column() - use_embree = False + use_embree = _cycles.with_embree + if use_cpu(context): - use_embree = _cycles.with_embree - if not use_embree: + col.prop(cscene, "debug_use_spatial_splits") + if use_embree: + col.prop(cscene, "debug_use_compact_bvh") + else: + sub = col.column() + sub.active = not cscene.debug_use_spatial_splits + sub.prop(cscene, "debug_bvh_time_steps") + + col.prop(cscene, "debug_use_hair_bvh") + sub = col.column(align=True) sub.label(text="Cycles built without Embree support") sub.label(text="CPU raytracing performance will be poor") + else: + col.prop(cscene, "debug_use_spatial_splits") + sub = col.column() + sub.active = not cscene.debug_use_spatial_splits + sub.prop(cscene, "debug_bvh_time_steps") - col.prop(cscene, "debug_use_spatial_splits") - sub = col.column() - sub.active = not use_embree - sub.prop(cscene, "debug_use_hair_bvh") - sub = col.column() - sub.active = not cscene.debug_use_spatial_splits and not use_embree - sub.prop(cscene, "debug_bvh_time_steps") + col.prop(cscene, "debug_use_hair_bvh") + + # CPU is used in addition to a GPU + if use_multi_device(context) and use_embree: + col.prop(cscene, "debug_use_compact_bvh") class CYCLES_RENDER_PT_performance_final_render(CyclesButtonsPanel, Panel): diff --git a/intern/cycles/blender/curves.cpp b/intern/cycles/blender/curves.cpp index 65a02d041cc..102ddf5ee32 100644 --- a/intern/cycles/blender/curves.cpp +++ b/intern/cycles/blender/curves.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include <optional> + #include "blender/sync.h" #include "blender/util.h" @@ -624,15 +626,36 @@ void BlenderSync::sync_particle_hair( } } -#ifdef WITH_HAIR_NODES -static float4 hair_point_as_float4(BL::HairPoint b_point) +#ifdef WITH_NEW_CURVES_TYPE + +static std::optional<BL::FloatAttribute> find_curves_radius_attribute(BL::Curves b_curves) { - float4 mP = float3_to_float4(get_float3(b_point.co())); - mP.w = b_point.radius(); + for (BL::Attribute &b_attribute : b_curves.attributes) { + if (b_attribute.name() != "radius") { + continue; + } + if (b_attribute.domain() != BL::Attribute::domain_POINT) { + continue; + } + if (b_attribute.data_type() != BL::Attribute::data_type_FLOAT) { + continue; + } + return BL::FloatAttribute{b_attribute}; + } + return std::nullopt; +} + +static float4 hair_point_as_float4(BL::Curves b_curves, + std::optional<BL::FloatAttribute> b_attr_radius, + const int index) +{ + float4 mP = float3_to_float4(get_float3(b_curves.position_data[index].vector())); + mP.w = b_attr_radius ? b_attr_radius->data[index].value() : 0.0f; return mP; } -static float4 interpolate_hair_points(BL::Hair b_hair, +static float4 interpolate_hair_points(BL::Curves b_curves, + std::optional<BL::FloatAttribute> b_attr_radius, const int first_point_index, const int num_points, const float step) @@ -641,12 +664,12 @@ static float4 interpolate_hair_points(BL::Hair b_hair, const int point_a = clamp((int)curve_t, 0, num_points - 1); const int point_b = min(point_a + 1, num_points - 1); const float t = curve_t - (float)point_a; - return lerp(hair_point_as_float4(b_hair.points[first_point_index + point_a]), - hair_point_as_float4(b_hair.points[first_point_index + point_b]), + return lerp(hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_a), + hair_point_as_float4(b_curves, b_attr_radius, first_point_index + point_b), t); } -static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair) +static void export_hair_curves(Scene *scene, Hair *hair, BL::Curves b_curves) { /* TODO: optimize so we can straight memcpy arrays from Blender? */ @@ -666,17 +689,19 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair) } /* Reserve memory. */ - const int num_keys = b_hair.points.length(); - const int num_curves = b_hair.curves.length(); + const int num_keys = b_curves.points.length(); + const int num_curves = b_curves.curves.length(); hair->reserve_curves(num_curves, num_keys); + std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves); + /* Export curves and points. */ vector<float> points_length; - for (BL::HairCurve &b_curve : b_hair.curves) { - const int first_point_index = b_curve.first_point_index(); - const int num_points = b_curve.num_points(); + for (int i = 0; i < num_curves; i++) { + const int first_point_index = b_curves.curve_offset_data[i].value(); + const int num_points = b_curves.curve_offset_data[i + 1].value() - first_point_index; float3 prev_co = zero_float3(); float length = 0.0f; @@ -687,10 +712,9 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair) /* Position and radius. */ for (int i = 0; i < num_points; i++) { - BL::HairPoint b_point = b_hair.points[first_point_index + i]; - - const float3 co = get_float3(b_point.co()); - const float radius = b_point.radius(); + const float3 co = get_float3(b_curves.position_data[first_point_index + i].vector()); + const float radius = b_attr_radius ? b_attr_radius->data[first_point_index + i].value() : + 0.0f; hair->add_curve_key(co, radius); if (attr_intercept) { @@ -715,7 +739,7 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair) /* Random number per curve. */ if (attr_random != NULL) { - attr_random->add(hash_uint2_to_float(b_curve.index(), 0)); + attr_random->add(hash_uint2_to_float(i, 0)); } /* Curve. */ @@ -724,7 +748,7 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair) } } -static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_step) +static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motion_step) { /* Find or add attribute. */ Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); @@ -737,14 +761,17 @@ static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_st /* Export motion keys. */ const int num_keys = hair->get_curve_keys().size(); + const int num_curves = b_curves.curves.length(); float4 *mP = attr_mP->data_float4() + motion_step * num_keys; bool have_motion = false; int num_motion_keys = 0; int curve_index = 0; - for (BL::HairCurve &b_curve : b_hair.curves) { - const int first_point_index = b_curve.first_point_index(); - const int num_points = b_curve.num_points(); + std::optional<BL::FloatAttribute> b_attr_radius = find_curves_radius_attribute(b_curves); + + for (int i = 0; i < num_curves; i++) { + const int first_point_index = b_curves.curve_offset_data[i].value(); + const int num_points = b_curves.curve_offset_data[i + 1].value() - first_point_index; Hair::Curve curve = hair->get_curve(curve_index); curve_index++; @@ -755,7 +782,7 @@ static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_st int point_index = first_point_index + i; if (point_index < num_keys) { - mP[num_motion_keys] = hair_point_as_float4(b_hair.points[point_index]); + mP[num_motion_keys] = hair_point_as_float4(b_curves, b_attr_radius, point_index); num_motion_keys++; if (!have_motion) { @@ -774,7 +801,8 @@ static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_st const float step_size = curve.num_keys > 1 ? 1.0f / (curve.num_keys - 1) : 0.0f; for (int i = 0; i < curve.num_keys; i++) { const float step = i * step_size; - mP[num_motion_keys] = interpolate_hair_points(b_hair, first_point_index, num_points, step); + mP[num_motion_keys] = interpolate_hair_points( + b_curves, b_attr_radius, first_point_index, num_points, step); num_motion_keys++; } have_motion = true; @@ -791,12 +819,12 @@ static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_st void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step) { /* Convert Blender hair to Cycles curves. */ - BL::Hair b_hair(b_ob_info.object_data); + BL::Curves b_curves(b_ob_info.object_data); if (motion) { - export_hair_curves_motion(hair, b_hair, motion_step); + export_hair_curves_motion(hair, b_curves, motion_step); } else { - export_hair_curves(scene, hair, b_hair); + export_hair_curves(scene, hair, b_curves); } } #else @@ -819,8 +847,8 @@ void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, H new_hair.set_used_shaders(used_shaders); if (view_layer.use_hair) { -#ifdef WITH_HAIR_NODES - if (b_ob_info.object_data.is_a(&RNA_Hair)) { +#ifdef WITH_NEW_CURVES_TYPE + if (b_ob_info.object_data.is_a(&RNA_Curves)) { /* Hair object. */ sync_hair(&new_hair, b_ob_info, false); } @@ -873,8 +901,8 @@ void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph, /* Export deformed coordinates. */ if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) { -#ifdef WITH_HAIR_NODES - if (b_ob_info.object_data.is_a(&RNA_Hair)) { +#ifdef WITH_NEW_CURVES_TYPE + if (b_ob_info.object_data.is_a(&RNA_Curves)) { /* Hair object. */ sync_hair(hair, b_ob_info, true, motion_step); return; diff --git a/intern/cycles/blender/geometry.cpp b/intern/cycles/blender/geometry.cpp index 78c803b7adb..a9b61f2578f 100644 --- a/intern/cycles/blender/geometry.cpp +++ b/intern/cycles/blender/geometry.cpp @@ -32,8 +32,8 @@ CCL_NAMESPACE_BEGIN static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair) { -#ifdef WITH_HAIR_NODES - if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) { +#ifdef WITH_NEW_CURVES_TYPE + if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) { #else if (use_particle_hair) { #endif @@ -231,8 +231,8 @@ void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph, if (progress.get_cancel()) return; -#ifdef WITH_HAIR_NODES - if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) { +#ifdef WITH_NEW_CURVES_TYPE + if (b_ob_info.object_data.is_a(&RNA_Curves) || use_particle_hair) { #else if (use_particle_hair) { #endif diff --git a/intern/cycles/blender/object.cpp b/intern/cycles/blender/object.cpp index 65a04a39660..22acc09c538 100644 --- a/intern/cycles/blender/object.cpp +++ b/intern/cycles/blender/object.cpp @@ -72,7 +72,7 @@ bool BlenderSync::object_is_geometry(BObjectInfo &b_ob_info) BL::Object::type_enum type = b_ob_info.iter_object.type(); - if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR || + if (type == BL::Object::type_VOLUME || type == BL::Object::type_CURVES || type == BL::Object::type_POINTCLOUD) { /* Will be exported attached to mesh. */ return true; @@ -97,7 +97,7 @@ bool BlenderSync::object_can_have_geometry(BL::Object &b_ob) case BL::Object::type_SURFACE: case BL::Object::type_META: case BL::Object::type_FONT: - case BL::Object::type_HAIR: + case BL::Object::type_CURVES: case BL::Object::type_POINTCLOUD: case BL::Object::type_VOLUME: return true; diff --git a/intern/cycles/blender/output_driver.cpp b/intern/cycles/blender/output_driver.cpp index d5cc0c60bae..f35b48493cb 100644 --- a/intern/cycles/blender/output_driver.cpp +++ b/intern/cycles/blender/output_driver.cpp @@ -51,8 +51,6 @@ bool BlenderOutputDriver::read_render_tile(const Tile &tile) BL::RenderLayer b_rlay = *b_single_rlay; - vector<float> pixels(static_cast<size_t>(tile.size.x) * tile.size.y * 4); - /* Copy each pass. * TODO:copy only the required ones for better performance? */ for (BL::RenderPass &b_pass : b_rlay.passes) { diff --git a/intern/cycles/blender/shader.cpp b/intern/cycles/blender/shader.cpp index 5604c2989fd..39e49ac3478 100644 --- a/intern/cycles/blender/shader.cpp +++ b/intern/cycles/blender/shader.cpp @@ -689,6 +689,9 @@ static ShaderNode *add_node(Scene *scene, else if (b_node.is_a(&RNA_ShaderNodeHairInfo)) { node = graph->create_node<HairInfoNode>(); } + else if (b_node.is_a(&RNA_ShaderNodePointInfo)) { + node = graph->create_node<PointInfoNode>(); + } else if (b_node.is_a(&RNA_ShaderNodeVolumeInfo)) { node = graph->create_node<VolumeInfoNode>(); } diff --git a/intern/cycles/blender/sync.cpp b/intern/cycles/blender/sync.cpp index 588e057b9ad..7e6f1535d66 100644 --- a/intern/cycles/blender/sync.cpp +++ b/intern/cycles/blender/sync.cpp @@ -787,6 +787,7 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background) params.bvh_type = BVH_TYPE_DYNAMIC; params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits"); + params.use_bvh_compact_structure = RNA_boolean_get(&cscene, "debug_use_compact_bvh"); params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh"); params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps"); diff --git a/intern/cycles/bvh/embree.cpp b/intern/cycles/bvh/embree.cpp index 618dd9438d5..616b6273e6a 100644 --- a/intern/cycles/bvh/embree.cpp +++ b/intern/cycles/bvh/embree.cpp @@ -66,6 +66,26 @@ static_assert(Object::MAX_MOTION_STEPS == Geometry::MAX_MOTION_STEPS, * as well as filtering for volume objects happen here. * Cycles' own BVH does that directly inside the traversal calls. */ +static void rtc_filter_intersection_func(const RTCFilterFunctionNArguments *args) +{ + /* Current implementation in Cycles assumes only single-ray intersection queries. */ + assert(args->N == 1); + + RTCHit *hit = (RTCHit *)args->hit; + CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt; + const KernelGlobalsCPU *kg = ctx->kg; + const Ray *cray = ctx->ray; + + if (kernel_embree_is_self_intersection(kg, hit, cray)) { + *args->valid = 0; + } +} + +/* This gets called by Embree at every valid ray/object intersection. + * Things like recording subsurface or shadow hits for later evaluation + * as well as filtering for volume objects happen here. + * Cycles' own BVH does that directly inside the traversal calls. + */ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args) { /* Current implementation in Cycles assumes only single-ray intersection queries. */ @@ -75,12 +95,16 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args) RTCHit *hit = (RTCHit *)args->hit; CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt; const KernelGlobalsCPU *kg = ctx->kg; + const Ray *cray = ctx->ray; switch (ctx->type) { case CCLIntersectContext::RAY_SHADOW_ALL: { Intersection current_isect; kernel_embree_convert_hit(kg, ray, hit, ¤t_isect); - + if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) { + *args->valid = 0; + return; + } /* If no transparent shadows or max number of hits exceeded, all light is blocked. */ const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type); if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) { @@ -160,6 +184,10 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args) break; } } + if (intersection_skip_self_local(cray->self, current_isect.prim)) { + *args->valid = 0; + return; + } /* No intersection information requested, just return a hit. */ if (ctx->max_hits == 0) { @@ -225,6 +253,11 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args) if (ctx->num_hits < ctx->max_hits) { Intersection current_isect; kernel_embree_convert_hit(kg, ray, hit, ¤t_isect); + if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) { + *args->valid = 0; + return; + } + Intersection *isect = &ctx->isect_s[ctx->num_hits]; ++ctx->num_hits; *isect = current_isect; @@ -236,12 +269,15 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args) } /* This tells Embree to continue tracing. */ *args->valid = 0; - break; } + break; } case CCLIntersectContext::RAY_REGULAR: default: - /* Nothing to do here. */ + if (kernel_embree_is_self_intersection(kg, hit, cray)) { + *args->valid = 0; + return; + } break; } } @@ -257,6 +293,14 @@ static void rtc_filter_func_backface_cull(const RTCFilterFunctionNArguments *arg *args->valid = 0; return; } + + CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt; + const KernelGlobalsCPU *kg = ctx->kg; + const Ray *cray = ctx->ray; + + if (kernel_embree_is_self_intersection(kg, hit, cray)) { + *args->valid = 0; + } } static void rtc_filter_occluded_func_backface_cull(const RTCFilterFunctionNArguments *args) @@ -355,10 +399,12 @@ void BVHEmbree::build(Progress &progress, Stats *stats, RTCDevice rtc_device_) } const bool dynamic = params.bvh_type == BVH_TYPE_DYNAMIC; + const bool compact = params.use_compact_structure; scene = rtcNewScene(rtc_device); const RTCSceneFlags scene_flags = (dynamic ? RTC_SCENE_FLAG_DYNAMIC : RTC_SCENE_FLAG_NONE) | - RTC_SCENE_FLAG_COMPACT | RTC_SCENE_FLAG_ROBUST; + (compact ? RTC_SCENE_FLAG_COMPACT : RTC_SCENE_FLAG_NONE) | + RTC_SCENE_FLAG_ROBUST; rtcSetSceneFlags(scene, scene_flags); build_quality = dynamic ? RTC_BUILD_QUALITY_LOW : (params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH : @@ -503,6 +549,7 @@ void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i) rtcSetGeometryUserData(geom_id, (void *)prim_offset); rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func); + rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_intersection_func); rtcSetGeometryMask(geom_id, ob->visibility_for_tracing()); rtcCommitGeometry(geom_id); @@ -765,6 +812,7 @@ void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i) rtcSetGeometryUserData(geom_id, (void *)prim_offset); if (hair->curve_shape == CURVE_RIBBON) { + rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_intersection_func); rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func); } else { diff --git a/intern/cycles/bvh/params.h b/intern/cycles/bvh/params.h index 16edf2e88e4..61fa5484ce0 100644 --- a/intern/cycles/bvh/params.h +++ b/intern/cycles/bvh/params.h @@ -97,6 +97,9 @@ class BVHParams { */ bool use_unaligned_nodes; + /* Use compact acceleration structure (Embree)*/ + bool use_compact_structure; + /* Split time range to this number of steps and create leaf node for each * of this time steps. * @@ -139,6 +142,7 @@ class BVHParams { top_level = false; bvh_layout = BVH_LAYOUT_BVH2; + use_compact_structure = true; use_unaligned_nodes = false; num_motion_curve_steps = 0; diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake index f46d18a4926..8d9631e5b44 100644 --- a/intern/cycles/cmake/external_libs.cmake +++ b/intern/cycles/cmake/external_libs.cmake @@ -559,10 +559,10 @@ if(WITH_CYCLES_DEVICE_METAL) find_library(METAL_LIBRARY Metal) # This file was added in the 12.0 SDK, use it as a way to detect the version. - if (METAL_LIBRARY AND NOT EXISTS "${METAL_LIBRARY}/Headers/MTLFunctionStitching.h") + if(METAL_LIBRARY AND NOT EXISTS "${METAL_LIBRARY}/Headers/MTLFunctionStitching.h") message(STATUS "Metal version too old, must be SDK 12.0 or newer, disabling WITH_CYCLES_DEVICE_METAL") set(WITH_CYCLES_DEVICE_METAL OFF) - elseif (NOT METAL_LIBRARY) + elseif(NOT METAL_LIBRARY) message(STATUS "Metal not found, disabling WITH_CYCLES_DEVICE_METAL") set(WITH_CYCLES_DEVICE_METAL OFF) else() diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index 4f1cbabc89b..85ed3dc5b55 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -905,8 +905,8 @@ void HIPDevice::tex_alloc(device_texture &mem) address_mode = hipAddressModeClamp; break; case EXTENSION_CLIP: - // TODO : (Arya) setting this to Mode Clamp instead of Mode Border because it's unsupported - // in hip + /* TODO(@arya): setting this to Mode Clamp instead of Mode Border + * because it's unsupported in HIP. */ address_mode = hipAddressModeClamp; break; default: diff --git a/intern/cycles/device/metal/bvh.h b/intern/cycles/device/metal/bvh.h index cbc5ca7d2c3..58d71e3928f 100644 --- a/intern/cycles/device/metal/bvh.h +++ b/intern/cycles/device/metal/bvh.h @@ -58,6 +58,11 @@ class BVHMetal : public BVH { id<MTLCommandQueue> queue, Geometry *const geom, bool refit); + bool build_BLAS_pointcloud(Progress &progress, + id<MTLDevice> device, + id<MTLCommandQueue> queue, + Geometry *const geom, + bool refit); bool build_TLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit); }; diff --git a/intern/cycles/device/metal/bvh.mm b/intern/cycles/device/metal/bvh.mm index 1953102cb41..8b252f1a5ec 100644 --- a/intern/cycles/device/metal/bvh.mm +++ b/intern/cycles/device/metal/bvh.mm @@ -19,6 +19,7 @@ # include "scene/hair.h" # include "scene/mesh.h" # include "scene/object.h" +# include "scene/pointcloud.h" # include "util/progress.h" @@ -475,6 +476,220 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, return false; } +bool BVHMetal::build_BLAS_pointcloud(Progress &progress, + id<MTLDevice> device, + id<MTLCommandQueue> queue, + Geometry *const geom, + bool refit) +{ + if (@available(macos 12.0, *)) { + /* Build BLAS for point cloud */ + PointCloud *pointcloud = static_cast<PointCloud *>(geom); + if (pointcloud->num_points() == 0) { + return false; + } + + /*------------------------------------------------*/ + BVH_status("Building pointcloud BLAS | %7d points | %s", + (int)pointcloud->num_points(), + geom->name.c_str()); + /*------------------------------------------------*/ + + const size_t num_points = pointcloud->get_points().size(); + const float3 *points = pointcloud->get_points().data(); + const float *radius = pointcloud->get_radius().data(); + + const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC); + + size_t num_motion_steps = 1; + Attribute *motion_keys = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) { + num_motion_steps = pointcloud->get_motion_steps(); + } + + const size_t num_aabbs = num_motion_steps; + + MTLResourceOptions storage_mode; + if (device.hasUnifiedMemory) { + storage_mode = MTLResourceStorageModeShared; + } + else { + storage_mode = MTLResourceStorageModeManaged; + } + + /* Allocate a GPU buffer for the AABB data and populate it */ + id<MTLBuffer> aabbBuf = [device + newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox) + options:storage_mode]; + MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents]; + + /* Get AABBs for each motion step */ + size_t center_step = (num_motion_steps - 1) / 2; + for (size_t step = 0; step < num_motion_steps; ++step) { + /* The center step for motion vertices is not stored in the attribute */ + if (step != center_step) { + size_t attr_offset = (step > center_step) ? step - 1 : step; + points = motion_keys->data_float3() + attr_offset * num_points; + } + + for (size_t j = 0; j < num_points; ++j) { + const PointCloud::Point point = pointcloud->get_point(j); + BoundBox bounds = BoundBox::empty; + point.bounds_grow(points, radius, bounds); + + const size_t index = step * num_points + j; + aabb_data[index].min = (MTLPackedFloat3 &)bounds.min; + aabb_data[index].max = (MTLPackedFloat3 &)bounds.max; + } + } + + if (storage_mode == MTLResourceStorageModeManaged) { + [aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)]; + } + +# if 0 + for (size_t i=0; i<num_aabbs && i < 400; i++) { + MTLAxisAlignedBoundingBox& bb = aabb_data[i]; + printf(" %d: %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n", int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z); + } +# endif + + MTLAccelerationStructureGeometryDescriptor *geomDesc; + if (motion_blur) { + std::vector<MTLMotionKeyframeData *> aabb_ptrs; + aabb_ptrs.reserve(num_motion_steps); + for (size_t step = 0; step < num_motion_steps; ++step) { + MTLMotionKeyframeData *k = [MTLMotionKeyframeData data]; + k.buffer = aabbBuf; + k.offset = step * num_points * sizeof(MTLAxisAlignedBoundingBox); + aabb_ptrs.push_back(k); + } + + MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion = + [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor]; + geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data() + count:aabb_ptrs.size()]; + geomDescMotion.boundingBoxCount = num_points; + geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]); + geomDescMotion.intersectionFunctionTableOffset = 2; + + /* Force a single any-hit call, so shadow record-all behavior works correctly */ + /* (Match optix behavior: unsigned int build_flags = + * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */ + geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false; + geomDescMotion.opaque = true; + geomDesc = geomDescMotion; + } + else { + MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion = + [MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor]; + geomDescNoMotion.boundingBoxBuffer = aabbBuf; + geomDescNoMotion.boundingBoxBufferOffset = 0; + geomDescNoMotion.boundingBoxCount = int(num_aabbs); + geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]); + geomDescNoMotion.intersectionFunctionTableOffset = 2; + + /* Force a single any-hit call, so shadow record-all behavior works correctly */ + /* (Match optix behavior: unsigned int build_flags = + * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */ + geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false; + geomDescNoMotion.opaque = true; + geomDesc = geomDescNoMotion; + } + + MTLPrimitiveAccelerationStructureDescriptor *accelDesc = + [MTLPrimitiveAccelerationStructureDescriptor descriptor]; + accelDesc.geometryDescriptors = @[ geomDesc ]; + + if (motion_blur) { + accelDesc.motionStartTime = 0.0f; + accelDesc.motionEndTime = 1.0f; + accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish; + accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish; + accelDesc.motionKeyframeCount = num_motion_steps; + } + + if (!use_fast_trace_bvh) { + accelDesc.usage |= (MTLAccelerationStructureUsageRefit | + MTLAccelerationStructureUsagePreferFastBuild); + } + + MTLAccelerationStructureSizes accelSizes = [device + accelerationStructureSizesWithDescriptor:accelDesc]; + id<MTLAccelerationStructure> accel_uncompressed = [device + newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; + id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize + options:MTLResourceStorageModePrivate]; + id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared]; + id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; + id<MTLAccelerationStructureCommandEncoder> accelEnc = + [accelCommands accelerationStructureCommandEncoder]; + if (refit) { + [accelEnc refitAccelerationStructure:accel_struct + descriptor:accelDesc + destination:accel_uncompressed + scratchBuffer:scratchBuf + scratchBufferOffset:0]; + } + else { + [accelEnc buildAccelerationStructure:accel_uncompressed + descriptor:accelDesc + scratchBuffer:scratchBuf + scratchBufferOffset:0]; + } + if (use_fast_trace_bvh) { + [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed + toBuffer:sizeBuf + offset:0 + sizeDataType:MTLDataTypeULong]; + } + [accelEnc endEncoding]; + [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { + /* free temp resources */ + [scratchBuf release]; + [aabbBuf release]; + + if (use_fast_trace_bvh) { + /* Compact the accel structure */ + uint64_t compressed_size = *(uint64_t *)sizeBuf.contents; + + dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ + id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; + id<MTLAccelerationStructureCommandEncoder> accelEnc = + [accelCommands accelerationStructureCommandEncoder]; + id<MTLAccelerationStructure> accel = [device + newAccelerationStructureWithSize:compressed_size]; + [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed + toAccelerationStructure:accel]; + [accelEnc endEncoding]; + [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { + uint64_t allocated_size = [accel allocatedSize]; + stats.mem_alloc(allocated_size); + accel_struct = accel; + [accel_uncompressed release]; + accel_struct_building = false; + }]; + [accelCommands commit]; + }); + } + else { + /* set our acceleration structure to the uncompressed structure */ + accel_struct = accel_uncompressed; + + uint64_t allocated_size = [accel_struct allocatedSize]; + stats.mem_alloc(allocated_size); + accel_struct_building = false; + } + [sizeBuf release]; + }]; + + accel_struct_building = true; + [accelCommands commit]; + return true; + } + return false; +} + bool BVHMetal::build_BLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, @@ -491,6 +706,8 @@ bool BVHMetal::build_BLAS(Progress &progress, return build_BLAS_mesh(progress, device, queue, geom, refit); case Geometry::HAIR: return build_BLAS_hair(progress, device, queue, geom, refit); + case Geometry::POINTCLOUD: + return build_BLAS_pointcloud(progress, device, queue, geom, refit); default: return false; } diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h index 4874af1bfa6..a4bfb30436d 100644 --- a/intern/cycles/device/metal/kernel.h +++ b/intern/cycles/device/metal/kernel.h @@ -36,6 +36,8 @@ enum { METALRT_FUNC_CURVE_RIBBON_SHADOW, METALRT_FUNC_CURVE_ALL, METALRT_FUNC_CURVE_ALL_SHADOW, + METALRT_FUNC_POINT, + METALRT_FUNC_POINT_SHADOW, METALRT_FUNC_NUM }; diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index f948a8a0a0f..e9bd1cea5df 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -358,6 +358,8 @@ bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type) "__intersection__curve_ribbon_shadow", "__intersection__curve_all", "__intersection__curve_all_shadow", + "__intersection__point", + "__intersection__point_shadow", }; assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM); @@ -400,37 +402,51 @@ bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type) NSArray *function_list = nil; if (device->use_metalrt) { - id<MTLFunction> box_intersect_default = nil; - id<MTLFunction> box_intersect_shadow = nil; + id<MTLFunction> curve_intersect_default = nil; + id<MTLFunction> curve_intersect_shadow = nil; + id<MTLFunction> point_intersect_default = nil; + id<MTLFunction> point_intersect_shadow = nil; if (device->kernel_features & KERNEL_FEATURE_HAIR) { /* Add curve intersection programs. */ if (device->kernel_features & KERNEL_FEATURE_HAIR_THICK) { /* Slower programs for thick hair since that also slows down ribbons. * Ideally this should not be needed. */ - box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL]; - box_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW]; + curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL]; + curve_intersect_shadow = + rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW]; } else { - box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON]; - box_intersect_shadow = + curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON]; + curve_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON_SHADOW]; } } + if (device->kernel_features & KERNEL_FEATURE_POINTCLOUD) { + point_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT]; + point_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT_SHADOW]; + } table_functions[METALRT_TABLE_DEFAULT] = [NSArray arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_TRI], - box_intersect_default ? - box_intersect_default : + curve_intersect_default ? + curve_intersect_default : + rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX], + point_intersect_default ? + point_intersect_default : rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX], nil]; table_functions[METALRT_TABLE_SHADOW] = [NSArray arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_TRI], - box_intersect_shadow ? - box_intersect_shadow : + curve_intersect_shadow ? + curve_intersect_shadow : + rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX], + point_intersect_shadow ? + point_intersect_shadow : rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX], nil]; table_functions[METALRT_TABLE_LOCAL] = [NSArray arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_TRI], rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX], + rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX], nil]; NSMutableSet *unique_functions = [NSMutableSet diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 009661b2dec..cb6c36d5ea6 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -226,7 +226,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) pipeline_options.usesMotionBlur = false; pipeline_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING; - pipeline_options.numPayloadValues = 6; + pipeline_options.numPayloadValues = 8; pipeline_options.numAttributeValues = 2; /* u, v */ pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; pipeline_options.pipelineLaunchParamsVariableName = "__params"; /* See globals.h */ diff --git a/intern/cycles/integrator/denoiser.cpp b/intern/cycles/integrator/denoiser.cpp index 2a5f99f358b..28cdeeb630a 100644 --- a/intern/cycles/integrator/denoiser.cpp +++ b/intern/cycles/integrator/denoiser.cpp @@ -125,20 +125,41 @@ static Device *find_best_device(Device *device, DenoiserType type) return best_device; } +static DeviceInfo find_best_denoiser_device_info(const vector<DeviceInfo> &device_infos, + DenoiserType denoiser_type) +{ + for (const DeviceInfo &device_info : device_infos) { + if ((device_info.denoisers & denoiser_type) == 0) { + continue; + } + + /* TODO(sergey): Use one of the already configured devices, so that OptiX denoising can happen + * on a physical CUDA device which is already used for rendering. */ + + /* TODO(sergey): Choose fastest device for denoising. */ + + return device_info; + } + + DeviceInfo none_device; + none_device.type = DEVICE_NONE; + return none_device; +} + static unique_ptr<Device> create_denoiser_device(Device *path_trace_device, - const uint device_type_mask) + const uint device_type_mask, + DenoiserType denoiser_type) { const vector<DeviceInfo> device_infos = Device::available_devices(device_type_mask); if (device_infos.empty()) { return nullptr; } - /* TODO(sergey): Use one of the already configured devices, so that OptiX denoising can happen on - * a physical CUDA device which is already used for rendering. */ - - /* TODO(sergey): Choose fastest device for denoising. */ - - const DeviceInfo denoiser_device_info = device_infos.front(); + const DeviceInfo denoiser_device_info = find_best_denoiser_device_info(device_infos, + denoiser_type); + if (denoiser_device_info.type == DEVICE_NONE) { + return nullptr; + } unique_ptr<Device> denoiser_device( Device::create(denoiser_device_info, path_trace_device->stats, path_trace_device->profiler)); @@ -186,7 +207,8 @@ Device *Denoiser::ensure_denoiser_device(Progress *progress) device_creation_attempted_ = true; const uint device_type_mask = get_device_type_mask(); - local_denoiser_device_ = create_denoiser_device(path_trace_device_, device_type_mask); + local_denoiser_device_ = create_denoiser_device( + path_trace_device_, device_type_mask, params_.type); denoiser_device_ = local_denoiser_device_.get(); return denoiser_device_; diff --git a/intern/cycles/integrator/denoiser_oidn.cpp b/intern/cycles/integrator/denoiser_oidn.cpp index a08aec513fc..4676e69c4fb 100644 --- a/intern/cycles/integrator/denoiser_oidn.cpp +++ b/intern/cycles/integrator/denoiser_oidn.cpp @@ -37,8 +37,6 @@ OIDNDenoiser::OIDNDenoiser(Device *path_trace_device, const DenoiseParams ¶m : Denoiser(path_trace_device, params) { DCHECK_EQ(params.type, DENOISER_OPENIMAGEDENOISE); - - DCHECK(openimagedenoise_supported()) << "OpenImageDenoiser is not supported on this platform."; } #ifdef WITH_OPENIMAGEDENOISE @@ -585,6 +583,9 @@ bool OIDNDenoiser::denoise_buffer(const BufferParams &buffer_params, const int num_samples, bool allow_inplace_modification) { + DCHECK(openimagedenoise_supported()) + << "OpenImageDenoiser is not supported on this platform or build."; + #ifdef WITH_OPENIMAGEDENOISE thread_scoped_lock lock(mutex_); @@ -635,4 +636,20 @@ uint OIDNDenoiser::get_device_type_mask() const return DEVICE_MASK_CPU; } +Device *OIDNDenoiser::ensure_denoiser_device(Progress *progress) +{ +#ifndef WITH_OPENIMAGEDENOISE + path_trace_device_->set_error("Build without OpenImageDenoiser"); + return nullptr; +#else + if (!openimagedenoise_supported()) { + path_trace_device_->set_error( + "OpenImageDenoiser is not supported on this CPU: missing SSE 4.1 support"); + return nullptr; + } + + return Denoiser::ensure_denoiser_device(progress); +#endif +} + CCL_NAMESPACE_END diff --git a/intern/cycles/integrator/denoiser_oidn.h b/intern/cycles/integrator/denoiser_oidn.h index a0ec3e26b9c..2b815be973e 100644 --- a/intern/cycles/integrator/denoiser_oidn.h +++ b/intern/cycles/integrator/denoiser_oidn.h @@ -38,6 +38,7 @@ class OIDNDenoiser : public Denoiser { protected: virtual uint get_device_type_mask() const override; + virtual Device *ensure_denoiser_device(Progress *progress) override; /* We only perform one denoising at a time, since OpenImageDenoise itself is multithreaded. * Use this mutex whenever images are passed to the OIDN and needs to be denoised. */ diff --git a/intern/cycles/integrator/pass_accessor.cpp b/intern/cycles/integrator/pass_accessor.cpp index 4479442df56..9fa5aab9ea9 100644 --- a/intern/cycles/integrator/pass_accessor.cpp +++ b/intern/cycles/integrator/pass_accessor.cpp @@ -141,6 +141,7 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers, const PassType type = pass_access_info_.type; const PassMode mode = pass_access_info_.mode; const PassInfo pass_info = Pass::get_info(type, pass_access_info_.include_albedo); + int num_written_components = pass_info.num_components; if (pass_info.num_components == 1) { /* Single channel passes. */ @@ -188,8 +189,10 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers, else if ((pass_info.divide_type != PASS_NONE || pass_info.direct_type != PASS_NONE || pass_info.indirect_type != PASS_NONE) && mode != PassMode::DENOISED) { - /* RGB lighting passes that need to divide out color and/or sum direct and indirect. */ + /* RGB lighting passes that need to divide out color and/or sum direct and indirect. + * These can also optionally write alpha like the combined pass. */ get_pass_light_path(render_buffers, buffer_params, destination); + num_written_components = 4; } else { /* Passes that need no special computation, or denoised passes that already @@ -215,7 +218,7 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers, } } - pad_pixels(buffer_params, destination, pass_info.num_components); + pad_pixels(buffer_params, destination, num_written_components); return true; } diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp index 0b55d1078a8..fd697836f52 100644 --- a/intern/cycles/integrator/path_trace.cpp +++ b/intern/cycles/integrator/path_trace.cpp @@ -820,8 +820,15 @@ void PathTrace::tile_buffer_read() return; } + /* Read buffers back from device. */ + tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { + path_trace_work->copy_render_buffers_from_device(); + }); + + /* Read (subset of) passes from output driver. */ PathTraceTile tile(*this); if (output_driver_->read_render_tile(tile)) { + /* Copy buffers to device again. */ tbb::parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) { path_trace_work->copy_render_buffers_to_device(); }); diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp index 95a1adeb016..0edd3810c39 100644 --- a/intern/cycles/integrator/shader_eval.cpp +++ b/intern/cycles/integrator/shader_eval.cpp @@ -157,7 +157,7 @@ bool ShaderEval::eval_gpu(Device *device, queue->init_execution(); /* Execute work on GPU in chunk, so we can cancel. - * TODO : query appropriate size from device.*/ + * TODO: query appropriate size from device. */ const int32_t chunk_size = 65536; device_ptr d_input = input.device_pointer; diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index 67804fb1d0d..1797bf60720 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -173,15 +173,16 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, uint p3 = 0; uint p4 = visibility; uint p5 = PRIMITIVE_NONE; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; uint ray_mask = visibility & 0xFF; - uint ray_flags = OPTIX_RAY_FLAG_NONE; + uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT; if (0 == ray_mask && (visibility & ~0xFF) != 0) { ray_mask = 0xFF; - ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT; } else if (visibility & PATH_RAY_SHADOW_OPAQUE) { - ray_flags = OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT; + ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT; } optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, @@ -200,7 +201,9 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, p2, p3, p4, - p5); + p5, + p6, + p7); isect->t = __uint_as_float(p0); isect->u = __uint_as_float(p1); @@ -242,6 +245,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, } MetalRTIntersectionPayload payload; + payload.self = ray->self; payload.u = 0.0f; payload.v = 0.0f; payload.visibility = visibility; @@ -309,6 +313,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); IntersectContext rtc_ctx(&ctx); RTCRayHit ray_hit; + ctx.ray = ray; kernel_embree_setup_rayhit(*ray, ray_hit, visibility); rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit); if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && @@ -356,6 +361,9 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, uint p2 = pointer_pack_to_uint_0(local_isect); uint p3 = pointer_pack_to_uint_1(local_isect); uint p4 = local_object; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; + /* Is set to zero on miss or if ray is aborted, so can be used as return value. */ uint p5 = max_hits; @@ -379,7 +387,9 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, p2, p3, p4, - p5); + p5, + p6, + p7); return p5; # elif defined(__METALRT__) @@ -417,6 +427,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, } MetalRTIntersectionLocalPayload payload; + payload.self = ray->self; payload.local_object = local_object; payload.max_hits = max_hits; payload.local_isect.num_hits = 0; @@ -460,6 +471,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, kg, has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL); ctx.lcg_state = lcg_state; ctx.max_hits = max_hits; + ctx.ray = ray; ctx.local_isect = local_isect; if (local_isect) { local_isect->num_hits = 0; @@ -532,6 +544,8 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, uint p3 = max_hits; uint p4 = visibility; uint p5 = false; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; uint ray_mask = visibility & 0xFF; if (0 == ray_mask && (visibility & ~0xFF) != 0) { @@ -555,7 +569,9 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, p2, p3, p4, - p5); + p5, + p6, + p7); *num_recorded_hits = uint16_unpack_from_uint_0(p2); *throughput = __uint_as_float(p1); @@ -588,6 +604,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, } MetalRTIntersectionShadowPayload payload; + payload.self = ray->self; payload.visibility = visibility; payload.max_hits = max_hits; payload.num_hits = 0; @@ -634,6 +651,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg, Intersection *isect_array = (Intersection *)state->shadow_isect; ctx.isect_s = isect_array; ctx.max_hits = max_hits; + ctx.ray = ray; IntersectContext rtc_ctx(&ctx); RTCRay rtc_ray; kernel_embree_setup_ray(*ray, rtc_ray, visibility); @@ -685,6 +703,8 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, uint p3 = 0; uint p4 = visibility; uint p5 = PRIMITIVE_NONE; + uint p6 = ((uint64_t)ray) & 0xFFFFFFFF; + uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF; uint ray_mask = visibility & 0xFF; if (0 == ray_mask && (visibility & ~0xFF) != 0) { @@ -708,7 +728,9 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, p2, p3, p4, - p5); + p5, + p6, + p7); isect->t = __uint_as_float(p0); isect->u = __uint_as_float(p1); @@ -744,6 +766,7 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg, } MetalRTIntersectionPayload payload; + payload.self = ray->self; payload.visibility = visibility; typename metalrt_intersector_type::result_type intersection; @@ -820,6 +843,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg, ctx.isect_s = isect; ctx.max_hits = max_hits; ctx.num_hits = 0; + ctx.ray = ray; IntersectContext rtc_ctx(&ctx); RTCRay rtc_ray; kernel_embree_setup_ray(*ray, rtc_ray, visibility); diff --git a/intern/cycles/kernel/bvh/embree.h b/intern/cycles/kernel/bvh/embree.h index 9edd4f90a7e..19c4b9f6f3d 100644 --- a/intern/cycles/kernel/bvh/embree.h +++ b/intern/cycles/kernel/bvh/embree.h @@ -22,6 +22,8 @@ #include "kernel/device/cpu/compat.h" #include "kernel/device/cpu/globals.h" +#include "kernel/bvh/util.h" + #include "util/vector.h" CCL_NAMESPACE_BEGIN @@ -38,6 +40,9 @@ struct CCLIntersectContext { KernelGlobals kg; RayType type; + /* For avoiding self intersections */ + const Ray *ray; + /* for shadow rays */ Intersection *isect_s; uint max_hits; @@ -56,6 +61,7 @@ struct CCLIntersectContext { { kg = kg_; type = type_; + ray = NULL; max_hits = 1; num_hits = 0; num_recorded_hits = 0; @@ -102,7 +108,34 @@ ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray, { kernel_embree_setup_ray(ray, rayhit.ray, visibility); rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID; + rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; +} + +ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg, + const RTCHit *hit, + const Ray *ray) +{ + bool status = false; + if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { + const int oID = hit->instID[0] / 2; + if ((ray->self.object == oID) || (ray->self.light_object == oID)) { + RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0])); + const int pID = hit->primID + + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); + status = intersection_skip_self_shadow(ray->self, oID, pID); + } + } + else { + const int oID = hit->geomID / 2; + if ((ray->self.object == oID) || (ray->self.light_object == oID)) { + const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.bvh.scene, hit->geomID)); + status = intersection_skip_self_shadow(ray->self, oID, pID); + } + } + + return status; } ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg, diff --git a/intern/cycles/kernel/bvh/local.h b/intern/cycles/kernel/bvh/local.h index 4d0e6aac901..4ef6deef98d 100644 --- a/intern/cycles/kernel/bvh/local.h +++ b/intern/cycles/kernel/bvh/local.h @@ -157,7 +157,11 @@ ccl_device_inline } } + /* Skip self intersection. */ const int prim = kernel_tex_fetch(__prim_index, prim_addr); + if (intersection_skip_self_local(ray->self, prim)) { + continue; + } if (triangle_intersect_local(kg, local_isect, @@ -188,7 +192,11 @@ ccl_device_inline } } + /* Skip self intersection. */ const int prim = kernel_tex_fetch(__prim_index, prim_addr); + if (intersection_skip_self_local(ray->self, prim)) { + continue; + } if (motion_triangle_intersect_local(kg, local_isect, diff --git a/intern/cycles/kernel/bvh/metal.h b/intern/cycles/kernel/bvh/metal.h index 55456d15f50..5ab413d9314 100644 --- a/intern/cycles/kernel/bvh/metal.h +++ b/intern/cycles/kernel/bvh/metal.h @@ -15,6 +15,7 @@ */ struct MetalRTIntersectionPayload { + RaySelfPrimitives self; uint visibility; float u, v; int prim; @@ -25,6 +26,7 @@ struct MetalRTIntersectionPayload { }; struct MetalRTIntersectionLocalPayload { + RaySelfPrimitives self; uint local_object; uint lcg_state; short max_hits; @@ -34,6 +36,7 @@ struct MetalRTIntersectionLocalPayload { }; struct MetalRTIntersectionShadowPayload { + RaySelfPrimitives self; uint visibility; #if defined(__METALRT_MOTION__) float time; diff --git a/intern/cycles/kernel/bvh/shadow_all.h b/intern/cycles/kernel/bvh/shadow_all.h index 0fb86bfda77..59a7ba63045 100644 --- a/intern/cycles/kernel/bvh/shadow_all.h +++ b/intern/cycles/kernel/bvh/shadow_all.h @@ -160,6 +160,9 @@ ccl_device_inline kernel_tex_fetch(__prim_object, prim_addr) : object; const int prim = kernel_tex_fetch(__prim_index, prim_addr); + if (intersection_skip_self_shadow(ray->self, prim_object, prim)) { + continue; + } switch (type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { diff --git a/intern/cycles/kernel/bvh/traversal.h b/intern/cycles/kernel/bvh/traversal.h index dc2d1422df6..17cd357a069 100644 --- a/intern/cycles/kernel/bvh/traversal.h +++ b/intern/cycles/kernel/bvh/traversal.h @@ -133,35 +133,29 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, --stack_ptr; /* primitive intersection */ - switch (type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - - const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : - object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + + const int prim_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + const int prim = kernel_tex_fetch(__prim_index, prim_addr); + if (intersection_skip_self_shadow(ray->self, prim_object, prim)) { + continue; + } + switch (type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { if (triangle_intersect( kg, isect, P, dir, isect->t, visibility, prim_object, prim, prim_addr)) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) return true; } + break; } - break; - } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for (; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - - const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : - object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); - + case PRIMITIVE_MOTION_TRIANGLE: { if (motion_triangle_intersect(kg, isect, P, @@ -176,28 +170,21 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, if (visibility & PATH_RAY_SHADOW_OPAQUE) return true; } + break; } - break; - } #endif /* BVH_FEATURE(BVH_MOTION) */ #if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE_THICK: - case PRIMITIVE_MOTION_CURVE_THICK: - case PRIMITIVE_CURVE_RIBBON: - case PRIMITIVE_MOTION_CURVE_RIBBON: { - for (; prim_addr < prim_addr2; prim_addr++) { + case PRIMITIVE_CURVE_THICK: + case PRIMITIVE_MOTION_CURVE_THICK: + case PRIMITIVE_CURVE_RIBBON: + case PRIMITIVE_MOTION_CURVE_RIBBON: { if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) { const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); if (ray->time < prim_time.x || ray->time > prim_time.y) { - continue; + break; } } - const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : - object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); - const int curve_type = kernel_tex_fetch(__prim_type, prim_addr); const bool hit = curve_intersect( kg, isect, P, dir, isect->t, prim_object, prim, ray->time, curve_type); @@ -206,26 +193,19 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, if (visibility & PATH_RAY_SHADOW_OPAQUE) return true; } + break; } - break; - } #endif /* BVH_FEATURE(BVH_HAIR) */ #if BVH_FEATURE(BVH_POINTCLOUD) - case PRIMITIVE_POINT: - case PRIMITIVE_MOTION_POINT: { - for (; prim_addr < prim_addr2; prim_addr++) { + case PRIMITIVE_POINT: + case PRIMITIVE_MOTION_POINT: { if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) { const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); if (ray->time < prim_time.x || ray->time > prim_time.y) { - continue; + break; } } - const int prim_object = (object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, prim_addr) : - object; - const int prim = kernel_tex_fetch(__prim_index, prim_addr); - const int point_type = kernel_tex_fetch(__prim_type, prim_addr); const bool hit = point_intersect( kg, isect, P, dir, isect->t, prim_object, prim, ray->time, point_type); @@ -234,10 +214,10 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg, if (visibility & PATH_RAY_SHADOW_OPAQUE) return true; } + break; } - break; - } #endif /* BVH_FEATURE(BVH_POINTCLOUD) */ + } } } else { diff --git a/intern/cycles/kernel/bvh/util.h b/intern/cycles/kernel/bvh/util.h index bd79c6e19c6..39c3ecd78c0 100644 --- a/intern/cycles/kernel/bvh/util.h +++ b/intern/cycles/kernel/bvh/util.h @@ -21,54 +21,22 @@ CCL_NAMESPACE_BEGIN /* Ray offset to avoid self intersection. * * This function should be used to compute a modified ray start position for - * rays leaving from a surface. */ - + * rays leaving from a surface. This is from "A Fast and Robust Method for Avoiding + * Self-Intersection" see https://research.nvidia.com/publication/2019-03_A-Fast-and + */ ccl_device_inline float3 ray_offset(float3 P, float3 Ng) { -#ifdef __INTERSECTION_REFINE__ - const float epsilon_f = 1e-5f; - /* ideally this should match epsilon_f, but instancing and motion blur - * precision makes it problematic */ - const float epsilon_test = 1.0f; - const int epsilon_i = 32; - - float3 res; - - /* x component */ - if (fabsf(P.x) < epsilon_test) { - res.x = P.x + Ng.x * epsilon_f; - } - else { - uint ix = __float_as_uint(P.x); - ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i; - res.x = __uint_as_float(ix); - } - - /* y component */ - if (fabsf(P.y) < epsilon_test) { - res.y = P.y + Ng.y * epsilon_f; - } - else { - uint iy = __float_as_uint(P.y); - iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i; - res.y = __uint_as_float(iy); - } - - /* z component */ - if (fabsf(P.z) < epsilon_test) { - res.z = P.z + Ng.z * epsilon_f; - } - else { - uint iz = __float_as_uint(P.z); - iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i; - res.z = __uint_as_float(iz); - } - - return res; -#else - const float epsilon_f = 1e-4f; - return P + epsilon_f * Ng; -#endif + const float int_scale = 256.0f; + int3 of_i = make_int3((int)(int_scale * Ng.x), (int)(int_scale * Ng.y), (int)(int_scale * Ng.z)); + + float3 p_i = make_float3(__int_as_float(__float_as_int(P.x) + ((P.x < 0) ? -of_i.x : of_i.x)), + __int_as_float(__float_as_int(P.y) + ((P.y < 0) ? -of_i.y : of_i.y)), + __int_as_float(__float_as_int(P.z) + ((P.z < 0) ? -of_i.z : of_i.z))); + const float origin = 1.0f / 32.0f; + const float float_scale = 1.0f / 65536.0f; + return make_float3(fabsf(P.x) < origin ? P.x + float_scale * Ng.x : p_i.x, + fabsf(P.y) < origin ? P.y + float_scale * Ng.y : p_i.y, + fabsf(P.z) < origin ? P.z + float_scale * Ng.z : p_i.z); } #if defined(__KERNEL_CPU__) @@ -227,4 +195,25 @@ ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg, return (1.0f - u) * f0 + u * f1; } +ccl_device_inline bool intersection_skip_self(ccl_private const RaySelfPrimitives &self, + const int object, + const int prim) +{ + return (self.prim == prim) && (self.object == object); +} + +ccl_device_inline bool intersection_skip_self_shadow(ccl_private const RaySelfPrimitives &self, + const int object, + const int prim) +{ + return ((self.prim == prim) && (self.object == object)) || + ((self.light_prim == prim) && (self.light_object == object)); +} + +ccl_device_inline bool intersection_skip_self_local(ccl_private const RaySelfPrimitives &self, + const int prim) +{ + return (self.prim == prim); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/volume.h b/intern/cycles/kernel/bvh/volume.h index c0746c8efc3..95bba4f071d 100644 --- a/intern/cycles/kernel/bvh/volume.h +++ b/intern/cycles/kernel/bvh/volume.h @@ -144,6 +144,9 @@ ccl_device_inline kernel_tex_fetch(__prim_object, prim_addr) : object; const int prim = kernel_tex_fetch(__prim_index, prim_addr); + if (intersection_skip_self(ray->self, prim_object, prim)) { + continue; + } int object_flag = kernel_tex_fetch(__object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { @@ -164,6 +167,9 @@ ccl_device_inline kernel_tex_fetch(__prim_object, prim_addr) : object; const int prim = kernel_tex_fetch(__prim_index, prim_addr); + if (intersection_skip_self(ray->self, prim_object, prim)) { + continue; + } int object_flag = kernel_tex_fetch(__object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; diff --git a/intern/cycles/kernel/bvh/volume_all.h b/intern/cycles/kernel/bvh/volume_all.h index a88c9d95d46..9f53e987cf1 100644 --- a/intern/cycles/kernel/bvh/volume_all.h +++ b/intern/cycles/kernel/bvh/volume_all.h @@ -147,6 +147,9 @@ ccl_device_inline kernel_tex_fetch(__prim_object, prim_addr) : object; const int prim = kernel_tex_fetch(__prim_index, prim_addr); + if (intersection_skip_self(ray->self, prim_object, prim)) { + continue; + } int object_flag = kernel_tex_fetch(__object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; @@ -188,6 +191,9 @@ ccl_device_inline kernel_tex_fetch(__prim_object, prim_addr) : object; const int prim = kernel_tex_fetch(__prim_index, prim_addr); + if (intersection_skip_self(ray->self, prim_object, prim)) { + continue; + } int object_flag = kernel_tex_fetch(__object_flag, prim_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { continue; diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal index deb7dafe55e..6b77940660f 100644 --- a/intern/cycles/kernel/device/metal/kernel.metal +++ b/intern/cycles/kernel/device/metal/kernel.metal @@ -40,6 +40,27 @@ struct TriangleIntersectionResult enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX }; +ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives& self, + const int object, + const int prim) +{ + return (self.prim == prim) && (self.object == object); +} + +ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives& self, + const int object, + const int prim) +{ + return ((self.prim == prim) && (self.object == object)) || + ((self.light_prim == prim) && (self.light_object == object)); +} + +ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives& self, + const int prim) +{ + return (self.prim == prim); +} + template<typename TReturn, uint intersection_type> TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload, @@ -53,8 +74,8 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, #ifdef __BVH_LOCAL__ uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); - if (object != payload.local_object) { - /* Only intersect with matching object */ + if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) { + /* Only intersect with matching object and skip self-intersecton. */ result.accept = false; result.continue_search = true; return result; @@ -166,6 +187,11 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, } # endif + if (intersection_skip_self_shadow(payload.self, object, prim)) { + /* continue search */ + return true; + } + float u = 0.0f, v = 0.0f; int type = 0; if (intersection_type == METALRT_HIT_TRIANGLE) { @@ -322,21 +348,35 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa } # endif -# ifdef __VISIBILITY_FLAG__ uint visibility = payload.visibility; +# ifdef __VISIBILITY_FLAG__ if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { result.accept = false; result.continue_search = true; return result; } +# endif /* Shadow ray early termination. */ if (visibility & PATH_RAY_SHADOW_OPAQUE) { - result.accept = true; - result.continue_search = false; - return result; + if (intersection_skip_self_shadow(payload.self, object, prim)) { + result.accept = false; + result.continue_search = true; + return result; + } + else { + result.accept = true; + result.continue_search = false; + return result; + } + } + else { + if (intersection_skip_self(payload.self, object, prim)) { + result.accept = false; + result.continue_search = true; + return result; + } } -# endif result.accept = true; result.continue_search = true; @@ -576,6 +616,150 @@ __intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal return result; } - #endif /* __HAIR__ */ + +#ifdef __POINTCLOUD__ +ccl_device_inline +void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_origin, + const float3 ray_direction, + float time, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + return; + } +# endif + + float3 P = ray_origin; + float3 dir = ray_direction; + + /* The direction is not normalized by default, but the point intersection routine expects that */ + float len; + dir = normalize_len(dir, &len); + + Intersection isect; + isect.t = ray_tmax; + /* Transform maximum distance into object space. */ + if (isect.t != FLT_MAX) + isect.t *= len; + + MetalKernelContext context(launch_params_metal); + if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, isect.u); + if (result.accept) { + result.distance = isect.t / len; + payload.u = isect.u; + payload.v = isect.v; + payload.prim = prim; + payload.type = type; + } + } +} + +ccl_device_inline +void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_origin, + const float3 ray_direction, + float time, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ + const uint visibility = payload.visibility; + + float3 P = ray_origin; + float3 dir = ray_direction; + + /* The direction is not normalized by default, but the point intersection routine expects that */ + float len; + dir = normalize_len(dir, &len); + + Intersection isect; + isect.t = ray_tmax; + /* Transform maximum distance into object space */ + if (isect.t != FLT_MAX) + isect.t *= len; + + MetalKernelContext context(launch_params_metal); + if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>( + launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); + result.accept = !result.continue_search; + + if (result.accept) { + result.distance = isect.t / len; + } + } +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + const uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const int type = kernel_tex_fetch(__objects, object).primitive_type; + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_point(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + const uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const int type = kernel_tex_fetch(__objects, object).primitive_type; + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_point_shadow(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + + return result; +} +#endif /* __POINTCLOUD__ */ #endif /* __METALRT__ */ diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index aa210b31a95..8e3d57bff8a 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -45,6 +45,11 @@ template<typename T> ccl_device_forceinline T *get_payload_ptr_2() return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3()); } +template<typename T> ccl_device_forceinline T *get_payload_ptr_6() +{ + return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6()); +} + ccl_device_forceinline int get_object_id() { #ifdef __OBJECT_MOTION__ @@ -111,6 +116,12 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() return optixIgnoreIntersection(); } + const int prim = optixGetPrimitiveIndex(); + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); + if (intersection_skip_self_local(ray->self, prim)) { + return optixIgnoreIntersection(); + } + const uint max_hits = optixGetPayload_5(); if (max_hits == 0) { /* Special case for when no hit information is requested, just report that something was hit */ @@ -149,8 +160,6 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() local_isect->num_hits = 1; } - const int prim = optixGetPrimitiveIndex(); - Intersection *isect = &local_isect->hits[hit]; isect->t = optixGetRayTmax(); isect->prim = prim; @@ -185,6 +194,11 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() } # endif + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); + if (intersection_skip_self_shadow(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } + float u = 0.0f, v = 0.0f; int type = 0; if (optixIsTriangleHit()) { @@ -314,6 +328,12 @@ extern "C" __global__ void __anyhit__kernel_optix_volume_test() if ((kernel_tex_fetch(__object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) { return optixIgnoreIntersection(); } + + const int prim = optixGetPrimitiveIndex(); + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); + if (intersection_skip_self(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } } extern "C" __global__ void __anyhit__kernel_optix_visibility_test() @@ -330,18 +350,31 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test() # endif #endif -#ifdef __VISIBILITY_FLAG__ const uint object = get_object_id(); const uint visibility = optixGetPayload_4(); +#ifdef __VISIBILITY_FLAG__ if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { return optixIgnoreIntersection(); } +#endif + + const int prim = optixGetPrimitiveIndex(); + ccl_private Ray *const ray = get_payload_ptr_6<Ray>(); - /* Shadow ray early termination. */ if (visibility & PATH_RAY_SHADOW_OPAQUE) { - return optixTerminateRay(); + if (intersection_skip_self_shadow(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } + else { + /* Shadow ray early termination. */ + return optixTerminateRay(); + } + } + else { + if (intersection_skip_self(ray->self, object, prim)) { + return optixIgnoreIntersection(); + } } -#endif } extern "C" __global__ void __closesthit__kernel_optix_hit() diff --git a/intern/cycles/kernel/film/read.h b/intern/cycles/kernel/film/read.h index 18a593a75b1..ba895fd8909 100644 --- a/intern/cycles/kernel/film/read.h +++ b/intern/cycles/kernel/film/read.h @@ -214,6 +214,21 @@ ccl_device_inline void film_get_pass_pixel_light_path( pixel[0] = f.x; pixel[1] = f.y; pixel[2] = f.z; + + /* Optional alpha channel. */ + if (kfilm_convert->num_components >= 4) { + if (kfilm_convert->pass_combined != PASS_UNUSED) { + float scale, scale_exposure; + film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure); + + ccl_global const float *in_combined = buffer + kfilm_convert->pass_combined; + const float alpha = in_combined[3] * scale; + pixel[3] = film_transparency_to_alpha(alpha); + } + else { + pixel[3] = 1.0f; + } + } } ccl_device_inline void film_get_pass_pixel_float3(ccl_global const KernelFilmConvert *ccl_restrict diff --git a/intern/cycles/kernel/geom/curve.h b/intern/cycles/kernel/geom/curve.h index 8a63f01643b..48ee8226e89 100644 --- a/intern/cycles/kernel/geom/curve.h +++ b/intern/cycles/kernel/geom/curve.h @@ -226,6 +226,18 @@ ccl_device float curve_thickness(KernelGlobals kg, ccl_private const ShaderData return r * 2.0f; } +/* Curve random */ + +ccl_device float curve_random(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + if (sd->type & PRIMITIVE_CURVE) { + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_CURVE_RANDOM); + return (desc.offset != ATTR_STD_NOT_FOUND) ? curve_attribute_float(kg, sd, desc, NULL, NULL) : + 0.0f; + } + return 0.0f; +} + /* Curve location for motion pass, linear interpolation between keys and * ignoring radius because we do the same for the motion keys */ diff --git a/intern/cycles/kernel/geom/motion_triangle_intersect.h b/intern/cycles/kernel/geom/motion_triangle_intersect.h index cb6d210d90f..a11cb88385b 100644 --- a/intern/cycles/kernel/geom/motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/motion_triangle_intersect.h @@ -29,46 +29,19 @@ CCL_NAMESPACE_BEGIN -/* Refine triangle intersection to more precise hit point. For rays that travel - * far the precision is often not so good, this reintersects the primitive from - * a closer distance. +/** + * Use the barycentric coordinates to get the intersection location */ - -ccl_device_inline float3 motion_triangle_refine(KernelGlobals kg, - ccl_private ShaderData *sd, - float3 P, - float3 D, - float t, - const int isect_object, - const int isect_prim, - float3 verts[3]) +ccl_device_inline float3 motion_triangle_point_from_uv(KernelGlobals kg, + ccl_private ShaderData *sd, + const int isect_object, + const int isect_prim, + const float u, + const float v, + float3 verts[3]) { -#ifdef __INTERSECTION_REFINE__ - if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - if (UNLIKELY(t == 0.0f)) { - return P; - } - const Transform tfm = object_get_inverse_transform(kg, sd); - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D * t); - D = normalize_len(D, &t); - } - - P = P + D * t; - - /* Compute refined intersection distance. */ - const float3 e1 = verts[0] - verts[2]; - const float3 e2 = verts[1] - verts[2]; - const float3 s1 = cross(D, e2); - - const float invdivisor = 1.0f / dot(s1, e1); - const float3 d = P - verts[2]; - const float3 s2 = cross(d, e1); - float rt = dot(e2, s2) * invdivisor; - - /* Compute refined position. */ - P = P + D * rt; + float w = 1.0f - u - v; + float3 P = u * verts[0] + v * verts[1] + w * verts[2]; if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); @@ -76,71 +49,8 @@ ccl_device_inline float3 motion_triangle_refine(KernelGlobals kg, } return P; -#else - return P + D * t; -#endif } -/* Same as above, except that t is assumed to be in object space - * for instancing. - */ - -#ifdef __BVH_LOCAL__ -# if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86)) -ccl_device_noinline -# else -ccl_device_inline -# endif - float3 - motion_triangle_refine_local(KernelGlobals kg, - ccl_private ShaderData *sd, - float3 P, - float3 D, - float t, - const int isect_object, - const int isect_prim, - float3 verts[3]) -{ -# if defined(__KERNEL_GPU_RAYTRACING__) - /* t is always in world space with OptiX and MetalRT. */ - return motion_triangle_refine(kg, sd, P, D, t, isect_object, isect_prim, verts); -# else -# ifdef __INTERSECTION_REFINE__ - if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - const Transform tfm = object_get_inverse_transform(kg, sd); - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D); - D = normalize(D); - } - - P = P + D * t; - - /* compute refined intersection distance */ - const float3 e1 = verts[0] - verts[2]; - const float3 e2 = verts[1] - verts[2]; - const float3 s1 = cross(D, e2); - - const float invdivisor = 1.0f / dot(s1, e1); - const float3 d = P - verts[2]; - const float3 s2 = cross(d, e1); - float rt = dot(e2, s2) * invdivisor; - - P = P + D * rt; - - if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - const Transform tfm = object_get_transform(kg, sd); - P = transform_point(&tfm, P); - } - - return P; -# else /* __INTERSECTION_REFINE__ */ - return P + D * t; -# endif /* __INTERSECTION_REFINE__ */ -# endif -} -#endif /* __BVH_LOCAL__ */ - /* Ray intersection. We simply compute the vertex positions at the given ray * time and do a ray intersection with the resulting triangle. */ diff --git a/intern/cycles/kernel/geom/motion_triangle_shader.h b/intern/cycles/kernel/geom/motion_triangle_shader.h index fc7c181882e..15730c83969 100644 --- a/intern/cycles/kernel/geom/motion_triangle_shader.h +++ b/intern/cycles/kernel/geom/motion_triangle_shader.h @@ -68,15 +68,7 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg, verts[1] = (1.0f - t) * verts[1] + t * next_verts[1]; verts[2] = (1.0f - t) * verts[2] + t * next_verts[2]; /* Compute refined position. */ -#ifdef __BVH_LOCAL__ - if (is_local) { - sd->P = motion_triangle_refine_local(kg, sd, P, D, ray_t, isect_object, isect_prim, verts); - } - else -#endif /* __BVH_LOCAL__*/ - { - sd->P = motion_triangle_refine(kg, sd, P, D, ray_t, isect_object, isect_prim, verts); - } + sd->P = motion_triangle_point_from_uv(kg, sd, isect_object, isect_prim, sd->u, sd->v, verts); /* Compute face normal. */ float3 Ng; if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { diff --git a/intern/cycles/kernel/geom/point.h b/intern/cycles/kernel/geom/point.h index 52a1e77d71a..545b5c7fa43 100644 --- a/intern/cycles/kernel/geom/point.h +++ b/intern/cycles/kernel/geom/point.h @@ -81,7 +81,7 @@ ccl_device float3 point_attribute_float3(KernelGlobals kg, # endif if (desc.element == ATTR_ELEMENT_VERTEX) { - return float4_to_float3(kernel_tex_fetch(__attributes_float4, desc.offset + sd->prim)); + return kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim); } else { return make_float3(0.0f, 0.0f, 0.0f); @@ -109,17 +109,59 @@ ccl_device float4 point_attribute_float4(KernelGlobals kg, } } +/* Point position */ + +ccl_device float3 point_position(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + if (sd->type & PRIMITIVE_POINT) { + /* World space center. */ + float3 P = (sd->type & PRIMITIVE_MOTION) ? + float4_to_float3(motion_point(kg, sd->object, sd->prim, sd->time)) : + float4_to_float3(kernel_tex_fetch(__points, sd->prim)); + + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_position_transform(kg, sd, &P); + } + + return P; + } + + return zero_float3(); +} + /* Point radius */ ccl_device float point_radius(KernelGlobals kg, ccl_private const ShaderData *sd) { if (sd->type & PRIMITIVE_POINT) { - return kernel_tex_fetch(__points, sd->prim).w; + /* World space radius. */ + const float r = kernel_tex_fetch(__points, sd->prim).w; + + if (sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED) { + return r; + } + else { + float3 dir = make_float3(r, r, r); + object_dir_transform(kg, sd, &dir); + return average(dir); + } } return 0.0f; } +/* Point random */ + +ccl_device float point_random(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + if (sd->type & PRIMITIVE_POINT) { + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POINT_RANDOM); + return (desc.offset != ATTR_STD_NOT_FOUND) ? point_attribute_float(kg, sd, desc, NULL, NULL) : + 0.0f; + } + return 0.0f; +} + /* Point location for motion pass, linear interpolation between keys and * ignoring radius because we do the same for the motion keys */ diff --git a/intern/cycles/kernel/geom/shader_data.h b/intern/cycles/kernel/geom/shader_data.h index f5055d8b285..fdf914d85e0 100644 --- a/intern/cycles/kernel/geom/shader_data.h +++ b/intern/cycles/kernel/geom/shader_data.h @@ -89,7 +89,7 @@ ccl_device_inline void shader_setup_from_ray(KernelGlobals kg, sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); /* vectors */ - sd->P = triangle_refine(kg, sd, ray->P, ray->D, isect->t, isect->object, isect->prim); + sd->P = triangle_point_from_uv(kg, sd, isect->object, isect->prim, isect->u, isect->v); sd->Ng = Ng; sd->N = Ng; @@ -190,40 +190,46 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals kg, #ifdef __OBJECT_MOTION__ shader_setup_object_transforms(kg, sd, time); #endif - } - else if (lamp != LAMP_NONE) { - sd->lamp = lamp; - } - /* transform into world space */ - if (object_space) { - object_position_transform_auto(kg, sd, &sd->P); - object_normal_transform_auto(kg, sd, &sd->Ng); - sd->N = sd->Ng; - object_dir_transform_auto(kg, sd, &sd->I); - } + /* transform into world space */ + if (object_space) { + object_position_transform_auto(kg, sd, &sd->P); + object_normal_transform_auto(kg, sd, &sd->Ng); + sd->N = sd->Ng; + object_dir_transform_auto(kg, sd, &sd->I); + } - if (sd->type == PRIMITIVE_TRIANGLE) { - /* smooth normal */ - if (sd->shader & SHADER_SMOOTH_NORMAL) { - sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v); + if (sd->type == PRIMITIVE_TRIANGLE) { + /* smooth normal */ + if (sd->shader & SHADER_SMOOTH_NORMAL) { + sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v); - if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - object_normal_transform_auto(kg, sd, &sd->N); + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_normal_transform_auto(kg, sd, &sd->N); + } } - } - /* dPdu/dPdv */ + /* dPdu/dPdv */ #ifdef __DPDU__ - triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); - if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - object_dir_transform_auto(kg, sd, &sd->dPdu); - object_dir_transform_auto(kg, sd, &sd->dPdv); + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_dir_transform_auto(kg, sd, &sd->dPdu); + object_dir_transform_auto(kg, sd, &sd->dPdv); + } +#endif } + else { +#ifdef __DPDU__ + sd->dPdu = zero_float3(); + sd->dPdv = zero_float3(); #endif + } } else { + if (lamp != LAMP_NONE) { + sd->lamp = lamp; + } #ifdef __DPDU__ sd->dPdu = zero_float3(); sd->dPdv = zero_float3(); diff --git a/intern/cycles/kernel/geom/triangle_intersect.h b/intern/cycles/kernel/geom/triangle_intersect.h index 0169b40bc34..8458cf020a0 100644 --- a/intern/cycles/kernel/geom/triangle_intersect.h +++ b/intern/cycles/kernel/geom/triangle_intersect.h @@ -142,116 +142,23 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg, } #endif /* __BVH_LOCAL__ */ -/* Refine triangle intersection to more precise hit point. For rays that travel - * far the precision is often not so good, this reintersects the primitive from - * a closer distance. */ - -/* Reintersections uses the paper: - * - * Tomas Moeller - * Fast, minimum storage ray/triangle intersection - * http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf +/** + * Use the barycentric coordinates to get the intersection location */ - -ccl_device_inline float3 triangle_refine(KernelGlobals kg, - ccl_private ShaderData *sd, - float3 P, - float3 D, - float t, - const int isect_object, - const int isect_prim) +ccl_device_inline float3 triangle_point_from_uv(KernelGlobals kg, + ccl_private ShaderData *sd, + const int isect_object, + const int isect_prim, + const float u, + const float v) { -#ifdef __INTERSECTION_REFINE__ - if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - if (UNLIKELY(t == 0.0f)) { - return P; - } - const Transform tfm = object_get_inverse_transform(kg, sd); - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D * t); - D = normalize_len(D, &t); - } - - P = P + D * t; - const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w; const packed_float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); - float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); - float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); - float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); - float3 qvec = cross(tvec, edge1); - float3 pvec = cross(D, edge2); - float det = dot(edge1, pvec); - if (det != 0.0f) { - /* If determinant is zero it means ray lies in the plane of - * the triangle. It is possible in theory due to watertight - * nature of triangle intersection. For such cases we simply - * don't refine intersection hoping it'll go all fine. - */ - float rt = dot(edge2, qvec) / det; - P = P + D * rt; - } - - if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - const Transform tfm = object_get_transform(kg, sd); - P = transform_point(&tfm, P); - } - - return P; -#else - return P + D * t; -#endif -} - -/* Same as above, except that t is assumed to be in object space for - * instancing. - */ -ccl_device_inline float3 triangle_refine_local(KernelGlobals kg, - ccl_private ShaderData *sd, - float3 P, - float3 D, - float t, - const int isect_object, - const int isect_prim) -{ -#if defined(__KERNEL_GPU_RAYTRACING__) - /* t is always in world space with OptiX and MetalRT. */ - return triangle_refine(kg, sd, P, D, t, isect_object, isect_prim); -#else - if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - const Transform tfm = object_get_inverse_transform(kg, sd); - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D); - D = normalize(D); - } + float w = 1.0f - u - v; - P = P + D * t; - -# ifdef __INTERSECTION_REFINE__ - const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w; - const packed_float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); - float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); - float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); - float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); - float3 qvec = cross(tvec, edge1); - float3 pvec = cross(D, edge2); - float det = dot(edge1, pvec); - if (det != 0.0f) { - /* If determinant is zero it means ray lies in the plane of - * the triangle. It is possible in theory due to watertight - * nature of triangle intersection. For such cases we simply - * don't refine intersection hoping it'll go all fine. - */ - float rt = dot(edge2, qvec) / det; - P = P + D * rt; - } -# endif /* __INTERSECTION_REFINE__ */ + float3 P = u * tri_a + v * tri_b + w * tri_c; if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); @@ -259,7 +166,6 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals kg, } return P; -#endif } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index df710dc1d82..4c5265189fa 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -328,6 +328,12 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, /* Scene Intersection. */ Intersection isect ccl_optional_struct_init; + isect.object = OBJECT_NONE; + isect.prim = PRIM_NONE; + ray.self.object = last_isect_object; + ray.self.prim = last_isect_prim; + ray.self.light_object = OBJECT_NONE; + ray.self.light_prim = PRIM_NONE; bool hit = scene_intersect(kg, &ray, visibility, &isect); /* TODO: remove this and do it in the various intersection functions instead. */ diff --git a/intern/cycles/kernel/integrator/intersect_shadow.h b/intern/cycles/kernel/integrator/intersect_shadow.h index 90422445fad..1ba8724826b 100644 --- a/intern/cycles/kernel/integrator/intersect_shadow.h +++ b/intern/cycles/kernel/integrator/intersect_shadow.h @@ -156,7 +156,10 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt /* Read ray from integrator state into local memory. */ Ray ray ccl_optional_struct_init; integrator_state_read_shadow_ray(kg, state, &ray); - + ray.self.object = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, object); + ray.self.prim = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, prim); + ray.self.light_object = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 1, object); + ray.self.light_prim = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 1, prim); /* Compute visibility. */ const uint visibility = integrate_intersect_shadow_visibility(kg, state); diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h index 9fa5ff63ad2..ee3d82ebacb 100644 --- a/intern/cycles/kernel/integrator/intersect_volume_stack.h +++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h @@ -38,7 +38,10 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, Ray volume_ray ccl_optional_struct_init; volume_ray.P = from_P; volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t); - + volume_ray.self.object = INTEGRATOR_STATE(state, isect, object); + volume_ray.self.prim = INTEGRATOR_STATE(state, isect, prim); + volume_ray.self.light_object = OBJECT_NONE; + volume_ray.self.light_prim = PRIM_NONE; /* Store to avoid global fetches on every intersection step. */ const uint volume_stack_size = kernel_data.volume_stack_size; @@ -68,7 +71,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, volume_stack_enter_exit(kg, state, stack_sd); /* Move ray forward. */ - volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng); + volume_ray.P = stack_sd->P; if (volume_ray.t != FLT_MAX) { volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t); } @@ -91,6 +94,10 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s * fewest hits. */ volume_ray.D = make_float3(0.0f, 0.0f, 1.0f); volume_ray.t = FLT_MAX; + volume_ray.self.object = OBJECT_NONE; + volume_ray.self.prim = PRIM_NONE; + volume_ray.self.light_object = OBJECT_NONE; + volume_ray.self.light_prim = PRIM_NONE; int stack_index = 0, enclosed_index = 0; @@ -203,7 +210,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s } /* Move ray forward. */ - volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng); + volume_ray.P = stack_sd->P; ++step; } #endif diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h index 97ca430752c..0a82c9cadef 100644 --- a/intern/cycles/kernel/integrator/shade_light.h +++ b/intern/cycles/kernel/integrator/shade_light.h @@ -37,8 +37,9 @@ ccl_device_inline void integrate_light(KernelGlobals kg, /* Advance ray beyond light. */ /* TODO: can we make this more numerically robust to avoid reintersecting the - * same light in some cases? */ - const float3 new_ray_P = ray_offset(ray_P + ray_D * isect.t, ray_D); + * same light in some cases? Ray should not intersect surface anymore as the + * object and prim ids will prevent self intersection. */ + const float3 new_ray_P = ray_P + ray_D * isect.t; INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P; INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t; @@ -46,7 +47,7 @@ ccl_device_inline void integrate_light(KernelGlobals kg, const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); ray_P -= ray_D * mis_ray_t; isect.t += mis_ray_t; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = mis_ray_t + isect.t; + INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = isect.t; LightSample ls ccl_optional_struct_init; const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls); diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h index a68fcaa7a64..3e8eba29ef7 100644 --- a/intern/cycles/kernel/integrator/shade_shadow.h +++ b/intern/cycles/kernel/integrator/shade_shadow.h @@ -83,7 +83,10 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, /* Setup shader data. */ Ray ray ccl_optional_struct_init; integrator_state_read_shadow_ray(kg, state, &ray); - + ray.self.object = OBJECT_NONE; + ray.self.prim = PRIM_NONE; + ray.self.light_object = OBJECT_NONE; + ray.self.light_prim = PRIM_NONE; /* Modify ray position and length to match current segment. */ const float start_t = (hit == 0) ? 0.0f : INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t); @@ -149,7 +152,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t); const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P); const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D); - INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D); + INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_P + last_hit_t * ray_D; INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t; } diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index 3d5b65458c7..10d3cbf7f57 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -182,23 +182,35 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, /* Write shadow ray and associated state to global memory. */ integrator_state_write_shadow_ray(kg, shadow_state, &ray); + // Save memory by storing the light and object indices in the shadow_isect + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 0, object) = ray.self.object; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 0, prim) = ray.self.prim; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 1, object) = ray.self.light_object; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 1, prim) = ray.self.light_prim; /* Copy state from main path to shadow path. */ const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce); const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag); shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; - shadow_flag |= PATH_RAY_SURFACE_PASS; const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval); if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - const packed_float3 pass_diffuse_weight = - (bounce == 0) ? packed_float3(bsdf_eval_pass_diffuse_weight(&bsdf_eval)) : - INTEGRATOR_STATE(state, path, pass_diffuse_weight); - const packed_float3 pass_glossy_weight = (bounce == 0) ? - packed_float3( - bsdf_eval_pass_glossy_weight(&bsdf_eval)) : - INTEGRATOR_STATE(state, path, pass_glossy_weight); + packed_float3 pass_diffuse_weight; + packed_float3 pass_glossy_weight; + + if (shadow_flag & PATH_RAY_ANY_PASS) { + /* Indirect bounce, use weights from earlier surface or volume bounce. */ + pass_diffuse_weight = INTEGRATOR_STATE(state, path, pass_diffuse_weight); + pass_glossy_weight = INTEGRATOR_STATE(state, path, pass_glossy_weight); + } + else { + /* Direct light, use BSDFs at this bounce. */ + shadow_flag |= PATH_RAY_SURFACE_PASS; + pass_diffuse_weight = packed_float3(bsdf_eval_pass_diffuse_weight(&bsdf_eval)); + pass_glossy_weight = packed_float3(bsdf_eval_pass_glossy_weight(&bsdf_eval)); + } + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight; INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_glossy_weight) = pass_glossy_weight; } @@ -266,13 +278,11 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( } /* Setup ray. Note that clipping works through transparent bounces. */ - INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P, - (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng); + INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in); INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ? INTEGRATOR_STATE(state, ray, t) - sd->ray_length : FLT_MAX; - #ifdef __RAY_DIFFERENTIALS__ INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in); @@ -316,7 +326,7 @@ ccl_device_forceinline bool integrate_surface_volume_only_bounce(IntegratorState } /* Setup ray position, direction stays unchanged. */ - INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P, -sd->Ng); + INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; /* Clipping works through transparent. */ INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length; @@ -360,10 +370,14 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, } Ray ray ccl_optional_struct_init; - ray.P = ray_offset(sd->P, sd->Ng); + ray.P = sd->P; ray.D = ao_D; ray.t = kernel_data.integrator.ao_bounces_distance; ray.time = sd->time; + ray.self.object = sd->object; + ray.self.prim = sd->prim; + ray.self.light_object = OBJECT_NONE; + ray.self.light_prim = PRIM_NONE; ray.dP = differential_zero_compact(); ray.dD = differential_zero_compact(); @@ -375,6 +389,10 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, /* Write shadow ray and associated state to global memory. */ integrator_state_write_shadow_ray(kg, shadow_state, &ray); + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 0, object) = ray.self.object; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 0, prim) = ray.self.prim; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 1, object) = ray.self.light_object; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 1, prim) = ray.self.light_prim; /* Copy state from main path to shadow path. */ const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce); diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 712c22357b8..c59234553a7 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -791,22 +791,36 @@ ccl_device_forceinline void integrate_volume_direct_light( /* Write shadow ray and associated state to global memory. */ integrator_state_write_shadow_ray(kg, shadow_state, &ray); + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 0, object) = ray.self.object; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 0, prim) = ray.self.prim; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 1, object) = ray.self.light_object; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_isect, 1, prim) = ray.self.light_prim; /* Copy state from main path to shadow path. */ const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce); const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag); shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; - shadow_flag |= PATH_RAY_VOLUME_PASS; const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval); if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - const packed_float3 pass_diffuse_weight = (bounce == 0) ? - packed_float3(one_float3()) : - INTEGRATOR_STATE( - state, path, pass_diffuse_weight); + packed_float3 pass_diffuse_weight; + packed_float3 pass_glossy_weight; + + if (shadow_flag & PATH_RAY_ANY_PASS) { + /* Indirect bounce, use weights from earlier surface or volume bounce. */ + pass_diffuse_weight = INTEGRATOR_STATE(state, path, pass_diffuse_weight); + pass_glossy_weight = INTEGRATOR_STATE(state, path, pass_glossy_weight); + } + else { + /* Direct light, no diffuse/glossy distinction needed for volumes. */ + shadow_flag |= PATH_RAY_VOLUME_PASS; + pass_diffuse_weight = packed_float3(one_float3()); + pass_glossy_weight = packed_float3(zero_float3()); + } + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight; - INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_glossy_weight) = zero_float3(); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_glossy_weight) = pass_glossy_weight; } INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE( @@ -873,11 +887,13 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in); INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX; - # ifdef __RAY_DIFFERENTIALS__ INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in); # endif + // Save memory by storing last hit prim and object in isect + INTEGRATOR_STATE_WRITE(state, isect, prim) = sd->prim; + INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object; /* Update throughput. */ const float3 throughput = INTEGRATOR_STATE(state, path, throughput); diff --git a/intern/cycles/kernel/integrator/shadow_state_template.h b/intern/cycles/kernel/integrator/shadow_state_template.h index 625a429d3db..86fcabdcd82 100644 --- a/intern/cycles/kernel/integrator/shadow_state_template.h +++ b/intern/cycles/kernel/integrator/shadow_state_template.h @@ -61,6 +61,7 @@ KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, int, object, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_END(shadow_ray) /*********************** Shadow Intersection result **************************/ diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h index 59b0cd2596c..6c0f815afea 100644 --- a/intern/cycles/kernel/integrator/subsurface.h +++ b/intern/cycles/kernel/integrator/subsurface.h @@ -57,7 +57,6 @@ ccl_device int subsurface_bounce(KernelGlobals kg, /* Pass along object info, reusing isect to save memory. */ INTEGRATOR_STATE_WRITE(state, subsurface, Ng) = sd->Ng; - INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object; uint32_t path_flag = (INTEGRATOR_STATE(state, path, flag) & ~PATH_RAY_CAMERA) | ((sc->type == CLOSURE_BSSRDF_BURLEY_ID) ? PATH_RAY_SUBSURFACE_DISK : @@ -165,10 +164,8 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) { float3 P = INTEGRATOR_STATE(state, ray, P); - const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng); - const float3 offset_P = ray_offset(P, -Ng); - integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P); + integrator_volume_stack_update_for_subsurface(kg, state, P, ray.P); } } # endif /* __VOLUME__ */ diff --git a/intern/cycles/kernel/integrator/subsurface_disk.h b/intern/cycles/kernel/integrator/subsurface_disk.h index cc6f5048cda..f5641d1fa5e 100644 --- a/intern/cycles/kernel/integrator/subsurface_disk.h +++ b/intern/cycles/kernel/integrator/subsurface_disk.h @@ -99,6 +99,10 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, ray.dP = ray_dP; ray.dD = differential_zero_compact(); ray.time = time; + ray.self.object = OBJECT_NONE; + ray.self.prim = PRIM_NONE; + ray.self.light_object = OBJECT_NONE; + ray.self.light_prim = OBJECT_NONE; /* Intersect with the same object. if multiple intersections are found it * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits. */ diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h index 7a8b467e199..993c54d9050 100644 --- a/intern/cycles/kernel/integrator/subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h @@ -195,6 +195,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, const float time = INTEGRATOR_STATE(state, ray, time); const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng); const int object = INTEGRATOR_STATE(state, isect, object); + const int prim = INTEGRATOR_STATE(state, isect, prim); /* Sample diffuse surface scatter into the object. */ float3 D; @@ -205,12 +206,16 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, } /* Setup ray. */ - ray.P = ray_offset(P, -Ng); + ray.P = P; ray.D = D; ray.t = FLT_MAX; ray.time = time; ray.dP = ray_dP; ray.dD = differential_zero_compact(); + ray.self.object = object; + ray.self.prim = prim; + ray.self.light_object = OBJECT_NONE; + ray.self.light_prim = PRIM_NONE; #ifndef __KERNEL_GPU_RAYTRACING__ /* Compute or fetch object transforms. */ @@ -377,7 +382,15 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, * If yes, we will later use backwards guided sampling in order to have a decent * chance of connecting to it. * TODO: Maybe use less than 10 times the mean free path? */ - ray.t = (bounce == 0) ? max(t, 10.0f / (min3(sigma_t))) : t; + if (bounce == 0) { + ray.t = max(t, 10.0f / (min3(sigma_t))); + } + else { + ray.t = t; + /* After the first bounce the object can intersect the same surface again */ + ray.self.object = OBJECT_NONE; + ray.self.prim = PRIM_NONE; + } scene_intersect_local(kg, &ray, &ss_isect, object, NULL, 1); hit = (ss_isect.num_hits > 0); @@ -408,13 +421,6 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, if (hit) { t = ray.t; } - else if (bounce == 0) { - /* Restore original position if nothing was hit after the first bounce, - * without the ray_offset() that was added to avoid self-intersection. - * Otherwise if that offset is relatively large compared to the scattering - * radius, we never go back up high enough to exit the surface. */ - ray.P = P; - } /* Advance to new scatter location. */ ray.P += t * ray.D; diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h index 6e445f862db..d05fe47cc2c 100644 --- a/intern/cycles/kernel/light/light.h +++ b/intern/cycles/kernel/light/light.h @@ -113,22 +113,30 @@ ccl_device_inline bool light_sample(KernelGlobals kg, ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]); if (type == LIGHT_SPOT) { - ls->Ng = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]); - float radius = klight->spot.radius; + const float3 center = make_float3(klight->co[0], klight->co[1], klight->co[2]); + const float radius = klight->spot.radius; + const float3 dir = make_float3( + klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]); + /* disk oriented normal */ + const float3 lightN = normalize(P - center); + ls->P = center; if (radius > 0.0f) - /* sphere light */ - ls->P += disk_light_sample(ls->Ng, randu, randv) * radius; + /* disk light */ + ls->P += disk_light_sample(lightN, randu, randv) * radius; + + const float invarea = klight->spot.invarea; + ls->pdf = invarea; ls->D = normalize_len(ls->P - P, &ls->t); + /* we set the light normal to the outgoing direction to support texturing */ + ls->Ng = -ls->D; - float invarea = klight->spot.invarea; ls->eval_fac = (0.25f * M_1_PI_F) * invarea; - ls->pdf = invarea; /* spot light attenuation */ ls->eval_fac *= spot_light_attenuation( - ls->Ng, klight->spot.spot_angle, klight->spot.spot_smooth, -ls->D); + dir, klight->spot.spot_angle, klight->spot.spot_smooth, -ls->D); if (!in_volume_segment && ls->eval_fac == 0.0f) { return false; } @@ -137,32 +145,33 @@ ccl_device_inline bool light_sample(KernelGlobals kg, ls->u = uv.x; ls->v = uv.y; - ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); + ls->pdf *= lamp_light_pdf(kg, lightN, -ls->D, ls->t); } else if (type == LIGHT_POINT) { float3 center = make_float3(klight->co[0], klight->co[1], klight->co[2]); float radius = klight->spot.radius; + /* disk oriented normal */ + const float3 lightN = normalize(P - center); ls->P = center; - float pdf = 1.0; if (radius > 0.0f) { - ls->Ng = normalize(P - center); - ls->P += disk_light_sample(ls->Ng, randu, randv) * radius; - pdf = klight->spot.invarea; - ls->D = normalize_len(ls->P - P, &ls->t); - } - else { - ls->Ng = normalize(P - center); + ls->P += disk_light_sample(lightN, randu, randv) * radius; } + ls->pdf = klight->spot.invarea; ls->D = normalize_len(ls->P - P, &ls->t); - ls->pdf = pdf; + /* we set the light normal to the outgoing direction to support texturing */ + ls->Ng = -ls->D; + ls->eval_fac = M_1_PI_F * 0.25f * klight->spot.invarea; + if (!in_volume_segment && ls->eval_fac == 0.0f) { + return false; + } float2 uv = map_to_sphere(ls->Ng); ls->u = uv.x; ls->v = uv.y; - ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); + ls->pdf *= lamp_light_pdf(kg, lightN, -ls->D, ls->t); } else { /* area light */ @@ -263,14 +272,16 @@ ccl_device bool lights_intersect(KernelGlobals kg, if (type == LIGHT_SPOT) { /* Spot/Disk light. */ + const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); + const float3 ray_P = ray->P - ray->D * mis_ray_t; + const float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]); - const float3 lightN = make_float3( - klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]); const float radius = klight->spot.radius; if (radius == 0.0f) { continue; } - + /* disk oriented normal */ + const float3 lightN = normalize(ray_P - lightP); /* One sided. */ if (dot(ray->D, lightN) >= 0.0f) { continue; @@ -292,9 +303,10 @@ ccl_device bool lights_intersect(KernelGlobals kg, continue; } + /* disk oriented normal */ + const float3 lightN = normalize(ray_P - lightP); float3 P; - const float3 lsN = normalize(ray_P - lightP); - if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lsN, radius, &P, &t)) { + if (!ray_disk_intersect(ray->P, ray->D, ray->t, lightP, lightN, radius, &P, &t)) { continue; } } @@ -418,8 +430,8 @@ ccl_device bool light_sample_from_intersection(KernelGlobals kg, LightType type = (LightType)klight->type; ls->type = type; ls->shader = klight->shader_id; - ls->object = PRIM_NONE; - ls->prim = PRIM_NONE; + ls->object = isect->object; + ls->prim = isect->prim; ls->lamp = lamp; /* todo: missing texture coordinates */ ls->t = isect->t; @@ -427,7 +439,12 @@ ccl_device bool light_sample_from_intersection(KernelGlobals kg, ls->D = ray_D; if (type == LIGHT_SPOT) { - ls->Ng = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]); + const float3 center = make_float3(klight->co[0], klight->co[1], klight->co[2]); + const float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]); + /* the normal of the oriented disk */ + const float3 lightN = normalize(ray_P - center); + /* we set the light normal to the outgoing direction to support texturing*/ + ls->Ng = -ls->D; float invarea = klight->spot.invarea; ls->eval_fac = (0.25f * M_1_PI_F) * invarea; @@ -435,7 +452,7 @@ ccl_device bool light_sample_from_intersection(KernelGlobals kg, /* spot light attenuation */ ls->eval_fac *= spot_light_attenuation( - ls->Ng, klight->spot.spot_angle, klight->spot.spot_smooth, -ls->D); + dir, klight->spot.spot_angle, klight->spot.spot_smooth, -ls->D); if (ls->eval_fac == 0.0f) { return false; @@ -447,23 +464,32 @@ ccl_device bool light_sample_from_intersection(KernelGlobals kg, /* compute pdf */ if (ls->t != FLT_MAX) - ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); + ls->pdf *= lamp_light_pdf(kg, lightN, -ls->D, ls->t); + else + ls->pdf = 0.f; } else if (type == LIGHT_POINT) { - float3 center = make_float3(klight->co[0], klight->co[1], klight->co[2]); + const float3 center = make_float3(klight->co[0], klight->co[1], klight->co[2]); + const float3 lighN = normalize(ray_P - center); + + /* we set the light normal to the outgoing direction to support texturing*/ + ls->Ng = -ls->D; - ls->Ng = normalize(ray_P - center); float invarea = klight->spot.invarea; ls->eval_fac = (0.25f * M_1_PI_F) * invarea; ls->pdf = invarea; + if (ls->eval_fac == 0.0f) { + return false; + } + float2 uv = map_to_sphere(ls->Ng); ls->u = uv.x; ls->v = uv.y; /* compute pdf */ if (ls->t != FLT_MAX) - ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); + ls->pdf *= lamp_light_pdf(kg, lighN, -ls->D, ls->t); else ls->pdf = 0.f; } @@ -921,4 +947,4 @@ ccl_device_inline bool light_distribution_sample_new_position(KernelGlobals kg, } } -CCL_NAMESPACE_END +CCL_NAMESPACE_END
\ No newline at end of file diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h index 7dbc783b1bb..521ad2f7066 100644 --- a/intern/cycles/kernel/light/sample.h +++ b/intern/cycles/kernel/light/sample.h @@ -198,7 +198,7 @@ ccl_device_inline float3 shadow_ray_offset(KernelGlobals kg, float NL = dot(sd->N, L); bool transmit = (NL < 0.0f); float3 Ng = (transmit ? -sd->Ng : sd->Ng); - float3 P = ray_offset(sd->P, Ng); + float3 P = sd->P; if ((sd->type & PRIMITIVE_TRIANGLE) && (sd->shader & SHADER_SMOOTH_NORMAL)) { const float offset_cutoff = @@ -243,7 +243,7 @@ ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restri } else { /* other lights, avoid self-intersection */ - ray->D = ray_offset(ls->P, ls->Ng) - P; + ray->D = ls->P - P; ray->D = normalize_len(ray->D, &ray->t); } } @@ -257,6 +257,12 @@ ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restri ray->dP = differential_make_compact(sd->dP); ray->dD = differential_zero_compact(); ray->time = sd->time; + + /* Fill in intersection surface and light details. */ + ray->self.prim = sd->prim; + ray->self.object = sd->object; + ray->self.light_prim = ls->prim; + ray->self.light_object = ls->object; } /* Create shadow ray towards light sample. */ diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp index a79fc323a13..d79e7dfa8a5 100644 --- a/intern/cycles/kernel/osl/services.cpp +++ b/intern/cycles/kernel/osl/services.cpp @@ -116,6 +116,8 @@ ustring OSLRenderServices::u_curve_tangent_normal("geom:curve_tangent_normal"); ustring OSLRenderServices::u_curve_random("geom:curve_random"); ustring OSLRenderServices::u_is_point("geom:is_point"); ustring OSLRenderServices::u_point_radius("geom:point_radius"); +ustring OSLRenderServices::u_point_position("geom:point_position"); +ustring OSLRenderServices::u_point_random("geom:point_random"); ustring OSLRenderServices::u_normal_map_normal("geom:normal_map_normal"); ustring OSLRenderServices::u_path_ray_length("path:ray_length"); ustring OSLRenderServices::u_path_ray_depth("path:ray_depth"); @@ -999,6 +1001,10 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg float3 f = curve_tangent_normal(kg, sd); return set_attribute_float3(f, type, derivatives, val); } + else if (name == u_curve_random) { + float f = curve_random(kg, sd); + return set_attribute_float(f, type, derivatives, val); + } /* point attributes */ else if (name == u_is_point) { float f = (sd->type & PRIMITIVE_POINT) != 0; @@ -1008,6 +1014,14 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg float f = point_radius(kg, sd); return set_attribute_float(f, type, derivatives, val); } + else if (name == u_point_position) { + float3 f = point_position(kg, sd); + return set_attribute_float3(f, type, derivatives, val); + } + else if (name == u_point_random) { + float f = point_random(kg, sd); + return set_attribute_float(f, type, derivatives, val); + } else if (name == u_normal_map_normal) { if (sd->type & PRIMITIVE_TRIANGLE) { float3 f = triangle_smooth_normal_unnormalized(kg, sd, sd->Ng, sd->prim, sd->u, sd->v); diff --git a/intern/cycles/kernel/osl/services.h b/intern/cycles/kernel/osl/services.h index 9526c92b8fb..96c71297186 100644 --- a/intern/cycles/kernel/osl/services.h +++ b/intern/cycles/kernel/osl/services.h @@ -298,7 +298,9 @@ class OSLRenderServices : public OSL::RendererServices { static ustring u_curve_tangent_normal; static ustring u_curve_random; static ustring u_is_point; + static ustring u_point_position; static ustring u_point_radius; + static ustring u_point_random; static ustring u_normal_map_normal; static ustring u_path_ray_length; static ustring u_path_ray_depth; diff --git a/intern/cycles/kernel/osl/shaders/CMakeLists.txt b/intern/cycles/kernel/osl/shaders/CMakeLists.txt index 4cafdb2a6d7..16a9b1cc012 100644 --- a/intern/cycles/kernel/osl/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/osl/shaders/CMakeLists.txt @@ -49,6 +49,7 @@ set(SRC_OSL node_glossy_bsdf.osl node_gradient_texture.osl node_hair_info.osl + node_point_info.osl node_scatter_volume.osl node_absorption_volume.osl node_principled_volume.osl diff --git a/intern/cycles/kernel/osl/shaders/node_point_info.osl b/intern/cycles/kernel/osl/shaders/node_point_info.osl new file mode 100644 index 00000000000..58d8acbf269 --- /dev/null +++ b/intern/cycles/kernel/osl/shaders/node_point_info.osl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2022 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stdcycles.h" + +shader node_point_info(output point Position = point(0.0, 0.0, 0.0), + output float Radius = 0.0, + output float Random = 0.0) +{ + getattribute("geom:point_position", Position); + getattribute("geom:point_radius", Radius); + getattribute("geom:point_random", Random); +} diff --git a/intern/cycles/kernel/svm/ao.h b/intern/cycles/kernel/svm/ao.h index 678f49c8ccd..dcb1a79717d 100644 --- a/intern/cycles/kernel/svm/ao.h +++ b/intern/cycles/kernel/svm/ao.h @@ -70,10 +70,14 @@ ccl_device float svm_ao( /* Create ray. */ Ray ray; - ray.P = ray_offset(sd->P, N); + ray.P = sd->P; ray.D = D.x * T + D.y * B + D.z * N; ray.t = max_dist; ray.time = sd->time; + ray.self.object = sd->object; + ray.self.prim = sd->prim; + ray.self.light_object = OBJECT_NONE; + ray.self.light_prim = PRIM_NONE; ray.dP = differential_zero_compact(); ray.dD = differential_zero_compact(); diff --git a/intern/cycles/kernel/svm/attribute.h b/intern/cycles/kernel/svm/attribute.h index e9de0164c7a..17301028528 100644 --- a/intern/cycles/kernel/svm/attribute.h +++ b/intern/cycles/kernel/svm/attribute.h @@ -87,7 +87,9 @@ ccl_device_noinline void svm_node_attr(KernelGlobals kg, if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) { /* No generated attribute, fall back to object coordinates. */ float3 f = sd->P; - object_inverse_position_transform(kg, sd, &f); + if (sd->object != OBJECT_NONE) { + object_inverse_position_transform(kg, sd, &f); + } if (type == NODE_ATTR_OUTPUT_FLOAT) { stack_store_float(stack, out_offset, average(f)); } @@ -179,7 +181,9 @@ ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg, if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) { /* No generated attribute, fall back to object coordinates. */ float3 f = sd->P + sd->dP.dx; - object_inverse_position_transform(kg, sd, &f); + if (sd->object != OBJECT_NONE) { + object_inverse_position_transform(kg, sd, &f); + } if (type == NODE_ATTR_OUTPUT_FLOAT) { stack_store_float(stack, out_offset, average(f)); } @@ -275,7 +279,9 @@ ccl_device_noinline void svm_node_attr_bump_dy(KernelGlobals kg, if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) { /* No generated attribute, fall back to object coordinates. */ float3 f = sd->P + sd->dP.dy; - object_inverse_position_transform(kg, sd, &f); + if (sd->object != OBJECT_NONE) { + object_inverse_position_transform(kg, sd, &f); + } if (type == NODE_ATTR_OUTPUT_FLOAT) { stack_store_float(stack, out_offset, average(f)); } diff --git a/intern/cycles/kernel/svm/bevel.h b/intern/cycles/kernel/svm/bevel.h index 46dfb6631da..98b663299da 100644 --- a/intern/cycles/kernel/svm/bevel.h +++ b/intern/cycles/kernel/svm/bevel.h @@ -196,6 +196,10 @@ ccl_device float3 svm_bevel( ray.dP = differential_zero_compact(); ray.dD = differential_zero_compact(); ray.time = sd->time; + ray.self.object = OBJECT_NONE; + ray.self.prim = PRIM_NONE; + ray.self.light_object = OBJECT_NONE; + ray.self.light_prim = PRIM_NONE; /* Intersect with the same object. if multiple intersections are found it * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */ @@ -207,15 +211,24 @@ ccl_device float3 svm_bevel( /* Quickly retrieve P and Ng without setting up ShaderData. */ float3 hit_P; if (sd->type == PRIMITIVE_TRIANGLE) { - hit_P = triangle_refine_local( - kg, sd, ray.P, ray.D, ray.t, isect.hits[hit].object, isect.hits[hit].prim); + hit_P = triangle_point_from_uv(kg, + sd, + isect.hits[hit].object, + isect.hits[hit].prim, + isect.hits[hit].u, + isect.hits[hit].v); } # ifdef __OBJECT_MOTION__ else if (sd->type == PRIMITIVE_MOTION_TRIANGLE) { float3 verts[3]; motion_triangle_vertices(kg, sd->object, isect.hits[hit].prim, sd->time, verts); - hit_P = motion_triangle_refine_local( - kg, sd, ray.P, ray.D, ray.t, isect.hits[hit].object, isect.hits[hit].prim, verts); + hit_P = motion_triangle_point_from_uv(kg, + sd, + isect.hits[hit].object, + isect.hits[hit].prim, + isect.hits[hit].u, + isect.hits[hit].v, + verts); } # endif /* __OBJECT_MOTION__ */ diff --git a/intern/cycles/kernel/svm/geometry.h b/intern/cycles/kernel/svm/geometry.h index 2bac58b0aa2..225348b1ac2 100644 --- a/intern/cycles/kernel/svm/geometry.h +++ b/intern/cycles/kernel/svm/geometry.h @@ -242,13 +242,6 @@ ccl_device_noinline void svm_node_hair_info(KernelGlobals kg, stack_store_float(stack, out_offset, data); break; } -# if 0 - case NODE_INFO_CURVE_FADE: { - data = sd->curve_transparency; - stack_store_float(stack, out_offset, data); - break; - } -# endif case NODE_INFO_CURVE_TANGENT_NORMAL: { data3 = curve_tangent_normal(kg, sd); stack_store_float3(stack, out_offset, data3); @@ -258,4 +251,28 @@ ccl_device_noinline void svm_node_hair_info(KernelGlobals kg, } #endif +#ifdef __POINTCLOUD__ + +/* Point Info */ + +ccl_device_noinline void svm_node_point_info(KernelGlobals kg, + ccl_private ShaderData *sd, + ccl_private float *stack, + uint type, + uint out_offset) +{ + switch (type) { + case NODE_INFO_POINT_POSITION: + stack_store_float3(stack, out_offset, point_position(kg, sd)); + break; + case NODE_INFO_POINT_RADIUS: + stack_store_float(stack, out_offset, point_radius(kg, sd)); + break; + case NODE_INFO_POINT_RANDOM: + break; /* handled as attribute */ + } +} + +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index b226bc66771..35d4c3f2055 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -454,13 +454,14 @@ ccl_device void svm_eval_nodes(KernelGlobals kg, break; #if defined(__HAIR__) case NODE_HAIR_INFO: - IF_KERNEL_NODES_FEATURE(HAIR) - { - svm_node_hair_info(kg, sd, stack, node.y, node.z); - } + svm_node_hair_info(kg, sd, stack, node.y, node.z); + break; +#endif +#if defined(__POINTCLOUD__) + case NODE_POINT_INFO: + svm_node_point_info(kg, sd, stack, node.y, node.z); break; #endif - case NODE_TEXTURE_MAPPING: offset = svm_node_texture_mapping(kg, sd, stack, node.y, node.z, offset); break; diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h index dd1b1f9bc28..16e9fd8862a 100644 --- a/intern/cycles/kernel/svm/types.h +++ b/intern/cycles/kernel/svm/types.h @@ -81,6 +81,7 @@ typedef enum ShaderNodeType { NODE_OBJECT_INFO, NODE_PARTICLE_INFO, NODE_HAIR_INFO, + NODE_POINT_INFO, NODE_TEXTURE_MAPPING, NODE_MAPPING, NODE_MIN_MAX, @@ -176,12 +177,16 @@ typedef enum NodeHairInfo { NODE_INFO_CURVE_INTERCEPT, NODE_INFO_CURVE_LENGTH, NODE_INFO_CURVE_THICKNESS, - /* Fade for minimum hair width transiency. */ - // NODE_INFO_CURVE_FADE, NODE_INFO_CURVE_TANGENT_NORMAL, NODE_INFO_CURVE_RANDOM, } NodeHairInfo; +typedef enum NodePointInfo { + NODE_INFO_POINT_POSITION, + NODE_INFO_POINT_RADIUS, + NODE_INFO_POINT_RANDOM, +} NodePointInfo; + typedef enum NodeLightPath { NODE_LP_camera = 0, NODE_LP_shadow, diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 5d41abb53c4..d4cb22d4af4 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -512,12 +512,21 @@ typedef struct differential { /* Ray */ +typedef struct RaySelfPrimitives { + int prim; /* Primitive the ray is starting from */ + int object; /* Instance prim is a part of */ + int light_prim; /* Light primitive */ + int light_object; /* Light object */ +} RaySelfPrimitives; + typedef struct Ray { float3 P; /* origin */ float3 D; /* direction */ float t; /* length of the ray */ float time; /* time (for motion blur) */ + RaySelfPrimitives self; + #ifdef __RAY_DIFFERENTIALS__ float dP; float dD; @@ -1565,21 +1574,21 @@ enum KernelFeatureFlag : uint32_t { KERNEL_FEATURE_NODE_BSDF = (1U << 0U), KERNEL_FEATURE_NODE_EMISSION = (1U << 1U), KERNEL_FEATURE_NODE_VOLUME = (1U << 2U), - KERNEL_FEATURE_NODE_HAIR = (1U << 3U), - KERNEL_FEATURE_NODE_BUMP = (1U << 4U), - KERNEL_FEATURE_NODE_BUMP_STATE = (1U << 5U), - KERNEL_FEATURE_NODE_VORONOI_EXTRA = (1U << 6U), - KERNEL_FEATURE_NODE_RAYTRACE = (1U << 7U), - KERNEL_FEATURE_NODE_AOV = (1U << 8U), - KERNEL_FEATURE_NODE_LIGHT_PATH = (1U << 9U), + KERNEL_FEATURE_NODE_BUMP = (1U << 3U), + KERNEL_FEATURE_NODE_BUMP_STATE = (1U << 4U), + KERNEL_FEATURE_NODE_VORONOI_EXTRA = (1U << 5U), + KERNEL_FEATURE_NODE_RAYTRACE = (1U << 6U), + KERNEL_FEATURE_NODE_AOV = (1U << 7U), + KERNEL_FEATURE_NODE_LIGHT_PATH = (1U << 8U), /* Use denoising kernels and output denoising passes. */ - KERNEL_FEATURE_DENOISING = (1U << 10U), + KERNEL_FEATURE_DENOISING = (1U << 9U), /* Use path tracing kernels. */ - KERNEL_FEATURE_PATH_TRACING = (1U << 11U), + KERNEL_FEATURE_PATH_TRACING = (1U << 10U), /* BVH/sampling kernel features. */ + KERNEL_FEATURE_POINTCLOUD = (1U << 11U), KERNEL_FEATURE_HAIR = (1U << 12U), KERNEL_FEATURE_HAIR_THICK = (1U << 13U), KERNEL_FEATURE_OBJECT_MOTION = (1U << 14U), @@ -1616,9 +1625,6 @@ enum KernelFeatureFlag : uint32_t { KERNEL_FEATURE_AO_PASS = (1U << 25U), KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U), KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE), - - /* Point clouds. */ - KERNEL_FEATURE_POINTCLOUD = (1U << 27U), }; /* Shader node feature mask, to specialize shader evaluation for kernels. */ @@ -1628,7 +1634,7 @@ enum KernelFeatureFlag : uint32_t { KERNEL_FEATURE_NODE_LIGHT_PATH) #define KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW \ (KERNEL_FEATURE_NODE_BSDF | KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | \ - KERNEL_FEATURE_NODE_HAIR | KERNEL_FEATURE_NODE_BUMP | KERNEL_FEATURE_NODE_BUMP_STATE | \ + KERNEL_FEATURE_NODE_BUMP | KERNEL_FEATURE_NODE_BUMP_STATE | \ KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_LIGHT_PATH) #define KERNEL_FEATURE_NODE_MASK_SURFACE \ (KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW | KERNEL_FEATURE_NODE_RAYTRACE | \ diff --git a/intern/cycles/scene/colorspace.cpp b/intern/cycles/scene/colorspace.cpp index c1a308fcbaa..f0b7eb724de 100644 --- a/intern/cycles/scene/colorspace.cpp +++ b/intern/cycles/scene/colorspace.cpp @@ -263,7 +263,9 @@ template<typename T> inline void cast_from_float4(T *data, float4 value) /* Slower versions for other all data types, which needs to convert to float and back. */ template<typename T, bool compress_as_srgb = false> -inline void processor_apply_pixels(const OCIO::Processor *processor, T *pixels, size_t num_pixels) +inline void processor_apply_pixels_rgba(const OCIO::Processor *processor, + T *pixels, + size_t num_pixels) { /* TODO: implement faster version for when we know the conversion * is a simple matrix transform between linear spaces. In that case @@ -310,25 +312,79 @@ inline void processor_apply_pixels(const OCIO::Processor *processor, T *pixels, } } } + +template<typename T, bool compress_as_srgb = false> +inline void processor_apply_pixels_grayscale(const OCIO::Processor *processor, + T *pixels, + size_t num_pixels) +{ + OCIO::ConstCPUProcessorRcPtr device_processor = processor->getDefaultCPUProcessor(); + + /* Process large images in chunks to keep temporary memory requirement down. */ + const size_t chunk_size = std::min((size_t)(16 * 1024 * 1024), num_pixels); + vector<float> float_pixels(chunk_size * 3); + + for (size_t j = 0; j < num_pixels; j += chunk_size) { + size_t width = std::min(chunk_size, num_pixels - j); + + /* Convert to 3 channels, since that's the minimum required by OpenColorIO. */ + { + const T *pixel = pixels + j; + float *fpixel = float_pixels.data(); + for (size_t i = 0; i < width; i++, pixel++, fpixel += 3) { + const float f = util_image_cast_to_float<T>(*pixel); + fpixel[0] = f; + fpixel[1] = f; + fpixel[2] = f; + } + } + + OCIO::PackedImageDesc desc((float *)float_pixels.data(), width, 1, 3); + device_processor->apply(desc); + + { + T *pixel = pixels + j; + const float *fpixel = float_pixels.data(); + for (size_t i = 0; i < width; i++, pixel++, fpixel += 3) { + float f = average(make_float3(fpixel[0], fpixel[1], fpixel[2])); + if (compress_as_srgb) { + f = color_linear_to_srgb(f); + } + *pixel = util_image_cast_from_float<T>(f); + } + } + } +} + #endif template<typename T> -void ColorSpaceManager::to_scene_linear(ustring colorspace, - T *pixels, - size_t num_pixels, - bool compress_as_srgb) +void ColorSpaceManager::to_scene_linear( + ustring colorspace, T *pixels, size_t num_pixels, bool is_rgba, bool compress_as_srgb) { #ifdef WITH_OCIO const OCIO::Processor *processor = (const OCIO::Processor *)get_processor(colorspace); if (processor) { - if (compress_as_srgb) { - /* Compress output as sRGB. */ - processor_apply_pixels<T, true>(processor, pixels, num_pixels); + if (is_rgba) { + if (compress_as_srgb) { + /* Compress output as sRGB. */ + processor_apply_pixels_rgba<T, true>(processor, pixels, num_pixels); + } + else { + /* Write output as scene linear directly. */ + processor_apply_pixels_rgba<T>(processor, pixels, num_pixels); + } } else { - /* Write output as scene linear directly. */ - processor_apply_pixels<T>(processor, pixels, num_pixels); + if (compress_as_srgb) { + /* Compress output as sRGB. */ + processor_apply_pixels_grayscale<T, true>(processor, pixels, num_pixels); + } + else { + /* Write output as scene linear directly. */ + processor_apply_pixels_grayscale<T>(processor, pixels, num_pixels); + } } } #else @@ -348,6 +404,11 @@ void ColorSpaceManager::to_scene_linear(ColorSpaceProcessor *processor_, if (processor) { OCIO::ConstCPUProcessorRcPtr device_processor = processor->getDefaultCPUProcessor(); + if (channels == 1) { + float3 rgb = make_float3(pixel[0], pixel[0], pixel[0]); + device_processor->applyRGB(&rgb.x); + pixel[0] = average(rgb); + } if (channels == 3) { device_processor->applyRGB(pixel); } @@ -390,9 +451,9 @@ void ColorSpaceManager::free_memory() } /* Template instantiations so we don't have to inline functions. */ -template void ColorSpaceManager::to_scene_linear(ustring, uchar *, size_t, bool); -template void ColorSpaceManager::to_scene_linear(ustring, ushort *, size_t, bool); -template void ColorSpaceManager::to_scene_linear(ustring, half *, size_t, bool); -template void ColorSpaceManager::to_scene_linear(ustring, float *, size_t, bool); +template void ColorSpaceManager::to_scene_linear(ustring, uchar *, size_t, bool, bool); +template void ColorSpaceManager::to_scene_linear(ustring, ushort *, size_t, bool, bool); +template void ColorSpaceManager::to_scene_linear(ustring, half *, size_t, bool, bool); +template void ColorSpaceManager::to_scene_linear(ustring, float *, size_t, bool, bool); CCL_NAMESPACE_END diff --git a/intern/cycles/scene/colorspace.h b/intern/cycles/scene/colorspace.h index 7f7bc604f07..f02c1231a44 100644 --- a/intern/cycles/scene/colorspace.h +++ b/intern/cycles/scene/colorspace.h @@ -43,10 +43,8 @@ class ColorSpaceManager { /* Convert pixels in the specified colorspace to scene linear color for * rendering. Must be a colorspace returned from detect_known_colorspace. */ template<typename T> - static void to_scene_linear(ustring colorspace, - T *pixels, - size_t num_pixels, - bool compress_as_srgb); + static void to_scene_linear( + ustring colorspace, T *pixels, size_t num_pixels, bool is_rgba, bool compress_as_srgb); /* Efficiently convert pixels to scene linear colorspace at render time, * for OSL where the image texture cache contains original pixels. The diff --git a/intern/cycles/scene/constant_fold.cpp b/intern/cycles/scene/constant_fold.cpp index a5fb68bf229..e9fb3426b70 100644 --- a/intern/cycles/scene/constant_fold.cpp +++ b/intern/cycles/scene/constant_fold.cpp @@ -441,9 +441,13 @@ void ConstantFolder::fold_mapping(NodeMappingType type) const if (is_zero(scale_in)) { make_zero(); } - else if ((is_zero(location_in) || type == NODE_MAPPING_TYPE_VECTOR || - type == NODE_MAPPING_TYPE_NORMAL) && - is_zero(rotation_in) && is_one(scale_in)) { + else if ( + /* Can't constant fold since we always need to normalize the output. */ + (type != NODE_MAPPING_TYPE_NORMAL) && + /* Check all use values are zero, note location is not used by vector and normal types. */ + (is_zero(location_in) || type == NODE_MAPPING_TYPE_VECTOR || + type == NODE_MAPPING_TYPE_NORMAL) && + is_zero(rotation_in) && is_one(scale_in)) { try_bypass_or_make_constant(vector_in); } } diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp index 49d18d00dd7..90f1e1cb021 100644 --- a/intern/cycles/scene/geometry.cpp +++ b/intern/cycles/scene/geometry.cpp @@ -236,6 +236,7 @@ void Geometry::compute_bvh( BVHParams bparams; bparams.use_spatial_split = params->use_bvh_spatial_split; + bparams.use_compact_structure = params->use_bvh_compact_structure; bparams.bvh_layout = bvh_layout; bparams.use_unaligned_nodes = dscene->data.bvh.have_curves && params->use_bvh_unaligned_nodes; diff --git a/intern/cycles/scene/image.cpp b/intern/cycles/scene/image.cpp index 3595ca55a46..7aad46d253c 100644 --- a/intern/cycles/scene/image.cpp +++ b/intern/cycles/scene/image.cpp @@ -576,13 +576,13 @@ bool ImageManager::file_load_image(Image *img, int texture_limit) pixels[i * 4 + 3] = one; } } + } - if (img->metadata.colorspace != u_colorspace_raw && - img->metadata.colorspace != u_colorspace_srgb) { - /* Convert to scene linear. */ - ColorSpaceManager::to_scene_linear( - img->metadata.colorspace, pixels, num_pixels, img->metadata.compress_as_srgb); - } + if (img->metadata.colorspace != u_colorspace_raw && + img->metadata.colorspace != u_colorspace_srgb) { + /* Convert to scene linear. */ + ColorSpaceManager::to_scene_linear( + img->metadata.colorspace, pixels, num_pixels, is_rgba, img->metadata.compress_as_srgb); } /* Make sure we don't have buggy values. */ @@ -891,6 +891,10 @@ void ImageManager::device_free(Device *device) void ImageManager::collect_statistics(RenderStats *stats) { foreach (const Image *image, images) { + if (!image) { + /* Image may have been freed due to lack of users. */ + continue; + } stats->image.textures.add_entry( NamedSizeEntry(image->loader->name(), image->mem->memory_size())); } diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp index 1963ebbbb19..b5b8eee24a7 100644 --- a/intern/cycles/scene/scene.cpp +++ b/intern/cycles/scene/scene.cpp @@ -570,7 +570,6 @@ static void log_kernel_features(const uint features) << "\n"; VLOG(2) << "Use Emission " << string_from_bool(features & KERNEL_FEATURE_NODE_EMISSION) << "\n"; VLOG(2) << "Use Volume " << string_from_bool(features & KERNEL_FEATURE_NODE_VOLUME) << "\n"; - VLOG(2) << "Use Hair " << string_from_bool(features & KERNEL_FEATURE_NODE_HAIR) << "\n"; VLOG(2) << "Use Bump " << string_from_bool(features & KERNEL_FEATURE_NODE_BUMP) << "\n"; VLOG(2) << "Use Voronoi " << string_from_bool(features & KERNEL_FEATURE_NODE_VORONOI_EXTRA) << "\n"; diff --git a/intern/cycles/scene/scene.h b/intern/cycles/scene/scene.h index ec935b41be6..77268837070 100644 --- a/intern/cycles/scene/scene.h +++ b/intern/cycles/scene/scene.h @@ -160,6 +160,7 @@ class SceneParams { BVHType bvh_type; bool use_bvh_spatial_split; + bool use_bvh_compact_structure; bool use_bvh_unaligned_nodes; int num_bvh_time_steps; int hair_subdivisions; @@ -174,6 +175,7 @@ class SceneParams { bvh_layout = BVH_LAYOUT_BVH2; bvh_type = BVH_TYPE_DYNAMIC; use_bvh_spatial_split = false; + use_bvh_compact_structure = true; use_bvh_unaligned_nodes = true; num_bvh_time_steps = 0; hair_subdivisions = 3; @@ -187,6 +189,7 @@ class SceneParams { return !(shadingsystem == params.shadingsystem && bvh_layout == params.bvh_layout && bvh_type == params.bvh_type && use_bvh_spatial_split == params.use_bvh_spatial_split && + use_bvh_compact_structure == params.use_bvh_compact_structure && use_bvh_unaligned_nodes == params.use_bvh_unaligned_nodes && num_bvh_time_steps == params.num_bvh_time_steps && hair_subdivisions == params.hair_subdivisions && hair_shape == params.hair_shape && diff --git a/intern/cycles/scene/shader_nodes.cpp b/intern/cycles/scene/shader_nodes.cpp index e8316ad41b4..34675be8e80 100644 --- a/intern/cycles/scene/shader_nodes.cpp +++ b/intern/cycles/scene/shader_nodes.cpp @@ -32,6 +32,7 @@ #include "util/color.h" #include "util/foreach.h" #include "util/log.h" +#include "util/string.h" #include "util/transform.h" #include "kernel/tables.h" @@ -462,8 +463,12 @@ void ImageTextureNode::compile(OSLCompiler &compiler) const ustring known_colorspace = metadata.colorspace; if (handle.svm_slot() == -1) { + /* OIIO currently does not support <UVTILE> substitutions natively. Replace with a format they + * understand. */ + std::string osl_filename = filename.string(); + string_replace(osl_filename, "<UVTILE>", "<U>_<V>"); compiler.parameter_texture( - "filename", filename, compress_as_srgb ? u_colorspace_raw : known_colorspace); + "filename", ustring(osl_filename), compress_as_srgb ? u_colorspace_raw : known_colorspace); } else { compiler.parameter_texture("filename", handle.svm_slot()); @@ -472,7 +477,8 @@ void ImageTextureNode::compile(OSLCompiler &compiler) const bool unassociate_alpha = !(ColorSpaceManager::colorspace_is_data(colorspace) || alpha_type == IMAGE_ALPHA_CHANNEL_PACKED || alpha_type == IMAGE_ALPHA_IGNORE); - const bool is_tiled = (filename.find("<UDIM>") != string::npos); + const bool is_tiled = (filename.find("<UDIM>") != string::npos || + filename.find("<UVTILE>") != string::npos); compiler.parameter(this, "projection"); compiler.parameter(this, "projection_blend"); @@ -4388,9 +4394,6 @@ NODE_DEFINE(HairInfoNode) SOCKET_OUT_FLOAT(size, "Length"); SOCKET_OUT_FLOAT(thickness, "Thickness"); SOCKET_OUT_NORMAL(tangent_normal, "Tangent Normal"); -#if 0 /* Output for minimum hair width transparency - deactivated. */ - SOCKET_OUT_FLOAT(fade, "Fade"); -#endif SOCKET_OUT_FLOAT(index, "Random"); return type; @@ -4448,12 +4451,7 @@ void HairInfoNode::compile(SVMCompiler &compiler) if (!out->links.empty()) { compiler.add_node(NODE_HAIR_INFO, NODE_INFO_CURVE_TANGENT_NORMAL, compiler.stack_assign(out)); } -#if 0 - out = output("Fade"); - if(!out->links.empty()) { - compiler.add_node(NODE_HAIR_INFO, NODE_INFO_CURVE_FADE, compiler.stack_assign(out)); - } -#endif + out = output("Random"); if (!out->links.empty()) { int attr = compiler.attribute(ATTR_STD_CURVE_RANDOM); @@ -4466,6 +4464,59 @@ void HairInfoNode::compile(OSLCompiler &compiler) compiler.add(this, "node_hair_info"); } +/* Point Info */ + +NODE_DEFINE(PointInfoNode) +{ + NodeType *type = NodeType::add("point_info", create, NodeType::SHADER); + + SOCKET_OUT_POINT(position, "Position"); + SOCKET_OUT_FLOAT(radius, "Radius"); + SOCKET_OUT_FLOAT(random, "Random"); + + return type; +} + +PointInfoNode::PointInfoNode() : ShaderNode(get_node_type()) +{ +} + +void PointInfoNode::attributes(Shader *shader, AttributeRequestSet *attributes) +{ + if (shader->has_surface_link()) { + if (!output("Random")->links.empty()) + attributes->add(ATTR_STD_POINT_RANDOM); + } + + ShaderNode::attributes(shader, attributes); +} + +void PointInfoNode::compile(SVMCompiler &compiler) +{ + ShaderOutput *out; + + out = output("Position"); + if (!out->links.empty()) { + compiler.add_node(NODE_POINT_INFO, NODE_INFO_POINT_POSITION, compiler.stack_assign(out)); + } + + out = output("Radius"); + if (!out->links.empty()) { + compiler.add_node(NODE_POINT_INFO, NODE_INFO_POINT_RADIUS, compiler.stack_assign(out)); + } + + out = output("Random"); + if (!out->links.empty()) { + int attr = compiler.attribute(ATTR_STD_POINT_RANDOM); + compiler.add_node(NODE_ATTR, attr, compiler.stack_assign(out), NODE_ATTR_OUTPUT_FLOAT); + } +} + +void PointInfoNode::compile(OSLCompiler &compiler) +{ + compiler.add(this, "node_point_info"); +} + /* Volume Info */ NODE_DEFINE(VolumeInfoNode) diff --git a/intern/cycles/scene/shader_nodes.h b/intern/cycles/scene/shader_nodes.h index 0faefd3041f..a8d5bdcf157 100644 --- a/intern/cycles/scene/shader_nodes.h +++ b/intern/cycles/scene/shader_nodes.h @@ -1005,9 +1005,20 @@ class HairInfoNode : public ShaderNode { { return true; } - virtual int get_feature() +}; + +class PointInfoNode : public ShaderNode { + public: + SHADER_NODE_CLASS(PointInfoNode) + + void attributes(Shader *shader, AttributeRequestSet *attributes); + bool has_attribute_dependency() + { + return true; + } + bool has_spatial_varying() { - return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_HAIR; + return true; } }; diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h index 18b60b70a4b..605a19aaef0 100644 --- a/intern/cycles/util/math.h +++ b/intern/cycles/util/math.h @@ -401,7 +401,7 @@ ccl_device_inline float fractf(float x) return x - floorf(x); } -/* Adapted from godot-engine math_funcs.h. */ +/* Adapted from `godot-engine` math_funcs.h. */ ccl_device_inline float wrapf(float value, float max, float min) { float range = max - min; |