diff options
author | Patrick Mours <pmours@nvidia.com> | 2019-08-21 13:06:26 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2019-08-26 11:26:53 +0300 |
commit | db257e679a63b1a6a5e361a0b1906e89e8de50cf (patch) | |
tree | 09b822e4b7f3070ce111fc89f5b95f3752baf146 /intern/cycles | |
parent | edbb755dfe54f929c08aa6ec77c134462581fbfe (diff) |
Cycles: remove workaround to pass ray by value
CUDA is working correct without it now, and it's more efficient not to do this.
Ref D5363
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/kernel/bvh/bvh.h | 64 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_shadow.h | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_subsurface.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 5 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/osl_services.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_ao.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_bevel.h | 2 |
8 files changed, 48 insertions, 43 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index be0f05285e8..162b2fb5cdb 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -177,24 +177,23 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray) return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x); } -/* Note: ray is passed by value to work around a possible CUDA compiler bug. */ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, - const Ray ray, + const Ray *ray, const uint visibility, Intersection *isect) { PROFILING_INIT(kg, PROFILING_INTERSECT); - if (!scene_intersect_valid(&ray)) { + if (!scene_intersect_valid(ray)) { return false; } #ifdef __EMBREE__ if (kernel_data.bvh.scene) { - isect->t = ray.t; + isect->t = ray->t; CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); IntersectContext rtc_ctx(&ctx); RTCRayHit ray_hit; - kernel_embree_setup_rayhit(ray, ray_hit, visibility); + kernel_embree_setup_rayhit(*ray, ray_hit, visibility); rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit); if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) { @@ -207,42 +206,43 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, #ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { # ifdef __HAIR__ - if (kernel_data.bvh.have_curves) - return bvh_intersect_hair_motion(kg, &ray, isect, visibility); + if (kernel_data.bvh.have_curves) { + return bvh_intersect_hair_motion(kg, ray, isect, visibility); + } # endif /* __HAIR__ */ - return bvh_intersect_motion(kg, &ray, isect, visibility); + return bvh_intersect_motion(kg, ray, isect, visibility); } #endif /* __OBJECT_MOTION__ */ #ifdef __HAIR__ - if (kernel_data.bvh.have_curves) - return bvh_intersect_hair(kg, &ray, isect, visibility); + if (kernel_data.bvh.have_curves) { + return bvh_intersect_hair(kg, ray, isect, visibility); + } #endif /* __HAIR__ */ #ifdef __KERNEL_CPU__ # ifdef __INSTANCING__ - if (kernel_data.bvh.have_instancing) - return bvh_intersect_instancing(kg, &ray, isect, visibility); + if (kernel_data.bvh.have_instancing) { + return bvh_intersect_instancing(kg, ray, isect, visibility); + } # endif /* __INSTANCING__ */ - - return bvh_intersect(kg, &ray, isect, visibility); + return bvh_intersect(kg, ray, isect, visibility); #else /* __KERNEL_CPU__ */ # ifdef __INSTANCING__ - return bvh_intersect_instancing(kg, &ray, isect, visibility); + return bvh_intersect_instancing(kg, ray, isect, visibility); # else - return bvh_intersect(kg, &ray, isect, visibility); + return bvh_intersect(kg, ray, isect, visibility); # endif /* __INSTANCING__ */ #endif /* __KERNEL_CPU__ */ } #ifdef __BVH_LOCAL__ -/* Note: ray is passed by value to work around a possible CUDA compiler bug. */ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, - const Ray ray, + const Ray *ray, LocalIntersection *local_isect, int local_object, uint *lcg_state, @@ -250,7 +250,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, { PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL); - if (!scene_intersect_valid(&ray)) { + if (!scene_intersect_valid(ray)) { local_isect->num_hits = 0; return false; } @@ -264,19 +264,19 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, ctx.sss_object_id = local_object; IntersectContext rtc_ctx(&ctx); RTCRay rtc_ray; - kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY); + kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY); /* Get the Embree scene for this intersection. */ RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2); if (geom) { - float3 P = ray.P; - float3 dir = ray.D; - float3 idir = ray.D; + float3 P = ray->P; + float3 dir = ray->D; + float3 idir = ray->D; const int object_flag = kernel_tex_fetch(__object_flag, local_object); if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { Transform ob_itfm; rtc_ray.tfar = bvh_instance_motion_push( - kg, local_object, &ray, &P, &dir, &idir, ray.t, &ob_itfm); + kg, local_object, ray, &P, &dir, &idir, ray->t, &ob_itfm); /* bvh_instance_motion_push() returns the inverse transform but * it's not needed here. */ (void)ob_itfm; @@ -299,10 +299,10 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, # endif /* __EMBREE__ */ # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { - return bvh_intersect_local_motion(kg, &ray, local_isect, local_object, lcg_state, max_hits); + return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits); } # endif /* __OBJECT_MOTION__ */ - return bvh_intersect_local(kg, &ray, local_isect, local_object, lcg_state, max_hits); + return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits); } #endif @@ -377,15 +377,18 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, if (!scene_intersect_valid(ray)) { return false; } + # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { return bvh_intersect_volume_motion(kg, ray, isect, visibility); } # endif /* __OBJECT_MOTION__ */ + # ifdef __KERNEL_CPU__ # ifdef __INSTANCING__ - if (kernel_data.bvh.have_instancing) + if (kernel_data.bvh.have_instancing) { return bvh_intersect_volume_instancing(kg, ray, isect, visibility); + } # endif /* __INSTANCING__ */ return bvh_intersect_volume(kg, ray, isect, visibility); # else /* __KERNEL_CPU__ */ @@ -422,15 +425,18 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); return rtc_ray.tfar == -INFINITY; } -# endif +# endif /* __EMBREE__ */ + # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); } # endif /* __OBJECT_MOTION__ */ + # ifdef __INSTANCING__ - if (kernel_data.bvh.have_instancing) + if (kernel_data.bvh.have_instancing) { return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility); + } # endif /* __INSTANCING__ */ return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); } diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index c7e49930701..1e8d54a23bf 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -65,7 +65,7 @@ ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg, ray->t = kernel_data.background.ao_distance; } - bool hit = scene_intersect(kg, *ray, visibility, isect); + bool hit = scene_intersect(kg, ray, visibility, isect); #ifdef __KERNEL_DEBUG__ if (state->flag & PATH_RAY_CAMERA) { diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 6640f64518a..c02d7d77faf 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -103,7 +103,7 @@ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg, Intersection *isect, float3 *shadow) { - const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect); + const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect); #ifdef __VOLUME__ if (!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* Apply attenuation from current volume shader. */ @@ -318,7 +318,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg, if (bounce >= kernel_data.integrator.transparent_max_bounce) { return true; } - if (!scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) { + if (!scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) { break; } if (!shader_transparent_shadow(kg, isect)) { @@ -374,7 +374,7 @@ ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg, Intersection *isect, float3 *shadow) { - bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect); + bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect); bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false; return shadow_blocked_transparent_stepped_loop( kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow); @@ -433,7 +433,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, * TODO(sergey): Check why using record-all behavior causes slowdown in such * cases. Could that be caused by a higher spill pressure? */ - const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect); + const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect); const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false; if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) { return shadow_blocked_transparent_stepped_loop( diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 7510e50a962..8dc1904058d 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -222,7 +222,7 @@ ccl_device_inline int subsurface_scatter_disk(KernelGlobals *kg, /* intersect with the same object. if multiple intersections are found it * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */ - scene_intersect_local(kg, *ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS); + scene_intersect_local(kg, ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS); int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS); for (int hit = 0; hit < num_eval_hits; hit++) { @@ -418,7 +418,7 @@ ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg, float t = -logf(1.0f - rdist) / sample_sigma_t; ray->t = t; - scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1); + scene_intersect_local(kg, ray, ss_isect, sd->object, NULL, 1); hit = (ss_isect->num_hits > 0); if (hit) { diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 8fedde4fb7f..b3cb6ca7c19 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -649,9 +649,8 @@ typedef struct Ray { * is fixed. */ #ifndef __KERNEL_OPENCL_AMD__ - float3 P; /* origin */ - float3 D; /* direction */ - + float3 P; /* origin */ + float3 D; /* direction */ float t; /* length of the ray */ float time; /* time (for motion blur) */ #else diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 222475b3778..415de9cd66b 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -1406,7 +1406,7 @@ bool OSLRenderServices::trace(TraceOpt &options, /* Raytrace, leaving out shadow opaque to avoid early exit. */ uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE; - return scene_intersect(kg, ray, visibility, &tracedata->isect); + return scene_intersect(kg, &ray, visibility, &tracedata->isect); } bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h index 59260de1199..3a8f32ac9d2 100644 --- a/intern/cycles/kernel/svm/svm_ao.h +++ b/intern/cycles/kernel/svm/svm_ao.h @@ -66,13 +66,13 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg, ray.dD = differential3_zero(); if (flags & NODE_AO_ONLY_LOCAL) { - if (!scene_intersect_local(kg, ray, NULL, sd->object, NULL, 0)) { + if (!scene_intersect_local(kg, &ray, NULL, sd->object, NULL, 0)) { unoccluded++; } } else { Intersection isect; - if (!scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect)) { + if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) { unoccluded++; } } diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h index c1a10784f89..6045268918b 100644 --- a/intern/cycles/kernel/svm/svm_bevel.h +++ b/intern/cycles/kernel/svm/svm_bevel.h @@ -112,7 +112,7 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, /* Intersect with the same object. if multiple intersections are found it * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */ - scene_intersect_local(kg, *ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS); + scene_intersect_local(kg, ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS); int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS); |