diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-10-18 18:53:32 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-10-18 20:02:10 +0300 |
commit | 2430f752797b83cd43892f656f5297fd6e0bb619 (patch) | |
tree | 24276e2bc3ff7d5f3cb8c41c1fa25aaa7ade9b0d /intern/cycles/kernel | |
parent | 3065d2609700d14100490a16c91152a6e71790e8 (diff) |
Cycles: reduce GPU state memory a little
* isect Ng is no longer needed for shadows, for main path needed for SSS only
* Reduce rng_offset and queued_kernel to 16 bits
Ref D12889
Diffstat (limited to 'intern/cycles/kernel')
8 files changed, 10 insertions, 35 deletions
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h index 7fa0cfdc510..4f85e8bee4b 100644 --- a/intern/cycles/kernel/bvh/bvh_embree.h +++ b/intern/cycles/kernel/bvh/bvh_embree.h @@ -107,7 +107,6 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg, Intersection *isect) { isect->t = ray->tfar; - isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z); if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0])); @@ -142,7 +141,6 @@ ccl_device_inline void kernel_embree_convert_sss_hit( isect->u = 1.0f - hit->v - hit->u; isect->v = hit->u; isect->t = ray->tfar; - isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z); RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( rtcGetGeometry(kernel_data.bvh.scene, object * 2)); isect->prim = hit->primID + diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h index df3c2103c5b..9bc115150ff 100644 --- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h +++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h @@ -180,9 +180,6 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, isect.v = v; isect.t = 1.0f; isect.type = PRIMITIVE_TRIANGLE; -#ifdef __EMBREE__ - isect.Ng = Ng; -#endif integrator_state_write_isect(kg, state, &isect); /* Setup next kernel to execute. */ diff --git a/intern/cycles/kernel/integrator/integrator_state_template.h b/intern/cycles/kernel/integrator/integrator_state_template.h index 0fe47cf13bc..d9801574d4f 100644 --- a/intern/cycles/kernel/integrator/integrator_state_template.h +++ b/intern/cycles/kernel/integrator/integrator_state_template.h @@ -40,13 +40,12 @@ KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounce, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounds_bounce, KERNEL_FEATURE_PATH_TRACING) /* Current transparent ray bounce depth. */ KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING) -/* DeviceKernel bit indicating queued kernels. - * TODO: reduce size? */ -KERNEL_STRUCT_MEMBER(path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING) +/* DeviceKernel bit indicating queued kernels. */ +KERNEL_STRUCT_MEMBER(path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING) /* Random number generator seed. */ KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING) /* Random number dimension offset. */ -KERNEL_STRUCT_MEMBER(path, uint32_t, rng_offset, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING) /* enum PathRayFlag */ KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) /* Multiple importance sampling @@ -89,8 +88,6 @@ KERNEL_STRUCT_MEMBER(isect, float, v, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(isect, int, prim, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(isect, int, object, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(isect, int, type, KERNEL_FEATURE_PATH_TRACING) -/* TODO: exclude for GPU. */ -KERNEL_STRUCT_MEMBER(isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_END(isect) /*************** Subsurface closure state for subsurface kernel ***************/ @@ -99,6 +96,7 @@ KERNEL_STRUCT_BEGIN(subsurface) KERNEL_STRUCT_MEMBER(subsurface, float3, albedo, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_MEMBER(subsurface, float3, radius, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, float3, Ng, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_END(subsurface) /********************************** Volume Stack ******************************/ @@ -117,9 +115,8 @@ KERNEL_STRUCT_BEGIN(shadow_path) KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING) /* Current transparent ray bounce depth. */ KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING) -/* DeviceKernel bit indicating queued kernels. - * TODO: reduce size? */ -KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING) +/* DeviceKernel bit indicating queued kernels. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING) /* enum PathRayFlag */ KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) /* Throughput. */ @@ -152,8 +149,6 @@ KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, v, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, prim, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING) -/* TODO: exclude for GPU. */ -KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_END_ARRAY(shadow_isect, INTEGRATOR_SHADOW_ISECT_SIZE_CPU, INTEGRATOR_SHADOW_ISECT_SIZE_GPU) diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h index bb372f9e984..18dcdff12ad 100644 --- a/intern/cycles/kernel/integrator/integrator_state_util.h +++ b/intern/cycles/kernel/integrator/integrator_state_util.h @@ -82,9 +82,6 @@ ccl_device_forceinline void integrator_state_write_isect( INTEGRATOR_STATE_WRITE(state, isect, object) = isect->object; INTEGRATOR_STATE_WRITE(state, isect, prim) = isect->prim; INTEGRATOR_STATE_WRITE(state, isect, type) = isect->type; -#ifdef __EMBREE__ - INTEGRATOR_STATE_WRITE(state, isect, Ng) = isect->Ng; -#endif } ccl_device_forceinline void integrator_state_read_isect( @@ -96,9 +93,6 @@ ccl_device_forceinline void integrator_state_read_isect( isect->u = INTEGRATOR_STATE(state, isect, u); isect->v = INTEGRATOR_STATE(state, isect, v); isect->t = INTEGRATOR_STATE(state, isect, t); -#ifdef __EMBREE__ - isect->Ng = INTEGRATOR_STATE(state, isect, Ng); -#endif } ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(ConstIntegratorState state, @@ -136,9 +130,6 @@ ccl_device_forceinline void integrator_state_write_shadow_isect( INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, object) = isect->object; INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, prim) = isect->prim; INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, type) = isect->type; -#ifdef __EMBREE__ - INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, Ng) = isect->Ng; -#endif } ccl_device_forceinline void integrator_state_read_shadow_isect( @@ -150,9 +141,6 @@ ccl_device_forceinline void integrator_state_read_shadow_isect( isect->u = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, u); isect->v = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, v); isect->t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, t); -#ifdef __EMBREE__ - isect->Ng = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, Ng); -#endif } ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(KernelGlobals kg, diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h index 448c99765e3..e9517a82453 100644 --- a/intern/cycles/kernel/integrator/integrator_subsurface.h +++ b/intern/cycles/kernel/integrator/integrator_subsurface.h @@ -56,7 +56,7 @@ ccl_device int subsurface_bounce(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact(); /* Pass along object info, reusing isect to save memory. */ - INTEGRATOR_STATE_WRITE(state, isect, Ng) = sd->Ng; + INTEGRATOR_STATE_WRITE(state, subsurface, Ng) = sd->Ng; INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object; uint32_t path_flag = (INTEGRATOR_STATE(state, path, flag) & ~PATH_RAY_CAMERA) | @@ -160,7 +160,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) { float3 P = INTEGRATOR_STATE(state, ray, P); - const float3 Ng = INTEGRATOR_STATE(state, isect, Ng); + const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng); const float3 offset_P = ray_offset(P, -Ng); integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P); diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h index 1de05ea2696..e1cce13fb30 100644 --- a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h +++ b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h @@ -45,7 +45,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, const float3 P = INTEGRATOR_STATE(state, ray, P); const float ray_dP = INTEGRATOR_STATE(state, ray, dP); const float time = INTEGRATOR_STATE(state, ray, time); - const float3 Ng = INTEGRATOR_STATE(state, isect, Ng); + const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng); const int object = INTEGRATOR_STATE(state, isect, object); /* Read subsurface scattering parameters. */ diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h index 5365093decf..2ab6d0961e3 100644 --- a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h @@ -193,7 +193,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, const float3 N = INTEGRATOR_STATE(state, ray, D); const float ray_dP = INTEGRATOR_STATE(state, ray, dP); const float time = INTEGRATOR_STATE(state, ray, time); - const float3 Ng = INTEGRATOR_STATE(state, isect, Ng); + const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng); const int object = INTEGRATOR_STATE(state, isect, object); /* Sample diffuse surface scatter into the object. */ diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index e478019b25c..3e276c24cdd 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -489,9 +489,6 @@ typedef struct Ray { /* Intersection */ typedef struct Intersection { -#ifdef __EMBREE__ - float3 Ng; -#endif float t, u, v; int prim; int object; |