Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-10-18 18:53:32 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-10-18 20:02:10 +0300
commit2430f752797b83cd43892f656f5297fd6e0bb619 (patch)
tree24276e2bc3ff7d5f3cb8c41c1fa25aaa7ade9b0d /intern
parent3065d2609700d14100490a16c91152a6e71790e8 (diff)
Cycles: reduce GPU state memory a little
* isect Ng is no longer needed for shadows, for main path needed for SSS only * Reduce rng_offset and queued_kernel to 16 bits Ref D12889
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/kernel/bvh/bvh_embree.h2
-rw-r--r--intern/cycles/kernel/integrator/integrator_init_from_bake.h3
-rw-r--r--intern/cycles/kernel/integrator/integrator_state_template.h17
-rw-r--r--intern/cycles/kernel/integrator/integrator_state_util.h12
-rw-r--r--intern/cycles/kernel/integrator/integrator_subsurface.h4
-rw-r--r--intern/cycles/kernel/integrator/integrator_subsurface_disk.h2
-rw-r--r--intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h2
-rw-r--r--intern/cycles/kernel/kernel_types.h3
8 files changed, 10 insertions, 35 deletions
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h
index 7fa0cfdc510..4f85e8bee4b 100644
--- a/intern/cycles/kernel/bvh/bvh_embree.h
+++ b/intern/cycles/kernel/bvh/bvh_embree.h
@@ -107,7 +107,6 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
Intersection *isect)
{
isect->t = ray->tfar;
- isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
@@ -142,7 +141,6 @@ ccl_device_inline void kernel_embree_convert_sss_hit(
isect->u = 1.0f - hit->v - hit->u;
isect->v = hit->u;
isect->t = ray->tfar;
- isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.bvh.scene, object * 2));
isect->prim = hit->primID +
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
index df3c2103c5b..9bc115150ff 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
@@ -180,9 +180,6 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
isect.v = v;
isect.t = 1.0f;
isect.type = PRIMITIVE_TRIANGLE;
-#ifdef __EMBREE__
- isect.Ng = Ng;
-#endif
integrator_state_write_isect(kg, state, &isect);
/* Setup next kernel to execute. */
diff --git a/intern/cycles/kernel/integrator/integrator_state_template.h b/intern/cycles/kernel/integrator/integrator_state_template.h
index 0fe47cf13bc..d9801574d4f 100644
--- a/intern/cycles/kernel/integrator/integrator_state_template.h
+++ b/intern/cycles/kernel/integrator/integrator_state_template.h
@@ -40,13 +40,12 @@ KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounce, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounds_bounce, KERNEL_FEATURE_PATH_TRACING)
/* Current transparent ray bounce depth. */
KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* DeviceKernel bit indicating queued kernels.
- * TODO: reduce size? */
-KERNEL_STRUCT_MEMBER(path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
+/* DeviceKernel bit indicating queued kernels. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
/* Random number generator seed. */
KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
/* Random number dimension offset. */
-KERNEL_STRUCT_MEMBER(path, uint32_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
/* enum PathRayFlag */
KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
/* Multiple importance sampling
@@ -89,8 +88,6 @@ KERNEL_STRUCT_MEMBER(isect, float, v, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(isect, int, object, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(isect, int, type, KERNEL_FEATURE_PATH_TRACING)
-/* TODO: exclude for GPU. */
-KERNEL_STRUCT_MEMBER(isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_END(isect)
/*************** Subsurface closure state for subsurface kernel ***************/
@@ -99,6 +96,7 @@ KERNEL_STRUCT_BEGIN(subsurface)
KERNEL_STRUCT_MEMBER(subsurface, float3, albedo, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_MEMBER(subsurface, float3, radius, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, float3, Ng, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_END(subsurface)
/********************************** Volume Stack ******************************/
@@ -117,9 +115,8 @@ KERNEL_STRUCT_BEGIN(shadow_path)
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
/* Current transparent ray bounce depth. */
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* DeviceKernel bit indicating queued kernels.
- * TODO: reduce size? */
-KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
+/* DeviceKernel bit indicating queued kernels. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
/* enum PathRayFlag */
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
/* Throughput. */
@@ -152,8 +149,6 @@ KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, v, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING)
-/* TODO: exclude for GPU. */
-KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_END_ARRAY(shadow_isect,
INTEGRATOR_SHADOW_ISECT_SIZE_CPU,
INTEGRATOR_SHADOW_ISECT_SIZE_GPU)
diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h
index bb372f9e984..18dcdff12ad 100644
--- a/intern/cycles/kernel/integrator/integrator_state_util.h
+++ b/intern/cycles/kernel/integrator/integrator_state_util.h
@@ -82,9 +82,6 @@ ccl_device_forceinline void integrator_state_write_isect(
INTEGRATOR_STATE_WRITE(state, isect, object) = isect->object;
INTEGRATOR_STATE_WRITE(state, isect, prim) = isect->prim;
INTEGRATOR_STATE_WRITE(state, isect, type) = isect->type;
-#ifdef __EMBREE__
- INTEGRATOR_STATE_WRITE(state, isect, Ng) = isect->Ng;
-#endif
}
ccl_device_forceinline void integrator_state_read_isect(
@@ -96,9 +93,6 @@ ccl_device_forceinline void integrator_state_read_isect(
isect->u = INTEGRATOR_STATE(state, isect, u);
isect->v = INTEGRATOR_STATE(state, isect, v);
isect->t = INTEGRATOR_STATE(state, isect, t);
-#ifdef __EMBREE__
- isect->Ng = INTEGRATOR_STATE(state, isect, Ng);
-#endif
}
ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(ConstIntegratorState state,
@@ -136,9 +130,6 @@ ccl_device_forceinline void integrator_state_write_shadow_isect(
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, object) = isect->object;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, prim) = isect->prim;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, type) = isect->type;
-#ifdef __EMBREE__
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, Ng) = isect->Ng;
-#endif
}
ccl_device_forceinline void integrator_state_read_shadow_isect(
@@ -150,9 +141,6 @@ ccl_device_forceinline void integrator_state_read_shadow_isect(
isect->u = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, u);
isect->v = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, v);
isect->t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, t);
-#ifdef __EMBREE__
- isect->Ng = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, Ng);
-#endif
}
ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(KernelGlobals kg,
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h
index 448c99765e3..e9517a82453 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface.h
@@ -56,7 +56,7 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();
/* Pass along object info, reusing isect to save memory. */
- INTEGRATOR_STATE_WRITE(state, isect, Ng) = sd->Ng;
+ INTEGRATOR_STATE_WRITE(state, subsurface, Ng) = sd->Ng;
INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object;
uint32_t path_flag = (INTEGRATOR_STATE(state, path, flag) & ~PATH_RAY_CAMERA) |
@@ -160,7 +160,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
float3 P = INTEGRATOR_STATE(state, ray, P);
- const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
+ const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
const float3 offset_P = ray_offset(P, -Ng);
integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P);
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
index 1de05ea2696..e1cce13fb30 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
@@ -45,7 +45,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
const float3 P = INTEGRATOR_STATE(state, ray, P);
const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
const float time = INTEGRATOR_STATE(state, ray, time);
- const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
+ const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
const int object = INTEGRATOR_STATE(state, isect, object);
/* Read subsurface scattering parameters. */
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
index 5365093decf..2ab6d0961e3 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
@@ -193,7 +193,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
const float3 N = INTEGRATOR_STATE(state, ray, D);
const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
const float time = INTEGRATOR_STATE(state, ray, time);
- const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
+ const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
const int object = INTEGRATOR_STATE(state, isect, object);
/* Sample diffuse surface scatter into the object. */
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index e478019b25c..3e276c24cdd 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -489,9 +489,6 @@ typedef struct Ray {
/* Intersection */
typedef struct Intersection {
-#ifdef __EMBREE__
- float3 Ng;
-#endif
float t, u, v;
int prim;
int object;