From 063ad8635ec87a490d6fc02c937387a3c6673b08 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 16 Nov 2021 14:03:59 +0100 Subject: Cycles: reduce triangle memory usage with packed_float3 Depends on D13243 Differential Revision: https://developer.blender.org/D13244 --- intern/cycles/scene/attribute.cpp | 6 ++- intern/cycles/scene/attribute.h | 7 +-- intern/cycles/scene/geometry.cpp | 91 +++++++++++++++++++++++++++++++-------- intern/cycles/scene/geometry.h | 4 +- intern/cycles/scene/mesh.cpp | 15 ++++--- intern/cycles/scene/mesh.h | 7 ++- intern/cycles/scene/scene.cpp | 1 + intern/cycles/scene/scene.h | 7 +-- 8 files changed, 101 insertions(+), 37 deletions(-) (limited to 'intern/cycles/scene') diff --git a/intern/cycles/scene/attribute.cpp b/intern/cycles/scene/attribute.cpp index 3401eea307f..6d15f3325f7 100644 --- a/intern/cycles/scene/attribute.cpp +++ b/intern/cycles/scene/attribute.cpp @@ -404,6 +404,10 @@ AttrKernelDataType Attribute::kernel_type(const Attribute &attr) return AttrKernelDataType::FLOAT2; } + if (attr.type == TypeFloat4 || attr.type == TypeRGBA || attr.type == TypeDesc::TypeMatrix) { + return AttrKernelDataType::FLOAT4; + } + return AttrKernelDataType::FLOAT3; } @@ -585,7 +589,7 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name) attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE); break; case ATTR_STD_MOTION_VERTEX_POSITION: - attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE_KEY_MOTION); + attr = add(name, TypeDesc::TypeFloat4, ATTR_ELEMENT_CURVE_KEY_MOTION); break; case ATTR_STD_CURVE_INTERCEPT: attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_CURVE_KEY); diff --git a/intern/cycles/scene/attribute.h b/intern/cycles/scene/attribute.h index 4a25a900c14..612a0b7c80d 100644 --- a/intern/cycles/scene/attribute.h +++ b/intern/cycles/scene/attribute.h @@ -47,12 +47,7 @@ struct Transform; * * The values of this enumeration are also used as flags to detect changes in AttributeSet. */ -enum AttrKernelDataType { - FLOAT = 0, - FLOAT2 = 1, - FLOAT3 = 2, - UCHAR4 = 3, -}; +enum AttrKernelDataType { FLOAT = 0, FLOAT2 = 1, FLOAT3 = 2, FLOAT4 = 3, UCHAR4 = 4, NUM = 5 }; /* Attribute * diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp index 8a3fc522d22..bf426fc49f6 100644 --- a/intern/cycles/scene/geometry.cpp +++ b/intern/cycles/scene/geometry.cpp @@ -551,6 +551,7 @@ static void update_attribute_element_size(Geometry *geom, size_t *attr_float_size, size_t *attr_float2_size, size_t *attr_float3_size, + size_t *attr_float4_size, size_t *attr_uchar4_size) { if (mattr) { @@ -569,7 +570,10 @@ static void update_attribute_element_size(Geometry *geom, *attr_float2_size += size; } else if (mattr->type == TypeDesc::TypeMatrix) { - *attr_float3_size += size * 4; + *attr_float4_size += size * 4; + } + else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) { + *attr_float4_size += size; } else { *attr_float3_size += size; @@ -582,8 +586,10 @@ void GeometryManager::update_attribute_element_offset(Geometry *geom, size_t &attr_float_offset, device_vector &attr_float2, size_t &attr_float2_offset, - device_vector &attr_float3, + device_vector &attr_float3, size_t &attr_float3_offset, + device_vector &attr_float4, + size_t &attr_float4_offset, device_vector &attr_uchar4, size_t &attr_uchar4_offset, Attribute *mattr, @@ -646,18 +652,30 @@ void GeometryManager::update_attribute_element_offset(Geometry *geom, } else if (mattr->type == TypeDesc::TypeMatrix) { Transform *tfm = mattr->data_transform(); - offset = attr_float3_offset; + offset = attr_float4_offset; - assert(attr_float3.size() >= offset + size * 3); + assert(attr_float4.size() >= offset + size * 3); if (mattr->modified) { for (size_t k = 0; k < size * 3; k++) { - attr_float3[offset + k] = (&tfm->x)[k]; + attr_float4[offset + k] = (&tfm->x)[k]; } } - attr_float3_offset += size * 3; + attr_float4_offset += size * 3; } - else { + else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) { float4 *data = mattr->data_float4(); + offset = attr_float4_offset; + + assert(attr_float4.size() >= offset + size); + if (mattr->modified) { + for (size_t k = 0; k < size; k++) { + attr_float4[offset + k] = data[k]; + } + } + attr_float4_offset += size; + } + else { + float3 *data = mattr->data_float3(); offset = attr_float3_offset; assert(attr_float3.size() >= offset + size); @@ -783,6 +801,7 @@ void GeometryManager::device_update_attributes(Device *device, size_t attr_float_size = 0; size_t attr_float2_size = 0; size_t attr_float3_size = 0; + size_t attr_float4_size = 0; size_t attr_uchar4_size = 0; for (size_t i = 0; i < scene->geometry.size(); i++) { @@ -797,6 +816,7 @@ void GeometryManager::device_update_attributes(Device *device, &attr_float_size, &attr_float2_size, &attr_float3_size, + &attr_float4_size, &attr_uchar4_size); if (geom->is_mesh()) { @@ -809,6 +829,7 @@ void GeometryManager::device_update_attributes(Device *device, &attr_float_size, &attr_float2_size, &attr_float3_size, + &attr_float4_size, &attr_uchar4_size); } } @@ -824,6 +845,7 @@ void GeometryManager::device_update_attributes(Device *device, &attr_float_size, &attr_float2_size, &attr_float3_size, + &attr_float4_size, &attr_uchar4_size); } } @@ -831,19 +853,22 @@ void GeometryManager::device_update_attributes(Device *device, dscene->attributes_float.alloc(attr_float_size); dscene->attributes_float2.alloc(attr_float2_size); dscene->attributes_float3.alloc(attr_float3_size); + dscene->attributes_float4.alloc(attr_float4_size); dscene->attributes_uchar4.alloc(attr_uchar4_size); /* The order of those flags needs to match that of AttrKernelDataType. */ - const bool attributes_need_realloc[4] = { + const bool attributes_need_realloc[AttrKernelDataType::NUM] = { dscene->attributes_float.need_realloc(), dscene->attributes_float2.need_realloc(), dscene->attributes_float3.need_realloc(), + dscene->attributes_float4.need_realloc(), dscene->attributes_uchar4.need_realloc(), }; size_t attr_float_offset = 0; size_t attr_float2_offset = 0; size_t attr_float3_offset = 0; + size_t attr_float4_offset = 0; size_t attr_uchar4_offset = 0; /* Fill in attributes. */ @@ -868,6 +893,8 @@ void GeometryManager::device_update_attributes(Device *device, attr_float2_offset, dscene->attributes_float3, attr_float3_offset, + dscene->attributes_float4, + attr_float4_offset, dscene->attributes_uchar4, attr_uchar4_offset, attr, @@ -891,6 +918,8 @@ void GeometryManager::device_update_attributes(Device *device, attr_float2_offset, dscene->attributes_float3, attr_float3_offset, + dscene->attributes_float4, + attr_float4_offset, dscene->attributes_uchar4, attr_uchar4_offset, subd_attr, @@ -923,6 +952,8 @@ void GeometryManager::device_update_attributes(Device *device, attr_float2_offset, dscene->attributes_float3, attr_float3_offset, + dscene->attributes_float4, + attr_float4_offset, dscene->attributes_uchar4, attr_uchar4_offset, attr, @@ -954,6 +985,7 @@ void GeometryManager::device_update_attributes(Device *device, dscene->attributes_float.copy_to_device_if_modified(); dscene->attributes_float2.copy_to_device_if_modified(); dscene->attributes_float3.copy_to_device_if_modified(); + dscene->attributes_float4.copy_to_device_if_modified(); dscene->attributes_uchar4.copy_to_device_if_modified(); if (progress.get_cancel()) @@ -1080,9 +1112,9 @@ void GeometryManager::device_update_mesh(Device *, /* normals */ progress.set_status("Updating Mesh", "Computing normals"); - float4 *tri_verts = dscene->tri_verts.alloc(tri_size * 3); + packed_float3 *tri_verts = dscene->tri_verts.alloc(tri_size * 3); uint *tri_shader = dscene->tri_shader.alloc(tri_size); - float4 *vnormal = dscene->tri_vnormal.alloc(vert_size); + packed_float3 *vnormal = dscene->tri_vnormal.alloc(vert_size); uint4 *tri_vindex = dscene->tri_vindex.alloc(tri_size); uint *tri_patch = dscene->tri_patch.alloc(tri_size); float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size); @@ -1293,18 +1325,21 @@ enum { ATTR_FLOAT_MODIFIED = (1 << 2), ATTR_FLOAT2_MODIFIED = (1 << 3), ATTR_FLOAT3_MODIFIED = (1 << 4), - ATTR_UCHAR4_MODIFIED = (1 << 5), + ATTR_FLOAT4_MODIFIED = (1 << 5), + ATTR_UCHAR4_MODIFIED = (1 << 6), - CURVE_DATA_NEED_REALLOC = (1 << 6), - MESH_DATA_NEED_REALLOC = (1 << 7), + CURVE_DATA_NEED_REALLOC = (1 << 7), + MESH_DATA_NEED_REALLOC = (1 << 8), - ATTR_FLOAT_NEEDS_REALLOC = (1 << 8), - ATTR_FLOAT2_NEEDS_REALLOC = (1 << 9), - ATTR_FLOAT3_NEEDS_REALLOC = (1 << 10), - ATTR_UCHAR4_NEEDS_REALLOC = (1 << 11), + ATTR_FLOAT_NEEDS_REALLOC = (1 << 9), + ATTR_FLOAT2_NEEDS_REALLOC = (1 << 10), + ATTR_FLOAT3_NEEDS_REALLOC = (1 << 11), + ATTR_FLOAT4_NEEDS_REALLOC = (1 << 12), + ATTR_UCHAR4_NEEDS_REALLOC = (1 << 13), ATTRS_NEED_REALLOC = (ATTR_FLOAT_NEEDS_REALLOC | ATTR_FLOAT2_NEEDS_REALLOC | - ATTR_FLOAT3_NEEDS_REALLOC | ATTR_UCHAR4_NEEDS_REALLOC), + ATTR_FLOAT3_NEEDS_REALLOC | ATTR_FLOAT4_NEEDS_REALLOC | + ATTR_UCHAR4_NEEDS_REALLOC), DEVICE_MESH_DATA_NEEDS_REALLOC = (MESH_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC), DEVICE_CURVE_DATA_NEEDS_REALLOC = (CURVE_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC), }; @@ -1332,10 +1367,17 @@ static void update_device_flags_attribute(uint32_t &device_update_flags, device_update_flags |= ATTR_FLOAT3_MODIFIED; break; } + case AttrKernelDataType::FLOAT4: { + device_update_flags |= ATTR_FLOAT4_MODIFIED; + break; + } case AttrKernelDataType::UCHAR4: { device_update_flags |= ATTR_UCHAR4_MODIFIED; break; } + case AttrKernelDataType::NUM: { + break; + } } } } @@ -1352,6 +1394,9 @@ static void update_attribute_realloc_flags(uint32_t &device_update_flags, if (attributes.modified(AttrKernelDataType::FLOAT3)) { device_update_flags |= ATTR_FLOAT3_NEEDS_REALLOC; } + if (attributes.modified(AttrKernelDataType::FLOAT4)) { + device_update_flags |= ATTR_FLOAT4_NEEDS_REALLOC; + } if (attributes.modified(AttrKernelDataType::UCHAR4)) { device_update_flags |= ATTR_UCHAR4_NEEDS_REALLOC; } @@ -1553,6 +1598,14 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro dscene->attributes_float3.tag_modified(); } + if (device_update_flags & ATTR_FLOAT4_NEEDS_REALLOC) { + dscene->attributes_map.tag_realloc(); + dscene->attributes_float4.tag_realloc(); + } + else if (device_update_flags & ATTR_FLOAT4_MODIFIED) { + dscene->attributes_float4.tag_modified(); + } + if (device_update_flags & ATTR_UCHAR4_NEEDS_REALLOC) { dscene->attributes_map.tag_realloc(); dscene->attributes_uchar4.tag_realloc(); @@ -2014,6 +2067,7 @@ void GeometryManager::device_update(Device *device, dscene->attributes_float.clear_modified(); dscene->attributes_float2.clear_modified(); dscene->attributes_float3.clear_modified(); + dscene->attributes_float4.clear_modified(); dscene->attributes_uchar4.clear_modified(); } @@ -2041,6 +2095,7 @@ void GeometryManager::device_free(Device *device, DeviceScene *dscene, bool forc dscene->attributes_float.free_if_need_realloc(force_free); dscene->attributes_float2.free_if_need_realloc(force_free); dscene->attributes_float3.free_if_need_realloc(force_free); + dscene->attributes_float4.free_if_need_realloc(force_free); dscene->attributes_uchar4.free_if_need_realloc(force_free); /* Signal for shaders like displacement not to do ray tracing. */ diff --git a/intern/cycles/scene/geometry.h b/intern/cycles/scene/geometry.h index 335bcdcd0b7..91799d7fde8 100644 --- a/intern/cycles/scene/geometry.h +++ b/intern/cycles/scene/geometry.h @@ -257,8 +257,10 @@ class GeometryManager { size_t &attr_float_offset, device_vector &attr_float2, size_t &attr_float2_offset, - device_vector &attr_float3, + device_vector &attr_float3, size_t &attr_float3_offset, + device_vector &attr_float4, + size_t &attr_float4_offset, device_vector &attr_uchar4, size_t &attr_uchar4_offset, Attribute *mattr, diff --git a/intern/cycles/scene/mesh.cpp b/intern/cycles/scene/mesh.cpp index f47dab30869..e65b8462e34 100644 --- a/intern/cycles/scene/mesh.cpp +++ b/intern/cycles/scene/mesh.cpp @@ -707,7 +707,7 @@ void Mesh::pack_shaders(Scene *scene, uint *tri_shader) } } -void Mesh::pack_normals(float4 *vnormal) +void Mesh::pack_normals(packed_float3 *vnormal) { Attribute *attr_vN = attributes.find(ATTR_STD_VERTEX_NORMAL); if (attr_vN == NULL) { @@ -727,11 +727,14 @@ void Mesh::pack_normals(float4 *vnormal) if (do_transform) vNi = safe_normalize(transform_direction(&ntfm, vNi)); - vnormal[i] = make_float4(vNi.x, vNi.y, vNi.z, 0.0f); + vnormal[i] = make_float3(vNi.x, vNi.y, vNi.z); } } -void Mesh::pack_verts(float4 *tri_verts, uint4 *tri_vindex, uint *tri_patch, float2 *tri_patch_uv) +void Mesh::pack_verts(packed_float3 *tri_verts, + uint4 *tri_vindex, + uint *tri_patch, + float2 *tri_patch_uv) { size_t verts_size = verts.size(); @@ -752,9 +755,9 @@ void Mesh::pack_verts(float4 *tri_verts, uint4 *tri_vindex, uint *tri_patch, flo tri_patch[i] = (!get_num_subd_faces()) ? -1 : (triangle_patch[i] * 8 + patch_offset); - tri_verts[i * 3] = float3_to_float4(verts[t.v[0]]); - tri_verts[i * 3 + 1] = float3_to_float4(verts[t.v[1]]); - tri_verts[i * 3 + 2] = float3_to_float4(verts[t.v[2]]); + tri_verts[i * 3] = verts[t.v[0]]; + tri_verts[i * 3 + 1] = verts[t.v[1]]; + tri_verts[i * 3 + 2] = verts[t.v[2]]; } } diff --git a/intern/cycles/scene/mesh.h b/intern/cycles/scene/mesh.h index d13b3003164..254672d0620 100644 --- a/intern/cycles/scene/mesh.h +++ b/intern/cycles/scene/mesh.h @@ -223,8 +223,11 @@ class Mesh : public Geometry { void get_uv_tiles(ustring map, unordered_set &tiles) override; void pack_shaders(Scene *scene, uint *shader); - void pack_normals(float4 *vnormal); - void pack_verts(float4 *tri_verts, uint4 *tri_vindex, uint *tri_patch, float2 *tri_patch_uv); + void pack_normals(packed_float3 *vnormal); + void pack_verts(packed_float3 *tri_verts, + uint4 *tri_vindex, + uint *tri_patch, + float2 *tri_patch_uv); void pack_patches(uint *patch_data); PrimitiveType primitive_type() const override; diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp index ef0ee0c6625..4230abe9a1b 100644 --- a/intern/cycles/scene/scene.cpp +++ b/intern/cycles/scene/scene.cpp @@ -74,6 +74,7 @@ DeviceScene::DeviceScene(Device *device) attributes_float(device, "__attributes_float", MEM_GLOBAL), attributes_float2(device, "__attributes_float2", MEM_GLOBAL), attributes_float3(device, "__attributes_float3", MEM_GLOBAL), + attributes_float4(device, "__attributes_float4", MEM_GLOBAL), attributes_uchar4(device, "__attributes_uchar4", MEM_GLOBAL), light_distribution(device, "__light_distribution", MEM_GLOBAL), lights(device, "__lights", MEM_GLOBAL), diff --git a/intern/cycles/scene/scene.h b/intern/cycles/scene/scene.h index fa7fc54602a..4af05349dd3 100644 --- a/intern/cycles/scene/scene.h +++ b/intern/cycles/scene/scene.h @@ -81,9 +81,9 @@ class DeviceScene { device_vector prim_time; /* mesh */ - device_vector tri_verts; + device_vector tri_verts; device_vector tri_shader; - device_vector tri_vnormal; + device_vector tri_vnormal; device_vector tri_vindex; device_vector tri_patch; device_vector tri_patch_uv; @@ -108,7 +108,8 @@ class DeviceScene { device_vector attributes_map; device_vector attributes_float; device_vector attributes_float2; - device_vector attributes_float3; + device_vector attributes_float3; + device_vector attributes_float4; device_vector attributes_uchar4; /* lights */ -- cgit v1.2.3