From 063ad8635ec87a490d6fc02c937387a3c6673b08 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 16 Nov 2021 14:03:59 +0100 Subject: Cycles: reduce triangle memory usage with packed_float3 Depends on D13243 Differential Revision: https://developer.blender.org/D13244 --- intern/cycles/kernel/device/optix/kernel.cu | 6 +- intern/cycles/kernel/geom/attribute.h | 6 +- intern/cycles/kernel/geom/curve.h | 12 ++-- intern/cycles/kernel/geom/motion_curve.h | 12 ++-- intern/cycles/kernel/geom/motion_triangle.h | 24 +++---- intern/cycles/kernel/geom/patch.h | 4 +- intern/cycles/kernel/geom/primitive.h | 33 +++++++--- intern/cycles/kernel/geom/subd_triangle.h | 42 ++++++------ intern/cycles/kernel/geom/triangle.h | 76 ++++++++++----------- intern/cycles/kernel/geom/triangle_intersect.h | 32 ++++----- intern/cycles/kernel/geom/volume.h | 2 +- intern/cycles/kernel/textures.h | 7 +- intern/cycles/scene/attribute.cpp | 6 +- intern/cycles/scene/attribute.h | 7 +- intern/cycles/scene/geometry.cpp | 91 +++++++++++++++++++++----- intern/cycles/scene/geometry.h | 4 +- intern/cycles/scene/mesh.cpp | 15 +++-- intern/cycles/scene/mesh.h | 7 +- intern/cycles/scene/scene.cpp | 1 + intern/cycles/scene/scene.h | 7 +- 20 files changed, 237 insertions(+), 157 deletions(-) (limited to 'intern/cycles') diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index b987aa7a817..849710ffe61 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -159,9 +159,9 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() /* Record geometric normal. */ const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)); - const float3 tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)); - const float3 tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); + const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0); + const float3 tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1); + const float3 tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); /* Continue tracing (without this the trace call would return after the first hit). */ diff --git a/intern/cycles/kernel/geom/attribute.h b/intern/cycles/kernel/geom/attribute.h index 848e0430caa..ae96e7b76ef 100644 --- a/intern/cycles/kernel/geom/attribute.h +++ b/intern/cycles/kernel/geom/attribute.h @@ -106,9 +106,9 @@ ccl_device Transform primitive_attribute_matrix(KernelGlobals kg, { Transform tfm; - tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0); - tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1); - tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2); + tfm.x = kernel_tex_fetch(__attributes_float4, desc.offset + 0); + tfm.y = kernel_tex_fetch(__attributes_float4, desc.offset + 1); + tfm.z = kernel_tex_fetch(__attributes_float4, desc.offset + 2); return tfm; } diff --git a/intern/cycles/kernel/geom/curve.h b/intern/cycles/kernel/geom/curve.h index 7271193eef8..4b6eecf9640 100644 --- a/intern/cycles/kernel/geom/curve.h +++ b/intern/cycles/kernel/geom/curve.h @@ -126,8 +126,8 @@ ccl_device float3 curve_attribute_float3(KernelGlobals kg, int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1)); + float3 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0); + float3 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + k1); # ifdef __RAY_DIFFERENTIALS__ if (dx) @@ -149,7 +149,7 @@ ccl_device float3 curve_attribute_float3(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim : desc.offset; - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset)); + return kernel_tex_fetch(__attributes_float3, offset); } else { return make_float3(0.0f, 0.0f, 0.0f); @@ -168,8 +168,8 @@ ccl_device float4 curve_attribute_float4(KernelGlobals kg, int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; - float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0); - float4 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + k1); + float4 f0 = kernel_tex_fetch(__attributes_float4, desc.offset + k0); + float4 f1 = kernel_tex_fetch(__attributes_float4, desc.offset + k1); # ifdef __RAY_DIFFERENTIALS__ if (dx) @@ -191,7 +191,7 @@ ccl_device float4 curve_attribute_float4(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim : desc.offset; - return kernel_tex_fetch(__attributes_float3, offset); + return kernel_tex_fetch(__attributes_float4, offset); } else { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/geom/motion_curve.h b/intern/cycles/kernel/geom/motion_curve.h index 2dd213d43f6..8358c94360f 100644 --- a/intern/cycles/kernel/geom/motion_curve.h +++ b/intern/cycles/kernel/geom/motion_curve.h @@ -48,8 +48,8 @@ ccl_device_inline void motion_curve_keys_for_step_linear(KernelGlobals kg, offset += step * numkeys; - keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); - keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); + keys[0] = kernel_tex_fetch(__attributes_float4, offset + k0); + keys[1] = kernel_tex_fetch(__attributes_float4, offset + k1); } } @@ -106,10 +106,10 @@ ccl_device_inline void motion_curve_keys_for_step(KernelGlobals kg, offset += step * numkeys; - keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); - keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); - keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2); - keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3); + keys[0] = kernel_tex_fetch(__attributes_float4, offset + k0); + keys[1] = kernel_tex_fetch(__attributes_float4, offset + k1); + keys[2] = kernel_tex_fetch(__attributes_float4, offset + k2); + keys[3] = kernel_tex_fetch(__attributes_float4, offset + k3); } } diff --git a/intern/cycles/kernel/geom/motion_triangle.h b/intern/cycles/kernel/geom/motion_triangle.h index 43f894938e0..62b7b630c89 100644 --- a/intern/cycles/kernel/geom/motion_triangle.h +++ b/intern/cycles/kernel/geom/motion_triangle.h @@ -43,9 +43,9 @@ ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals kg, { if (step == numsteps) { /* center step: regular vertex location */ - verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); - verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); - verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); + verts[0] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); + verts[1] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); + verts[2] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); } else { /* center step not store in this array */ @@ -54,9 +54,9 @@ ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals kg, offset += step * numverts; - verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); - verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); - verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); + verts[0] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x); + verts[1] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y); + verts[2] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z); } } @@ -70,9 +70,9 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals kg, { if (step == numsteps) { /* center step: regular vertex location */ - normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); - normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); - normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); + normals[0] = kernel_tex_fetch(__tri_vnormal, tri_vindex.x); + normals[1] = kernel_tex_fetch(__tri_vnormal, tri_vindex.y); + normals[2] = kernel_tex_fetch(__tri_vnormal, tri_vindex.z); } else { /* center step is not stored in this array */ @@ -81,9 +81,9 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals kg, offset += step * numverts; - normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); - normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); - normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); + normals[0] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x); + normals[1] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y); + normals[2] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z); } } diff --git a/intern/cycles/kernel/geom/patch.h b/intern/cycles/kernel/geom/patch.h index 7d24937a41e..432618aa243 100644 --- a/intern/cycles/kernel/geom/patch.h +++ b/intern/cycles/kernel/geom/patch.h @@ -380,7 +380,7 @@ ccl_device float3 patch_eval_float3(KernelGlobals kg, *dv = make_float3(0.0f, 0.0f, 0.0f); for (int i = 0; i < num_control; i++) { - float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i])); + float3 v = kernel_tex_fetch(__attributes_float3, offset + indices[i]); val += v * weights[i]; if (du) @@ -417,7 +417,7 @@ ccl_device float4 patch_eval_float4(KernelGlobals kg, *dv = make_float4(0.0f, 0.0f, 0.0f, 0.0f); for (int i = 0; i < num_control; i++) { - float4 v = kernel_tex_fetch(__attributes_float3, offset + indices[i]); + float4 v = kernel_tex_fetch(__attributes_float4, offset + indices[i]); val += v * weights[i]; if (du) diff --git a/intern/cycles/kernel/geom/primitive.h b/intern/cycles/kernel/geom/primitive.h index 7a8921b6d6e..6d7b550d82f 100644 --- a/intern/cycles/kernel/geom/primitive.h +++ b/intern/cycles/kernel/geom/primitive.h @@ -284,18 +284,33 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals kg, int numverts, numkeys; object_motion_info(kg, sd->object, NULL, &numverts, &numkeys); - /* lookup attributes */ - motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); - - desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE) ? numverts : numkeys; - motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); - #ifdef __HAIR__ - if (is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { - object_position_transform(kg, sd, &motion_pre); - object_position_transform(kg, sd, &motion_post); + if (is_curve_primitive) { + motion_pre = float4_to_float3(curve_attribute_float4(kg, sd, desc, NULL, NULL)); + desc.offset += numkeys; + motion_post = float4_to_float3(curve_attribute_float4(kg, sd, desc, NULL, NULL)); + + /* Curve */ + if ((sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { + object_position_transform(kg, sd, &motion_pre); + object_position_transform(kg, sd, &motion_post); + } } + else #endif + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + /* Triangle */ + if (subd_triangle_patch(kg, sd) == ~0) { + motion_pre = triangle_attribute_float3(kg, sd, desc, NULL, NULL); + desc.offset += numverts; + motion_post = triangle_attribute_float3(kg, sd, desc, NULL, NULL); + } + else { + motion_pre = subd_triangle_attribute_float3(kg, sd, desc, NULL, NULL); + desc.offset += numverts; + motion_post = subd_triangle_attribute_float3(kg, sd, desc, NULL, NULL); + } + } } /* object motion. note that depending on the mesh having motion vectors, this diff --git a/intern/cycles/kernel/geom/subd_triangle.h b/intern/cycles/kernel/geom/subd_triangle.h index 8a9a3f71231..e3b5c9afb91 100644 --- a/intern/cycles/kernel/geom/subd_triangle.h +++ b/intern/cycles/kernel/geom/subd_triangle.h @@ -443,8 +443,8 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, if (dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return float4_to_float3( - kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch))); + return kernel_tex_fetch(__attributes_float3, + desc.offset + subd_triangle_patch_face(kg, patch)); } else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { float2 uv[3]; @@ -452,10 +452,10 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, uint4 v = subd_triangle_patch_indices(kg, patch); - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y)); - float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z)); - float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w)); + float3 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + v.x); + float3 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + v.y); + float3 f2 = kernel_tex_fetch(__attributes_float3, desc.offset + v.z); + float3 f3 = kernel_tex_fetch(__attributes_float3, desc.offset + v.w); if (subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1 + f0) * 0.5f; @@ -484,10 +484,10 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, float3 f0, f1, f2, f3; - f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset)); - f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset)); - f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset)); - f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset)); + f0 = kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset); + f1 = kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset); + f2 = kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset); + f3 = kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset); if (subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1 + f0) * 0.5f; @@ -513,7 +513,7 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg, if (dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset)); + return kernel_tex_fetch(__attributes_float3, desc.offset); } else { if (dx) @@ -590,7 +590,7 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, if (dy) *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - return kernel_tex_fetch(__attributes_float3, + return kernel_tex_fetch(__attributes_float4, desc.offset + subd_triangle_patch_face(kg, patch)); } else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { @@ -599,10 +599,10 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, uint4 v = subd_triangle_patch_indices(kg, patch); - float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + v.x); - float4 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + v.y); - float4 f2 = kernel_tex_fetch(__attributes_float3, desc.offset + v.z); - float4 f3 = kernel_tex_fetch(__attributes_float3, desc.offset + v.w); + float4 f0 = kernel_tex_fetch(__attributes_float4, desc.offset + v.x); + float4 f1 = kernel_tex_fetch(__attributes_float4, desc.offset + v.y); + float4 f2 = kernel_tex_fetch(__attributes_float4, desc.offset + v.z); + float4 f3 = kernel_tex_fetch(__attributes_float4, desc.offset + v.w); if (subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1 + f0) * 0.5f; @@ -642,10 +642,10 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset))); } else { - f0 = kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset); - f1 = kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset); - f2 = kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset); - f3 = kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset); + f0 = kernel_tex_fetch(__attributes_float4, corners[0] + desc.offset); + f1 = kernel_tex_fetch(__attributes_float4, corners[1] + desc.offset); + f2 = kernel_tex_fetch(__attributes_float4, corners[2] + desc.offset); + f3 = kernel_tex_fetch(__attributes_float4, corners[3] + desc.offset); } if (subd_triangle_patch_num_corners(kg, patch) != 4) { @@ -672,7 +672,7 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg, if (dy) *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - return kernel_tex_fetch(__attributes_float3, desc.offset); + return kernel_tex_fetch(__attributes_float4, desc.offset); } else { if (dx) diff --git a/intern/cycles/kernel/geom/triangle.h b/intern/cycles/kernel/geom/triangle.h index 233e901c7ca..854022b3369 100644 --- a/intern/cycles/kernel/geom/triangle.h +++ b/intern/cycles/kernel/geom/triangle.h @@ -29,9 +29,9 @@ ccl_device_inline float3 triangle_normal(KernelGlobals kg, ccl_private ShaderDat { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - const float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); - const float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); - const float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); + const float3 v0 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); + const float3 v1 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); + const float3 v2 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); /* return normal */ if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { @@ -54,9 +54,9 @@ ccl_device_inline void triangle_point_normal(KernelGlobals kg, { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); - float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); - float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); + float3 v0 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); + float3 v1 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); + float3 v2 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); /* compute point */ float t = 1.0f - u - v; *P = (u * v0 + v * v1 + t * v2); @@ -78,9 +78,9 @@ ccl_device_inline void triangle_point_normal(KernelGlobals kg, ccl_device_inline void triangle_vertices(KernelGlobals kg, int prim, float3 P[3]) { const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); - P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); - P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); + P[0] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); + P[1] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); + P[2] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); } /* Triangle vertex locations and vertex normals */ @@ -91,12 +91,12 @@ ccl_device_inline void triangle_vertices_and_normals(KernelGlobals kg, float3 N[3]) { const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); - P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); - P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); - N[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); - N[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); - N[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); + P[0] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); + P[1] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); + P[2] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); + N[0] = kernel_tex_fetch(__tri_vnormal, tri_vindex.x); + N[1] = kernel_tex_fetch(__tri_vnormal, tri_vindex.y); + N[2] = kernel_tex_fetch(__tri_vnormal, tri_vindex.z); } /* Interpolate smooth vertex normal from vertices */ @@ -106,9 +106,9 @@ triangle_smooth_normal(KernelGlobals kg, float3 Ng, int prim, float u, float v) { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); - float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); - float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); + float3 n0 = kernel_tex_fetch(__tri_vnormal, tri_vindex.x); + float3 n1 = kernel_tex_fetch(__tri_vnormal, tri_vindex.y); + float3 n2 = kernel_tex_fetch(__tri_vnormal, tri_vindex.z); float3 N = safe_normalize((1.0f - u - v) * n2 + u * n0 + v * n1); @@ -120,9 +120,9 @@ ccl_device_inline float3 triangle_smooth_normal_unnormalized( { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); - float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); - float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); + float3 n0 = kernel_tex_fetch(__tri_vnormal, tri_vindex.x); + float3 n1 = kernel_tex_fetch(__tri_vnormal, tri_vindex.y); + float3 n2 = kernel_tex_fetch(__tri_vnormal, tri_vindex.z); /* ensure that the normals are in object space */ if (sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED) { @@ -145,9 +145,9 @@ ccl_device_inline void triangle_dPdudv(KernelGlobals kg, { /* fetch triangle vertex coordinates */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - const float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); - const float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); - const float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); + const float3 p0 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); + const float3 p1 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); + const float3 p2 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); /* compute derivatives of P w.r.t. uv */ *dPdu = (p0 - p2); @@ -267,15 +267,15 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) { const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x)); - f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y)); - f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z)); + f0 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x); + f1 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y); + f2 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z); } else { const int tri = desc.offset + sd->prim * 3; - f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0)); - f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1)); - f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2)); + f0 = kernel_tex_fetch(__attributes_float3, tri + 0); + f1 = kernel_tex_fetch(__attributes_float3, tri + 1); + f2 = kernel_tex_fetch(__attributes_float3, tri + 2); } #ifdef __RAY_DIFFERENTIALS__ @@ -298,7 +298,7 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim : desc.offset; - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset)); + return kernel_tex_fetch(__attributes_float3, offset); } else { return make_float3(0.0f, 0.0f, 0.0f); @@ -318,16 +318,16 @@ ccl_device float4 triangle_attribute_float4(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) { const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - f0 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x); - f1 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y); - f2 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z); + f0 = kernel_tex_fetch(__attributes_float4, desc.offset + tri_vindex.x); + f1 = kernel_tex_fetch(__attributes_float4, desc.offset + tri_vindex.y); + f2 = kernel_tex_fetch(__attributes_float4, desc.offset + tri_vindex.z); } else { const int tri = desc.offset + sd->prim * 3; if (desc.element == ATTR_ELEMENT_CORNER) { - f0 = kernel_tex_fetch(__attributes_float3, tri + 0); - f1 = kernel_tex_fetch(__attributes_float3, tri + 1); - f2 = kernel_tex_fetch(__attributes_float3, tri + 2); + f0 = kernel_tex_fetch(__attributes_float4, tri + 0); + f1 = kernel_tex_fetch(__attributes_float4, tri + 1); + f2 = kernel_tex_fetch(__attributes_float4, tri + 2); } else { f0 = color_srgb_to_linear_v4( @@ -359,7 +359,7 @@ ccl_device float4 triangle_attribute_float4(KernelGlobals kg, if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim : desc.offset; - return kernel_tex_fetch(__attributes_float3, offset); + return kernel_tex_fetch(__attributes_float4, offset); } else { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/geom/triangle_intersect.h b/intern/cycles/kernel/geom/triangle_intersect.h index faff8a85a93..720eceec4ed 100644 --- a/intern/cycles/kernel/geom/triangle_intersect.h +++ b/intern/cycles/kernel/geom/triangle_intersect.h @@ -40,7 +40,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg, #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex]; #else - const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); #endif @@ -51,9 +51,9 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg, #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) ssef_verts, #else - float4_to_float3(tri_a), - float4_to_float3(tri_b), - float4_to_float3(tri_c), + tri_a, + tri_b, + tri_c, #endif &u, &v, @@ -109,9 +109,9 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg, # if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex]; # else - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)), - tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)), - tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); + const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); # endif float t, u, v; if (!ray_triangle_intersect(P, @@ -179,9 +179,9 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg, /* Record geometric normal. */ # if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)), - tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)), - tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); + const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); # endif local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); @@ -223,9 +223,9 @@ ccl_device_inline float3 triangle_refine(KernelGlobals kg, P = P + D * t; const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w; - const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); + const packed_float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); @@ -280,9 +280,9 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals kg, # ifdef __INTERSECTION_REFINE__ const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w; - const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); + const packed_float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); diff --git a/intern/cycles/kernel/geom/volume.h b/intern/cycles/kernel/geom/volume.h index 4e83ad6acb3..387eb2646da 100644 --- a/intern/cycles/kernel/geom/volume.h +++ b/intern/cycles/kernel/geom/volume.h @@ -75,7 +75,7 @@ ccl_device float4 volume_attribute_float4(KernelGlobals kg, const AttributeDescriptor desc) { if (desc.element & (ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { - return kernel_tex_fetch(__attributes_float3, desc.offset); + return kernel_tex_fetch(__attributes_float4, desc.offset); } else if (desc.element == ATTR_ELEMENT_VOXEL) { /* todo: optimize this so we don't have to transform both here and in diff --git a/intern/cycles/kernel/textures.h b/intern/cycles/kernel/textures.h index 464ecb183cb..2e3ae29a19a 100644 --- a/intern/cycles/kernel/textures.h +++ b/intern/cycles/kernel/textures.h @@ -40,11 +40,11 @@ KERNEL_TEX(DecomposedTransform, __camera_motion) /* triangles */ KERNEL_TEX(uint, __tri_shader) -KERNEL_TEX(float4, __tri_vnormal) +KERNEL_TEX(packed_float3, __tri_vnormal) KERNEL_TEX(uint4, __tri_vindex) KERNEL_TEX(uint, __tri_patch) KERNEL_TEX(float2, __tri_patch_uv) -KERNEL_TEX(float4, __tri_verts) +KERNEL_TEX(packed_float3, __tri_verts) /* curves */ KERNEL_TEX(KernelCurve, __curves) @@ -58,7 +58,8 @@ KERNEL_TEX(uint, __patches) KERNEL_TEX(uint4, __attributes_map) KERNEL_TEX(float, __attributes_float) KERNEL_TEX(float2, __attributes_float2) -KERNEL_TEX(float4, __attributes_float3) +KERNEL_TEX(packed_float3, __attributes_float3) +KERNEL_TEX(float4, __attributes_float4) KERNEL_TEX(uchar4, __attributes_uchar4) /* lights */ diff --git a/intern/cycles/scene/attribute.cpp b/intern/cycles/scene/attribute.cpp index 3401eea307f..6d15f3325f7 100644 --- a/intern/cycles/scene/attribute.cpp +++ b/intern/cycles/scene/attribute.cpp @@ -404,6 +404,10 @@ AttrKernelDataType Attribute::kernel_type(const Attribute &attr) return AttrKernelDataType::FLOAT2; } + if (attr.type == TypeFloat4 || attr.type == TypeRGBA || attr.type == TypeDesc::TypeMatrix) { + return AttrKernelDataType::FLOAT4; + } + return AttrKernelDataType::FLOAT3; } @@ -585,7 +589,7 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name) attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE); break; case ATTR_STD_MOTION_VERTEX_POSITION: - attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE_KEY_MOTION); + attr = add(name, TypeDesc::TypeFloat4, ATTR_ELEMENT_CURVE_KEY_MOTION); break; case ATTR_STD_CURVE_INTERCEPT: attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_CURVE_KEY); diff --git a/intern/cycles/scene/attribute.h b/intern/cycles/scene/attribute.h index 4a25a900c14..612a0b7c80d 100644 --- a/intern/cycles/scene/attribute.h +++ b/intern/cycles/scene/attribute.h @@ -47,12 +47,7 @@ struct Transform; * * The values of this enumeration are also used as flags to detect changes in AttributeSet. */ -enum AttrKernelDataType { - FLOAT = 0, - FLOAT2 = 1, - FLOAT3 = 2, - UCHAR4 = 3, -}; +enum AttrKernelDataType { FLOAT = 0, FLOAT2 = 1, FLOAT3 = 2, FLOAT4 = 3, UCHAR4 = 4, NUM = 5 }; /* Attribute * diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp index 8a3fc522d22..bf426fc49f6 100644 --- a/intern/cycles/scene/geometry.cpp +++ b/intern/cycles/scene/geometry.cpp @@ -551,6 +551,7 @@ static void update_attribute_element_size(Geometry *geom, size_t *attr_float_size, size_t *attr_float2_size, size_t *attr_float3_size, + size_t *attr_float4_size, size_t *attr_uchar4_size) { if (mattr) { @@ -569,7 +570,10 @@ static void update_attribute_element_size(Geometry *geom, *attr_float2_size += size; } else if (mattr->type == TypeDesc::TypeMatrix) { - *attr_float3_size += size * 4; + *attr_float4_size += size * 4; + } + else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) { + *attr_float4_size += size; } else { *attr_float3_size += size; @@ -582,8 +586,10 @@ void GeometryManager::update_attribute_element_offset(Geometry *geom, size_t &attr_float_offset, device_vector &attr_float2, size_t &attr_float2_offset, - device_vector &attr_float3, + device_vector &attr_float3, size_t &attr_float3_offset, + device_vector &attr_float4, + size_t &attr_float4_offset, device_vector &attr_uchar4, size_t &attr_uchar4_offset, Attribute *mattr, @@ -646,18 +652,30 @@ void GeometryManager::update_attribute_element_offset(Geometry *geom, } else if (mattr->type == TypeDesc::TypeMatrix) { Transform *tfm = mattr->data_transform(); - offset = attr_float3_offset; + offset = attr_float4_offset; - assert(attr_float3.size() >= offset + size * 3); + assert(attr_float4.size() >= offset + size * 3); if (mattr->modified) { for (size_t k = 0; k < size * 3; k++) { - attr_float3[offset + k] = (&tfm->x)[k]; + attr_float4[offset + k] = (&tfm->x)[k]; } } - attr_float3_offset += size * 3; + attr_float4_offset += size * 3; } - else { + else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) { float4 *data = mattr->data_float4(); + offset = attr_float4_offset; + + assert(attr_float4.size() >= offset + size); + if (mattr->modified) { + for (size_t k = 0; k < size; k++) { + attr_float4[offset + k] = data[k]; + } + } + attr_float4_offset += size; + } + else { + float3 *data = mattr->data_float3(); offset = attr_float3_offset; assert(attr_float3.size() >= offset + size); @@ -783,6 +801,7 @@ void GeometryManager::device_update_attributes(Device *device, size_t attr_float_size = 0; size_t attr_float2_size = 0; size_t attr_float3_size = 0; + size_t attr_float4_size = 0; size_t attr_uchar4_size = 0; for (size_t i = 0; i < scene->geometry.size(); i++) { @@ -797,6 +816,7 @@ void GeometryManager::device_update_attributes(Device *device, &attr_float_size, &attr_float2_size, &attr_float3_size, + &attr_float4_size, &attr_uchar4_size); if (geom->is_mesh()) { @@ -809,6 +829,7 @@ void GeometryManager::device_update_attributes(Device *device, &attr_float_size, &attr_float2_size, &attr_float3_size, + &attr_float4_size, &attr_uchar4_size); } } @@ -824,6 +845,7 @@ void GeometryManager::device_update_attributes(Device *device, &attr_float_size, &attr_float2_size, &attr_float3_size, + &attr_float4_size, &attr_uchar4_size); } } @@ -831,19 +853,22 @@ void GeometryManager::device_update_attributes(Device *device, dscene->attributes_float.alloc(attr_float_size); dscene->attributes_float2.alloc(attr_float2_size); dscene->attributes_float3.alloc(attr_float3_size); + dscene->attributes_float4.alloc(attr_float4_size); dscene->attributes_uchar4.alloc(attr_uchar4_size); /* The order of those flags needs to match that of AttrKernelDataType. */ - const bool attributes_need_realloc[4] = { + const bool attributes_need_realloc[AttrKernelDataType::NUM] = { dscene->attributes_float.need_realloc(), dscene->attributes_float2.need_realloc(), dscene->attributes_float3.need_realloc(), + dscene->attributes_float4.need_realloc(), dscene->attributes_uchar4.need_realloc(), }; size_t attr_float_offset = 0; size_t attr_float2_offset = 0; size_t attr_float3_offset = 0; + size_t attr_float4_offset = 0; size_t attr_uchar4_offset = 0; /* Fill in attributes. */ @@ -868,6 +893,8 @@ void GeometryManager::device_update_attributes(Device *device, attr_float2_offset, dscene->attributes_float3, attr_float3_offset, + dscene->attributes_float4, + attr_float4_offset, dscene->attributes_uchar4, attr_uchar4_offset, attr, @@ -891,6 +918,8 @@ void GeometryManager::device_update_attributes(Device *device, attr_float2_offset, dscene->attributes_float3, attr_float3_offset, + dscene->attributes_float4, + attr_float4_offset, dscene->attributes_uchar4, attr_uchar4_offset, subd_attr, @@ -923,6 +952,8 @@ void GeometryManager::device_update_attributes(Device *device, attr_float2_offset, dscene->attributes_float3, attr_float3_offset, + dscene->attributes_float4, + attr_float4_offset, dscene->attributes_uchar4, attr_uchar4_offset, attr, @@ -954,6 +985,7 @@ void GeometryManager::device_update_attributes(Device *device, dscene->attributes_float.copy_to_device_if_modified(); dscene->attributes_float2.copy_to_device_if_modified(); dscene->attributes_float3.copy_to_device_if_modified(); + dscene->attributes_float4.copy_to_device_if_modified(); dscene->attributes_uchar4.copy_to_device_if_modified(); if (progress.get_cancel()) @@ -1080,9 +1112,9 @@ void GeometryManager::device_update_mesh(Device *, /* normals */ progress.set_status("Updating Mesh", "Computing normals"); - float4 *tri_verts = dscene->tri_verts.alloc(tri_size * 3); + packed_float3 *tri_verts = dscene->tri_verts.alloc(tri_size * 3); uint *tri_shader = dscene->tri_shader.alloc(tri_size); - float4 *vnormal = dscene->tri_vnormal.alloc(vert_size); + packed_float3 *vnormal = dscene->tri_vnormal.alloc(vert_size); uint4 *tri_vindex = dscene->tri_vindex.alloc(tri_size); uint *tri_patch = dscene->tri_patch.alloc(tri_size); float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size); @@ -1293,18 +1325,21 @@ enum { ATTR_FLOAT_MODIFIED = (1 << 2), ATTR_FLOAT2_MODIFIED = (1 << 3), ATTR_FLOAT3_MODIFIED = (1 << 4), - ATTR_UCHAR4_MODIFIED = (1 << 5), + ATTR_FLOAT4_MODIFIED = (1 << 5), + ATTR_UCHAR4_MODIFIED = (1 << 6), - CURVE_DATA_NEED_REALLOC = (1 << 6), - MESH_DATA_NEED_REALLOC = (1 << 7), + CURVE_DATA_NEED_REALLOC = (1 << 7), + MESH_DATA_NEED_REALLOC = (1 << 8), - ATTR_FLOAT_NEEDS_REALLOC = (1 << 8), - ATTR_FLOAT2_NEEDS_REALLOC = (1 << 9), - ATTR_FLOAT3_NEEDS_REALLOC = (1 << 10), - ATTR_UCHAR4_NEEDS_REALLOC = (1 << 11), + ATTR_FLOAT_NEEDS_REALLOC = (1 << 9), + ATTR_FLOAT2_NEEDS_REALLOC = (1 << 10), + ATTR_FLOAT3_NEEDS_REALLOC = (1 << 11), + ATTR_FLOAT4_NEEDS_REALLOC = (1 << 12), + ATTR_UCHAR4_NEEDS_REALLOC = (1 << 13), ATTRS_NEED_REALLOC = (ATTR_FLOAT_NEEDS_REALLOC | ATTR_FLOAT2_NEEDS_REALLOC | - ATTR_FLOAT3_NEEDS_REALLOC | ATTR_UCHAR4_NEEDS_REALLOC), + ATTR_FLOAT3_NEEDS_REALLOC | ATTR_FLOAT4_NEEDS_REALLOC | + ATTR_UCHAR4_NEEDS_REALLOC), DEVICE_MESH_DATA_NEEDS_REALLOC = (MESH_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC), DEVICE_CURVE_DATA_NEEDS_REALLOC = (CURVE_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC), }; @@ -1332,10 +1367,17 @@ static void update_device_flags_attribute(uint32_t &device_update_flags, device_update_flags |= ATTR_FLOAT3_MODIFIED; break; } + case AttrKernelDataType::FLOAT4: { + device_update_flags |= ATTR_FLOAT4_MODIFIED; + break; + } case AttrKernelDataType::UCHAR4: { device_update_flags |= ATTR_UCHAR4_MODIFIED; break; } + case AttrKernelDataType::NUM: { + break; + } } } } @@ -1352,6 +1394,9 @@ static void update_attribute_realloc_flags(uint32_t &device_update_flags, if (attributes.modified(AttrKernelDataType::FLOAT3)) { device_update_flags |= ATTR_FLOAT3_NEEDS_REALLOC; } + if (attributes.modified(AttrKernelDataType::FLOAT4)) { + device_update_flags |= ATTR_FLOAT4_NEEDS_REALLOC; + } if (attributes.modified(AttrKernelDataType::UCHAR4)) { device_update_flags |= ATTR_UCHAR4_NEEDS_REALLOC; } @@ -1553,6 +1598,14 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro dscene->attributes_float3.tag_modified(); } + if (device_update_flags & ATTR_FLOAT4_NEEDS_REALLOC) { + dscene->attributes_map.tag_realloc(); + dscene->attributes_float4.tag_realloc(); + } + else if (device_update_flags & ATTR_FLOAT4_MODIFIED) { + dscene->attributes_float4.tag_modified(); + } + if (device_update_flags & ATTR_UCHAR4_NEEDS_REALLOC) { dscene->attributes_map.tag_realloc(); dscene->attributes_uchar4.tag_realloc(); @@ -2014,6 +2067,7 @@ void GeometryManager::device_update(Device *device, dscene->attributes_float.clear_modified(); dscene->attributes_float2.clear_modified(); dscene->attributes_float3.clear_modified(); + dscene->attributes_float4.clear_modified(); dscene->attributes_uchar4.clear_modified(); } @@ -2041,6 +2095,7 @@ void GeometryManager::device_free(Device *device, DeviceScene *dscene, bool forc dscene->attributes_float.free_if_need_realloc(force_free); dscene->attributes_float2.free_if_need_realloc(force_free); dscene->attributes_float3.free_if_need_realloc(force_free); + dscene->attributes_float4.free_if_need_realloc(force_free); dscene->attributes_uchar4.free_if_need_realloc(force_free); /* Signal for shaders like displacement not to do ray tracing. */ diff --git a/intern/cycles/scene/geometry.h b/intern/cycles/scene/geometry.h index 335bcdcd0b7..91799d7fde8 100644 --- a/intern/cycles/scene/geometry.h +++ b/intern/cycles/scene/geometry.h @@ -257,8 +257,10 @@ class GeometryManager { size_t &attr_float_offset, device_vector &attr_float2, size_t &attr_float2_offset, - device_vector &attr_float3, + device_vector &attr_float3, size_t &attr_float3_offset, + device_vector &attr_float4, + size_t &attr_float4_offset, device_vector &attr_uchar4, size_t &attr_uchar4_offset, Attribute *mattr, diff --git a/intern/cycles/scene/mesh.cpp b/intern/cycles/scene/mesh.cpp index f47dab30869..e65b8462e34 100644 --- a/intern/cycles/scene/mesh.cpp +++ b/intern/cycles/scene/mesh.cpp @@ -707,7 +707,7 @@ void Mesh::pack_shaders(Scene *scene, uint *tri_shader) } } -void Mesh::pack_normals(float4 *vnormal) +void Mesh::pack_normals(packed_float3 *vnormal) { Attribute *attr_vN = attributes.find(ATTR_STD_VERTEX_NORMAL); if (attr_vN == NULL) { @@ -727,11 +727,14 @@ void Mesh::pack_normals(float4 *vnormal) if (do_transform) vNi = safe_normalize(transform_direction(&ntfm, vNi)); - vnormal[i] = make_float4(vNi.x, vNi.y, vNi.z, 0.0f); + vnormal[i] = make_float3(vNi.x, vNi.y, vNi.z); } } -void Mesh::pack_verts(float4 *tri_verts, uint4 *tri_vindex, uint *tri_patch, float2 *tri_patch_uv) +void Mesh::pack_verts(packed_float3 *tri_verts, + uint4 *tri_vindex, + uint *tri_patch, + float2 *tri_patch_uv) { size_t verts_size = verts.size(); @@ -752,9 +755,9 @@ void Mesh::pack_verts(float4 *tri_verts, uint4 *tri_vindex, uint *tri_patch, flo tri_patch[i] = (!get_num_subd_faces()) ? -1 : (triangle_patch[i] * 8 + patch_offset); - tri_verts[i * 3] = float3_to_float4(verts[t.v[0]]); - tri_verts[i * 3 + 1] = float3_to_float4(verts[t.v[1]]); - tri_verts[i * 3 + 2] = float3_to_float4(verts[t.v[2]]); + tri_verts[i * 3] = verts[t.v[0]]; + tri_verts[i * 3 + 1] = verts[t.v[1]]; + tri_verts[i * 3 + 2] = verts[t.v[2]]; } } diff --git a/intern/cycles/scene/mesh.h b/intern/cycles/scene/mesh.h index d13b3003164..254672d0620 100644 --- a/intern/cycles/scene/mesh.h +++ b/intern/cycles/scene/mesh.h @@ -223,8 +223,11 @@ class Mesh : public Geometry { void get_uv_tiles(ustring map, unordered_set &tiles) override; void pack_shaders(Scene *scene, uint *shader); - void pack_normals(float4 *vnormal); - void pack_verts(float4 *tri_verts, uint4 *tri_vindex, uint *tri_patch, float2 *tri_patch_uv); + void pack_normals(packed_float3 *vnormal); + void pack_verts(packed_float3 *tri_verts, + uint4 *tri_vindex, + uint *tri_patch, + float2 *tri_patch_uv); void pack_patches(uint *patch_data); PrimitiveType primitive_type() const override; diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp index ef0ee0c6625..4230abe9a1b 100644 --- a/intern/cycles/scene/scene.cpp +++ b/intern/cycles/scene/scene.cpp @@ -74,6 +74,7 @@ DeviceScene::DeviceScene(Device *device) attributes_float(device, "__attributes_float", MEM_GLOBAL), attributes_float2(device, "__attributes_float2", MEM_GLOBAL), attributes_float3(device, "__attributes_float3", MEM_GLOBAL), + attributes_float4(device, "__attributes_float4", MEM_GLOBAL), attributes_uchar4(device, "__attributes_uchar4", MEM_GLOBAL), light_distribution(device, "__light_distribution", MEM_GLOBAL), lights(device, "__lights", MEM_GLOBAL), diff --git a/intern/cycles/scene/scene.h b/intern/cycles/scene/scene.h index fa7fc54602a..4af05349dd3 100644 --- a/intern/cycles/scene/scene.h +++ b/intern/cycles/scene/scene.h @@ -81,9 +81,9 @@ class DeviceScene { device_vector prim_time; /* mesh */ - device_vector tri_verts; + device_vector tri_verts; device_vector tri_shader; - device_vector tri_vnormal; + device_vector tri_vnormal; device_vector tri_vindex; device_vector tri_patch; device_vector tri_patch_uv; @@ -108,7 +108,8 @@ class DeviceScene { device_vector attributes_map; device_vector attributes_float; device_vector attributes_float2; - device_vector attributes_float3; + device_vector attributes_float3; + device_vector attributes_float4; device_vector attributes_uchar4; /* lights */ -- cgit v1.2.3