/* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * The Original Code is Copyright (C) 2016 by Mike Erwin. * All rights reserved. */ /** \file blender/gpu/intern/gpu_vertex_format.c * \ingroup gpu * * GPU vertex format */ #include "GPU_shader_interface.h" #include "GPU_vertex_format.h" #include "gpu_vertex_format_private.h" #include #include #include "BLI_utildefines.h" #define PACK_DEBUG 0 #if PACK_DEBUG # include #endif void GPU_vertformat_clear(GPUVertFormat *format) { #if TRUST_NO_ONE memset(format, 0, sizeof(GPUVertFormat)); #else format->attr_len = 0; format->packed = false; format->name_offset = 0; format->name_len = 0; for (uint i = 0; i < GPU_VERT_ATTR_MAX_LEN; i++) { format->attrs[i].name_len = 0; } #endif } void GPU_vertformat_copy(GPUVertFormat *dest, const GPUVertFormat *src) { /* copy regular struct fields */ memcpy(dest, src, sizeof(GPUVertFormat)); for (uint i = 0; i < dest->attr_len; i++) { for (uint j = 0; j < dest->attrs[i].name_len; j++) { dest->attrs[i].name[j] = (char *)dest + (src->attrs[i].name[j] - ((char *)src)); } } } static GLenum convert_comp_type_to_gl(GPUVertCompType type) { static const GLenum table[] = { [GPU_COMP_I8] = GL_BYTE, [GPU_COMP_U8] = GL_UNSIGNED_BYTE, [GPU_COMP_I16] = GL_SHORT, [GPU_COMP_U16] = GL_UNSIGNED_SHORT, [GPU_COMP_I32] = GL_INT, [GPU_COMP_U32] = GL_UNSIGNED_INT, [GPU_COMP_F32] = GL_FLOAT, [GPU_COMP_I10] = GL_INT_2_10_10_10_REV, }; return table[type]; } static uint comp_sz(GPUVertCompType type) { #if TRUST_NO_ONE assert(type <= GPU_COMP_F32); /* other types have irregular sizes (not bytes) */ #endif const GLubyte sizes[] = {1, 1, 2, 2, 4, 4, 4}; return sizes[type]; } static uint attr_sz(const GPUVertAttr *a) { if (a->comp_type == GPU_COMP_I10) { return 4; /* always packed as 10_10_10_2 */ } return a->comp_len * comp_sz(a->comp_type); } static uint attr_align(const GPUVertAttr *a) { if (a->comp_type == GPU_COMP_I10) { return 4; /* always packed as 10_10_10_2 */ } uint c = comp_sz(a->comp_type); if (a->comp_len == 3 && c <= 2) { return 4 * c; /* AMD HW can't fetch these well, so pad it out (other vendors too?) */ } else { return c; /* most fetches are ok if components are naturally aligned */ } } uint vertex_buffer_size(const GPUVertFormat *format, uint vertex_len) { #if TRUST_NO_ONE assert(format->packed && format->stride > 0); #endif return format->stride * vertex_len; } static const char *copy_attr_name(GPUVertFormat *format, const char *name, const char *suffix) { /* strncpy does 110% of what we need; let's do exactly 100% */ char *name_copy = format->names + format->name_offset; uint available = GPU_VERT_ATTR_NAMES_BUF_LEN - format->name_offset; bool terminated = false; for (uint i = 0; i < available; ++i) { const char c = name[i]; name_copy[i] = c; if (c == '\0') { if (suffix) { for (uint j = 0; j < available; ++j) { const char s = suffix[j]; name_copy[i + j] = s; if (s == '\0') { terminated = true; format->name_offset += (i + j + 1); break; } } } else { terminated = true; format->name_offset += (i + 1); } break; } } #if TRUST_NO_ONE assert(terminated); assert(format->name_offset <= GPU_VERT_ATTR_NAMES_BUF_LEN); #else (void)terminated; #endif return name_copy; } uint GPU_vertformat_attr_add( GPUVertFormat *format, const char *name, GPUVertCompType comp_type, uint comp_len, GPUVertFetchMode fetch_mode) { #if TRUST_NO_ONE assert(format->name_len < GPU_VERT_ATTR_MAX_LEN); /* there's room for more */ assert(format->attr_len < GPU_VERT_ATTR_MAX_LEN); /* there's room for more */ assert(!format->packed); /* packed means frozen/locked */ assert((comp_len >= 1 && comp_len <= 4) || comp_len == 8 || comp_len == 12 || comp_len == 16); switch (comp_type) { case GPU_COMP_F32: /* float type can only kept as float */ assert(fetch_mode == GPU_FETCH_FLOAT); break; case GPU_COMP_I10: /* 10_10_10 format intended for normals (xyz) or colors (rgb) * extra component packed.w can be manually set to { -2, -1, 0, 1 } */ assert(comp_len == 3 || comp_len == 4); assert(fetch_mode == GPU_FETCH_INT_TO_FLOAT_UNIT); /* not strictly required, may relax later */ break; default: /* integer types can be kept as int or converted/normalized to float */ assert(fetch_mode != GPU_FETCH_FLOAT); /* only support float matrices (see Batch_update_program_bindings) */ assert(comp_len != 8 && comp_len != 12 && comp_len != 16); } #endif format->name_len++; /* multiname support */ const uint attr_id = format->attr_len++; GPUVertAttr *attr = &format->attrs[attr_id]; attr->name[attr->name_len++] = copy_attr_name(format, name, NULL); attr->comp_type = comp_type; attr->gl_comp_type = convert_comp_type_to_gl(comp_type); attr->comp_len = (comp_type == GPU_COMP_I10) ? 4 : comp_len; /* system needs 10_10_10_2 to be 4 or BGRA */ attr->sz = attr_sz(attr); attr->offset = 0; /* offsets & stride are calculated later (during pack) */ attr->fetch_mode = fetch_mode; return attr_id; } void GPU_vertformat_alias_add(GPUVertFormat *format, const char *alias) { GPUVertAttr *attr = &format->attrs[format->attr_len - 1]; #if TRUST_NO_ONE assert(format->name_len < GPU_VERT_ATTR_MAX_LEN); /* there's room for more */ assert(attr->name_len < GPU_VERT_ATTR_MAX_NAMES); #endif format->name_len++; /* multiname support */ attr->name[attr->name_len++] = copy_attr_name(format, alias, NULL); } int GPU_vertformat_attr_id_get(const GPUVertFormat *format, const char *name) { for (int i = 0; i < format->attr_len; i++) { const GPUVertAttr *attr = &format->attrs[i]; for (int j = 0; j < attr->name_len; j++) { if (STREQ(name, attr->name[j])) { return i; } } } return -1; } void GPU_vertformat_triple_load(GPUVertFormat *format) { #if TRUST_NO_ONE assert(!format->packed); assert(format->attr_len * 3 < GPU_VERT_ATTR_MAX_LEN); assert(format->name_len + format->attr_len * 3 < GPU_VERT_ATTR_MAX_LEN); #endif VertexFormat_pack(format); uint old_attr_len = format->attr_len; for (uint a_idx = 0; a_idx < old_attr_len; ++a_idx) { GPUVertAttr *attr = &format->attrs[a_idx]; /* Duplicate attr twice */ for (int i = 1; i < 3; ++i) { GPUVertAttr *dst_attr = &format->attrs[format->attr_len]; memcpy(dst_attr, attr, sizeof(GPUVertAttr)); /* Increase offset to the next vertex. */ dst_attr->offset += format->stride * i; /* Only copy first name for now. */ dst_attr->name_len = 0; dst_attr->name[dst_attr->name_len++] = copy_attr_name(format, attr->name[0], (i == 1) ? "1" : "2"); format->attr_len++; } #if TRUST_NO_ONE assert(attr->name_len < GPU_VERT_ATTR_MAX_NAMES); #endif /* Add alias to first attr. */ format->name_len++; attr->name[attr->name_len++] = copy_attr_name(format, attr->name[0], "0"); } } uint padding(uint offset, uint alignment) { const uint mod = offset % alignment; return (mod == 0) ? 0 : (alignment - mod); } #if PACK_DEBUG static void show_pack(uint a_idx, uint sz, uint pad) { const char c = 'A' + a_idx; for (uint i = 0; i < pad; ++i) { putchar('-'); } for (uint i = 0; i < sz; ++i) { putchar(c); } } #endif void VertexFormat_pack(GPUVertFormat *format) { /* For now, attributes are packed in the order they were added, * making sure each attribute is naturally aligned (add padding where necessary) * Later we can implement more efficient packing w/ reordering * (keep attribute ID order, adjust their offsets to reorder in buffer). */ /* TODO: realloc just enough to hold the final combo string. And just enough to * hold used attributes, not all 16. */ GPUVertAttr *a0 = &format->attrs[0]; a0->offset = 0; uint offset = a0->sz; #if PACK_DEBUG show_pack(0, a0->sz, 0); #endif for (uint a_idx = 1; a_idx < format->attr_len; ++a_idx) { GPUVertAttr *a = &format->attrs[a_idx]; uint mid_padding = padding(offset, attr_align(a)); offset += mid_padding; a->offset = offset; offset += a->sz; #if PACK_DEBUG show_pack(a_idx, a->sz, mid_padding); #endif } uint end_padding = padding(offset, attr_align(a0)); #if PACK_DEBUG show_pack(0, 0, end_padding); putchar('\n'); #endif format->stride = offset + end_padding; format->packed = true; } static uint calc_input_component_size(const GPUShaderInput *input) { int size = input->size; switch (input->gl_type) { case GL_FLOAT_VEC2: case GL_INT_VEC2: case GL_UNSIGNED_INT_VEC2: return size * 2; case GL_FLOAT_VEC3: case GL_INT_VEC3: case GL_UNSIGNED_INT_VEC3: return size * 3; case GL_FLOAT_VEC4: case GL_FLOAT_MAT2: case GL_INT_VEC4: case GL_UNSIGNED_INT_VEC4: return size * 4; case GL_FLOAT_MAT3: return size * 9; case GL_FLOAT_MAT4: return size * 16; case GL_FLOAT_MAT2x3: case GL_FLOAT_MAT3x2: return size * 6; case GL_FLOAT_MAT2x4: case GL_FLOAT_MAT4x2: return size * 8; case GL_FLOAT_MAT3x4: case GL_FLOAT_MAT4x3: return size * 12; default: return size; } } static void get_fetch_mode_and_comp_type( int gl_type, GPUVertCompType *r_comp_type, uint *r_gl_comp_type, GPUVertFetchMode *r_fetch_mode) { switch (gl_type) { case GL_FLOAT: case GL_FLOAT_VEC2: case GL_FLOAT_VEC3: case GL_FLOAT_VEC4: case GL_FLOAT_MAT2: case GL_FLOAT_MAT3: case GL_FLOAT_MAT4: case GL_FLOAT_MAT2x3: case GL_FLOAT_MAT2x4: case GL_FLOAT_MAT3x2: case GL_FLOAT_MAT3x4: case GL_FLOAT_MAT4x2: case GL_FLOAT_MAT4x3: *r_comp_type = GPU_COMP_F32; *r_gl_comp_type = GL_FLOAT; *r_fetch_mode = GPU_FETCH_FLOAT; break; case GL_INT: case GL_INT_VEC2: case GL_INT_VEC3: case GL_INT_VEC4: *r_comp_type = GPU_COMP_I32; *r_gl_comp_type = GL_INT; *r_fetch_mode = GPU_FETCH_INT; break; case GL_UNSIGNED_INT: case GL_UNSIGNED_INT_VEC2: case GL_UNSIGNED_INT_VEC3: case GL_UNSIGNED_INT_VEC4: *r_comp_type = GPU_COMP_U32; *r_gl_comp_type = GL_UNSIGNED_INT; *r_fetch_mode = GPU_FETCH_INT; break; default: BLI_assert(0); } } void GPU_vertformat_from_interface(GPUVertFormat *format, const GPUShaderInterface *shaderface) { const char *name_buffer = shaderface->name_buffer; for (int i = 0; i < GPU_NUM_SHADERINTERFACE_BUCKETS; i++) { const GPUShaderInput *input = shaderface->attr_buckets[i]; if (input == NULL) { continue; } const GPUShaderInput *next = input; while (next != NULL) { input = next; next = input->next; format->name_len++; /* multiname support */ format->attr_len++; GPUVertAttr *attr = &format->attrs[input->location]; attr->name[attr->name_len++] = copy_attr_name(format, name_buffer + input->name_offset, NULL); attr->offset = 0; /* offsets & stride are calculated later (during pack) */ attr->comp_len = calc_input_component_size(input); attr->sz = attr->comp_len * 4; get_fetch_mode_and_comp_type(input->gl_type, &attr->comp_type, &attr->gl_comp_type, &attr->fetch_mode); } } } /* OpenGL ES packs in a different order as desktop GL but component conversion is the same. * Of the code here, only struct GPUPackedNormal needs to change. */ #define SIGNED_INT_10_MAX 511 #define SIGNED_INT_10_MIN -512 static int clampi(int x, int min_allowed, int max_allowed) { #if TRUST_NO_ONE assert(min_allowed <= max_allowed); #endif if (x < min_allowed) { return min_allowed; } else if (x > max_allowed) { return max_allowed; } else { return x; } } static int quantize(float x) { int qx = x * 511.0f; return clampi(qx, SIGNED_INT_10_MIN, SIGNED_INT_10_MAX); } static int convert_i16(short x) { /* 16-bit signed --> 10-bit signed */ /* TODO: round? */ return x >> 6; } GPUPackedNormal GPU_normal_convert_i10_v3(const float data[3]) { GPUPackedNormal n = { .x = quantize(data[0]), .y = quantize(data[1]), .z = quantize(data[2]), }; return n; } GPUPackedNormal GPU_normal_convert_i10_s3(const short data[3]) { GPUPackedNormal n = { .x = convert_i16(data[0]), .y = convert_i16(data[1]), .z = convert_i16(data[2]), }; return n; }