/* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * The Original Code is Copyright (C) 2017 by Blender Foundation. * All rights reserved. */ /** \file * \ingroup draw * * \brief Contains procedural GPU hair drawing methods. */ #include "DRW_render.h" #include "BLI_string_utils.h" #include "BLI_utildefines.h" #include "DNA_customdata_types.h" #include "DNA_modifier_types.h" #include "DNA_particle_types.h" #include "BKE_anim.h" #include "GPU_batch.h" #include "GPU_shader.h" #include "GPU_vertex_buffer.h" #include "draw_hair_private.h" #ifndef __APPLE__ # define USE_TRANSFORM_FEEDBACK #endif typedef enum ParticleRefineShader { PART_REFINE_CATMULL_ROM = 0, PART_REFINE_MAX_SHADER, } ParticleRefineShader; #ifndef USE_TRANSFORM_FEEDBACK typedef struct ParticleRefineCall { struct ParticleRefineCall *next; GPUVertBuf *vbo; DRWShadingGroup *shgrp; uint vert_len; } ParticleRefineCall; static ParticleRefineCall *g_tf_calls = NULL; static int g_tf_id_offset; static int g_tf_target_width; static int g_tf_target_height; #endif static GPUVertBuf *g_dummy_vbo = NULL; static GPUTexture *g_dummy_texture = NULL; static GPUShader *g_refine_shaders[PART_REFINE_MAX_SHADER] = {NULL}; static DRWPass *g_tf_pass; /* XXX can be a problem with multiple DRWManager in the future */ extern char datatoc_common_hair_lib_glsl[]; extern char datatoc_common_hair_refine_vert_glsl[]; extern char datatoc_gpu_shader_3D_smooth_color_frag_glsl[]; static GPUShader *hair_refine_shader_get(ParticleRefineShader sh) { if (g_refine_shaders[sh]) { return g_refine_shaders[sh]; } char *vert_with_lib = BLI_string_joinN(datatoc_common_hair_lib_glsl, datatoc_common_hair_refine_vert_glsl); #ifdef USE_TRANSFORM_FEEDBACK const char *var_names[1] = {"finalColor"}; g_refine_shaders[sh] = DRW_shader_create_with_transform_feedback( vert_with_lib, NULL, "#define HAIR_PHASE_SUBDIV\n", GPU_SHADER_TFB_POINTS, var_names, 1); #else g_refine_shaders[sh] = DRW_shader_create(vert_with_lib, NULL, datatoc_gpu_shader_3D_smooth_color_frag_glsl, "#define HAIR_PHASE_SUBDIV\n" "#define TF_WORKAROUND\n"); #endif MEM_freeN(vert_with_lib); return g_refine_shaders[sh]; } void DRW_hair_init(void) { #ifdef USE_TRANSFORM_FEEDBACK g_tf_pass = DRW_pass_create("Update Hair Pass", 0); #else g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR); #endif if (g_dummy_vbo == NULL) { /* initialize vertex format */ GPUVertFormat format = {0}; uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); g_dummy_vbo = GPU_vertbuf_create_with_format(&format); float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f}; GPU_vertbuf_data_alloc(g_dummy_vbo, 1); GPU_vertbuf_attr_fill(g_dummy_vbo, dummy_id, vert); /* Create vbo immediately to bind to texture buffer. */ GPU_vertbuf_use(g_dummy_vbo); g_dummy_texture = GPU_texture_create_from_vertbuf(g_dummy_vbo); } } static DRWShadingGroup *drw_shgroup_create_hair_procedural_ex(Object *object, ParticleSystem *psys, ModifierData *md, DRWPass *hair_pass, DRWShadingGroup *shgrp_parent, struct GPUMaterial *gpu_mat, GPUShader *gpu_shader) { /* TODO(fclem): Pass the scene as parameter */ const DRWContextState *draw_ctx = DRW_context_state_get(); Scene *scene = draw_ctx->scene; float dupli_mat[4][4]; Object *dupli_parent = DRW_object_get_dupli_parent(object); DupliObject *dupli_object = DRW_object_get_dupli(object); int subdiv = scene->r.hair_subdiv; int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2; ParticleHairCache *hair_cache; bool need_ft_update; if (psys) { /* Old particle hair. */ need_ft_update = particles_ensure_procedural_data( object, psys, md, &hair_cache, subdiv, thickness_res); } else { /* New hair object. */ need_ft_update = hair_ensure_procedural_data(object, &hair_cache, subdiv, thickness_res); } DRWShadingGroup *shgrp; if (shgrp_parent) { shgrp = DRW_shgroup_create_sub(shgrp_parent); } else if (gpu_mat) { shgrp = DRW_shgroup_material_create(gpu_mat, hair_pass); } else if (gpu_shader) { shgrp = DRW_shgroup_create(gpu_shader, hair_pass); } else { shgrp = NULL; BLI_assert(0); } if (shgrp == NULL) { return NULL; } /* TODO optimize this. Only bind the ones GPUMaterial needs. */ for (int i = 0; i < hair_cache->num_uv_layers; i++) { for (int n = 0; n < MAX_LAYER_NAME_CT && hair_cache->uv_layer_names[i][n][0] != '\0'; n++) { DRW_shgroup_uniform_texture(shgrp, hair_cache->uv_layer_names[i][n], hair_cache->uv_tex[i]); } } for (int i = 0; i < hair_cache->num_col_layers; i++) { for (int n = 0; n < MAX_LAYER_NAME_CT && hair_cache->col_layer_names[i][n][0] != '\0'; n++) { DRW_shgroup_uniform_texture( shgrp, hair_cache->col_layer_names[i][n], hair_cache->col_tex[i]); } } /* Fix issue with certain driver not drawing anything if there is no texture bound to * "ac", "au", "u" or "c". */ if (hair_cache->num_uv_layers == 0) { DRW_shgroup_uniform_texture(shgrp, "u", g_dummy_texture); DRW_shgroup_uniform_texture(shgrp, "au", g_dummy_texture); } if (hair_cache->num_col_layers == 0) { DRW_shgroup_uniform_texture(shgrp, "c", g_dummy_texture); DRW_shgroup_uniform_texture(shgrp, "ac", g_dummy_texture); } if (psys) { if ((dupli_parent != NULL) && (dupli_object != NULL)) { if (dupli_object->type & OB_DUPLICOLLECTION) { copy_m4_m4(dupli_mat, dupli_parent->obmat); } else { copy_m4_m4(dupli_mat, dupli_object->ob->obmat); invert_m4(dupli_mat); mul_m4_m4m4(dupli_mat, object->obmat, dupli_mat); } } else { unit_m4(dupli_mat); } } else { /* New hair object. */ copy_m4_m4(dupli_mat, object->obmat); } /* Get hair shape parameters. */ float hair_rad_shape, hair_rad_root, hair_rad_tip; bool hair_close_tip; if (psys) { /* Old particle hair. */ ParticleSettings *part = psys->part; hair_rad_shape = part->shape; hair_rad_root = part->rad_root * part->rad_scale * 0.5f; hair_rad_tip = part->rad_tip * part->rad_scale * 0.5f; hair_close_tip = (part->shape_flag & PART_SHAPE_CLOSE_TIP) != 0; } else { /* TODO: implement for new hair object. */ hair_rad_shape = 1.0f; hair_rad_root = 0.005f; hair_rad_tip = 0.0f; hair_close_tip = true; } DRW_shgroup_uniform_texture(shgrp, "hairPointBuffer", hair_cache->final[subdiv].proc_tex); DRW_shgroup_uniform_int(shgrp, "hairStrandsRes", &hair_cache->final[subdiv].strands_res, 1); DRW_shgroup_uniform_int_copy(shgrp, "hairThicknessRes", thickness_res); DRW_shgroup_uniform_float_copy(shgrp, "hairRadShape", hair_rad_shape); DRW_shgroup_uniform_vec4_copy(shgrp, "hairDupliMatrix[0]", dupli_mat[0]); DRW_shgroup_uniform_vec4_copy(shgrp, "hairDupliMatrix[1]", dupli_mat[1]); DRW_shgroup_uniform_vec4_copy(shgrp, "hairDupliMatrix[2]", dupli_mat[2]); DRW_shgroup_uniform_vec4_copy(shgrp, "hairDupliMatrix[3]", dupli_mat[3]); DRW_shgroup_uniform_float_copy(shgrp, "hairRadRoot", hair_rad_root); DRW_shgroup_uniform_float_copy(shgrp, "hairRadTip", hair_rad_tip); DRW_shgroup_uniform_bool_copy(shgrp, "hairCloseTip", hair_close_tip); /* TODO(fclem): Until we have a better way to cull the hair and render with orco, bypass * culling test. */ GPUBatch *geom = hair_cache->final[subdiv].proc_hairs[thickness_res - 1]; DRW_shgroup_call_no_cull(shgrp, geom, object); /* Transform Feedback subdiv. */ if (need_ft_update) { int final_points_len = hair_cache->final[subdiv].strands_res * hair_cache->strands_len; if (final_points_len) { GPUShader *tf_shader = hair_refine_shader_get(PART_REFINE_CATMULL_ROM); #ifdef USE_TRANSFORM_FEEDBACK DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create( tf_shader, g_tf_pass, hair_cache->final[subdiv].proc_buf); #else DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); ParticleRefineCall *pr_call = MEM_mallocN(sizeof(*pr_call), __func__); pr_call->next = g_tf_calls; pr_call->vbo = hair_cache->final[subdiv].proc_buf; pr_call->shgrp = tf_shgrp; pr_call->vert_len = final_points_len; g_tf_calls = pr_call; DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); #endif DRW_shgroup_uniform_texture(tf_shgrp, "hairPointBuffer", hair_cache->point_tex); DRW_shgroup_uniform_texture(tf_shgrp, "hairStrandBuffer", hair_cache->strand_tex); DRW_shgroup_uniform_texture(tf_shgrp, "hairStrandSegBuffer", hair_cache->strand_seg_tex); DRW_shgroup_uniform_int( tf_shgrp, "hairStrandsRes", &hair_cache->final[subdiv].strands_res, 1); DRW_shgroup_call_procedural_points(tf_shgrp, NULL, final_points_len); } } return shgrp; } DRWShadingGroup *DRW_shgroup_hair_create( Object *object, ParticleSystem *psys, ModifierData *md, DRWPass *hair_pass, GPUShader *shader) { return drw_shgroup_create_hair_procedural_ex(object, psys, md, hair_pass, NULL, NULL, shader); } DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object, ParticleSystem *psys, ModifierData *md, DRWShadingGroup *shgrp) { return drw_shgroup_create_hair_procedural_ex(object, psys, md, NULL, shgrp, NULL, NULL); } DRWShadingGroup *DRW_shgroup_material_hair_create(Object *object, ParticleSystem *psys, ModifierData *md, DRWPass *hair_pass, struct GPUMaterial *material) { return drw_shgroup_create_hair_procedural_ex(object, psys, md, hair_pass, NULL, material, NULL); } void DRW_hair_update(void) { #ifndef USE_TRANSFORM_FEEDBACK /** * Workaround to transform feedback not working on mac. * On some system it crashes (see T58489) and on some other it renders garbage (see T60171). * * So instead of using transform feedback we render to a texture, * read back the result to system memory and re-upload as VBO data. * It is really not ideal performance wise, but it is the simplest * and the most local workaround that still uses the power of the GPU. */ if (g_tf_calls == NULL) { return; } /* Search ideal buffer size. */ uint max_size = 0; for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) { max_size = max_ii(max_size, pr_call->vert_len); } /* Create target Texture / Framebuffer */ /* Don't use max size as it can be really heavy and fail. * Do chunks of maximum 2048 * 2048 hair points. */ int width = 2048; int height = min_ii(width, 1 + max_size / width); GPUTexture *tex = DRW_texture_pool_query_2d(width, height, GPU_RGBA32F, (void *)DRW_hair_update); g_tf_target_height = height; g_tf_target_width = width; GPUFrameBuffer *fb = NULL; GPU_framebuffer_ensure_config(&fb, { GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(tex), }); float *data = MEM_mallocN(sizeof(float) * 4 * width * height, "tf fallback buffer"); GPU_framebuffer_bind(fb); while (g_tf_calls != NULL) { ParticleRefineCall *pr_call = g_tf_calls; g_tf_calls = g_tf_calls->next; g_tf_id_offset = 0; while (pr_call->vert_len > 0) { int max_read_px_len = min_ii(width * height, pr_call->vert_len); DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp); /* Readback result to main memory. */ GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, data); /* Upload back to VBO. */ GPU_vertbuf_use(pr_call->vbo); glBufferSubData(GL_ARRAY_BUFFER, sizeof(float) * 4 * g_tf_id_offset, sizeof(float) * 4 * max_read_px_len, data); g_tf_id_offset += max_read_px_len; pr_call->vert_len -= max_read_px_len; } MEM_freeN(pr_call); } MEM_freeN(data); GPU_framebuffer_free(fb); #else /* TODO(fclem): replace by compute shader. */ /* Just render using transform feedback. */ DRW_draw_pass(g_tf_pass); #endif } void DRW_hair_free(void) { for (int i = 0; i < PART_REFINE_MAX_SHADER; i++) { DRW_SHADER_FREE_SAFE(g_refine_shaders[i]); } GPU_VERTBUF_DISCARD_SAFE(g_dummy_vbo); DRW_TEXTURE_FREE_SAFE(g_dummy_texture); }