diff options
author | Clément Foucault <foucault.clem@gmail.com> | 2020-09-12 07:10:11 +0300 |
---|---|---|
committer | Clément Foucault <foucault.clem@gmail.com> | 2020-09-12 16:29:54 +0300 |
commit | 136bdb561b4ce05788e7b654c7e734cc35664b91 (patch) | |
tree | 7b1dd88c0e36f02498a66f107b99252f6f1c76d5 /source/blender/gpu | |
parent | a442da62dc6ea14c43a7aba04a600c9ba7cd7f1b (diff) |
GPU: Add Image Load Store extension support
This wraps the functionality used to speedup EEVEE volumetrics.
This touches the rendering code of EEVEE as it should fix a mis-usage of
the GL barrier. The barrier changed type and location, removing an
unused barrier.
Diffstat (limited to 'source/blender/gpu')
-rw-r--r-- | source/blender/gpu/GPU_capabilities.h | 2 | ||||
-rw-r--r-- | source/blender/gpu/GPU_state.h | 10 | ||||
-rw-r--r-- | source/blender/gpu/GPU_texture.h | 4 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_capabilities.cc | 5 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_capabilities_private.hh | 3 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_shader_interface.hh | 1 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_state.cc | 12 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_state_private.hh | 6 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_texture.cc | 15 | ||||
-rw-r--r-- | source/blender/gpu/opengl/gl_backend.cc | 15 | ||||
-rw-r--r-- | source/blender/gpu/opengl/gl_context.hh | 2 | ||||
-rw-r--r-- | source/blender/gpu/opengl/gl_debug.cc | 20 | ||||
-rw-r--r-- | source/blender/gpu/opengl/gl_shader_interface.cc | 37 | ||||
-rw-r--r-- | source/blender/gpu/opengl/gl_state.cc | 95 | ||||
-rw-r--r-- | source/blender/gpu/opengl/gl_state.hh | 24 |
15 files changed, 238 insertions, 13 deletions
diff --git a/source/blender/gpu/GPU_capabilities.h b/source/blender/gpu/GPU_capabilities.h index b8a48735548..9d55fe73708 100644 --- a/source/blender/gpu/GPU_capabilities.h +++ b/source/blender/gpu/GPU_capabilities.h @@ -45,6 +45,8 @@ bool GPU_depth_blitting_workaround(void); bool GPU_use_main_context_workaround(void); bool GPU_crappy_amd_driver(void); +bool GPU_shader_image_load_store_support(void); + bool GPU_mem_stats_supported(void); void GPU_mem_stats_get(int *totalmem, int *freemem); diff --git a/source/blender/gpu/GPU_state.h b/source/blender/gpu/GPU_state.h index 5e872001267..aa32c6c75ba 100644 --- a/source/blender/gpu/GPU_state.h +++ b/source/blender/gpu/GPU_state.h @@ -35,6 +35,14 @@ typedef enum eGPUWriteMask { ENUM_OPERATORS(eGPUWriteMask) +typedef enum eGPUBarrier { + GPU_BARRIER_NONE = 0, + GPU_BARRIER_SHADER_IMAGE_ACCESS = (1 << 0), + GPU_BARRIER_TEXTURE_FETCH = (1 << 1), +} eGPUBarrier; + +ENUM_OPERATORS(eGPUBarrier) + /** * Defines the fixed pipeline blending equation. * SRC is the output color from the shader. @@ -152,6 +160,8 @@ eGPUStencilTest GPU_stencil_test_get(void); void GPU_flush(void); void GPU_finish(void); +void GPU_memory_barrier(eGPUBarrier barrier); + #ifdef __cplusplus } #endif diff --git a/source/blender/gpu/GPU_texture.h b/source/blender/gpu/GPU_texture.h index 2ce2ba093cf..fafa45fe0fe 100644 --- a/source/blender/gpu/GPU_texture.h +++ b/source/blender/gpu/GPU_texture.h @@ -243,6 +243,10 @@ void GPU_texture_bind_ex(GPUTexture *tex, eGPUSamplerState state, int unit, cons void GPU_texture_unbind(GPUTexture *tex); void GPU_texture_unbind_all(void); +void GPU_texture_image_bind(GPUTexture *tex, int unit); +void GPU_texture_image_unbind(GPUTexture *tex); +void GPU_texture_image_unbind_all(void); + void GPU_texture_copy(GPUTexture *dst, GPUTexture *src); void GPU_texture_generate_mipmap(GPUTexture *tex); diff --git a/source/blender/gpu/intern/gpu_capabilities.cc b/source/blender/gpu/intern/gpu_capabilities.cc index a79ce27ba63..63e29654e1c 100644 --- a/source/blender/gpu/intern/gpu_capabilities.cc +++ b/source/blender/gpu/intern/gpu_capabilities.cc @@ -102,6 +102,11 @@ bool GPU_crappy_amd_driver(void) return GCaps.broken_amd_driver; } +bool GPU_shader_image_load_store_support(void) +{ + return GCaps.shader_image_load_store_support; +} + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/gpu/intern/gpu_capabilities_private.hh b/source/blender/gpu/intern/gpu_capabilities_private.hh index a51525fa932..abe5b706a7d 100644 --- a/source/blender/gpu/intern/gpu_capabilities_private.hh +++ b/source/blender/gpu/intern/gpu_capabilities_private.hh @@ -42,6 +42,7 @@ struct GPUCapabilities { int max_textures_geom = 0; int max_textures_frag = 0; bool mem_stats_support = false; + bool shader_image_load_store_support = false; /* OpenGL related workarounds. */ bool mip_render_workaround = false; bool depth_blitting_workaround = false; @@ -52,4 +53,4 @@ struct GPUCapabilities { extern GPUCapabilities GCaps; -} // namespace blender::gpu
\ No newline at end of file +} // namespace blender::gpu diff --git a/source/blender/gpu/intern/gpu_shader_interface.hh b/source/blender/gpu/intern/gpu_shader_interface.hh index f76339d3adb..fce6fda5f14 100644 --- a/source/blender/gpu/intern/gpu_shader_interface.hh +++ b/source/blender/gpu/intern/gpu_shader_interface.hh @@ -63,6 +63,7 @@ class ShaderInterface { /** Enabled bindpoints that needs to be fed with data. */ uint16_t enabled_attr_mask_ = 0; uint16_t enabled_ubo_mask_ = 0; + uint8_t enabled_ima_mask_ = 0; uint64_t enabled_tex_mask_ = 0; /** Location of builtin uniforms. Fast access, no lookup needed. */ int32_t builtins_[GPU_NUM_UNIFORMS]; diff --git a/source/blender/gpu/intern/gpu_state.cc b/source/blender/gpu/intern/gpu_state.cc index be523020e8a..01a07ee3e4f 100644 --- a/source/blender/gpu/intern/gpu_state.cc +++ b/source/blender/gpu/intern/gpu_state.cc @@ -30,7 +30,6 @@ #include "BKE_global.h" -#include "GPU_glew.h" #include "GPU_state.h" #include "gpu_context_private.hh" @@ -309,6 +308,17 @@ void GPU_finish(void) /** \} */ /* -------------------------------------------------------------------- */ +/** \name Synchronisation Utils + * \{ */ + +void GPU_memory_barrier(eGPUBarrier barrier) +{ + Context::get()->state_manager->issue_barrier(barrier); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name Default OpenGL State * * This is called on startup, for opengl offscreen render. diff --git a/source/blender/gpu/intern/gpu_state_private.hh b/source/blender/gpu/intern/gpu_state_private.hh index 9fee45e7bd4..a21093f00a2 100644 --- a/source/blender/gpu/intern/gpu_state_private.hh +++ b/source/blender/gpu/intern/gpu_state_private.hh @@ -163,10 +163,16 @@ class GPUStateManager { virtual void apply_state(void) = 0; + virtual void issue_barrier(eGPUBarrier barrier_bits) = 0; + virtual void texture_bind(Texture *tex, eGPUSamplerState sampler, int unit) = 0; virtual void texture_unbind(Texture *tex) = 0; virtual void texture_unbind_all(void) = 0; + virtual void image_bind(Texture *tex, int unit) = 0; + virtual void image_unbind(Texture *tex) = 0; + virtual void image_unbind_all(void) = 0; + virtual void texture_unpack_row_length_set(uint len) = 0; }; diff --git a/source/blender/gpu/intern/gpu_texture.cc b/source/blender/gpu/intern/gpu_texture.cc index b22fd53f0f6..09dbf04210a 100644 --- a/source/blender/gpu/intern/gpu_texture.cc +++ b/source/blender/gpu/intern/gpu_texture.cc @@ -418,6 +418,21 @@ void GPU_texture_unbind_all(void) Context::get()->state_manager->texture_unbind_all(); } +void GPU_texture_image_bind(GPUTexture *tex, int unit) +{ + Context::get()->state_manager->image_bind(unwrap(tex), unit); +} + +void GPU_texture_image_unbind(GPUTexture *tex) +{ + Context::get()->state_manager->image_unbind(unwrap(tex)); +} + +void GPU_texture_image_unbind_all(void) +{ + Context::get()->state_manager->image_unbind_all(); +} + void GPU_texture_generate_mipmap(GPUTexture *tex) { reinterpret_cast<Texture *>(tex)->generate_mipmap(); diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc index edaa84cdcf8..46e048d7f7c 100644 --- a/source/blender/gpu/opengl/gl_backend.cc +++ b/source/blender/gpu/opengl/gl_backend.cc @@ -210,6 +210,7 @@ static void detect_workarounds(void) GLContext::debug_layer_workaround = true; GLContext::unused_fb_slot_workaround = true; /* Turn off extensions. */ + GCaps.shader_image_load_store_support = false; GLContext::base_instance_support = false; GLContext::clear_texture_support = false; GLContext::copy_image_support = false; @@ -250,17 +251,20 @@ static void detect_workarounds(void) (strstr(version, "4.5.13399") || strstr(version, "4.5.13417") || strstr(version, "4.5.13422"))) { GLContext::unused_fb_slot_workaround = true; + GCaps.shader_image_load_store_support = false; GCaps.broken_amd_driver = true; } /* We have issues with this specific renderer. (see T74024) */ if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_OPENSOURCE) && strstr(renderer, "AMD VERDE")) { GLContext::unused_fb_slot_workaround = true; + GCaps.shader_image_load_store_support = false; GCaps.broken_amd_driver = true; } /* Fix slowdown on this particular driver. (see T77641) */ if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_OPENSOURCE) && strstr(version, "Mesa 19.3.4")) { + GCaps.shader_image_load_store_support = false; GCaps.broken_amd_driver = true; } /* There is an issue with the #glBlitFramebuffer on MacOS with radeon pro graphics. @@ -349,10 +353,10 @@ static void detect_workarounds(void) } /** Internal capabilities. */ -GLint GLContext::max_texture_3d_size; -GLint GLContext::max_cubemap_size; -GLint GLContext::max_ubo_size; -GLint GLContext::max_ubo_binds; +GLint GLContext::max_cubemap_size = 0; +GLint GLContext::max_texture_3d_size = 0; +GLint GLContext::max_ubo_binds = 0; +GLint GLContext::max_ubo_size = 0; /** Extensions. */ bool GLContext::base_instance_support = false; bool GLContext::clear_texture_support = false; @@ -383,6 +387,7 @@ void GLBackend::capabilities_init(void) glGetIntegerv(GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, &GCaps.max_textures_geom); glGetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &GCaps.max_textures); GCaps.mem_stats_support = GLEW_NVX_gpu_memory_info || GLEW_ATI_meminfo; + GCaps.shader_image_load_store_support = GLEW_ARB_shader_image_load_store; /* GL specific capabilities. */ glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &GLContext::max_texture_3d_size); glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &GLContext::max_cubemap_size); @@ -413,4 +418,4 @@ void GLBackend::capabilities_init(void) /** \} */ -} // namespace blender::gpu
\ No newline at end of file +} // namespace blender::gpu diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh index 9822c842ce7..4d9c2470db0 100644 --- a/source/blender/gpu/opengl/gl_context.hh +++ b/source/blender/gpu/opengl/gl_context.hh @@ -56,8 +56,8 @@ class GLSharedOrphanLists { class GLContext : public Context { public: /** Capabilities. */ - static GLint max_texture_3d_size; static GLint max_cubemap_size; + static GLint max_texture_3d_size; static GLint max_ubo_size; static GLint max_ubo_binds; /** Extensions. */ diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc index db99e90d0ec..747d8ee2e3e 100644 --- a/source/blender/gpu/opengl/gl_debug.cc +++ b/source/blender/gpu/opengl/gl_debug.cc @@ -200,13 +200,16 @@ void check_gl_resources(const char *info) * be big enough to feed the data range the shader awaits. */ uint16_t ubo_needed = interface->enabled_ubo_mask_; ubo_needed &= ~ctx->bound_ubo_slots; - /* NOTE: This only check binding. To be valid, the bound texture needs to * be the same format/target the shader expects. */ uint64_t tex_needed = interface->enabled_tex_mask_; tex_needed &= ~GLContext::state_manager_active_get()->bound_texture_slots(); + /* NOTE: This only check binding. To be valid, the bound image needs to + * be the same format/target the shader expects. */ + uint8_t ima_needed = interface->enabled_ima_mask_; + ima_needed &= ~GLContext::state_manager_active_get()->bound_image_slots(); - if (ubo_needed == 0 && tex_needed == 0) { + if (ubo_needed == 0 && tex_needed == 0 && ima_needed == 0) { return; } @@ -223,6 +226,7 @@ void check_gl_resources(const char *info) for (int i = 0; tex_needed != 0; i++, tex_needed >>= 1) { if ((tex_needed & 1) != 0) { + /* FIXME: texture_get might return an image input instead. */ const ShaderInput *tex_input = interface->texture_get(i); const char *tex_name = interface->input_name_get(tex_input); const char *sh_name = ctx->shader->name_get(); @@ -231,6 +235,18 @@ void check_gl_resources(const char *info) debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, NULL); } } + + for (int i = 0; ima_needed != 0; i++, ima_needed >>= 1) { + if ((ima_needed & 1) != 0) { + /* FIXME: texture_get might return a texture input instead. */ + const ShaderInput *tex_input = interface->texture_get(i); + const char *tex_name = interface->input_name_get(tex_input); + const char *sh_name = ctx->shader->name_get(); + char msg[256]; + SNPRINTF(msg, "Missing Image bind at slot %d : %s > %s : %s", i, sh_name, tex_name, info); + debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, NULL); + } + } } void raise_gl_error(const char *info) diff --git a/source/blender/gpu/opengl/gl_shader_interface.cc b/source/blender/gpu/opengl/gl_shader_interface.cc index d611efcd975..2d55c222e9c 100644 --- a/source/blender/gpu/opengl/gl_shader_interface.cc +++ b/source/blender/gpu/opengl/gl_shader_interface.cc @@ -100,6 +100,31 @@ static inline int sampler_binding(int32_t program, return -1; } } + +static inline int image_binding(int32_t program, + uint32_t uniform_index, + int32_t uniform_location, + int *image_len) +{ + /* Identify image uniforms and asign image units to them. */ + GLint type; + glGetActiveUniformsiv(program, 1, &uniform_index, GL_UNIFORM_TYPE, &type); + + switch (type) { + case GL_IMAGE_1D: + case GL_IMAGE_2D: + case GL_IMAGE_3D: { + /* For now just assign a consecutive index. In the future, we should set it in + * the shader using layout(binding = i) and query its value. */ + int binding = *image_len; + glUniform1i(uniform_location, binding); + (*image_len)++; + return binding; + } + default: + return -1; + } +} /** \} */ /* -------------------------------------------------------------------- */ @@ -207,8 +232,8 @@ GLShaderInterface::GLShaderInterface(GLuint program) enabled_ubo_mask_ |= (1 << input->binding); } - /* Uniforms */ - for (int i = 0, sampler = 0; i < active_uniform_len; i++) { + /* Uniforms & samplers & images */ + for (int i = 0, sampler = 0, image = 0; i < active_uniform_len; i++) { if (BLI_BITMAP_TEST(uniforms_from_blocks, i)) { continue; } @@ -224,6 +249,12 @@ GLShaderInterface::GLShaderInterface(GLuint program) name_buffer_offset += this->set_input_name(input, name, name_len); enabled_tex_mask_ |= (input->binding != -1) ? (1lu << input->binding) : 0lu; + + if (input->binding == -1) { + input->binding = image_binding(program, i, input->location, &image); + + enabled_ima_mask_ |= (input->binding != -1) ? (1lu << input->binding) : 0lu; + } } /* Builtin Uniforms */ @@ -296,4 +327,4 @@ void GLShaderInterface::ref_remove(GLVaoCache *ref) /** \} */ -} // namespace blender::gpu
\ No newline at end of file +} // namespace blender::gpu diff --git a/source/blender/gpu/opengl/gl_state.cc b/source/blender/gpu/opengl/gl_state.cc index 1678760e9cd..93753768928 100644 --- a/source/blender/gpu/opengl/gl_state.cc +++ b/source/blender/gpu/opengl/gl_state.cc @@ -76,6 +76,7 @@ void GLStateManager::apply_state(void) this->set_state(this->state); this->set_mutable_state(this->mutable_state); this->texture_bind_apply(); + this->image_bind_apply(); active_fb->apply_state(); }; @@ -538,4 +539,98 @@ uint64_t GLStateManager::bound_texture_slots(void) /** \} */ +/* -------------------------------------------------------------------- */ +/** \name Image Binding (from image load store) + * \{ */ + +void GLStateManager::image_bind(Texture *tex_, int unit) +{ + /* Minimum support is 8 image in the fragment shader. No image for other stages. */ + BLI_assert(GPU_shader_image_load_store_support() && unit < 8); + GLTexture *tex = static_cast<GLTexture *>(tex_); + if (G.debug & G_DEBUG_GPU) { + tex->check_feedback_loop(); + } + images_[unit] = tex->tex_id_; + formats_[unit] = to_gl_internal_format(tex->format_); + tex->is_bound_ = true; + dirty_image_binds_ |= 1ULL << unit; +} + +void GLStateManager::image_unbind(Texture *tex_) +{ + GLTexture *tex = static_cast<GLTexture *>(tex_); + if (!tex->is_bound_) { + return; + } + + GLuint tex_id = tex->tex_id_; + for (int i = 0; i < ARRAY_SIZE(images_); i++) { + if (images_[i] == tex_id) { + images_[i] = 0; + dirty_image_binds_ |= 1ULL << i; + } + } + tex->is_bound_ = false; +} + +void GLStateManager::image_unbind_all(void) +{ + for (int i = 0; i < ARRAY_SIZE(images_); i++) { + if (images_[i] != 0) { + images_[i] = 0; + dirty_image_binds_ |= 1ULL << i; + } + } + this->image_bind_apply(); +} + +void GLStateManager::image_bind_apply(void) +{ + if (dirty_image_binds_ == 0) { + return; + } + uint32_t dirty_bind = dirty_image_binds_; + dirty_image_binds_ = 0; + + int first = bitscan_forward_uint(dirty_bind); + int last = 32 - bitscan_reverse_uint(dirty_bind); + int count = last - first; + + if (GLContext::multi_bind_support) { + glBindImageTextures(first, count, images_ + first); + } + else { + for (int unit = first; unit < last; unit++) { + if ((dirty_bind >> unit) & 1UL) { + glBindImageTexture(unit, images_[unit], 0, GL_TRUE, 0, GL_READ_WRITE, formats_[unit]); + } + } + } +} + +uint8_t GLStateManager::bound_image_slots(void) +{ + uint8_t bound_slots = 0; + for (int i = 0; i < ARRAY_SIZE(images_); i++) { + if (images_[i] != 0) { + bound_slots |= 1ULL << i; + } + } + return bound_slots; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Memory barrier + * \{ */ + +void GLStateManager::issue_barrier(eGPUBarrier barrier_bits) +{ + glMemoryBarrier(to_gl(barrier_bits)); +} + +/** \} */ + } // namespace blender::gpu diff --git a/source/blender/gpu/opengl/gl_state.hh b/source/blender/gpu/opengl/gl_state.hh index fb2ed3403f7..b636c08ec0d 100644 --- a/source/blender/gpu/opengl/gl_state.hh +++ b/source/blender/gpu/opengl/gl_state.hh @@ -64,19 +64,30 @@ class GLStateManager : public GPUStateManager { GLuint samplers_[64] = {0}; uint64_t dirty_texture_binds_ = 0; + GLuint images_[8] = {0}; + GLenum formats_[8] = {0}; + uint8_t dirty_image_binds_ = 0; + public: GLStateManager(); void apply_state(void) override; + void issue_barrier(eGPUBarrier barrier_bits) override; + void texture_bind(Texture *tex, eGPUSamplerState sampler, int unit) override; void texture_bind_temp(GLTexture *tex); void texture_unbind(Texture *tex) override; void texture_unbind_all(void) override; + void image_bind(Texture *tex, int unit) override; + void image_unbind(Texture *tex) override; + void image_unbind_all(void) override; + void texture_unpack_row_length_set(uint len) override; uint64_t bound_texture_slots(void); + uint8_t bound_image_slots(void); private: static void set_write_mask(const eGPUWriteMask value); @@ -95,9 +106,22 @@ class GLStateManager : public GPUStateManager { void set_mutable_state(const GPUStateMutable &state); void texture_bind_apply(void); + void image_bind_apply(void); MEM_CXX_CLASS_ALLOC_FUNCS("GLStateManager") }; +static inline GLbitfield to_gl(eGPUBarrier barrier_bits) +{ + GLbitfield barrier = 0; + if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS) { + barrier |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT; + } + if (barrier_bits & GPU_BARRIER_TEXTURE_FETCH) { + barrier |= GL_TEXTURE_FETCH_BARRIER_BIT; + } + return barrier; +} + } // namespace gpu } // namespace blender |