Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClément Foucault <foucault.clem@gmail.com>2020-09-12 07:10:11 +0300
committerClément Foucault <foucault.clem@gmail.com>2020-09-12 16:29:54 +0300
commit136bdb561b4ce05788e7b654c7e734cc35664b91 (patch)
tree7b1dd88c0e36f02498a66f107b99252f6f1c76d5
parenta442da62dc6ea14c43a7aba04a600c9ba7cd7f1b (diff)
GPU: Add Image Load Store extension support
This wraps the functionality used to speedup EEVEE volumetrics. This touches the rendering code of EEVEE as it should fix a mis-usage of the GL barrier. The barrier changed type and location, removing an unused barrier.
-rw-r--r--source/blender/draw/engines/eevee/eevee_private.h4
-rw-r--r--source/blender/draw/engines/eevee/eevee_volumes.c26
-rw-r--r--source/blender/draw/engines/eevee/shaders/volumetric_integration_frag.glsl4
-rw-r--r--source/blender/draw/intern/DRW_render.h3
-rw-r--r--source/blender/draw/intern/draw_manager.h2
-rw-r--r--source/blender/draw/intern/draw_manager_data.c16
-rw-r--r--source/blender/draw/intern/draw_manager_exec.c6
-rw-r--r--source/blender/gpu/GPU_capabilities.h2
-rw-r--r--source/blender/gpu/GPU_state.h10
-rw-r--r--source/blender/gpu/GPU_texture.h4
-rw-r--r--source/blender/gpu/intern/gpu_capabilities.cc5
-rw-r--r--source/blender/gpu/intern/gpu_capabilities_private.hh3
-rw-r--r--source/blender/gpu/intern/gpu_shader_interface.hh1
-rw-r--r--source/blender/gpu/intern/gpu_state.cc12
-rw-r--r--source/blender/gpu/intern/gpu_state_private.hh6
-rw-r--r--source/blender/gpu/intern/gpu_texture.cc15
-rw-r--r--source/blender/gpu/opengl/gl_backend.cc15
-rw-r--r--source/blender/gpu/opengl/gl_context.hh2
-rw-r--r--source/blender/gpu/opengl/gl_debug.cc20
-rw-r--r--source/blender/gpu/opengl/gl_shader_interface.cc37
-rw-r--r--source/blender/gpu/opengl/gl_state.cc95
-rw-r--r--source/blender/gpu/opengl/gl_state.hh24
22 files changed, 278 insertions, 34 deletions
diff --git a/source/blender/draw/engines/eevee/eevee_private.h b/source/blender/draw/engines/eevee/eevee_private.h
index 8216d2545ac..e731ed071b2 100644
--- a/source/blender/draw/engines/eevee/eevee_private.h
+++ b/source/blender/draw/engines/eevee/eevee_private.h
@@ -84,9 +84,7 @@ extern struct DrawEngineType draw_engine_eevee_type;
#define EEVEE_PROBE_MAX min_ii(MAX_PROBE, GPU_max_texture_layers() / 6)
#define EEVEE_VELOCITY_TILE_SIZE 32
-#define USE_VOLUME_OPTI \
- (GLEW_ARB_shader_image_load_store && GLEW_ARB_shading_language_420pack && \
- !GPU_crappy_amd_driver())
+#define USE_VOLUME_OPTI (GPU_shader_image_load_store_support())
#define SWAP_DOUBLE_BUFFERS() \
{ \
diff --git a/source/blender/draw/engines/eevee/eevee_volumes.c b/source/blender/draw/engines/eevee/eevee_volumes.c
index 69b916244b5..93701887b51 100644
--- a/source/blender/draw/engines/eevee/eevee_volumes.c
+++ b/source/blender/draw/engines/eevee/eevee_volumes.c
@@ -601,6 +601,10 @@ void EEVEE_volumes_cache_finish(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo);
DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined);
+ if (USE_VOLUME_OPTI) {
+ DRW_shgroup_uniform_image_ref(grp, "finalScattering_img", &txl->volume_scatter_history);
+ DRW_shgroup_uniform_image_ref(grp, "finalTransmittance_img", &txl->volume_transmit_history);
+ }
DRW_shgroup_call_procedural_triangles(
grp, NULL, USE_VOLUME_OPTI ? 1 : common_data->vol_tex_size[2]);
@@ -610,6 +614,7 @@ void EEVEE_volumes_cache_finish(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
DRW_shgroup_uniform_texture_ref(grp, "inScattering", &txl->volume_scatter);
DRW_shgroup_uniform_texture_ref(grp, "inTransmittance", &txl->volume_transmit);
DRW_shgroup_uniform_texture_ref(grp, "inSceneDepth", &e_data.depth_src);
+ DRW_shgroup_uniform_block(grp, "light_block", sldata->light_ubo);
DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo);
DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined);
@@ -714,15 +719,7 @@ void EEVEE_volumes_compute(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
DRW_draw_pass(psl->volumetric_scatter_ps);
if (USE_VOLUME_OPTI) {
- int tex_scatter = GPU_texture_opengl_bindcode(txl->volume_scatter_history);
- int tex_transmit = GPU_texture_opengl_bindcode(txl->volume_transmit_history);
- /* TODO(fclem) Encapsulate these GL calls into DRWManager. */
- glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
- /* Subtlety here! we need to tell the GL that the texture is layered (GL_TRUE)
- * in order to bind the full 3D texture and not just a 2D slice. */
- glBindImageTexture(0, tex_scatter, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R11F_G11F_B10F);
- glBindImageTexture(1, tex_transmit, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R11F_G11F_B10F);
-
+ /* Avoid feedback loop assert. */
GPU_framebuffer_bind(fbl->volumetric_fb);
}
else {
@@ -731,13 +728,6 @@ void EEVEE_volumes_compute(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
DRW_draw_pass(psl->volumetric_integration_ps);
- if (USE_VOLUME_OPTI) {
- glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
-
- glBindImageTexture(0, 0, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R11F_G11F_B10F);
- glBindImageTexture(1, 0, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R11F_G11F_B10F);
- }
-
SWAP(struct GPUFrameBuffer *, fbl->volumetric_scat_fb, fbl->volumetric_integ_fb);
SWAP(GPUTexture *, txl->volume_scatter, txl->volume_scatter_history);
SWAP(GPUTexture *, txl->volume_transmit, txl->volume_transmit_history);
@@ -763,6 +753,10 @@ void EEVEE_volumes_resolve(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *veda
DefaultTextureList *dtxl = DRW_viewport_texture_list_get();
e_data.depth_src = dtxl->depth;
+ if (USE_VOLUME_OPTI) {
+ GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH);
+ }
+
/* Apply for opaque geometry. */
GPU_framebuffer_bind(fbl->main_color_fb);
DRW_draw_pass(psl->volumetric_resolve_ps);
diff --git a/source/blender/draw/engines/eevee/shaders/volumetric_integration_frag.glsl b/source/blender/draw/engines/eevee/shaders/volumetric_integration_frag.glsl
index f4276bd61bd..12b7d8acbea 100644
--- a/source/blender/draw/engines/eevee/shaders/volumetric_integration_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/volumetric_integration_frag.glsl
@@ -11,8 +11,8 @@ uniform sampler3D volumeScattering; /* Result of the scatter step */
uniform sampler3D volumeExtinction;
#ifdef USE_VOLUME_OPTI
-uniform layout(binding = 0, r11f_g11f_b10f) writeonly restrict image3D finalScattering_img;
-uniform layout(binding = 1, r11f_g11f_b10f) writeonly restrict image3D finalTransmittance_img;
+uniform layout(r11f_g11f_b10f) writeonly restrict image3D finalScattering_img;
+uniform layout(r11f_g11f_b10f) writeonly restrict image3D finalTransmittance_img;
vec3 finalScattering;
vec3 finalTransmittance;
diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h
index 8e3562216e9..30c6f0ad4dc 100644
--- a/source/blender/draw/intern/DRW_render.h
+++ b/source/blender/draw/intern/DRW_render.h
@@ -557,6 +557,9 @@ void DRW_shgroup_uniform_ivec4(DRWShadingGroup *shgroup,
int arraysize);
void DRW_shgroup_uniform_mat3(DRWShadingGroup *shgroup, const char *name, const float (*value)[3]);
void DRW_shgroup_uniform_mat4(DRWShadingGroup *shgroup, const char *name, const float (*value)[4]);
+/* Only to be used when image load store is supported (GPU_shader_image_load_store_support()). */
+void DRW_shgroup_uniform_image(DRWShadingGroup *shgroup, const char *name, const GPUTexture *tex);
+void DRW_shgroup_uniform_image_ref(DRWShadingGroup *shgroup, const char *name, GPUTexture **tex);
/* Store value instead of referencing it. */
void DRW_shgroup_uniform_int_copy(DRWShadingGroup *shgroup, const char *name, const int value);
void DRW_shgroup_uniform_ivec2_copy(DRWShadingGroup *shgroup, const char *name, const int *value);
diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h
index c0bcb0e679f..9f6a970ea22 100644
--- a/source/blender/draw/intern/draw_manager.h
+++ b/source/blender/draw/intern/draw_manager.h
@@ -278,6 +278,8 @@ typedef enum {
DRW_UNIFORM_FLOAT_COPY,
DRW_UNIFORM_TEXTURE,
DRW_UNIFORM_TEXTURE_REF,
+ DRW_UNIFORM_IMAGE,
+ DRW_UNIFORM_IMAGE_REF,
DRW_UNIFORM_BLOCK,
DRW_UNIFORM_BLOCK_REF,
DRW_UNIFORM_TFEEDBACK_TARGET,
diff --git a/source/blender/draw/intern/draw_manager_data.c b/source/blender/draw/intern/draw_manager_data.c
index a4fc44e9571..81842f5d2ec 100644
--- a/source/blender/draw/intern/draw_manager_data.c
+++ b/source/blender/draw/intern/draw_manager_data.c
@@ -199,10 +199,12 @@ static void drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup,
case DRW_UNIFORM_BLOCK_REF:
uni->block_ref = (GPUUniformBuf **)value;
break;
+ case DRW_UNIFORM_IMAGE:
case DRW_UNIFORM_TEXTURE:
uni->texture = (GPUTexture *)value;
uni->sampler_state = sampler_state;
break;
+ case DRW_UNIFORM_IMAGE_REF:
case DRW_UNIFORM_TEXTURE_REF:
uni->texture_ref = (GPUTexture **)value;
uni->sampler_state = sampler_state;
@@ -261,6 +263,20 @@ void DRW_shgroup_uniform_texture_ref(DRWShadingGroup *shgroup, const char *name,
DRW_shgroup_uniform_texture_ref_ex(shgroup, name, tex, GPU_SAMPLER_MAX);
}
+void DRW_shgroup_uniform_image(DRWShadingGroup *shgroup, const char *name, const GPUTexture *tex)
+{
+ BLI_assert(tex != NULL);
+ int loc = GPU_shader_get_texture_binding(shgroup->shader, name);
+ drw_shgroup_uniform_create_ex(shgroup, loc, DRW_UNIFORM_IMAGE, tex, 0, 0, 1);
+}
+
+void DRW_shgroup_uniform_image_ref(DRWShadingGroup *shgroup, const char *name, GPUTexture **tex)
+{
+ BLI_assert(tex != NULL);
+ int loc = GPU_shader_get_texture_binding(shgroup->shader, name);
+ drw_shgroup_uniform_create_ex(shgroup, loc, DRW_UNIFORM_IMAGE_REF, tex, 0, 0, 1);
+}
+
void DRW_shgroup_uniform_block(DRWShadingGroup *shgroup,
const char *name,
const GPUUniformBuf *ubo)
diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c
index 79d74e1f67d..84f618c1c15 100644
--- a/source/blender/draw/intern/draw_manager_exec.c
+++ b/source/blender/draw/intern/draw_manager_exec.c
@@ -596,6 +596,12 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup,
case DRW_UNIFORM_TEXTURE_REF:
GPU_texture_bind_ex(*uni->texture_ref, uni->sampler_state, uni->location, false);
break;
+ case DRW_UNIFORM_IMAGE:
+ GPU_texture_image_bind(uni->texture, uni->location);
+ break;
+ case DRW_UNIFORM_IMAGE_REF:
+ GPU_texture_image_bind(*uni->texture_ref, uni->location);
+ break;
case DRW_UNIFORM_BLOCK:
GPU_uniformbuf_bind(uni->block, uni->location);
break;
diff --git a/source/blender/gpu/GPU_capabilities.h b/source/blender/gpu/GPU_capabilities.h
index b8a48735548..9d55fe73708 100644
--- a/source/blender/gpu/GPU_capabilities.h
+++ b/source/blender/gpu/GPU_capabilities.h
@@ -45,6 +45,8 @@ bool GPU_depth_blitting_workaround(void);
bool GPU_use_main_context_workaround(void);
bool GPU_crappy_amd_driver(void);
+bool GPU_shader_image_load_store_support(void);
+
bool GPU_mem_stats_supported(void);
void GPU_mem_stats_get(int *totalmem, int *freemem);
diff --git a/source/blender/gpu/GPU_state.h b/source/blender/gpu/GPU_state.h
index 5e872001267..aa32c6c75ba 100644
--- a/source/blender/gpu/GPU_state.h
+++ b/source/blender/gpu/GPU_state.h
@@ -35,6 +35,14 @@ typedef enum eGPUWriteMask {
ENUM_OPERATORS(eGPUWriteMask)
+typedef enum eGPUBarrier {
+ GPU_BARRIER_NONE = 0,
+ GPU_BARRIER_SHADER_IMAGE_ACCESS = (1 << 0),
+ GPU_BARRIER_TEXTURE_FETCH = (1 << 1),
+} eGPUBarrier;
+
+ENUM_OPERATORS(eGPUBarrier)
+
/**
* Defines the fixed pipeline blending equation.
* SRC is the output color from the shader.
@@ -152,6 +160,8 @@ eGPUStencilTest GPU_stencil_test_get(void);
void GPU_flush(void);
void GPU_finish(void);
+void GPU_memory_barrier(eGPUBarrier barrier);
+
#ifdef __cplusplus
}
#endif
diff --git a/source/blender/gpu/GPU_texture.h b/source/blender/gpu/GPU_texture.h
index 2ce2ba093cf..fafa45fe0fe 100644
--- a/source/blender/gpu/GPU_texture.h
+++ b/source/blender/gpu/GPU_texture.h
@@ -243,6 +243,10 @@ void GPU_texture_bind_ex(GPUTexture *tex, eGPUSamplerState state, int unit, cons
void GPU_texture_unbind(GPUTexture *tex);
void GPU_texture_unbind_all(void);
+void GPU_texture_image_bind(GPUTexture *tex, int unit);
+void GPU_texture_image_unbind(GPUTexture *tex);
+void GPU_texture_image_unbind_all(void);
+
void GPU_texture_copy(GPUTexture *dst, GPUTexture *src);
void GPU_texture_generate_mipmap(GPUTexture *tex);
diff --git a/source/blender/gpu/intern/gpu_capabilities.cc b/source/blender/gpu/intern/gpu_capabilities.cc
index a79ce27ba63..63e29654e1c 100644
--- a/source/blender/gpu/intern/gpu_capabilities.cc
+++ b/source/blender/gpu/intern/gpu_capabilities.cc
@@ -102,6 +102,11 @@ bool GPU_crappy_amd_driver(void)
return GCaps.broken_amd_driver;
}
+bool GPU_shader_image_load_store_support(void)
+{
+ return GCaps.shader_image_load_store_support;
+}
+
/** \} */
/* -------------------------------------------------------------------- */
diff --git a/source/blender/gpu/intern/gpu_capabilities_private.hh b/source/blender/gpu/intern/gpu_capabilities_private.hh
index a51525fa932..abe5b706a7d 100644
--- a/source/blender/gpu/intern/gpu_capabilities_private.hh
+++ b/source/blender/gpu/intern/gpu_capabilities_private.hh
@@ -42,6 +42,7 @@ struct GPUCapabilities {
int max_textures_geom = 0;
int max_textures_frag = 0;
bool mem_stats_support = false;
+ bool shader_image_load_store_support = false;
/* OpenGL related workarounds. */
bool mip_render_workaround = false;
bool depth_blitting_workaround = false;
@@ -52,4 +53,4 @@ struct GPUCapabilities {
extern GPUCapabilities GCaps;
-} // namespace blender::gpu \ No newline at end of file
+} // namespace blender::gpu
diff --git a/source/blender/gpu/intern/gpu_shader_interface.hh b/source/blender/gpu/intern/gpu_shader_interface.hh
index f76339d3adb..fce6fda5f14 100644
--- a/source/blender/gpu/intern/gpu_shader_interface.hh
+++ b/source/blender/gpu/intern/gpu_shader_interface.hh
@@ -63,6 +63,7 @@ class ShaderInterface {
/** Enabled bindpoints that needs to be fed with data. */
uint16_t enabled_attr_mask_ = 0;
uint16_t enabled_ubo_mask_ = 0;
+ uint8_t enabled_ima_mask_ = 0;
uint64_t enabled_tex_mask_ = 0;
/** Location of builtin uniforms. Fast access, no lookup needed. */
int32_t builtins_[GPU_NUM_UNIFORMS];
diff --git a/source/blender/gpu/intern/gpu_state.cc b/source/blender/gpu/intern/gpu_state.cc
index be523020e8a..01a07ee3e4f 100644
--- a/source/blender/gpu/intern/gpu_state.cc
+++ b/source/blender/gpu/intern/gpu_state.cc
@@ -30,7 +30,6 @@
#include "BKE_global.h"
-#include "GPU_glew.h"
#include "GPU_state.h"
#include "gpu_context_private.hh"
@@ -309,6 +308,17 @@ void GPU_finish(void)
/** \} */
/* -------------------------------------------------------------------- */
+/** \name Synchronisation Utils
+ * \{ */
+
+void GPU_memory_barrier(eGPUBarrier barrier)
+{
+ Context::get()->state_manager->issue_barrier(barrier);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name Default OpenGL State
*
* This is called on startup, for opengl offscreen render.
diff --git a/source/blender/gpu/intern/gpu_state_private.hh b/source/blender/gpu/intern/gpu_state_private.hh
index 9fee45e7bd4..a21093f00a2 100644
--- a/source/blender/gpu/intern/gpu_state_private.hh
+++ b/source/blender/gpu/intern/gpu_state_private.hh
@@ -163,10 +163,16 @@ class GPUStateManager {
virtual void apply_state(void) = 0;
+ virtual void issue_barrier(eGPUBarrier barrier_bits) = 0;
+
virtual void texture_bind(Texture *tex, eGPUSamplerState sampler, int unit) = 0;
virtual void texture_unbind(Texture *tex) = 0;
virtual void texture_unbind_all(void) = 0;
+ virtual void image_bind(Texture *tex, int unit) = 0;
+ virtual void image_unbind(Texture *tex) = 0;
+ virtual void image_unbind_all(void) = 0;
+
virtual void texture_unpack_row_length_set(uint len) = 0;
};
diff --git a/source/blender/gpu/intern/gpu_texture.cc b/source/blender/gpu/intern/gpu_texture.cc
index b22fd53f0f6..09dbf04210a 100644
--- a/source/blender/gpu/intern/gpu_texture.cc
+++ b/source/blender/gpu/intern/gpu_texture.cc
@@ -418,6 +418,21 @@ void GPU_texture_unbind_all(void)
Context::get()->state_manager->texture_unbind_all();
}
+void GPU_texture_image_bind(GPUTexture *tex, int unit)
+{
+ Context::get()->state_manager->image_bind(unwrap(tex), unit);
+}
+
+void GPU_texture_image_unbind(GPUTexture *tex)
+{
+ Context::get()->state_manager->image_unbind(unwrap(tex));
+}
+
+void GPU_texture_image_unbind_all(void)
+{
+ Context::get()->state_manager->image_unbind_all();
+}
+
void GPU_texture_generate_mipmap(GPUTexture *tex)
{
reinterpret_cast<Texture *>(tex)->generate_mipmap();
diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc
index edaa84cdcf8..46e048d7f7c 100644
--- a/source/blender/gpu/opengl/gl_backend.cc
+++ b/source/blender/gpu/opengl/gl_backend.cc
@@ -210,6 +210,7 @@ static void detect_workarounds(void)
GLContext::debug_layer_workaround = true;
GLContext::unused_fb_slot_workaround = true;
/* Turn off extensions. */
+ GCaps.shader_image_load_store_support = false;
GLContext::base_instance_support = false;
GLContext::clear_texture_support = false;
GLContext::copy_image_support = false;
@@ -250,17 +251,20 @@ static void detect_workarounds(void)
(strstr(version, "4.5.13399") || strstr(version, "4.5.13417") ||
strstr(version, "4.5.13422"))) {
GLContext::unused_fb_slot_workaround = true;
+ GCaps.shader_image_load_store_support = false;
GCaps.broken_amd_driver = true;
}
/* We have issues with this specific renderer. (see T74024) */
if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_OPENSOURCE) &&
strstr(renderer, "AMD VERDE")) {
GLContext::unused_fb_slot_workaround = true;
+ GCaps.shader_image_load_store_support = false;
GCaps.broken_amd_driver = true;
}
/* Fix slowdown on this particular driver. (see T77641) */
if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_OPENSOURCE) &&
strstr(version, "Mesa 19.3.4")) {
+ GCaps.shader_image_load_store_support = false;
GCaps.broken_amd_driver = true;
}
/* There is an issue with the #glBlitFramebuffer on MacOS with radeon pro graphics.
@@ -349,10 +353,10 @@ static void detect_workarounds(void)
}
/** Internal capabilities. */
-GLint GLContext::max_texture_3d_size;
-GLint GLContext::max_cubemap_size;
-GLint GLContext::max_ubo_size;
-GLint GLContext::max_ubo_binds;
+GLint GLContext::max_cubemap_size = 0;
+GLint GLContext::max_texture_3d_size = 0;
+GLint GLContext::max_ubo_binds = 0;
+GLint GLContext::max_ubo_size = 0;
/** Extensions. */
bool GLContext::base_instance_support = false;
bool GLContext::clear_texture_support = false;
@@ -383,6 +387,7 @@ void GLBackend::capabilities_init(void)
glGetIntegerv(GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, &GCaps.max_textures_geom);
glGetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &GCaps.max_textures);
GCaps.mem_stats_support = GLEW_NVX_gpu_memory_info || GLEW_ATI_meminfo;
+ GCaps.shader_image_load_store_support = GLEW_ARB_shader_image_load_store;
/* GL specific capabilities. */
glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &GLContext::max_texture_3d_size);
glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &GLContext::max_cubemap_size);
@@ -413,4 +418,4 @@ void GLBackend::capabilities_init(void)
/** \} */
-} // namespace blender::gpu \ No newline at end of file
+} // namespace blender::gpu
diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh
index 9822c842ce7..4d9c2470db0 100644
--- a/source/blender/gpu/opengl/gl_context.hh
+++ b/source/blender/gpu/opengl/gl_context.hh
@@ -56,8 +56,8 @@ class GLSharedOrphanLists {
class GLContext : public Context {
public:
/** Capabilities. */
- static GLint max_texture_3d_size;
static GLint max_cubemap_size;
+ static GLint max_texture_3d_size;
static GLint max_ubo_size;
static GLint max_ubo_binds;
/** Extensions. */
diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc
index db99e90d0ec..747d8ee2e3e 100644
--- a/source/blender/gpu/opengl/gl_debug.cc
+++ b/source/blender/gpu/opengl/gl_debug.cc
@@ -200,13 +200,16 @@ void check_gl_resources(const char *info)
* be big enough to feed the data range the shader awaits. */
uint16_t ubo_needed = interface->enabled_ubo_mask_;
ubo_needed &= ~ctx->bound_ubo_slots;
-
/* NOTE: This only check binding. To be valid, the bound texture needs to
* be the same format/target the shader expects. */
uint64_t tex_needed = interface->enabled_tex_mask_;
tex_needed &= ~GLContext::state_manager_active_get()->bound_texture_slots();
+ /* NOTE: This only check binding. To be valid, the bound image needs to
+ * be the same format/target the shader expects. */
+ uint8_t ima_needed = interface->enabled_ima_mask_;
+ ima_needed &= ~GLContext::state_manager_active_get()->bound_image_slots();
- if (ubo_needed == 0 && tex_needed == 0) {
+ if (ubo_needed == 0 && tex_needed == 0 && ima_needed == 0) {
return;
}
@@ -223,6 +226,7 @@ void check_gl_resources(const char *info)
for (int i = 0; tex_needed != 0; i++, tex_needed >>= 1) {
if ((tex_needed & 1) != 0) {
+ /* FIXME: texture_get might return an image input instead. */
const ShaderInput *tex_input = interface->texture_get(i);
const char *tex_name = interface->input_name_get(tex_input);
const char *sh_name = ctx->shader->name_get();
@@ -231,6 +235,18 @@ void check_gl_resources(const char *info)
debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, NULL);
}
}
+
+ for (int i = 0; ima_needed != 0; i++, ima_needed >>= 1) {
+ if ((ima_needed & 1) != 0) {
+ /* FIXME: texture_get might return a texture input instead. */
+ const ShaderInput *tex_input = interface->texture_get(i);
+ const char *tex_name = interface->input_name_get(tex_input);
+ const char *sh_name = ctx->shader->name_get();
+ char msg[256];
+ SNPRINTF(msg, "Missing Image bind at slot %d : %s > %s : %s", i, sh_name, tex_name, info);
+ debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, NULL);
+ }
+ }
}
void raise_gl_error(const char *info)
diff --git a/source/blender/gpu/opengl/gl_shader_interface.cc b/source/blender/gpu/opengl/gl_shader_interface.cc
index d611efcd975..2d55c222e9c 100644
--- a/source/blender/gpu/opengl/gl_shader_interface.cc
+++ b/source/blender/gpu/opengl/gl_shader_interface.cc
@@ -100,6 +100,31 @@ static inline int sampler_binding(int32_t program,
return -1;
}
}
+
+static inline int image_binding(int32_t program,
+ uint32_t uniform_index,
+ int32_t uniform_location,
+ int *image_len)
+{
+ /* Identify image uniforms and asign image units to them. */
+ GLint type;
+ glGetActiveUniformsiv(program, 1, &uniform_index, GL_UNIFORM_TYPE, &type);
+
+ switch (type) {
+ case GL_IMAGE_1D:
+ case GL_IMAGE_2D:
+ case GL_IMAGE_3D: {
+ /* For now just assign a consecutive index. In the future, we should set it in
+ * the shader using layout(binding = i) and query its value. */
+ int binding = *image_len;
+ glUniform1i(uniform_location, binding);
+ (*image_len)++;
+ return binding;
+ }
+ default:
+ return -1;
+ }
+}
/** \} */
/* -------------------------------------------------------------------- */
@@ -207,8 +232,8 @@ GLShaderInterface::GLShaderInterface(GLuint program)
enabled_ubo_mask_ |= (1 << input->binding);
}
- /* Uniforms */
- for (int i = 0, sampler = 0; i < active_uniform_len; i++) {
+ /* Uniforms & samplers & images */
+ for (int i = 0, sampler = 0, image = 0; i < active_uniform_len; i++) {
if (BLI_BITMAP_TEST(uniforms_from_blocks, i)) {
continue;
}
@@ -224,6 +249,12 @@ GLShaderInterface::GLShaderInterface(GLuint program)
name_buffer_offset += this->set_input_name(input, name, name_len);
enabled_tex_mask_ |= (input->binding != -1) ? (1lu << input->binding) : 0lu;
+
+ if (input->binding == -1) {
+ input->binding = image_binding(program, i, input->location, &image);
+
+ enabled_ima_mask_ |= (input->binding != -1) ? (1lu << input->binding) : 0lu;
+ }
}
/* Builtin Uniforms */
@@ -296,4 +327,4 @@ void GLShaderInterface::ref_remove(GLVaoCache *ref)
/** \} */
-} // namespace blender::gpu \ No newline at end of file
+} // namespace blender::gpu
diff --git a/source/blender/gpu/opengl/gl_state.cc b/source/blender/gpu/opengl/gl_state.cc
index 1678760e9cd..93753768928 100644
--- a/source/blender/gpu/opengl/gl_state.cc
+++ b/source/blender/gpu/opengl/gl_state.cc
@@ -76,6 +76,7 @@ void GLStateManager::apply_state(void)
this->set_state(this->state);
this->set_mutable_state(this->mutable_state);
this->texture_bind_apply();
+ this->image_bind_apply();
active_fb->apply_state();
};
@@ -538,4 +539,98 @@ uint64_t GLStateManager::bound_texture_slots(void)
/** \} */
+/* -------------------------------------------------------------------- */
+/** \name Image Binding (from image load store)
+ * \{ */
+
+void GLStateManager::image_bind(Texture *tex_, int unit)
+{
+ /* Minimum support is 8 image in the fragment shader. No image for other stages. */
+ BLI_assert(GPU_shader_image_load_store_support() && unit < 8);
+ GLTexture *tex = static_cast<GLTexture *>(tex_);
+ if (G.debug & G_DEBUG_GPU) {
+ tex->check_feedback_loop();
+ }
+ images_[unit] = tex->tex_id_;
+ formats_[unit] = to_gl_internal_format(tex->format_);
+ tex->is_bound_ = true;
+ dirty_image_binds_ |= 1ULL << unit;
+}
+
+void GLStateManager::image_unbind(Texture *tex_)
+{
+ GLTexture *tex = static_cast<GLTexture *>(tex_);
+ if (!tex->is_bound_) {
+ return;
+ }
+
+ GLuint tex_id = tex->tex_id_;
+ for (int i = 0; i < ARRAY_SIZE(images_); i++) {
+ if (images_[i] == tex_id) {
+ images_[i] = 0;
+ dirty_image_binds_ |= 1ULL << i;
+ }
+ }
+ tex->is_bound_ = false;
+}
+
+void GLStateManager::image_unbind_all(void)
+{
+ for (int i = 0; i < ARRAY_SIZE(images_); i++) {
+ if (images_[i] != 0) {
+ images_[i] = 0;
+ dirty_image_binds_ |= 1ULL << i;
+ }
+ }
+ this->image_bind_apply();
+}
+
+void GLStateManager::image_bind_apply(void)
+{
+ if (dirty_image_binds_ == 0) {
+ return;
+ }
+ uint32_t dirty_bind = dirty_image_binds_;
+ dirty_image_binds_ = 0;
+
+ int first = bitscan_forward_uint(dirty_bind);
+ int last = 32 - bitscan_reverse_uint(dirty_bind);
+ int count = last - first;
+
+ if (GLContext::multi_bind_support) {
+ glBindImageTextures(first, count, images_ + first);
+ }
+ else {
+ for (int unit = first; unit < last; unit++) {
+ if ((dirty_bind >> unit) & 1UL) {
+ glBindImageTexture(unit, images_[unit], 0, GL_TRUE, 0, GL_READ_WRITE, formats_[unit]);
+ }
+ }
+ }
+}
+
+uint8_t GLStateManager::bound_image_slots(void)
+{
+ uint8_t bound_slots = 0;
+ for (int i = 0; i < ARRAY_SIZE(images_); i++) {
+ if (images_[i] != 0) {
+ bound_slots |= 1ULL << i;
+ }
+ }
+ return bound_slots;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Memory barrier
+ * \{ */
+
+void GLStateManager::issue_barrier(eGPUBarrier barrier_bits)
+{
+ glMemoryBarrier(to_gl(barrier_bits));
+}
+
+/** \} */
+
} // namespace blender::gpu
diff --git a/source/blender/gpu/opengl/gl_state.hh b/source/blender/gpu/opengl/gl_state.hh
index fb2ed3403f7..b636c08ec0d 100644
--- a/source/blender/gpu/opengl/gl_state.hh
+++ b/source/blender/gpu/opengl/gl_state.hh
@@ -64,19 +64,30 @@ class GLStateManager : public GPUStateManager {
GLuint samplers_[64] = {0};
uint64_t dirty_texture_binds_ = 0;
+ GLuint images_[8] = {0};
+ GLenum formats_[8] = {0};
+ uint8_t dirty_image_binds_ = 0;
+
public:
GLStateManager();
void apply_state(void) override;
+ void issue_barrier(eGPUBarrier barrier_bits) override;
+
void texture_bind(Texture *tex, eGPUSamplerState sampler, int unit) override;
void texture_bind_temp(GLTexture *tex);
void texture_unbind(Texture *tex) override;
void texture_unbind_all(void) override;
+ void image_bind(Texture *tex, int unit) override;
+ void image_unbind(Texture *tex) override;
+ void image_unbind_all(void) override;
+
void texture_unpack_row_length_set(uint len) override;
uint64_t bound_texture_slots(void);
+ uint8_t bound_image_slots(void);
private:
static void set_write_mask(const eGPUWriteMask value);
@@ -95,9 +106,22 @@ class GLStateManager : public GPUStateManager {
void set_mutable_state(const GPUStateMutable &state);
void texture_bind_apply(void);
+ void image_bind_apply(void);
MEM_CXX_CLASS_ALLOC_FUNCS("GLStateManager")
};
+static inline GLbitfield to_gl(eGPUBarrier barrier_bits)
+{
+ GLbitfield barrier = 0;
+ if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS) {
+ barrier |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
+ }
+ if (barrier_bits & GPU_BARRIER_TEXTURE_FETCH) {
+ barrier |= GL_TEXTURE_FETCH_BARRIER_BIT;
+ }
+ return barrier;
+}
+
} // namespace gpu
} // namespace blender