From 87055dc71b0d50cd25660969b55cda7d44af6a12 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Wed, 26 May 2021 16:49:17 +0200 Subject: GPU: Compute Pipeline. With the compute pipeline calculation can be offloaded to the GPU. This patch only adds the framework for compute. So no changes for users at this moment. NOTE: As this is an OpenGL4.3 feature it must always have a fallback. Use `GPU_compute_shader_support` to check if compute pipeline can be used. Check `gpu_shader_compute*` test cases for usage. This patch also adds support for shader storage buffer objects and device only vertex/index buffers. An alternative that had been discussed was adding this to the `GPUBatch`, this was eventually not chosen as it would lead to more code when used as part of a shading group. The idea is that we add an `eDRWCommandType` in the near future. Reviewed By: fclem Differential Revision: https://developer.blender.org/D10913 --- source/blender/draw/intern/draw_manager_shader.c | 1 + source/blender/gpu/CMakeLists.txt | 5 + source/blender/gpu/GPU_capabilities.h | 2 + source/blender/gpu/GPU_compute.h | 38 +++ source/blender/gpu/GPU_index_buffer.h | 12 + source/blender/gpu/GPU_shader.h | 7 + source/blender/gpu/GPU_state.h | 1 + source/blender/gpu/GPU_vertex_buffer.h | 10 + source/blender/gpu/intern/gpu_backend.hh | 1 + source/blender/gpu/intern/gpu_capabilities.cc | 10 + .../blender/gpu/intern/gpu_capabilities_private.hh | 2 + source/blender/gpu/intern/gpu_compute.cc | 41 +++ source/blender/gpu/intern/gpu_index_buffer.cc | 42 +++ .../blender/gpu/intern/gpu_index_buffer_private.hh | 12 +- source/blender/gpu/intern/gpu_shader.cc | 59 +++- source/blender/gpu/intern/gpu_shader_interface.cc | 9 + source/blender/gpu/intern/gpu_shader_interface.hh | 6 + source/blender/gpu/intern/gpu_shader_private.hh | 1 + source/blender/gpu/intern/gpu_vertex_buffer.cc | 15 + .../gpu/intern/gpu_vertex_buffer_private.hh | 3 + source/blender/gpu/opengl/gl_backend.cc | 2 + source/blender/gpu/opengl/gl_backend.hh | 7 + source/blender/gpu/opengl/gl_compute.cc | 35 +++ source/blender/gpu/opengl/gl_compute.hh | 30 ++ source/blender/gpu/opengl/gl_index_buffer.cc | 34 ++- source/blender/gpu/opengl/gl_index_buffer.hh | 7 + source/blender/gpu/opengl/gl_shader.cc | 38 ++- source/blender/gpu/opengl/gl_shader.hh | 4 +- source/blender/gpu/opengl/gl_shader_interface.cc | 45 ++- source/blender/gpu/opengl/gl_state.hh | 3 + source/blender/gpu/opengl/gl_texture.cc | 2 +- source/blender/gpu/opengl/gl_vertex_buffer.cc | 45 ++- source/blender/gpu/opengl/gl_vertex_buffer.hh | 8 + source/blender/gpu/tests/gpu_shader_test.cc | 301 +++++++++++++++++++++ 34 files changed, 819 insertions(+), 19 deletions(-) create mode 100644 source/blender/gpu/GPU_compute.h create mode 100644 source/blender/gpu/intern/gpu_compute.cc create mode 100644 source/blender/gpu/opengl/gl_compute.cc create mode 100644 source/blender/gpu/opengl/gl_compute.hh create mode 100644 source/blender/gpu/tests/gpu_shader_test.cc (limited to 'source') diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c index 2aad1f10154..83d0030f89b 100644 --- a/source/blender/draw/intern/draw_manager_shader.c +++ b/source/blender/draw/intern/draw_manager_shader.c @@ -396,6 +396,7 @@ GPUShader *DRW_shader_create_with_transform_feedback(const char *vert, datatoc_gpu_shader_depth_only_frag_glsl, geom, NULL, + NULL, defines, prim_type, varying_names, diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index f1ffd7827b8..cf6009c2881 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -62,6 +62,7 @@ set(SRC intern/gpu_buffers.c intern/gpu_capabilities.cc intern/gpu_codegen.c + intern/gpu_compute.cc intern/gpu_context.cc intern/gpu_debug.cc intern/gpu_drawlist.cc @@ -91,6 +92,7 @@ set(SRC opengl/gl_backend.cc opengl/gl_batch.cc + opengl/gl_compute.cc opengl/gl_context.cc opengl/gl_debug.cc opengl/gl_debug_layer.cc @@ -113,6 +115,7 @@ set(SRC GPU_buffers.h GPU_capabilities.h GPU_common.h + GPU_compute.h GPU_context.h GPU_debug.h GPU_drawlist.h @@ -163,6 +166,7 @@ set(SRC opengl/gl_backend.hh opengl/gl_batch.hh + opengl/gl_compute.hh opengl/gl_context.hh opengl/gl_debug.hh opengl/gl_drawlist.hh @@ -390,6 +394,7 @@ if(WITH_GTESTS) if(WITH_OPENGL_DRAW_TESTS) set(TEST_SRC tests/gpu_testing.cc + tests/gpu_shader_test.cc tests/gpu_testing.hh ) diff --git a/source/blender/gpu/GPU_capabilities.h b/source/blender/gpu/GPU_capabilities.h index f54ecece659..45c656b49be 100644 --- a/source/blender/gpu/GPU_capabilities.h +++ b/source/blender/gpu/GPU_capabilities.h @@ -55,6 +55,8 @@ bool GPU_use_main_context_workaround(void); bool GPU_use_hq_normals_workaround(void); bool GPU_crappy_amd_driver(void); +bool GPU_compute_shader_support(void); +bool GPU_shader_storage_buffer_objects_support(void); bool GPU_shader_image_load_store_support(void); bool GPU_mem_stats_supported(void); diff --git a/source/blender/gpu/GPU_compute.h b/source/blender/gpu/GPU_compute.h new file mode 100644 index 00000000000..a048f72c0a0 --- /dev/null +++ b/source/blender/gpu/GPU_compute.h @@ -0,0 +1,38 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "BLI_sys_types.h" + +#include "GPU_shader.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void GPU_compute_dispatch(GPUShader *shader, + uint groups_x_len, + uint groups_y_len, + uint groups_z_len); + +#ifdef __cplusplus +} +#endif diff --git a/source/blender/gpu/GPU_index_buffer.h b/source/blender/gpu/GPU_index_buffer.h index 76aab3c196b..bdacfe6fc0f 100644 --- a/source/blender/gpu/GPU_index_buffer.h +++ b/source/blender/gpu/GPU_index_buffer.h @@ -49,6 +49,7 @@ void GPU_indexbuf_init_ex(GPUIndexBufBuilder *, GPUPrimType, uint index_len, uin /* supports only GPU_PRIM_POINTS, GPU_PRIM_LINES and GPU_PRIM_TRIS. */ void GPU_indexbuf_init(GPUIndexBufBuilder *, GPUPrimType, uint prim_len, uint vertex_len); +GPUIndexBuf *GPU_indexbuf_build_on_device(uint index_len); void GPU_indexbuf_add_generic_vert(GPUIndexBufBuilder *, uint v); void GPU_indexbuf_add_primitive_restart(GPUIndexBufBuilder *); @@ -70,6 +71,8 @@ void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem); GPUIndexBuf *GPU_indexbuf_build(GPUIndexBufBuilder *); void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *, GPUIndexBuf *); +void GPU_indexbuf_bind_as_ssbo(GPUIndexBuf *elem, int binding); + /* Create a sub-range of an existing index-buffer. */ GPUIndexBuf *GPU_indexbuf_create_subrange(GPUIndexBuf *elem_src, uint start, uint length); void GPU_indexbuf_create_subrange_in_place(GPUIndexBuf *elem, @@ -77,6 +80,15 @@ void GPU_indexbuf_create_subrange_in_place(GPUIndexBuf *elem, uint start, uint length); +/** + * (Download and) return a pointer containing the data of an index buffer. + * + * Note that the returned pointer is still owned by the driver. To get an + * local copy, use `GPU_indexbuf_unmap` after calling `GPU_indexbuf_read`. + */ +const uint32_t *GPU_indexbuf_read(GPUIndexBuf *elem); +uint32_t *GPU_indexbuf_unmap(const GPUIndexBuf *elem, const uint32_t *mapped_data); + void GPU_indexbuf_discard(GPUIndexBuf *elem); bool GPU_indexbuf_is_init(GPUIndexBuf *elem); diff --git a/source/blender/gpu/GPU_shader.h b/source/blender/gpu/GPU_shader.h index 9824c7016dc..3923c920c9e 100644 --- a/source/blender/gpu/GPU_shader.h +++ b/source/blender/gpu/GPU_shader.h @@ -27,6 +27,7 @@ extern "C" { #endif +struct GPUIndexBuf; struct GPUVertBuf; /** Opaque type hiding #blender::gpu::Shader */ @@ -45,6 +46,10 @@ GPUShader *GPU_shader_create(const char *vertcode, const char *libcode, const char *defines, const char *shname); +GPUShader *GPU_shader_create_compute(const char *computecode, + const char *libcode, + const char *defines, + const char *shname); GPUShader *GPU_shader_create_from_python(const char *vertcode, const char *fragcode, const char *geomcode, @@ -53,6 +58,7 @@ GPUShader *GPU_shader_create_from_python(const char *vertcode, GPUShader *GPU_shader_create_ex(const char *vertcode, const char *fragcode, const char *geomcode, + const char *computecode, const char *libcode, const char *defines, const eGPUShaderTFBType tf_type, @@ -126,6 +132,7 @@ int GPU_shader_get_uniform(GPUShader *shader, const char *name); int GPU_shader_get_builtin_uniform(GPUShader *shader, int builtin); int GPU_shader_get_builtin_block(GPUShader *shader, int builtin); int GPU_shader_get_uniform_block(GPUShader *shader, const char *name); +int GPU_shader_get_ssbo(GPUShader *shader, const char *name); int GPU_shader_get_uniform_block_binding(GPUShader *shader, const char *name); int GPU_shader_get_texture_binding(GPUShader *shader, const char *name); diff --git a/source/blender/gpu/GPU_state.h b/source/blender/gpu/GPU_state.h index 0687f271670..a338728804c 100644 --- a/source/blender/gpu/GPU_state.h +++ b/source/blender/gpu/GPU_state.h @@ -39,6 +39,7 @@ typedef enum eGPUBarrier { GPU_BARRIER_NONE = 0, GPU_BARRIER_SHADER_IMAGE_ACCESS = (1 << 0), GPU_BARRIER_TEXTURE_FETCH = (1 << 1), + GPU_BARRIER_SHADER_STORAGE = (1 << 2), } eGPUBarrier; ENUM_OPERATORS(eGPUBarrier, GPU_BARRIER_TEXTURE_FETCH) diff --git a/source/blender/gpu/GPU_vertex_buffer.h b/source/blender/gpu/GPU_vertex_buffer.h index aae58de533b..2c54016daa7 100644 --- a/source/blender/gpu/GPU_vertex_buffer.h +++ b/source/blender/gpu/GPU_vertex_buffer.h @@ -59,6 +59,7 @@ typedef enum { GPU_USAGE_STREAM, GPU_USAGE_STATIC, /* do not keep data in memory */ GPU_USAGE_DYNAMIC, + GPU_USAGE_DEVICE_ONLY, /* Do not do host->device data transfers. */ } GPUUsageType; /** Opaque type hiding blender::gpu::VertBuf. */ @@ -70,6 +71,14 @@ GPUVertBuf *GPU_vertbuf_create_with_format_ex(const GPUVertFormat *, GPUUsageTyp #define GPU_vertbuf_create_with_format(format) \ GPU_vertbuf_create_with_format_ex(format, GPU_USAGE_STATIC) +/** + * (Download and) return a pointer containing the data of a vertex buffer. + * + * Note that the returned pointer is still owned by the driver. To get an + * local copy, use `GPU_vertbuf_unmap` after calling `GPU_vertbuf_read`. + */ +const void *GPU_vertbuf_read(GPUVertBuf *verts); +void *GPU_vertbuf_unmap(const GPUVertBuf *verts, const void *mapped_data); void GPU_vertbuf_clear(GPUVertBuf *verts); void GPU_vertbuf_discard(GPUVertBuf *); @@ -138,6 +147,7 @@ uint GPU_vertbuf_get_vertex_len(const GPUVertBuf *verts); GPUVertBufStatus GPU_vertbuf_get_status(const GPUVertBuf *verts); void GPU_vertbuf_use(GPUVertBuf *); +void GPU_vertbuf_bind_as_ssbo(struct GPUVertBuf *verts, int binding); /* XXX do not use. */ void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, void *data); diff --git a/source/blender/gpu/intern/gpu_backend.hh b/source/blender/gpu/intern/gpu_backend.hh index 04ec82a9213..73792215569 100644 --- a/source/blender/gpu/intern/gpu_backend.hh +++ b/source/blender/gpu/intern/gpu_backend.hh @@ -47,6 +47,7 @@ class GPUBackend { static GPUBackend *get(void); virtual void samplers_update(void) = 0; + virtual void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) = 0; virtual Context *context_alloc(void *ghost_window) = 0; diff --git a/source/blender/gpu/intern/gpu_capabilities.cc b/source/blender/gpu/intern/gpu_capabilities.cc index d8764502800..bedc9ad3092 100644 --- a/source/blender/gpu/intern/gpu_capabilities.cc +++ b/source/blender/gpu/intern/gpu_capabilities.cc @@ -148,6 +148,16 @@ bool GPU_use_hq_normals_workaround(void) return GCaps.use_hq_normals_workaround; } +bool GPU_compute_shader_support(void) +{ + return GCaps.compute_shader_support; +} + +bool GPU_shader_storage_buffer_objects_support(void) +{ + return GCaps.shader_storage_buffer_objects_support; +} + bool GPU_shader_image_load_store_support(void) { return GCaps.shader_image_load_store_support; diff --git a/source/blender/gpu/intern/gpu_capabilities_private.hh b/source/blender/gpu/intern/gpu_capabilities_private.hh index 7c1d4590ce8..ee7ef1e69e6 100644 --- a/source/blender/gpu/intern/gpu_capabilities_private.hh +++ b/source/blender/gpu/intern/gpu_capabilities_private.hh @@ -51,6 +51,8 @@ struct GPUCapabilities { const char *(*extension_get)(int); bool mem_stats_support = false; + bool compute_shader_support = false; + bool shader_storage_buffer_objects_support = false; bool shader_image_load_store_support = false; /* OpenGL related workarounds. */ bool mip_render_workaround = false; diff --git a/source/blender/gpu/intern/gpu_compute.cc b/source/blender/gpu/intern/gpu_compute.cc new file mode 100644 index 00000000000..7a8ae2acf9a --- /dev/null +++ b/source/blender/gpu/intern/gpu_compute.cc @@ -0,0 +1,41 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** \file + * \ingroup gpu + */ + +#include "GPU_compute.h" + +#include "gpu_backend.hh" + +#ifdef __cplusplus +extern "C" { +#endif + +void GPU_compute_dispatch(GPUShader *shader, + uint groups_x_len, + uint groups_y_len, + uint groups_z_len) +{ + blender::gpu::GPUBackend &gpu_backend = *blender::gpu::GPUBackend::get(); + GPU_shader_bind(shader); + gpu_backend.compute_dispatch(groups_x_len, groups_y_len, groups_z_len); +} + +#ifdef __cplusplus +} +#endif diff --git a/source/blender/gpu/intern/gpu_index_buffer.cc b/source/blender/gpu/intern/gpu_index_buffer.cc index 65932d2dbf4..20a26c0fe9d 100644 --- a/source/blender/gpu/intern/gpu_index_buffer.cc +++ b/source/blender/gpu/intern/gpu_index_buffer.cc @@ -31,6 +31,8 @@ #include "gpu_index_buffer_private.hh" +#include + #define KEEP_SINGLE_COPY 1 #define RESTART_INDEX 0xFFFFFFFF @@ -66,6 +68,14 @@ void GPU_indexbuf_init(GPUIndexBufBuilder *builder, GPU_indexbuf_init_ex(builder, prim_type, prim_len * (uint)verts_per_prim, vertex_len); } +GPUIndexBuf *GPU_indexbuf_build_on_device(uint index_len) +{ + GPUIndexBuf *elem_ = GPU_indexbuf_calloc(); + IndexBuf *elem = unwrap(elem_); + elem->init_build_on_device(index_len); + return elem_; +} + void GPU_indexbuf_add_generic_vert(GPUIndexBufBuilder *builder, uint v) { #if TRUST_NO_ONE @@ -241,6 +251,15 @@ void IndexBuf::init(uint indices_len, uint32_t *indices) #endif } +void IndexBuf::init_build_on_device(uint index_len) +{ + is_init_ = true; + index_start_ = 0; + index_len_ = index_len; + index_type_ = GPU_INDEX_U32; + data_ = nullptr; +} + void IndexBuf::init_subrange(IndexBuf *elem_src, uint start, uint length) { /* We don't support nested subranges. */ @@ -307,6 +326,14 @@ void IndexBuf::squeeze_indices_short(uint min_idx, uint max_idx) } } +uint32_t *IndexBuf::unmap(const uint32_t *mapped_memory) const +{ + size_t size = size_get(); + uint32_t *result = static_cast(MEM_mallocN(size, __func__)); + memcpy(result, mapped_memory, size); + return result; +} + } // namespace blender::gpu /** \} */ @@ -351,6 +378,16 @@ void GPU_indexbuf_create_subrange_in_place(GPUIndexBuf *elem, unwrap(elem)->init_subrange(unwrap(elem_src), start, length); } +const uint32_t *GPU_indexbuf_read(GPUIndexBuf *elem) +{ + return unwrap(elem)->read(); +} + +uint32_t *GPU_indexbuf_unmap(const GPUIndexBuf *elem, const uint32_t *mapped_buffer) +{ + return unwrap(elem)->unmap(mapped_buffer); +} + void GPU_indexbuf_discard(GPUIndexBuf *elem) { delete unwrap(elem); @@ -366,4 +403,9 @@ int GPU_indexbuf_primitive_len(GPUPrimType prim_type) return indices_per_primitive(prim_type); } +void GPU_indexbuf_bind_as_ssbo(GPUIndexBuf *elem, int binding) +{ + unwrap(elem)->bind_as_ssbo(binding); +} + /** \} */ diff --git a/source/blender/gpu/intern/gpu_index_buffer_private.hh b/source/blender/gpu/intern/gpu_index_buffer_private.hh index 2405db8664a..358258604bf 100644 --- a/source/blender/gpu/intern/gpu_index_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_index_buffer_private.hh @@ -75,13 +75,14 @@ class IndexBuf { void init(uint indices_len, uint32_t *indices); void init_subrange(IndexBuf *elem_src, uint start, uint length); + void init_build_on_device(uint index_len); uint32_t index_len_get(void) const { return index_len_; } /* Return size in byte of the drawable data buffer range. Actual buffer size might be bigger. */ - size_t size_get(void) + size_t size_get(void) const { return index_len_ * to_bytesize(index_type_); }; @@ -91,6 +92,11 @@ class IndexBuf { return is_init_; }; + virtual void bind_as_ssbo(uint binding) = 0; + + virtual const uint32_t *read() const = 0; + uint32_t *unmap(const uint32_t *mapped_memory) const; + private: inline void squeeze_indices_short(uint min_idx, uint max_idx); inline uint index_range(uint *r_min, uint *r_max); @@ -105,6 +111,10 @@ static inline IndexBuf *unwrap(GPUIndexBuf *indexbuf) { return reinterpret_cast(indexbuf); } +static inline const IndexBuf *unwrap(const GPUIndexBuf *indexbuf) +{ + return reinterpret_cast(indexbuf); +} static inline int indices_per_primitive(GPUPrimType prim_type) { diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc index aea27756708..265dec7c56a 100644 --- a/source/blender/gpu/intern/gpu_shader.cc +++ b/source/blender/gpu/intern/gpu_shader.cc @@ -290,6 +290,7 @@ static void standard_defines(Vector &sources) GPUShader *GPU_shader_create_ex(const char *vertcode, const char *fragcode, const char *geomcode, + const char *computecode, const char *libcode, const char *defines, const eGPUShaderTFBType tf_type, @@ -297,8 +298,10 @@ GPUShader *GPU_shader_create_ex(const char *vertcode, const int tf_count, const char *shname) { - /* At least a vertex shader and a fragment shader are required. */ - BLI_assert((fragcode != nullptr) && (vertcode != nullptr)); + /* At least a vertex shader and a fragment shader are required, or only a compute shader. */ + BLI_assert(((fragcode != nullptr) && (vertcode != nullptr) && (computecode == nullptr)) || + ((fragcode == nullptr) && (vertcode == nullptr) && (geomcode == nullptr) && + (computecode != nullptr))); Shader *shader = GPUBackend::get()->shader_alloc(shname); @@ -349,6 +352,21 @@ GPUShader *GPU_shader_create_ex(const char *vertcode, shader->geometry_shader_from_glsl(sources); } + if (computecode) { + Vector sources; + standard_defines(sources); + sources.append("#define GPU_COMPUTE_SHADER\n"); + if (defines) { + sources.append(defines); + } + if (libcode) { + sources.append(libcode); + } + sources.append(computecode); + + shader->compute_shader_from_glsl(sources); + } + if (tf_names != nullptr && tf_count > 0) { BLI_assert(tf_type != GPU_SHADER_TFB_NONE); shader->transform_feedback_names_set(Span(tf_names, tf_count), tf_type); @@ -380,8 +398,33 @@ GPUShader *GPU_shader_create(const char *vertcode, const char *defines, const char *shname) { - return GPU_shader_create_ex( - vertcode, fragcode, geomcode, libcode, defines, GPU_SHADER_TFB_NONE, nullptr, 0, shname); + return GPU_shader_create_ex(vertcode, + fragcode, + geomcode, + nullptr, + libcode, + defines, + GPU_SHADER_TFB_NONE, + nullptr, + 0, + shname); +} + +GPUShader *GPU_shader_create_compute(const char *computecode, + const char *libcode, + const char *defines, + const char *shname) +{ + return GPU_shader_create_ex(nullptr, + nullptr, + nullptr, + computecode, + libcode, + defines, + GPU_SHADER_TFB_NONE, + nullptr, + 0, + shname); } GPUShader *GPU_shader_create_from_python(const char *vertcode, @@ -402,6 +445,7 @@ GPUShader *GPU_shader_create_from_python(const char *vertcode, GPUShader *sh = GPU_shader_create_ex(vertcode, fragcode, geomcode, + nullptr, libcode, defines, GPU_SHADER_TFB_NONE, @@ -567,6 +611,13 @@ int GPU_shader_get_builtin_block(GPUShader *shader, int builtin) return interface->ubo_builtin((GPUUniformBlockBuiltin)builtin); } +int GPU_shader_get_ssbo(GPUShader *shader, const char *name) +{ + ShaderInterface *interface = unwrap(shader)->interface; + const ShaderInput *ssbo = interface->ssbo_get(name); + return ssbo ? ssbo->location : -1; +} + /* DEPRECATED. */ int GPU_shader_get_uniform_block(GPUShader *shader, const char *name) { diff --git a/source/blender/gpu/intern/gpu_shader_interface.cc b/source/blender/gpu/intern/gpu_shader_interface.cc index c584c40eca8..ae94112b17b 100644 --- a/source/blender/gpu/intern/gpu_shader_interface.cc +++ b/source/blender/gpu/intern/gpu_shader_interface.cc @@ -80,6 +80,8 @@ void ShaderInterface::debug_print() Span attrs = Span(inputs_, attr_len_); Span ubos = Span(inputs_ + attr_len_, ubo_len_); Span uniforms = Span(inputs_ + attr_len_ + ubo_len_, uniform_len_); + Span ssbos = Span(inputs_ + attr_len_ + ubo_len_ + uniform_len_, + ssbo_len_); char *name_buf = name_buffer_; const char format[] = " | %.8x : %4d : %s\n"; @@ -117,6 +119,13 @@ void ShaderInterface::debug_print() } } + if (ssbos.size() > 0) { + printf("\n Shader Storage Objects :\n"); + } + for (const ShaderInput &ssbo : ssbos) { + printf(format, ssbo.name_hash, ssbo.binding, name_buf + ssbo.name_offset); + } + printf("\n"); } diff --git a/source/blender/gpu/intern/gpu_shader_interface.hh b/source/blender/gpu/intern/gpu_shader_interface.hh index aec58544111..ebed7b15170 100644 --- a/source/blender/gpu/intern/gpu_shader_interface.hh +++ b/source/blender/gpu/intern/gpu_shader_interface.hh @@ -60,6 +60,7 @@ class ShaderInterface { uint attr_len_ = 0; uint ubo_len_ = 0; uint uniform_len_ = 0; + uint ssbo_len_ = 0; /** Enabled bind-points that needs to be fed with data. */ uint16_t enabled_attr_mask_ = 0; uint16_t enabled_ubo_mask_ = 0; @@ -99,6 +100,11 @@ class ShaderInterface { return input_lookup(inputs_ + attr_len_ + ubo_len_, uniform_len_, binding); } + inline const ShaderInput *ssbo_get(const char *name) const + { + return input_lookup(inputs_ + attr_len_ + ubo_len_ + uniform_len_, ssbo_len_, name); + } + inline const char *input_name_get(const ShaderInput *input) const { return name_buffer_ + input->name_offset; diff --git a/source/blender/gpu/intern/gpu_shader_private.hh b/source/blender/gpu/intern/gpu_shader_private.hh index d9327bbc0f4..281f01dbc22 100644 --- a/source/blender/gpu/intern/gpu_shader_private.hh +++ b/source/blender/gpu/intern/gpu_shader_private.hh @@ -49,6 +49,7 @@ class Shader { virtual void vertex_shader_from_glsl(MutableSpan sources) = 0; virtual void geometry_shader_from_glsl(MutableSpan sources) = 0; virtual void fragment_shader_from_glsl(MutableSpan sources) = 0; + virtual void compute_shader_from_glsl(MutableSpan sources) = 0; virtual bool finalize(void) = 0; virtual void transform_feedback_names_set(Span name_list, diff --git a/source/blender/gpu/intern/gpu_vertex_buffer.cc b/source/blender/gpu/intern/gpu_vertex_buffer.cc index 09b9eba9f95..3ecbb740a0c 100644 --- a/source/blender/gpu/intern/gpu_vertex_buffer.cc +++ b/source/blender/gpu/intern/gpu_vertex_buffer.cc @@ -149,6 +149,16 @@ GPUVertBuf *GPU_vertbuf_duplicate(GPUVertBuf *verts_) return wrap(unwrap(verts_)->duplicate()); } +const void *GPU_vertbuf_read(GPUVertBuf *verts) +{ + return unwrap(verts)->read(); +} + +void *GPU_vertbuf_unmap(const GPUVertBuf *verts, const void *mapped_data) +{ + return unwrap(verts)->unmap(mapped_data); +} + /** Same as discard but does not free. */ void GPU_vertbuf_clear(GPUVertBuf *verts) { @@ -324,6 +334,11 @@ void GPU_vertbuf_use(GPUVertBuf *verts) unwrap(verts)->upload(); } +void GPU_vertbuf_bind_as_ssbo(struct GPUVertBuf *verts, int binding) +{ + unwrap(verts)->bind_as_ssbo(binding); +} + /* XXX this is just a wrapper for the use of the Hair refine workaround. * To be used with GPU_vertbuf_use(). */ void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, void *data) diff --git a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh index 67a09f6f83c..9531c2c1a5f 100644 --- a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh @@ -66,6 +66,7 @@ class VertBuf { void allocate(uint vert_len); void resize(uint vert_len); void upload(void); + virtual void bind_as_ssbo(uint binding) = 0; VertBuf *duplicate(void); @@ -96,6 +97,8 @@ class VertBuf { } virtual void update_sub(uint start, uint len, void *data) = 0; + virtual const void *read() const = 0; + virtual void *unmap(const void *mapped_data) const = 0; protected: virtual void acquire_data(void) = 0; diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc index 31b6549fc3b..fb03a2c2d2a 100644 --- a/source/blender/gpu/opengl/gl_backend.cc +++ b/source/blender/gpu/opengl/gl_backend.cc @@ -437,6 +437,8 @@ void GLBackend::capabilities_init() GCaps.mem_stats_support = GLEW_NVX_gpu_memory_info || GLEW_ATI_meminfo; GCaps.shader_image_load_store_support = GLEW_ARB_shader_image_load_store; + GCaps.compute_shader_support = GLEW_ARB_compute_shader; + GCaps.shader_storage_buffer_objects_support = GLEW_ARB_shader_storage_buffer_object; /* GL specific capabilities. */ glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &GLContext::max_texture_3d_size); glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &GLContext::max_cubemap_size); diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh index 231e5811b45..e9dcdffced0 100644 --- a/source/blender/gpu/opengl/gl_backend.hh +++ b/source/blender/gpu/opengl/gl_backend.hh @@ -28,6 +28,7 @@ #include "BLI_vector.hh" #include "gl_batch.hh" +#include "gl_compute.hh" #include "gl_context.hh" #include "gl_drawlist.hh" #include "gl_framebuffer.hh" @@ -126,6 +127,12 @@ class GLBackend : public GPUBackend { return shared_orphan_list_; }; + void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) override + { + GLContext::get()->state_manager_active_get()->apply_state(); + GLCompute::dispatch(groups_x_len, groups_y_len, groups_z_len); + } + private: static void platform_init(void); static void platform_exit(void); diff --git a/source/blender/gpu/opengl/gl_compute.cc b/source/blender/gpu/opengl/gl_compute.cc new file mode 100644 index 00000000000..fa8317dde4a --- /dev/null +++ b/source/blender/gpu/opengl/gl_compute.cc @@ -0,0 +1,35 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** \file + * \ingroup gpu + */ + +#include "gl_compute.hh" + +#include "gl_debug.hh" + +#include "glew-mx.h" + +namespace blender::gpu { + +void GLCompute::dispatch(int group_x_len, int group_y_len, int group_z_len) +{ + glDispatchCompute(group_x_len, group_y_len, group_z_len); + debug::check_gl_error("Dispatch Compute"); +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/opengl/gl_compute.hh b/source/blender/gpu/opengl/gl_compute.hh new file mode 100644 index 00000000000..2fd918ddd10 --- /dev/null +++ b/source/blender/gpu/opengl/gl_compute.hh @@ -0,0 +1,30 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +namespace blender::gpu { + +class GLCompute { + public: + static void dispatch(int group_x_len, int group_y_len, int group_z_len); +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/opengl/gl_index_buffer.cc b/source/blender/gpu/opengl/gl_index_buffer.cc index e2c18c5d0b9..e305f765ad9 100644 --- a/source/blender/gpu/opengl/gl_index_buffer.cc +++ b/source/blender/gpu/opengl/gl_index_buffer.cc @@ -40,17 +40,14 @@ void GLIndexBuf::bind() return; } - if (ibo_id_ == 0) { + const bool allocate_on_device = ibo_id_ == 0; + if (allocate_on_device) { glGenBuffers(1, &ibo_id_); - - if (data_ == nullptr) { - debug::raise_gl_error("Trying to use Index Buffer but the buffer contains no data"); - } } glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ibo_id_); - if (data_ != nullptr) { + if (data_ != nullptr || allocate_on_device) { size_t size = this->size_get(); /* Sends data to GPU. */ glBufferData(GL_ELEMENT_ARRAY_BUFFER, size, data_, GL_STATIC_DRAW); @@ -59,4 +56,29 @@ void GLIndexBuf::bind() } } +void GLIndexBuf::bind_as_ssbo(uint binding) +{ + bind(); + BLI_assert(ibo_id_ != 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, binding, ibo_id_); +} + +const uint32_t *GLIndexBuf::read() const +{ + BLI_assert(is_active()); + void *data = glMapBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY); + uint32_t *result = static_cast(data); + return result; +} + +bool GLIndexBuf::is_active() const +{ + if (!ibo_id_) { + return false; + } + int active_ibo_id = 0; + glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING, &active_ibo_id); + return ibo_id_ == active_ibo_id; +} + } // namespace blender::gpu diff --git a/source/blender/gpu/opengl/gl_index_buffer.hh b/source/blender/gpu/opengl/gl_index_buffer.hh index b84934bb77f..0dbdaa6d398 100644 --- a/source/blender/gpu/opengl/gl_index_buffer.hh +++ b/source/blender/gpu/opengl/gl_index_buffer.hh @@ -34,6 +34,7 @@ namespace blender::gpu { class GLIndexBuf : public IndexBuf { friend class GLBatch; friend class GLDrawList; + friend class GLShader; /* For compute shaders. */ private: GLuint ibo_id_ = 0; @@ -42,6 +43,9 @@ class GLIndexBuf : public IndexBuf { ~GLIndexBuf(); void bind(void); + void bind_as_ssbo(uint binding) override; + + const uint32_t *read() const override; void *offset_ptr(uint additional_vertex_offset) const { @@ -57,6 +61,9 @@ class GLIndexBuf : public IndexBuf { return (index_type_ == GPU_INDEX_U16) ? 0xFFFFu : 0xFFFFFFFFu; } + private: + bool is_active() const; + MEM_CXX_CLASS_ALLOC_FUNCS("GLIndexBuf") }; diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc index dd08a67517e..e77347d99eb 100644 --- a/source/blender/gpu/opengl/gl_shader.cc +++ b/source/blender/gpu/opengl/gl_shader.cc @@ -26,6 +26,7 @@ #include "BLI_string.h" #include "BLI_vector.hh" +#include "GPU_capabilities.h" #include "GPU_platform.h" #include "gl_backend.hh" @@ -63,6 +64,7 @@ GLShader::~GLShader() glDeleteShader(vert_shader_); glDeleteShader(geom_shader_); glDeleteShader(frag_shader_); + glDeleteShader(compute_shader_); glDeleteProgram(shader_program_); } @@ -72,7 +74,7 @@ GLShader::~GLShader() /** \name Shader stage creation * \{ */ -char *GLShader::glsl_patch_get() +static char *glsl_patch_default_get() { /** Used for shader patching. Init once. */ static char patch[512] = "\0"; @@ -111,6 +113,30 @@ char *GLShader::glsl_patch_get() return patch; } +static char *glsl_patch_compute_get() +{ + /** Used for shader patching. Init once. */ + static char patch[512] = "\0"; + if (patch[0] != '\0') { + return patch; + } + + size_t slen = 0; + /* Version need to go first. */ + STR_CONCAT(patch, slen, "#version 430\n"); + STR_CONCAT(patch, slen, "#extension GL_ARB_compute_shader :enable\n"); + BLI_assert(slen < sizeof(patch)); + return patch; +} + +char *GLShader::glsl_patch_get(GLenum gl_stage) +{ + if (gl_stage == GL_COMPUTE_SHADER) { + return glsl_patch_compute_get(); + } + return glsl_patch_default_get(); +} + /* Create, compile and attach the shader stage to the shader program. */ GLuint GLShader::create_shader_stage(GLenum gl_stage, MutableSpan sources) { @@ -121,7 +147,7 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage, MutableSpan } /* Patch the shader code using the first source slot. */ - sources[0] = glsl_patch_get(); + sources[0] = glsl_patch_get(gl_stage); glShaderSource(shader, sources.size(), sources.data(), nullptr); glCompileShader(shader); @@ -142,6 +168,9 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage, MutableSpan case GL_FRAGMENT_SHADER: this->print_log(sources, log, "FragShader", !status); break; + case GL_COMPUTE_SHADER: + this->print_log(sources, log, "ComputeShader", !status); + break; } } } @@ -172,6 +201,11 @@ void GLShader::fragment_shader_from_glsl(MutableSpan sources) frag_shader_ = this->create_shader_stage(GL_FRAGMENT_SHADER, sources); } +void GLShader::compute_shader_from_glsl(MutableSpan sources) +{ + compute_shader_ = this->create_shader_stage(GL_COMPUTE_SHADER, sources); +} + bool GLShader::finalize() { if (compilation_failed_) { diff --git a/source/blender/gpu/opengl/gl_shader.hh b/source/blender/gpu/opengl/gl_shader.hh index 152eb2f068a..48aaaf2283d 100644 --- a/source/blender/gpu/opengl/gl_shader.hh +++ b/source/blender/gpu/opengl/gl_shader.hh @@ -43,6 +43,7 @@ class GLShader : public Shader { GLuint vert_shader_ = 0; GLuint geom_shader_ = 0; GLuint frag_shader_ = 0; + GLuint compute_shader_ = 0; /** True if any shader failed to compile. */ bool compilation_failed_ = false; @@ -56,6 +57,7 @@ class GLShader : public Shader { void vertex_shader_from_glsl(MutableSpan sources) override; void geometry_shader_from_glsl(MutableSpan sources) override; void fragment_shader_from_glsl(MutableSpan sources) override; + void compute_shader_from_glsl(MutableSpan sources) override; bool finalize(void) override; void transform_feedback_names_set(Span name_list, @@ -75,7 +77,7 @@ class GLShader : public Shader { int program_handle_get(void) const override; private: - char *glsl_patch_get(void); + char *glsl_patch_get(GLenum gl_stage); GLuint create_shader_stage(GLenum gl_stage, MutableSpan sources); diff --git a/source/blender/gpu/opengl/gl_shader_interface.cc b/source/blender/gpu/opengl/gl_shader_interface.cc index 5870c645bf4..9cf072b2e8a 100644 --- a/source/blender/gpu/opengl/gl_shader_interface.cc +++ b/source/blender/gpu/opengl/gl_shader_interface.cc @@ -29,6 +29,8 @@ #include "gl_shader_interface.hh" +#include "GPU_capabilities.h" + namespace blender::gpu { /* -------------------------------------------------------------------- */ @@ -125,6 +127,18 @@ static inline int image_binding(int32_t program, return -1; } } + +static inline int ssbo_binding(int32_t program, uint32_t ssbo_index) +{ + GLint binding = -1; + GLenum property = GL_BUFFER_BINDING; + GLint values_written = 0; + glGetProgramResourceiv( + program, GL_SHADER_STORAGE_BLOCK, ssbo_index, 1, &property, 1, &values_written, &binding); + + return binding; +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -149,6 +163,13 @@ GLShaderInterface::GLShaderInterface(GLuint program) glGetProgramiv(program, GL_ACTIVE_UNIFORMS, &active_uniform_len); uniform_len = active_uniform_len; + GLint max_ssbo_name_len = 0, ssbo_len = 0; + if (GPU_shader_storage_buffer_objects_support()) { + glGetProgramInterfaceiv(program, GL_SHADER_STORAGE_BLOCK, GL_ACTIVE_RESOURCES, &ssbo_len); + glGetProgramInterfaceiv( + program, GL_SHADER_STORAGE_BLOCK, GL_MAX_NAME_LENGTH, &max_ssbo_name_len); + } + BLI_assert(ubo_len <= 16 && "enabled_ubo_mask_ is uint16_t"); /* Work around driver bug with Intel HD 4600 on Windows 7/8, where @@ -162,6 +183,9 @@ GLShaderInterface::GLShaderInterface(GLuint program) if (uniform_len > 0 && max_uniform_name_len == 0) { max_uniform_name_len = 256; } + if (ssbo_len > 0 && max_ssbo_name_len == 0) { + max_ssbo_name_len = 256; + } /* GL_ACTIVE_UNIFORMS lied to us! Remove the UBO uniforms from the total before * allocating the uniform array. */ @@ -186,11 +210,12 @@ GLShaderInterface::GLShaderInterface(GLuint program) } MEM_freeN(ubo_uni_ids); - int input_tot_len = attr_len + ubo_len + uniform_len; + int input_tot_len = attr_len + ubo_len + uniform_len + ssbo_len; inputs_ = (ShaderInput *)MEM_callocN(sizeof(ShaderInput) * input_tot_len, __func__); const uint32_t name_buffer_len = attr_len * max_attr_name_len + ubo_len * max_ubo_name_len + - uniform_len * max_uniform_name_len; + uniform_len * max_uniform_name_len + + ssbo_len * max_ssbo_name_len; name_buffer_ = (char *)MEM_mallocN(name_buffer_len, "name_buffer"); uint32_t name_buffer_offset = 0; @@ -257,6 +282,22 @@ GLShaderInterface::GLShaderInterface(GLuint program) } } + /* SSBOs */ + for (int i = 0; i < ssbo_len; i++) { + char *name = name_buffer_ + name_buffer_offset; + GLsizei remaining_buffer = name_buffer_len - name_buffer_offset; + GLsizei name_len = 0; + glGetProgramResourceName( + program, GL_SHADER_STORAGE_BLOCK, i, remaining_buffer, &name_len, name); + + const GLint binding = ssbo_binding(program, i); + + ShaderInput *input = &inputs_[attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_++]; + input->binding = input->location = binding; + + name_buffer_offset += this->set_input_name(input, name, name_len); + } + /* Builtin Uniforms */ for (int32_t u_int = 0; u_int < GPU_NUM_UNIFORMS; u_int++) { GPUUniformBuiltin u = static_cast(u_int); diff --git a/source/blender/gpu/opengl/gl_state.hh b/source/blender/gpu/opengl/gl_state.hh index 651c3c22afa..3b4b40b1d10 100644 --- a/source/blender/gpu/opengl/gl_state.hh +++ b/source/blender/gpu/opengl/gl_state.hh @@ -121,6 +121,9 @@ static inline GLbitfield to_gl(eGPUBarrier barrier_bits) if (barrier_bits & GPU_BARRIER_TEXTURE_FETCH) { barrier |= GL_TEXTURE_FETCH_BARRIER_BIT; } + if (barrier_bits & GPU_BARRIER_SHADER_STORAGE) { + barrier |= GL_SHADER_STORAGE_BARRIER_BIT; + } return barrier; } diff --git a/source/blender/gpu/opengl/gl_texture.cc b/source/blender/gpu/opengl/gl_texture.cc index b65686165d9..e2478a9976c 100644 --- a/source/blender/gpu/opengl/gl_texture.cc +++ b/source/blender/gpu/opengl/gl_texture.cc @@ -368,7 +368,7 @@ void GLTexture::copy_to(Texture *dst_) void *GLTexture::read(int mip, eGPUDataFormat type) { BLI_assert(!(format_flag_ & GPU_FORMAT_COMPRESSED)); - BLI_assert(mip <= mipmaps_); + BLI_assert(mip <= mipmaps_ || mip == 0); BLI_assert(validate_data_format(format_, type)); /* NOTE: mip_size_get() won't override any dimension that is equal to 0. */ diff --git a/source/blender/gpu/opengl/gl_vertex_buffer.cc b/source/blender/gpu/opengl/gl_vertex_buffer.cc index a56d5269fde..ce16a491528 100644 --- a/source/blender/gpu/opengl/gl_vertex_buffer.cc +++ b/source/blender/gpu/opengl/gl_vertex_buffer.cc @@ -29,6 +29,10 @@ namespace blender::gpu { void GLVertBuf::acquire_data() { + if (usage_ == GPU_USAGE_DEVICE_ONLY) { + return; + } + /* Discard previous data if any. */ MEM_SAFE_FREE(data); data = (uchar *)MEM_mallocN(sizeof(uchar) * this->size_alloc_get(), __func__); @@ -36,6 +40,10 @@ void GLVertBuf::acquire_data() void GLVertBuf::resize_data() { + if (usage_ == GPU_USAGE_DEVICE_ONLY) { + return; + } + data = (uchar *)MEM_reallocN(data, sizeof(uchar) * this->size_alloc_get()); } @@ -94,8 +102,10 @@ void GLVertBuf::bind() vbo_size_ = this->size_used_get(); /* Orphan the vbo to avoid sync then upload data. */ glBufferData(GL_ARRAY_BUFFER, vbo_size_, nullptr, to_gl(usage_)); - glBufferSubData(GL_ARRAY_BUFFER, 0, vbo_size_, data); - + /* Do not transfer data from host to device when buffer is device only. */ + if (usage_ != GPU_USAGE_DEVICE_ONLY) { + glBufferSubData(GL_ARRAY_BUFFER, 0, vbo_size_, data); + } memory_usage += vbo_size_; if (usage_ == GPU_USAGE_STATIC) { @@ -106,6 +116,37 @@ void GLVertBuf::bind() } } +void GLVertBuf::bind_as_ssbo(uint binding) +{ + bind(); + BLI_assert(vbo_id_ != 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, binding, vbo_id_); +} + +const void *GLVertBuf::read() const +{ + BLI_assert(is_active()); + void *result = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY); + return result; +} + +void *GLVertBuf::unmap(const void *mapped_data) const +{ + void *result = MEM_mallocN(vbo_size_, __func__); + memcpy(result, mapped_data, vbo_size_); + return result; +} + +bool GLVertBuf::is_active() const +{ + if (!vbo_id_) { + return false; + } + int active_vbo_id = 0; + glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &active_vbo_id); + return vbo_id_ == active_vbo_id; +} + void GLVertBuf::update_sub(uint start, uint len, void *data) { glBufferSubData(GL_ARRAY_BUFFER, start, len, data); diff --git a/source/blender/gpu/opengl/gl_vertex_buffer.hh b/source/blender/gpu/opengl/gl_vertex_buffer.hh index e2bf6cd00e8..6c38a2225b3 100644 --- a/source/blender/gpu/opengl/gl_vertex_buffer.hh +++ b/source/blender/gpu/opengl/gl_vertex_buffer.hh @@ -47,12 +47,19 @@ class GLVertBuf : public VertBuf { void update_sub(uint start, uint len, void *data) override; + const void *read() const override; + void *unmap(const void *mapped_data) const override; + protected: void acquire_data(void) override; void resize_data(void) override; void release_data(void) override; void upload_data(void) override; void duplicate_data(VertBuf *dst) override; + void bind_as_ssbo(uint binding) override; + + private: + bool is_active() const; MEM_CXX_CLASS_ALLOC_FUNCS("GLVertBuf"); }; @@ -65,6 +72,7 @@ static inline GLenum to_gl(GPUUsageType type) case GPU_USAGE_DYNAMIC: return GL_DYNAMIC_DRAW; case GPU_USAGE_STATIC: + case GPU_USAGE_DEVICE_ONLY: return GL_STATIC_DRAW; default: BLI_assert(0); diff --git a/source/blender/gpu/tests/gpu_shader_test.cc b/source/blender/gpu/tests/gpu_shader_test.cc new file mode 100644 index 00000000000..e8645b89e41 --- /dev/null +++ b/source/blender/gpu/tests/gpu_shader_test.cc @@ -0,0 +1,301 @@ +/* Apache License, Version 2.0 */ + +#include "testing/testing.h" + +#include "GPU_capabilities.h" +#include "GPU_compute.h" +#include "GPU_index_buffer.h" +#include "GPU_shader.h" +#include "GPU_texture.h" +#include "GPU_vertex_buffer.h" +#include "GPU_vertex_format.h" + +#include "MEM_guardedalloc.h" + +#include "gpu_testing.hh" + +#include "GPU_glew.h" + +namespace blender::gpu::tests { + +TEST_F(GPUTest, gpu_shader_compute_2d) +{ + + if (!GPU_compute_shader_support()) { + /* We can't test as a the platform does not support compute shaders. */ + std::cout << "Skipping compute shader test: platform not supported"; + return; + } + + static constexpr uint SIZE = 512; + + /* Build compute shader. */ + const char *compute_glsl = R"( + +layout(local_size_x = 1, local_size_y = 1) in; +layout(rgba32f, binding = 0) uniform image2D img_output; + +void main() { + vec4 pixel = vec4(1.0, 0.5, 0.2, 1.0); + imageStore(img_output, ivec2(gl_GlobalInvocationID.xy), pixel); +} + +)"; + + GPUShader *shader = GPU_shader_create_compute( + compute_glsl, nullptr, nullptr, "gpu_shader_compute_2d"); + EXPECT_NE(shader, nullptr); + + /* Create texture to store result and attach to shader. */ + GPUTexture *texture = GPU_texture_create_2d( + "gpu_shader_compute_2d", SIZE, SIZE, 0, GPU_RGBA32F, nullptr); + EXPECT_NE(texture, nullptr); + + GPU_shader_bind(shader); + GPU_texture_image_bind(texture, GPU_shader_get_texture_binding(shader, "img_output")); + + /* Dispatch compute task. */ + GPU_compute_dispatch(shader, SIZE, SIZE, 1); + + /* Check if compute has been done. */ + GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH); + float *data = static_cast(GPU_texture_read(texture, GPU_DATA_FLOAT, 0)); + EXPECT_NE(data, nullptr); + for (int index = 0; index < SIZE * SIZE; index++) { + EXPECT_FLOAT_EQ(data[index * 4 + 0], 1.0f); + EXPECT_FLOAT_EQ(data[index * 4 + 1], 0.5f); + EXPECT_FLOAT_EQ(data[index * 4 + 2], 0.2f); + EXPECT_FLOAT_EQ(data[index * 4 + 3], 1.0f); + } + MEM_freeN(data); + + /* Cleanup. */ + GPU_shader_unbind(); + GPU_texture_unbind(texture); + GPU_texture_free(texture); + GPU_shader_free(shader); +} + +TEST_F(GPUTest, gpu_shader_compute_1d) +{ + + if (!GPU_compute_shader_support()) { + /* We can't test as a the platform does not support compute shaders. */ + std::cout << "Skipping compute shader test: platform not supported"; + return; + } + + static constexpr uint SIZE = 10; + + /* Build compute shader. */ + const char *compute_glsl = R"( + +layout(local_size_x = 1) in; + +layout(rgba32f, binding = 1) uniform image1D outputVboData; + +void main() { + int index = int(gl_GlobalInvocationID.x); + vec4 pos = vec4(gl_GlobalInvocationID.x); + imageStore(outputVboData, index, pos); +} + +)"; + + GPUShader *shader = GPU_shader_create_compute( + compute_glsl, nullptr, nullptr, "gpu_shader_compute_1d"); + EXPECT_NE(shader, nullptr); + + /* Construct Texture. */ + GPUTexture *texture = GPU_texture_create_1d("gpu_shader_compute_1d", SIZE, 0, GPU_RGBA32F, NULL); + EXPECT_NE(texture, nullptr); + + GPU_shader_bind(shader); + GPU_texture_image_bind(texture, GPU_shader_get_texture_binding(shader, "outputVboData")); + + /* Dispatch compute task. */ + GPU_compute_dispatch(shader, SIZE, 1, 1); + + /* Check if compute has been done. */ + GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH); + + /* Create texture to load back result. */ + float *data = static_cast(GPU_texture_read(texture, GPU_DATA_FLOAT, 0)); + EXPECT_NE(data, nullptr); + for (int index = 0; index < SIZE; index++) { + float expected_value = index; + EXPECT_FLOAT_EQ(data[index * 4 + 0], expected_value); + EXPECT_FLOAT_EQ(data[index * 4 + 1], expected_value); + EXPECT_FLOAT_EQ(data[index * 4 + 2], expected_value); + EXPECT_FLOAT_EQ(data[index * 4 + 3], expected_value); + } + MEM_freeN(data); + + /* Cleanup. */ + GPU_shader_unbind(); + GPU_texture_unbind(texture); + GPU_texture_free(texture); + GPU_shader_free(shader); +} + +TEST_F(GPUTest, gpu_shader_compute_vbo) +{ + + if (!GPU_compute_shader_support()) { + /* We can't test as a the platform does not support compute shaders. */ + std::cout << "Skipping compute shader test: platform not supported"; + return; + } + + static constexpr uint SIZE = 128; + + /* Build compute shader. */ + const char *compute_glsl = R"( + +layout(local_size_x = 1) in; + +layout(std430, binding = 0) writeonly buffer outputVboData +{ + vec4 out_positions[]; +}; + +void main() { + uint index = gl_GlobalInvocationID.x; + vec4 pos = vec4(gl_GlobalInvocationID.x); + out_positions[index] = pos; +} + +)"; + + GPUShader *shader = GPU_shader_create_compute( + compute_glsl, nullptr, nullptr, "gpu_shader_compute_vbo"); + EXPECT_NE(shader, nullptr); + GPU_shader_bind(shader); + + /* Construct VBO. */ + static GPUVertFormat format = {0}; + GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); + GPUVertBuf *vbo = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_DEVICE_ONLY); + GPU_vertbuf_data_alloc(vbo, SIZE); + GPU_vertbuf_bind_as_ssbo(vbo, GPU_shader_get_ssbo(shader, "outputVboData")); + + /* Dispatch compute task. */ + GPU_compute_dispatch(shader, SIZE, 1, 1); + + /* Check if compute has been done. */ + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + + /* Download the vertex buffer. */ + const float *data = static_cast(GPU_vertbuf_read(vbo)); + ASSERT_NE(data, nullptr); + for (int index = 0; index < SIZE; index++) { + float expected_value = index; + EXPECT_FLOAT_EQ(data[index * 4 + 0], expected_value); + EXPECT_FLOAT_EQ(data[index * 4 + 1], expected_value); + EXPECT_FLOAT_EQ(data[index * 4 + 2], expected_value); + EXPECT_FLOAT_EQ(data[index * 4 + 3], expected_value); + } + + /* Cleanup. */ + GPU_shader_unbind(); + GPU_vertbuf_discard(vbo); + GPU_shader_free(shader); +} + +TEST_F(GPUTest, gpu_shader_compute_ibo) +{ + + if (!GPU_compute_shader_support()) { + /* We can't test as a the platform does not support compute shaders. */ + std::cout << "Skipping compute shader test: platform not supported"; + return; + } + + static constexpr uint SIZE = 128; + + /* Build compute shader. */ + const char *compute_glsl = R"( + +layout(local_size_x = 1) in; + +layout(std430, binding = 1) writeonly buffer outputIboData +{ + uint out_indexes[]; +}; + +void main() { + uint store_index = int(gl_GlobalInvocationID.x); + out_indexes[store_index] = store_index; +} + +)"; + + GPUShader *shader = GPU_shader_create_compute( + compute_glsl, nullptr, nullptr, "gpu_shader_compute_vbo"); + EXPECT_NE(shader, nullptr); + GPU_shader_bind(shader); + + /* Construct IBO. */ + GPUIndexBuf *ibo = GPU_indexbuf_build_on_device(SIZE); + GPU_indexbuf_bind_as_ssbo(ibo, GPU_shader_get_ssbo(shader, "outputIboData")); + + /* Dispatch compute task. */ + GPU_compute_dispatch(shader, SIZE, 1, 1); + + /* Check if compute has been done. */ + GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE); + + /* Download the index buffer. */ + const uint32_t *data = GPU_indexbuf_read(ibo); + ASSERT_NE(data, nullptr); + for (int index = 0; index < SIZE; index++) { + uint32_t expected = index; + EXPECT_EQ(data[index], expected); + } + + /* Cleanup. */ + GPU_shader_unbind(); + GPU_indexbuf_discard(ibo); + GPU_shader_free(shader); +} + +TEST_F(GPUTest, gpu_shader_ssbo_binding) +{ + if (!GPU_compute_shader_support()) { + /* We can't test as a the platform does not support compute shaders. */ + std::cout << "Skipping compute shader test: platform not supported"; + return; + } + + /* Build compute shader. */ + const char *compute_glsl = R"( + +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer ssboBinding0 +{ + int data0[]; +}; +layout(std430, binding = 1) buffer ssboBinding1 +{ + int data1[]; +}; + +void main() { +} + +)"; + + GPUShader *shader = GPU_shader_create_compute(compute_glsl, nullptr, nullptr, "gpu_shader_ssbo"); + EXPECT_NE(shader, nullptr); + GPU_shader_bind(shader); + + EXPECT_EQ(0, GPU_shader_get_ssbo(shader, "ssboBinding0")); + EXPECT_EQ(1, GPU_shader_get_ssbo(shader, "ssboBinding1")); + + /* Cleanup. */ + GPU_shader_unbind(); + GPU_shader_free(shader); +} + +} // namespace blender::gpu::tests -- cgit v1.2.3