diff options
author | Jeroen Bakker <jbakker> | 2021-05-26 17:49:17 +0300 |
---|---|---|
committer | Jeroen Bakker <jeroen@blender.org> | 2021-05-26 17:49:30 +0300 |
commit | 87055dc71b0d50cd25660969b55cda7d44af6a12 (patch) | |
tree | c30337eadc92f072f524d6d0670b6a1043e29074 /source/blender/gpu/intern | |
parent | e459a25e6cbe9321ad25f87843e2fe5a8a2306f9 (diff) |
GPU: Compute Pipeline.
With the compute pipeline calculation can be offloaded to the GPU.
This patch only adds the framework for compute. So no changes for users at
this moment.
NOTE: As this is an OpenGL4.3 feature it must always have a fallback.
Use `GPU_compute_shader_support` to check if compute pipeline can be used.
Check `gpu_shader_compute*` test cases for usage.
This patch also adds support for shader storage buffer objects and device only
vertex/index buffers.
An alternative that had been discussed was adding this to the `GPUBatch`, this
was eventually not chosen as it would lead to more code when used as part of a
shading group. The idea is that we add an `eDRWCommandType` in the near
future.
Reviewed By: fclem
Differential Revision: https://developer.blender.org/D10913
Diffstat (limited to 'source/blender/gpu/intern')
-rw-r--r-- | source/blender/gpu/intern/gpu_backend.hh | 1 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_capabilities.cc | 10 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_capabilities_private.hh | 2 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_compute.cc | 41 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_index_buffer.cc | 42 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_index_buffer_private.hh | 12 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_shader.cc | 59 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_shader_interface.cc | 9 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_shader_interface.hh | 6 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_shader_private.hh | 1 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_vertex_buffer.cc | 15 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_vertex_buffer_private.hh | 3 |
12 files changed, 196 insertions, 5 deletions
diff --git a/source/blender/gpu/intern/gpu_backend.hh b/source/blender/gpu/intern/gpu_backend.hh index 04ec82a9213..73792215569 100644 --- a/source/blender/gpu/intern/gpu_backend.hh +++ b/source/blender/gpu/intern/gpu_backend.hh @@ -47,6 +47,7 @@ class GPUBackend { static GPUBackend *get(void); virtual void samplers_update(void) = 0; + virtual void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) = 0; virtual Context *context_alloc(void *ghost_window) = 0; diff --git a/source/blender/gpu/intern/gpu_capabilities.cc b/source/blender/gpu/intern/gpu_capabilities.cc index d8764502800..bedc9ad3092 100644 --- a/source/blender/gpu/intern/gpu_capabilities.cc +++ b/source/blender/gpu/intern/gpu_capabilities.cc @@ -148,6 +148,16 @@ bool GPU_use_hq_normals_workaround(void) return GCaps.use_hq_normals_workaround; } +bool GPU_compute_shader_support(void) +{ + return GCaps.compute_shader_support; +} + +bool GPU_shader_storage_buffer_objects_support(void) +{ + return GCaps.shader_storage_buffer_objects_support; +} + bool GPU_shader_image_load_store_support(void) { return GCaps.shader_image_load_store_support; diff --git a/source/blender/gpu/intern/gpu_capabilities_private.hh b/source/blender/gpu/intern/gpu_capabilities_private.hh index 7c1d4590ce8..ee7ef1e69e6 100644 --- a/source/blender/gpu/intern/gpu_capabilities_private.hh +++ b/source/blender/gpu/intern/gpu_capabilities_private.hh @@ -51,6 +51,8 @@ struct GPUCapabilities { const char *(*extension_get)(int); bool mem_stats_support = false; + bool compute_shader_support = false; + bool shader_storage_buffer_objects_support = false; bool shader_image_load_store_support = false; /* OpenGL related workarounds. */ bool mip_render_workaround = false; diff --git a/source/blender/gpu/intern/gpu_compute.cc b/source/blender/gpu/intern/gpu_compute.cc new file mode 100644 index 00000000000..7a8ae2acf9a --- /dev/null +++ b/source/blender/gpu/intern/gpu_compute.cc @@ -0,0 +1,41 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** \file + * \ingroup gpu + */ + +#include "GPU_compute.h" + +#include "gpu_backend.hh" + +#ifdef __cplusplus +extern "C" { +#endif + +void GPU_compute_dispatch(GPUShader *shader, + uint groups_x_len, + uint groups_y_len, + uint groups_z_len) +{ + blender::gpu::GPUBackend &gpu_backend = *blender::gpu::GPUBackend::get(); + GPU_shader_bind(shader); + gpu_backend.compute_dispatch(groups_x_len, groups_y_len, groups_z_len); +} + +#ifdef __cplusplus +} +#endif diff --git a/source/blender/gpu/intern/gpu_index_buffer.cc b/source/blender/gpu/intern/gpu_index_buffer.cc index 65932d2dbf4..20a26c0fe9d 100644 --- a/source/blender/gpu/intern/gpu_index_buffer.cc +++ b/source/blender/gpu/intern/gpu_index_buffer.cc @@ -31,6 +31,8 @@ #include "gpu_index_buffer_private.hh" +#include <cstring> + #define KEEP_SINGLE_COPY 1 #define RESTART_INDEX 0xFFFFFFFF @@ -66,6 +68,14 @@ void GPU_indexbuf_init(GPUIndexBufBuilder *builder, GPU_indexbuf_init_ex(builder, prim_type, prim_len * (uint)verts_per_prim, vertex_len); } +GPUIndexBuf *GPU_indexbuf_build_on_device(uint index_len) +{ + GPUIndexBuf *elem_ = GPU_indexbuf_calloc(); + IndexBuf *elem = unwrap(elem_); + elem->init_build_on_device(index_len); + return elem_; +} + void GPU_indexbuf_add_generic_vert(GPUIndexBufBuilder *builder, uint v) { #if TRUST_NO_ONE @@ -241,6 +251,15 @@ void IndexBuf::init(uint indices_len, uint32_t *indices) #endif } +void IndexBuf::init_build_on_device(uint index_len) +{ + is_init_ = true; + index_start_ = 0; + index_len_ = index_len; + index_type_ = GPU_INDEX_U32; + data_ = nullptr; +} + void IndexBuf::init_subrange(IndexBuf *elem_src, uint start, uint length) { /* We don't support nested subranges. */ @@ -307,6 +326,14 @@ void IndexBuf::squeeze_indices_short(uint min_idx, uint max_idx) } } +uint32_t *IndexBuf::unmap(const uint32_t *mapped_memory) const +{ + size_t size = size_get(); + uint32_t *result = static_cast<uint32_t *>(MEM_mallocN(size, __func__)); + memcpy(result, mapped_memory, size); + return result; +} + } // namespace blender::gpu /** \} */ @@ -351,6 +378,16 @@ void GPU_indexbuf_create_subrange_in_place(GPUIndexBuf *elem, unwrap(elem)->init_subrange(unwrap(elem_src), start, length); } +const uint32_t *GPU_indexbuf_read(GPUIndexBuf *elem) +{ + return unwrap(elem)->read(); +} + +uint32_t *GPU_indexbuf_unmap(const GPUIndexBuf *elem, const uint32_t *mapped_buffer) +{ + return unwrap(elem)->unmap(mapped_buffer); +} + void GPU_indexbuf_discard(GPUIndexBuf *elem) { delete unwrap(elem); @@ -366,4 +403,9 @@ int GPU_indexbuf_primitive_len(GPUPrimType prim_type) return indices_per_primitive(prim_type); } +void GPU_indexbuf_bind_as_ssbo(GPUIndexBuf *elem, int binding) +{ + unwrap(elem)->bind_as_ssbo(binding); +} + /** \} */ diff --git a/source/blender/gpu/intern/gpu_index_buffer_private.hh b/source/blender/gpu/intern/gpu_index_buffer_private.hh index 2405db8664a..358258604bf 100644 --- a/source/blender/gpu/intern/gpu_index_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_index_buffer_private.hh @@ -75,13 +75,14 @@ class IndexBuf { void init(uint indices_len, uint32_t *indices); void init_subrange(IndexBuf *elem_src, uint start, uint length); + void init_build_on_device(uint index_len); uint32_t index_len_get(void) const { return index_len_; } /* Return size in byte of the drawable data buffer range. Actual buffer size might be bigger. */ - size_t size_get(void) + size_t size_get(void) const { return index_len_ * to_bytesize(index_type_); }; @@ -91,6 +92,11 @@ class IndexBuf { return is_init_; }; + virtual void bind_as_ssbo(uint binding) = 0; + + virtual const uint32_t *read() const = 0; + uint32_t *unmap(const uint32_t *mapped_memory) const; + private: inline void squeeze_indices_short(uint min_idx, uint max_idx); inline uint index_range(uint *r_min, uint *r_max); @@ -105,6 +111,10 @@ static inline IndexBuf *unwrap(GPUIndexBuf *indexbuf) { return reinterpret_cast<IndexBuf *>(indexbuf); } +static inline const IndexBuf *unwrap(const GPUIndexBuf *indexbuf) +{ + return reinterpret_cast<const IndexBuf *>(indexbuf); +} static inline int indices_per_primitive(GPUPrimType prim_type) { diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc index aea27756708..265dec7c56a 100644 --- a/source/blender/gpu/intern/gpu_shader.cc +++ b/source/blender/gpu/intern/gpu_shader.cc @@ -290,6 +290,7 @@ static void standard_defines(Vector<const char *> &sources) GPUShader *GPU_shader_create_ex(const char *vertcode, const char *fragcode, const char *geomcode, + const char *computecode, const char *libcode, const char *defines, const eGPUShaderTFBType tf_type, @@ -297,8 +298,10 @@ GPUShader *GPU_shader_create_ex(const char *vertcode, const int tf_count, const char *shname) { - /* At least a vertex shader and a fragment shader are required. */ - BLI_assert((fragcode != nullptr) && (vertcode != nullptr)); + /* At least a vertex shader and a fragment shader are required, or only a compute shader. */ + BLI_assert(((fragcode != nullptr) && (vertcode != nullptr) && (computecode == nullptr)) || + ((fragcode == nullptr) && (vertcode == nullptr) && (geomcode == nullptr) && + (computecode != nullptr))); Shader *shader = GPUBackend::get()->shader_alloc(shname); @@ -349,6 +352,21 @@ GPUShader *GPU_shader_create_ex(const char *vertcode, shader->geometry_shader_from_glsl(sources); } + if (computecode) { + Vector<const char *> sources; + standard_defines(sources); + sources.append("#define GPU_COMPUTE_SHADER\n"); + if (defines) { + sources.append(defines); + } + if (libcode) { + sources.append(libcode); + } + sources.append(computecode); + + shader->compute_shader_from_glsl(sources); + } + if (tf_names != nullptr && tf_count > 0) { BLI_assert(tf_type != GPU_SHADER_TFB_NONE); shader->transform_feedback_names_set(Span<const char *>(tf_names, tf_count), tf_type); @@ -380,8 +398,33 @@ GPUShader *GPU_shader_create(const char *vertcode, const char *defines, const char *shname) { - return GPU_shader_create_ex( - vertcode, fragcode, geomcode, libcode, defines, GPU_SHADER_TFB_NONE, nullptr, 0, shname); + return GPU_shader_create_ex(vertcode, + fragcode, + geomcode, + nullptr, + libcode, + defines, + GPU_SHADER_TFB_NONE, + nullptr, + 0, + shname); +} + +GPUShader *GPU_shader_create_compute(const char *computecode, + const char *libcode, + const char *defines, + const char *shname) +{ + return GPU_shader_create_ex(nullptr, + nullptr, + nullptr, + computecode, + libcode, + defines, + GPU_SHADER_TFB_NONE, + nullptr, + 0, + shname); } GPUShader *GPU_shader_create_from_python(const char *vertcode, @@ -402,6 +445,7 @@ GPUShader *GPU_shader_create_from_python(const char *vertcode, GPUShader *sh = GPU_shader_create_ex(vertcode, fragcode, geomcode, + nullptr, libcode, defines, GPU_SHADER_TFB_NONE, @@ -567,6 +611,13 @@ int GPU_shader_get_builtin_block(GPUShader *shader, int builtin) return interface->ubo_builtin((GPUUniformBlockBuiltin)builtin); } +int GPU_shader_get_ssbo(GPUShader *shader, const char *name) +{ + ShaderInterface *interface = unwrap(shader)->interface; + const ShaderInput *ssbo = interface->ssbo_get(name); + return ssbo ? ssbo->location : -1; +} + /* DEPRECATED. */ int GPU_shader_get_uniform_block(GPUShader *shader, const char *name) { diff --git a/source/blender/gpu/intern/gpu_shader_interface.cc b/source/blender/gpu/intern/gpu_shader_interface.cc index c584c40eca8..ae94112b17b 100644 --- a/source/blender/gpu/intern/gpu_shader_interface.cc +++ b/source/blender/gpu/intern/gpu_shader_interface.cc @@ -80,6 +80,8 @@ void ShaderInterface::debug_print() Span<ShaderInput> attrs = Span<ShaderInput>(inputs_, attr_len_); Span<ShaderInput> ubos = Span<ShaderInput>(inputs_ + attr_len_, ubo_len_); Span<ShaderInput> uniforms = Span<ShaderInput>(inputs_ + attr_len_ + ubo_len_, uniform_len_); + Span<ShaderInput> ssbos = Span<ShaderInput>(inputs_ + attr_len_ + ubo_len_ + uniform_len_, + ssbo_len_); char *name_buf = name_buffer_; const char format[] = " | %.8x : %4d : %s\n"; @@ -117,6 +119,13 @@ void ShaderInterface::debug_print() } } + if (ssbos.size() > 0) { + printf("\n Shader Storage Objects :\n"); + } + for (const ShaderInput &ssbo : ssbos) { + printf(format, ssbo.name_hash, ssbo.binding, name_buf + ssbo.name_offset); + } + printf("\n"); } diff --git a/source/blender/gpu/intern/gpu_shader_interface.hh b/source/blender/gpu/intern/gpu_shader_interface.hh index aec58544111..ebed7b15170 100644 --- a/source/blender/gpu/intern/gpu_shader_interface.hh +++ b/source/blender/gpu/intern/gpu_shader_interface.hh @@ -60,6 +60,7 @@ class ShaderInterface { uint attr_len_ = 0; uint ubo_len_ = 0; uint uniform_len_ = 0; + uint ssbo_len_ = 0; /** Enabled bind-points that needs to be fed with data. */ uint16_t enabled_attr_mask_ = 0; uint16_t enabled_ubo_mask_ = 0; @@ -99,6 +100,11 @@ class ShaderInterface { return input_lookup(inputs_ + attr_len_ + ubo_len_, uniform_len_, binding); } + inline const ShaderInput *ssbo_get(const char *name) const + { + return input_lookup(inputs_ + attr_len_ + ubo_len_ + uniform_len_, ssbo_len_, name); + } + inline const char *input_name_get(const ShaderInput *input) const { return name_buffer_ + input->name_offset; diff --git a/source/blender/gpu/intern/gpu_shader_private.hh b/source/blender/gpu/intern/gpu_shader_private.hh index d9327bbc0f4..281f01dbc22 100644 --- a/source/blender/gpu/intern/gpu_shader_private.hh +++ b/source/blender/gpu/intern/gpu_shader_private.hh @@ -49,6 +49,7 @@ class Shader { virtual void vertex_shader_from_glsl(MutableSpan<const char *> sources) = 0; virtual void geometry_shader_from_glsl(MutableSpan<const char *> sources) = 0; virtual void fragment_shader_from_glsl(MutableSpan<const char *> sources) = 0; + virtual void compute_shader_from_glsl(MutableSpan<const char *> sources) = 0; virtual bool finalize(void) = 0; virtual void transform_feedback_names_set(Span<const char *> name_list, diff --git a/source/blender/gpu/intern/gpu_vertex_buffer.cc b/source/blender/gpu/intern/gpu_vertex_buffer.cc index 09b9eba9f95..3ecbb740a0c 100644 --- a/source/blender/gpu/intern/gpu_vertex_buffer.cc +++ b/source/blender/gpu/intern/gpu_vertex_buffer.cc @@ -149,6 +149,16 @@ GPUVertBuf *GPU_vertbuf_duplicate(GPUVertBuf *verts_) return wrap(unwrap(verts_)->duplicate()); } +const void *GPU_vertbuf_read(GPUVertBuf *verts) +{ + return unwrap(verts)->read(); +} + +void *GPU_vertbuf_unmap(const GPUVertBuf *verts, const void *mapped_data) +{ + return unwrap(verts)->unmap(mapped_data); +} + /** Same as discard but does not free. */ void GPU_vertbuf_clear(GPUVertBuf *verts) { @@ -324,6 +334,11 @@ void GPU_vertbuf_use(GPUVertBuf *verts) unwrap(verts)->upload(); } +void GPU_vertbuf_bind_as_ssbo(struct GPUVertBuf *verts, int binding) +{ + unwrap(verts)->bind_as_ssbo(binding); +} + /* XXX this is just a wrapper for the use of the Hair refine workaround. * To be used with GPU_vertbuf_use(). */ void GPU_vertbuf_update_sub(GPUVertBuf *verts, uint start, uint len, void *data) diff --git a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh index 67a09f6f83c..9531c2c1a5f 100644 --- a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh @@ -66,6 +66,7 @@ class VertBuf { void allocate(uint vert_len); void resize(uint vert_len); void upload(void); + virtual void bind_as_ssbo(uint binding) = 0; VertBuf *duplicate(void); @@ -96,6 +97,8 @@ class VertBuf { } virtual void update_sub(uint start, uint len, void *data) = 0; + virtual const void *read() const = 0; + virtual void *unmap(const void *mapped_data) const = 0; protected: virtual void acquire_data(void) = 0; |