/* * Copyright 2021 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "blender/blender_gpu_display.h" #include "device/device.h" #include "util/util_logging.h" #include "util/util_opengl.h" extern "C" { struct RenderEngine; bool RE_engine_has_render_context(struct RenderEngine *engine); void RE_engine_render_context_enable(struct RenderEngine *engine); void RE_engine_render_context_disable(struct RenderEngine *engine); bool DRW_opengl_context_release(); void DRW_opengl_context_activate(bool drw_state); void *WM_opengl_context_create(); void WM_opengl_context_activate(void *gl_context); void WM_opengl_context_dispose(void *gl_context); void WM_opengl_context_release(void *context); } CCL_NAMESPACE_BEGIN /* -------------------------------------------------------------------- * BlenderDisplayShader. */ unique_ptr BlenderDisplayShader::create(BL::RenderEngine &b_engine, BL::Scene &b_scene) { if (b_engine.support_display_space_shader(b_scene)) { return make_unique(b_engine, b_scene); } return make_unique(); } int BlenderDisplayShader::get_position_attrib_location() { if (position_attribute_location_ == -1) { const uint shader_program = get_shader_program(); position_attribute_location_ = glGetAttribLocation(shader_program, position_attribute_name); } return position_attribute_location_; } int BlenderDisplayShader::get_tex_coord_attrib_location() { if (tex_coord_attribute_location_ == -1) { const uint shader_program = get_shader_program(); tex_coord_attribute_location_ = glGetAttribLocation(shader_program, tex_coord_attribute_name); } return tex_coord_attribute_location_; } /* -------------------------------------------------------------------- * BlenderFallbackDisplayShader. */ /* TODO move shaders to standalone .glsl file. */ static const char *FALLBACK_VERTEX_SHADER = "#version 330\n" "uniform vec2 fullscreen;\n" "in vec2 texCoord;\n" "in vec2 pos;\n" "out vec2 texCoord_interp;\n" "\n" "vec2 normalize_coordinates()\n" "{\n" " return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n" "}\n" "\n" "void main()\n" "{\n" " gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n" " texCoord_interp = texCoord;\n" "}\n\0"; static const char *FALLBACK_FRAGMENT_SHADER = "#version 330\n" "uniform sampler2D image_texture;\n" "in vec2 texCoord_interp;\n" "out vec4 fragColor;\n" "\n" "void main()\n" "{\n" " fragColor = texture(image_texture, texCoord_interp);\n" "}\n\0"; static void shader_print_errors(const char *task, const char *log, const char *code) { LOG(ERROR) << "Shader: " << task << " error:"; LOG(ERROR) << "===== shader string ===="; stringstream stream(code); string partial; int line = 1; while (getline(stream, partial, '\n')) { if (line < 10) { LOG(ERROR) << " " << line << " " << partial; } else { LOG(ERROR) << line << " " << partial; } line++; } LOG(ERROR) << log; } static int compile_fallback_shader(void) { const struct Shader { const char *source; const GLenum type; } shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER}, {FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}}; const GLuint program = glCreateProgram(); for (int i = 0; i < 2; i++) { const GLuint shader = glCreateShader(shaders[i].type); string source_str = shaders[i].source; const char *c_str = source_str.c_str(); glShaderSource(shader, 1, &c_str, NULL); glCompileShader(shader); GLint compile_status; glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_status); if (!compile_status) { GLchar log[5000]; GLsizei length = 0; glGetShaderInfoLog(shader, sizeof(log), &length, log); shader_print_errors("compile", log, c_str); return 0; } glAttachShader(program, shader); } /* Link output. */ glBindFragDataLocation(program, 0, "fragColor"); /* Link and error check. */ glLinkProgram(program); /* TODO(sergey): Find a way to nicely de-duplicate the error checking. */ GLint link_status; glGetProgramiv(program, GL_LINK_STATUS, &link_status); if (!link_status) { GLchar log[5000]; GLsizei length = 0; /* TODO(sergey): Is it really program passed to glGetShaderInfoLog? */ glGetShaderInfoLog(program, sizeof(log), &length, log); shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER); shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER); return 0; } return program; } void BlenderFallbackDisplayShader::bind(int width, int height) { create_shader_if_needed(); if (!shader_program_) { return; } glUseProgram(shader_program_); glUniform1i(image_texture_location_, 0); glUniform2f(fullscreen_location_, width, height); } void BlenderFallbackDisplayShader::unbind() { } uint BlenderFallbackDisplayShader::get_shader_program() { return shader_program_; } void BlenderFallbackDisplayShader::create_shader_if_needed() { if (shader_program_ || shader_compile_attempted_) { return; } shader_compile_attempted_ = true; shader_program_ = compile_fallback_shader(); if (!shader_program_) { return; } glUseProgram(shader_program_); image_texture_location_ = glGetUniformLocation(shader_program_, "image_texture"); if (image_texture_location_ < 0) { LOG(ERROR) << "Shader doesn't contain the 'image_texture' uniform."; destroy_shader(); return; } fullscreen_location_ = glGetUniformLocation(shader_program_, "fullscreen"); if (fullscreen_location_ < 0) { LOG(ERROR) << "Shader doesn't contain the 'fullscreen' uniform."; destroy_shader(); return; } } void BlenderFallbackDisplayShader::destroy_shader() { glDeleteProgram(shader_program_); shader_program_ = 0; } /* -------------------------------------------------------------------- * BlenderDisplaySpaceShader. */ BlenderDisplaySpaceShader::BlenderDisplaySpaceShader(BL::RenderEngine &b_engine, BL::Scene &b_scene) : b_engine_(b_engine), b_scene_(b_scene) { DCHECK(b_engine_.support_display_space_shader(b_scene_)); } void BlenderDisplaySpaceShader::bind(int /*width*/, int /*height*/) { b_engine_.bind_display_space_shader(b_scene_); } void BlenderDisplaySpaceShader::unbind() { b_engine_.unbind_display_space_shader(); } uint BlenderDisplaySpaceShader::get_shader_program() { if (!shader_program_) { glGetIntegerv(GL_CURRENT_PROGRAM, reinterpret_cast(&shader_program_)); } if (!shader_program_) { LOG(ERROR) << "Error retrieving shader program for display space shader."; } return shader_program_; } /* -------------------------------------------------------------------- * BlenderGPUDisplay. */ BlenderGPUDisplay::BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene) : b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene)) { /* Create context while on the main thread. */ gl_context_create(); } BlenderGPUDisplay::~BlenderGPUDisplay() { gl_resources_destroy(); } /* -------------------------------------------------------------------- * Update procedure. */ bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams ¶ms, int texture_width, int texture_height) { /* Note that it's the responsibility of BlenderGPUDisplay to ensure updating and drawing * the texture does not happen at the same time. This is achieved indirectly. * * When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock. * This same lock is also held when do_draw() is called, which together ensure mutual * exclusion. * * This locking is not performed at the GPU display level, because that would cause lock * inversion. */ if (!gl_context_enable()) { return false; } if (gl_render_sync_) { glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED); } if (!gl_texture_resources_ensure()) { gl_context_disable(); return false; } /* Update texture dimensions if needed. */ if (texture_.width != texture_width || texture_.height != texture_height) { glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, texture_.gl_id); glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA16F, texture_width, texture_height, 0, GL_RGBA, GL_HALF_FLOAT, 0); texture_.width = texture_width; texture_.height = texture_height; glBindTexture(GL_TEXTURE_2D, 0); /* Texture did change, and no pixel storage was provided. Tag for an explicit zeroing out to * avoid undefined content. */ texture_.need_clear = true; } /* Update PBO dimensions if needed. * * NOTE: Allocate the PBO for the the size which will fit the final render resolution (as in, * at a resolution divider 1. This was we don't need to recreate graphics interoperability * objects which are costly and which are tied to the specific underlying buffer size. * The downside of this approach is that when graphics interoperability is not used we are * sending too much data to GPU when resolution divider is not 1. */ /* TODO(sergey): Investigate whether keeping the PBO exact size of the texture makes non-interop * mode faster. */ const int buffer_width = params.full_size.x; const int buffer_height = params.full_size.y; if (texture_.buffer_width != buffer_width || texture_.buffer_height != buffer_height) { const size_t size_in_bytes = sizeof(half4) * buffer_width * buffer_height; glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id); glBufferData(GL_PIXEL_UNPACK_BUFFER, size_in_bytes, 0, GL_DYNAMIC_DRAW); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); texture_.buffer_width = buffer_width; texture_.buffer_height = buffer_height; } /* New content will be provided to the texture in one way or another, so mark this in a * centralized place. */ texture_.need_update = true; return true; } void BlenderGPUDisplay::do_update_end() { gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); glFlush(); gl_context_disable(); } /* -------------------------------------------------------------------- * Texture update from CPU buffer. */ void BlenderGPUDisplay::do_copy_pixels_to_texture( const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height) { /* This call copies pixels to a Pixel Buffer Object (PBO) which is much cheaper from CPU time * point of view than to copy data directly to the OpenGL texture. * * The possible downside of this approach is that it might require a higher peak memory when * doing partial updates of the texture (although, in practice even partial updates might peak * with a full-frame buffer stored on the CPU if the GPU is currently occupied). */ half4 *mapped_rgba_pixels = map_texture_buffer(); if (!mapped_rgba_pixels) { return; } if (texture_x == 0 && texture_y == 0 && pixels_width == texture_.width && pixels_height == texture_.height) { const size_t size_in_bytes = sizeof(half4) * texture_.width * texture_.height; memcpy(mapped_rgba_pixels, rgba_pixels, size_in_bytes); } else { const half4 *rgba_row = rgba_pixels; half4 *mapped_rgba_row = mapped_rgba_pixels + texture_y * texture_.width + texture_x; for (int y = 0; y < pixels_height; ++y, rgba_row += pixels_width, mapped_rgba_row += texture_.width) { memcpy(mapped_rgba_row, rgba_row, sizeof(half4) * pixels_width); } } unmap_texture_buffer(); } /* -------------------------------------------------------------------- * Texture buffer mapping. */ half4 *BlenderGPUDisplay::do_map_texture_buffer() { glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id); half4 *mapped_rgba_pixels = reinterpret_cast( glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY)); if (!mapped_rgba_pixels) { LOG(ERROR) << "Error mapping BlenderGPUDisplay pixel buffer object."; } if (texture_.need_clear) { const int64_t texture_width = texture_.width; const int64_t texture_height = texture_.height; memset(reinterpret_cast(mapped_rgba_pixels), 0, texture_width * texture_height * sizeof(half4)); texture_.need_clear = false; } return mapped_rgba_pixels; } void BlenderGPUDisplay::do_unmap_texture_buffer() { glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); } /* -------------------------------------------------------------------- * Graphics interoperability. */ DeviceGraphicsInteropDestination BlenderGPUDisplay::do_graphics_interop_get() { DeviceGraphicsInteropDestination interop_dst; interop_dst.buffer_width = texture_.buffer_width; interop_dst.buffer_height = texture_.buffer_height; interop_dst.opengl_pbo_id = texture_.gl_pbo_id; interop_dst.need_clear = texture_.need_clear; texture_.need_clear = false; return interop_dst; } void BlenderGPUDisplay::graphics_interop_activate() { gl_context_enable(); } void BlenderGPUDisplay::graphics_interop_deactivate() { gl_context_disable(); } /* -------------------------------------------------------------------- * Drawing. */ void BlenderGPUDisplay::clear() { texture_.need_clear = true; } void BlenderGPUDisplay::set_zoom(float zoom_x, float zoom_y) { zoom_ = make_float2(zoom_x, zoom_y); } void BlenderGPUDisplay::do_draw(const GPUDisplayParams ¶ms) { /* See do_update_begin() for why no locking is required here. */ const bool transparent = true; // TODO(sergey): Derive this from Film. if (texture_.need_clear) { /* Texture is requested to be cleared and was not yet cleared. * Do early return which should be equivalent of drawing all-zero texture. */ return; } if (!gl_draw_resources_ensure()) { return; } if (use_gl_context_) { gl_context_mutex_.lock(); } if (gl_upload_sync_) { glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED); } if (transparent) { glEnable(GL_BLEND); glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA); } display_shader_->bind(params.full_size.x, params.full_size.y); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, texture_.gl_id); /* Trick to keep sharp rendering without jagged edges on all GPUs. * * The idea here is to enforce driver to use linear interpolation when the image is not zoomed * in. * For the render result with a resolution divider in effect we always use nearest interpolation. * * Use explicit MIN assignment to make sure the driver does not have an undefined behavior at * the zoom level 1. The MAG filter is always NEAREST. */ const float zoomed_width = params.size.x * zoom_.x; const float zoomed_height = params.size.y * zoom_.y; if (texture_.width != params.size.x || texture_.height != params.size.y) { /* Resolution divider is different from 1, force nearest interpolation. */ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); } else if (zoomed_width - params.size.x > 0.5f || zoomed_height - params.size.y > 0.5f) { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); } else { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); } glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_); texture_update_if_needed(); vertex_buffer_update(params); /* TODO(sergey): Does it make sense/possible to cache/reuse the VAO? */ GLuint vertex_array_object; glGenVertexArrays(1, &vertex_array_object); glBindVertexArray(vertex_array_object); const int texcoord_attribute = display_shader_->get_tex_coord_attrib_location(); const int position_attribute = display_shader_->get_position_attrib_location(); glEnableVertexAttribArray(texcoord_attribute); glEnableVertexAttribArray(position_attribute); glVertexAttribPointer( texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0); glVertexAttribPointer(position_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)(sizeof(float) * 2)); glDrawArrays(GL_TRIANGLE_FAN, 0, 4); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindTexture(GL_TEXTURE_2D, 0); glDeleteVertexArrays(1, &vertex_array_object); display_shader_->unbind(); if (transparent) { glDisable(GL_BLEND); } gl_render_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); glFlush(); if (use_gl_context_) { gl_context_mutex_.unlock(); } } void BlenderGPUDisplay::gl_context_create() { /* When rendering in viewport there is no render context available via engine. * Check whether own context is to be created here. * * NOTE: If the `b_engine_`'s context is not available, we are expected to be on a main thread * here. */ use_gl_context_ = !RE_engine_has_render_context( reinterpret_cast(b_engine_.ptr.data)); if (use_gl_context_) { const bool drw_state = DRW_opengl_context_release(); gl_context_ = WM_opengl_context_create(); if (gl_context_) { /* On Windows an old context is restored after creation, and subsequent release of context * generates a Win32 error. Harmless for users, but annoying to have possible misleading * error prints in the console. */ #ifndef _WIN32 WM_opengl_context_release(gl_context_); #endif } else { LOG(ERROR) << "Error creating OpenGL context."; } DRW_opengl_context_activate(drw_state); } } bool BlenderGPUDisplay::gl_context_enable() { if (use_gl_context_) { if (!gl_context_) { return false; } gl_context_mutex_.lock(); WM_opengl_context_activate(gl_context_); return true; } RE_engine_render_context_enable(reinterpret_cast(b_engine_.ptr.data)); return true; } void BlenderGPUDisplay::gl_context_disable() { if (use_gl_context_) { if (gl_context_) { WM_opengl_context_release(gl_context_); gl_context_mutex_.unlock(); } return; } RE_engine_render_context_disable(reinterpret_cast(b_engine_.ptr.data)); } void BlenderGPUDisplay::gl_context_dispose() { if (gl_context_) { const bool drw_state = DRW_opengl_context_release(); WM_opengl_context_activate(gl_context_); WM_opengl_context_dispose(gl_context_); DRW_opengl_context_activate(drw_state); } } bool BlenderGPUDisplay::gl_draw_resources_ensure() { if (!texture_.gl_id) { /* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can * can not continue. Note that this is not an unrecoverable error, so once the texture is known * we will come back here and create all the GPU resources needed for draw. */ return false; } if (gl_draw_resource_creation_attempted_) { return gl_draw_resources_created_; } gl_draw_resource_creation_attempted_ = true; if (!vertex_buffer_) { glGenBuffers(1, &vertex_buffer_); if (!vertex_buffer_) { LOG(ERROR) << "Error creating vertex buffer."; return false; } } gl_draw_resources_created_ = true; return true; } void BlenderGPUDisplay::gl_resources_destroy() { gl_context_enable(); if (vertex_buffer_ != 0) { glDeleteBuffers(1, &vertex_buffer_); } if (texture_.gl_pbo_id) { glDeleteBuffers(1, &texture_.gl_pbo_id); texture_.gl_pbo_id = 0; } if (texture_.gl_id) { glDeleteTextures(1, &texture_.gl_id); texture_.gl_id = 0; } gl_context_disable(); gl_context_dispose(); } bool BlenderGPUDisplay::gl_texture_resources_ensure() { if (texture_.creation_attempted) { return texture_.is_created; } texture_.creation_attempted = true; DCHECK(!texture_.gl_id); DCHECK(!texture_.gl_pbo_id); /* Create texture. */ glGenTextures(1, &texture_.gl_id); if (!texture_.gl_id) { LOG(ERROR) << "Error creating texture."; return false; } /* Configure the texture. */ glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, texture_.gl_id); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glBindTexture(GL_TEXTURE_2D, 0); /* Create PBO for the texture. */ glGenBuffers(1, &texture_.gl_pbo_id); if (!texture_.gl_pbo_id) { LOG(ERROR) << "Error creating texture pixel buffer object."; return false; } /* Creation finished with a success. */ texture_.is_created = true; return true; } void BlenderGPUDisplay::texture_update_if_needed() { if (!texture_.need_update) { return; } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id); glTexSubImage2D( GL_TEXTURE_2D, 0, 0, 0, texture_.width, texture_.height, GL_RGBA, GL_HALF_FLOAT, 0); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); texture_.need_update = false; } void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams ¶ms) { /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be * rendered. */ glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW); float *vpointer = reinterpret_cast(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY)); if (!vpointer) { return; } vpointer[0] = 0.0f; vpointer[1] = 0.0f; vpointer[2] = params.offset.x; vpointer[3] = params.offset.y; vpointer[4] = 1.0f; vpointer[5] = 0.0f; vpointer[6] = (float)params.size.x + params.offset.x; vpointer[7] = params.offset.y; vpointer[8] = 1.0f; vpointer[9] = 1.0f; vpointer[10] = (float)params.size.x + params.offset.x; vpointer[11] = (float)params.size.y + params.offset.y; vpointer[12] = 0.0f; vpointer[13] = 1.0f; vpointer[14] = params.offset.x; vpointer[15] = (float)params.size.y + params.offset.y; glUnmapBuffer(GL_ARRAY_BUFFER); } CCL_NAMESPACE_END