diff options
Diffstat (limited to 'intern')
55 files changed, 5341 insertions, 692 deletions
diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt index 75ff7114dd7..3248ef0dcda 100644 --- a/intern/cycles/app/CMakeLists.txt +++ b/intern/cycles/app/CMakeLists.txt @@ -33,15 +33,19 @@ else() endif() if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI) - list(APPEND LIBRARIES ${GLUT_LIBRARIES}) + add_definitions(${GL_DEFINITIONS}) + list(APPEND INC_SYS + ${GLEW_INCLUDE_DIR} + ${SDL2_INCLUDE_DIRS} + ) + list(APPEND LIBRARIES + ${CYCLES_GL_LIBRARIES} + ${SDL2_LIBRARIES} + ) endif() -list(APPEND LIBRARIES ${CYCLES_GL_LIBRARIES}) - # Common configuration. -add_definitions(${GL_DEFINITIONS}) - include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) @@ -55,6 +59,18 @@ if(WITH_CYCLES_STANDALONE) oiio_output_driver.cpp oiio_output_driver.h ) + + if(WITH_CYCLES_STANDALONE_GUI) + list(APPEND SRC + opengl/display_driver.cpp + opengl/display_driver.h + opengl/shader.cpp + opengl/shader.h + opengl/window.cpp + opengl/window.h + ) + endif() + add_executable(cycles ${SRC} ${INC} ${INC_SYS}) unset(SRC) @@ -69,6 +85,10 @@ if(WITH_CYCLES_STANDALONE) # OpenImageDenoise uses BNNS from the Accelerate framework. set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework Accelerate") endif() + if(WITH_CYCLES_STANDALONE_GUI) + set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS + " -framework Cocoa -framework CoreAudio -framework AudioUnit -framework AudioToolbox -framework ForceFeedback -framework CoreVideo") + endif() endif() if(UNIX AND NOT APPLE) diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp index 0e425ac3d8f..ef20f64debd 100644 --- a/intern/cycles/app/cycles_standalone.cpp +++ b/intern/cycles/app/cycles_standalone.cpp @@ -27,11 +27,10 @@ #include "app/oiio_output_driver.h" #ifdef WITH_CYCLES_STANDALONE_GUI -# include "util/view.h" +# include "opengl/display_driver.h" +# include "opengl/window.h" #endif -#include "app/cycles_xml.h" - CCL_NAMESPACE_BEGIN struct Options { @@ -117,7 +116,14 @@ static void session_init() options.output_pass = "combined"; options.session = new Session(options.session_params, options.scene_params); - if (!options.output_filepath.empty()) { +#ifdef WITH_CYCLES_STANDALONE_GUI + if (!options.session_params.background) { + options.session->set_display_driver(make_unique<OpenGLDisplayDriver>( + window_opengl_context_enable, window_opengl_context_disable)); + } + else +#endif + if (!options.output_filepath.empty()) { options.session->set_output_driver(make_unique<OIIOOutputDriver>( options.output_filepath, options.output_pass, session_print)); } @@ -126,7 +132,7 @@ static void session_init() options.session->progress.set_update_callback(function_bind(&session_print_status)); #ifdef WITH_CYCLES_STANDALONE_GUI else - options.session->progress.set_update_callback(function_bind(&view_redraw)); + options.session->progress.set_update_callback(function_bind(&window_redraw)); #endif /* load scene */ @@ -191,10 +197,10 @@ static void display_info(Progress &progress) sample_time, interactive.c_str()); - view_display_info(str.c_str()); + window_display_info(str.c_str()); if (options.show_help) - view_display_help(); + window_display_help(); } static void display() @@ -525,15 +531,15 @@ int main(int argc, const char **argv) string title = "Cycles: " + path_filename(options.filepath); /* init/exit are callback so they run while GL is initialized */ - view_main_loop(title.c_str(), - options.width, - options.height, - session_init, - session_exit, - resize, - display, - keyboard, - motion); + window_main_loop(title.c_str(), + options.width, + options.height, + session_init, + session_exit, + resize, + display, + keyboard, + motion); } #endif diff --git a/intern/cycles/app/opengl/display_driver.cpp b/intern/cycles/app/opengl/display_driver.cpp new file mode 100644 index 00000000000..8b99f3b6feb --- /dev/null +++ b/intern/cycles/app/opengl/display_driver.cpp @@ -0,0 +1,385 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "app/opengl/display_driver.h" +#include "app/opengl/shader.h" + +#include "util/log.h" +#include "util/string.h" + +#include <GL/glew.h> +#include <SDL.h> + +CCL_NAMESPACE_BEGIN + +/* -------------------------------------------------------------------- + * OpenGLDisplayDriver. + */ + +OpenGLDisplayDriver::OpenGLDisplayDriver(const function<bool()> &gl_context_enable, + const function<void()> &gl_context_disable) + : gl_context_enable_(gl_context_enable), gl_context_disable_(gl_context_disable) +{ +} + +OpenGLDisplayDriver::~OpenGLDisplayDriver() +{ +} + +/* -------------------------------------------------------------------- + * Update procedure. + */ + +void OpenGLDisplayDriver::next_tile_begin() +{ + /* Assuming no tiles used in interactive display. */ +} + +bool OpenGLDisplayDriver::update_begin(const Params ¶ms, int texture_width, int texture_height) +{ + /* Note that it's the responsibility of OpenGLDisplayDriver to ensure updating and drawing + * the texture does not happen at the same time. This is achieved indirectly. + * + * When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock. + * This same lock is also held when do_draw() is called, which together ensure mutual + * exclusion. + * + * This locking is not performed on the Cycles side, because that would cause lock inversion. */ + if (!gl_context_enable_()) { + return false; + } + + if (gl_render_sync_) { + glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED); + } + + if (!gl_texture_resources_ensure()) { + gl_context_disable_(); + return false; + } + + /* Update texture dimensions if needed. */ + if (texture_.width != texture_width || texture_.height != texture_height) { + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, texture_.gl_id); + glTexImage2D( + GL_TEXTURE_2D, 0, GL_RGBA16F, texture_width, texture_height, 0, GL_RGBA, GL_HALF_FLOAT, 0); + texture_.width = texture_width; + texture_.height = texture_height; + glBindTexture(GL_TEXTURE_2D, 0); + + /* Texture did change, and no pixel storage was provided. Tag for an explicit zeroing out to + * avoid undefined content. */ + texture_.need_clear = true; + } + + /* Update PBO dimensions if needed. + * + * NOTE: Allocate the PBO for the size which will fit the final render resolution (as in, + * at a resolution divider 1. This was we don't need to recreate graphics interoperability + * objects which are costly and which are tied to the specific underlying buffer size. + * The downside of this approach is that when graphics interoperability is not used we are + * sending too much data to GPU when resolution divider is not 1. */ + const int buffer_width = params.full_size.x; + const int buffer_height = params.full_size.y; + if (texture_.buffer_width != buffer_width || texture_.buffer_height != buffer_height) { + const size_t size_in_bytes = sizeof(half4) * buffer_width * buffer_height; + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id); + glBufferData(GL_PIXEL_UNPACK_BUFFER, size_in_bytes, 0, GL_DYNAMIC_DRAW); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + texture_.buffer_width = buffer_width; + texture_.buffer_height = buffer_height; + } + + /* New content will be provided to the texture in one way or another, so mark this in a + * centralized place. */ + texture_.need_update = true; + + return true; +} + +void OpenGLDisplayDriver::update_end() +{ + gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glFlush(); + + gl_context_disable_(); +} + +/* -------------------------------------------------------------------- + * Texture buffer mapping. + */ + +half4 *OpenGLDisplayDriver::map_texture_buffer() +{ + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id); + + half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>( + glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY)); + if (!mapped_rgba_pixels) { + LOG(ERROR) << "Error mapping OpenGLDisplayDriver pixel buffer object."; + } + + if (texture_.need_clear) { + const int64_t texture_width = texture_.width; + const int64_t texture_height = texture_.height; + memset(reinterpret_cast<void *>(mapped_rgba_pixels), + 0, + texture_width * texture_height * sizeof(half4)); + texture_.need_clear = false; + } + + return mapped_rgba_pixels; +} + +void OpenGLDisplayDriver::unmap_texture_buffer() +{ + glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); +} + +/* -------------------------------------------------------------------- + * Graphics interoperability. + */ + +OpenGLDisplayDriver::GraphicsInterop OpenGLDisplayDriver::graphics_interop_get() +{ + GraphicsInterop interop_dst; + + interop_dst.buffer_width = texture_.buffer_width; + interop_dst.buffer_height = texture_.buffer_height; + interop_dst.opengl_pbo_id = texture_.gl_pbo_id; + + interop_dst.need_clear = texture_.need_clear; + texture_.need_clear = false; + + return interop_dst; +} + +void OpenGLDisplayDriver::graphics_interop_activate() +{ + gl_context_enable_(); +} + +void OpenGLDisplayDriver::graphics_interop_deactivate() +{ + gl_context_disable_(); +} + +/* -------------------------------------------------------------------- + * Drawing. + */ + +void OpenGLDisplayDriver::clear() +{ + texture_.need_clear = true; +} + +void OpenGLDisplayDriver::draw(const Params ¶ms) +{ + /* See do_update_begin() for why no locking is required here. */ + if (texture_.need_clear) { + /* Texture is requested to be cleared and was not yet cleared. + * Do early return which should be equivalent of drawing all-zero texture. */ + return; + } + + if (!gl_draw_resources_ensure()) { + return; + } + + if (gl_upload_sync_) { + glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED); + } + + glEnable(GL_BLEND); + glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + + display_shader_.bind(params.full_size.x, params.full_size.y); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, texture_.gl_id); + + if (texture_.width != params.size.x || texture_.height != params.size.y) { + /* Resolution divider is different from 1, force nearest interpolation. */ + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + } + else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + } + + glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_); + + texture_update_if_needed(); + vertex_buffer_update(params); + + GLuint vertex_array_object; + glGenVertexArrays(1, &vertex_array_object); + glBindVertexArray(vertex_array_object); + + const int texcoord_attribute = display_shader_.get_tex_coord_attrib_location(); + const int position_attribute = display_shader_.get_position_attrib_location(); + + glEnableVertexAttribArray(texcoord_attribute); + glEnableVertexAttribArray(position_attribute); + + glVertexAttribPointer( + texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0); + glVertexAttribPointer(position_attribute, + 2, + GL_FLOAT, + GL_FALSE, + 4 * sizeof(float), + (const GLvoid *)(sizeof(float) * 2)); + + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindTexture(GL_TEXTURE_2D, 0); + + glDeleteVertexArrays(1, &vertex_array_object); + + display_shader_.unbind(); + + glDisable(GL_BLEND); + + gl_render_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glFlush(); +} + +bool OpenGLDisplayDriver::gl_draw_resources_ensure() +{ + if (!texture_.gl_id) { + /* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can + * can not continue. Note that this is not an unrecoverable error, so once the texture is known + * we will come back here and create all the GPU resources needed for draw. */ + return false; + } + + if (gl_draw_resource_creation_attempted_) { + return gl_draw_resources_created_; + } + gl_draw_resource_creation_attempted_ = true; + + if (!vertex_buffer_) { + glGenBuffers(1, &vertex_buffer_); + if (!vertex_buffer_) { + LOG(ERROR) << "Error creating vertex buffer."; + return false; + } + } + + gl_draw_resources_created_ = true; + + return true; +} + +void OpenGLDisplayDriver::gl_resources_destroy() +{ + gl_context_enable_(); + + if (vertex_buffer_ != 0) { + glDeleteBuffers(1, &vertex_buffer_); + } + + if (texture_.gl_pbo_id) { + glDeleteBuffers(1, &texture_.gl_pbo_id); + texture_.gl_pbo_id = 0; + } + + if (texture_.gl_id) { + glDeleteTextures(1, &texture_.gl_id); + texture_.gl_id = 0; + } + + gl_context_disable_(); +} + +bool OpenGLDisplayDriver::gl_texture_resources_ensure() +{ + if (texture_.creation_attempted) { + return texture_.is_created; + } + texture_.creation_attempted = true; + + DCHECK(!texture_.gl_id); + DCHECK(!texture_.gl_pbo_id); + + /* Create texture. */ + glGenTextures(1, &texture_.gl_id); + if (!texture_.gl_id) { + LOG(ERROR) << "Error creating texture."; + return false; + } + + /* Configure the texture. */ + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, texture_.gl_id); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glBindTexture(GL_TEXTURE_2D, 0); + + /* Create PBO for the texture. */ + glGenBuffers(1, &texture_.gl_pbo_id); + if (!texture_.gl_pbo_id) { + LOG(ERROR) << "Error creating texture pixel buffer object."; + return false; + } + + /* Creation finished with a success. */ + texture_.is_created = true; + + return true; +} + +void OpenGLDisplayDriver::texture_update_if_needed() +{ + if (!texture_.need_update) { + return; + } + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id); + glTexSubImage2D( + GL_TEXTURE_2D, 0, 0, 0, texture_.width, texture_.height, GL_RGBA, GL_HALF_FLOAT, 0); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + texture_.need_update = false; +} + +void OpenGLDisplayDriver::vertex_buffer_update(const Params ¶ms) +{ + /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be + * rendered. */ + glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW); + + float *vpointer = reinterpret_cast<float *>(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY)); + if (!vpointer) { + return; + } + + vpointer[0] = 0.0f; + vpointer[1] = 0.0f; + vpointer[2] = params.full_offset.x; + vpointer[3] = params.full_offset.y; + + vpointer[4] = 1.0f; + vpointer[5] = 0.0f; + vpointer[6] = (float)params.size.x + params.full_offset.x; + vpointer[7] = params.full_offset.y; + + vpointer[8] = 1.0f; + vpointer[9] = 1.0f; + vpointer[10] = (float)params.size.x + params.full_offset.x; + vpointer[11] = (float)params.size.y + params.full_offset.y; + + vpointer[12] = 0.0f; + vpointer[13] = 1.0f; + vpointer[14] = params.full_offset.x; + vpointer[15] = (float)params.size.y + params.full_offset.y; + + glUnmapBuffer(GL_ARRAY_BUFFER); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/app/opengl/display_driver.h b/intern/cycles/app/opengl/display_driver.h new file mode 100644 index 00000000000..92578412d68 --- /dev/null +++ b/intern/cycles/app/opengl/display_driver.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +#include <atomic> + +#include "app/opengl/shader.h" + +#include "session/display_driver.h" + +#include "util/function.h" +#include "util/unique_ptr.h" + +CCL_NAMESPACE_BEGIN + +class OpenGLDisplayDriver : public DisplayDriver { + public: + /* Callbacks for enabling and disabling the OpenGL context. Must be provided to support enabling + * the context on the Cycles render thread independent of the main thread. */ + OpenGLDisplayDriver(const function<bool()> &gl_context_enable, + const function<void()> &gl_context_disable); + ~OpenGLDisplayDriver(); + + virtual void graphics_interop_activate() override; + virtual void graphics_interop_deactivate() override; + + virtual void clear() override; + + void set_zoom(float zoom_x, float zoom_y); + + protected: + virtual void next_tile_begin() override; + + virtual bool update_begin(const Params ¶ms, int texture_width, int texture_height) override; + virtual void update_end() override; + + virtual half4 *map_texture_buffer() override; + virtual void unmap_texture_buffer() override; + + virtual GraphicsInterop graphics_interop_get() override; + + virtual void draw(const Params ¶ms) override; + + /* Make sure texture is allocated and its initial configuration is performed. */ + bool gl_texture_resources_ensure(); + + /* Ensure all runtime GPU resources needed for drawing are allocated. + * Returns true if all resources needed for drawing are available. */ + bool gl_draw_resources_ensure(); + + /* Destroy all GPU resources which are being used by this object. */ + void gl_resources_destroy(); + + /* Update GPU texture dimensions and content if needed (new pixel data was provided). + * + * NOTE: The texture needs to be bound. */ + void texture_update_if_needed(); + + /* Update vertex buffer with new coordinates of vertex positions and texture coordinates. + * This buffer is used to render texture in the viewport. + * + * NOTE: The buffer needs to be bound. */ + void vertex_buffer_update(const Params ¶ms); + + /* Texture which contains pixels of the render result. */ + struct { + /* Indicates whether texture creation was attempted and succeeded. + * Used to avoid multiple attempts of texture creation on GPU issues or GPU context + * misconfiguration. */ + bool creation_attempted = false; + bool is_created = false; + + /* OpenGL resource IDs of the texture itself and Pixel Buffer Object (PBO) used to write + * pixels to it. + * + * NOTE: Allocated on the engine's context. */ + uint gl_id = 0; + uint gl_pbo_id = 0; + + /* Is true when new data was written to the PBO, meaning, the texture might need to be resized + * and new data is to be uploaded to the GPU. */ + bool need_update = false; + + /* Content of the texture is to be filled with zeroes. */ + std::atomic<bool> need_clear = true; + + /* Dimensions of the texture in pixels. */ + int width = 0; + int height = 0; + + /* Dimensions of the underlying PBO. */ + int buffer_width = 0; + int buffer_height = 0; + } texture_; + + OpenGLShader display_shader_; + + /* Special track of whether GPU resources were attempted to be created, to avoid attempts of + * their re-creation on failure on every redraw. */ + bool gl_draw_resource_creation_attempted_ = false; + bool gl_draw_resources_created_ = false; + + /* Vertex buffer which hold vertices of a triangle fan which is textures with the texture + * holding the render result. */ + uint vertex_buffer_ = 0; + + void *gl_render_sync_ = nullptr; + void *gl_upload_sync_ = nullptr; + + float2 zoom_ = make_float2(1.0f, 1.0f); + + function<bool()> gl_context_enable_ = nullptr; + function<void()> gl_context_disable_ = nullptr; +}; + +CCL_NAMESPACE_END diff --git a/intern/cycles/app/opengl/shader.cpp b/intern/cycles/app/opengl/shader.cpp new file mode 100644 index 00000000000..9db9ea7fce9 --- /dev/null +++ b/intern/cycles/app/opengl/shader.cpp @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include "app/opengl/shader.h" + +#include "util/log.h" +#include "util/string.h" + +#include <GL/glew.h> + +CCL_NAMESPACE_BEGIN + +/* -------------------------------------------------------------------- + * OpenGLShader. + */ + +static const char *VERTEX_SHADER = + "#version 330\n" + "uniform vec2 fullscreen;\n" + "in vec2 texCoord;\n" + "in vec2 pos;\n" + "out vec2 texCoord_interp;\n" + "\n" + "vec2 normalize_coordinates()\n" + "{\n" + " return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n" + "}\n" + "\n" + "void main()\n" + "{\n" + " gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n" + " texCoord_interp = texCoord;\n" + "}\n\0"; + +static const char *FRAGMENT_SHADER = + "#version 330\n" + "uniform sampler2D image_texture;\n" + "in vec2 texCoord_interp;\n" + "out vec4 fragColor;\n" + "\n" + "void main()\n" + "{\n" + " vec4 rgba = texture(image_texture, texCoord_interp);\n" + /* Harcoded Rec.709 gamma, should use OpenColorIO eventually. */ + " fragColor = pow(rgba, vec4(0.45, 0.45, 0.45, 1.0));\n" + "}\n\0"; + +static void shader_print_errors(const char *task, const char *log, const char *code) +{ + LOG(ERROR) << "Shader: " << task << " error:"; + LOG(ERROR) << "===== shader string ===="; + + stringstream stream(code); + string partial; + + int line = 1; + while (getline(stream, partial, '\n')) { + if (line < 10) { + LOG(ERROR) << " " << line << " " << partial; + } + else { + LOG(ERROR) << line << " " << partial; + } + line++; + } + LOG(ERROR) << log; +} + +static int compile_shader_program(void) +{ + const struct Shader { + const char *source; + const GLenum type; + } shaders[2] = {{VERTEX_SHADER, GL_VERTEX_SHADER}, {FRAGMENT_SHADER, GL_FRAGMENT_SHADER}}; + + const GLuint program = glCreateProgram(); + + for (int i = 0; i < 2; i++) { + const GLuint shader = glCreateShader(shaders[i].type); + + string source_str = shaders[i].source; + const char *c_str = source_str.c_str(); + + glShaderSource(shader, 1, &c_str, NULL); + glCompileShader(shader); + + GLint compile_status; + glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_status); + + if (!compile_status) { + GLchar log[5000]; + GLsizei length = 0; + glGetShaderInfoLog(shader, sizeof(log), &length, log); + shader_print_errors("compile", log, c_str); + return 0; + } + + glAttachShader(program, shader); + } + + /* Link output. */ + glBindFragDataLocation(program, 0, "fragColor"); + + /* Link and error check. */ + glLinkProgram(program); + + GLint link_status; + glGetProgramiv(program, GL_LINK_STATUS, &link_status); + if (!link_status) { + GLchar log[5000]; + GLsizei length = 0; + glGetShaderInfoLog(program, sizeof(log), &length, log); + shader_print_errors("linking", log, VERTEX_SHADER); + shader_print_errors("linking", log, FRAGMENT_SHADER); + return 0; + } + + return program; +} + +int OpenGLShader::get_position_attrib_location() +{ + if (position_attribute_location_ == -1) { + const uint shader_program = get_shader_program(); + position_attribute_location_ = glGetAttribLocation(shader_program, position_attribute_name); + } + return position_attribute_location_; +} + +int OpenGLShader::get_tex_coord_attrib_location() +{ + if (tex_coord_attribute_location_ == -1) { + const uint shader_program = get_shader_program(); + tex_coord_attribute_location_ = glGetAttribLocation(shader_program, tex_coord_attribute_name); + } + return tex_coord_attribute_location_; +} + +void OpenGLShader::bind(int width, int height) +{ + create_shader_if_needed(); + + if (!shader_program_) { + return; + } + + glUseProgram(shader_program_); + glUniform1i(image_texture_location_, 0); + glUniform2f(fullscreen_location_, width, height); +} + +void OpenGLShader::unbind() +{ +} + +uint OpenGLShader::get_shader_program() +{ + return shader_program_; +} + +void OpenGLShader::create_shader_if_needed() +{ + if (shader_program_ || shader_compile_attempted_) { + return; + } + + shader_compile_attempted_ = true; + + shader_program_ = compile_shader_program(); + if (!shader_program_) { + return; + } + + glUseProgram(shader_program_); + + image_texture_location_ = glGetUniformLocation(shader_program_, "image_texture"); + if (image_texture_location_ < 0) { + LOG(ERROR) << "Shader doesn't contain the 'image_texture' uniform."; + destroy_shader(); + return; + } + + fullscreen_location_ = glGetUniformLocation(shader_program_, "fullscreen"); + if (fullscreen_location_ < 0) { + LOG(ERROR) << "Shader doesn't contain the 'fullscreen' uniform."; + destroy_shader(); + return; + } +} + +void OpenGLShader::destroy_shader() +{ + glDeleteProgram(shader_program_); + shader_program_ = 0; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/app/opengl/shader.h b/intern/cycles/app/opengl/shader.h new file mode 100644 index 00000000000..6ca121ca6ff --- /dev/null +++ b/intern/cycles/app/opengl/shader.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 OpenGL Foundation */ + +#pragma once + +#include "util/types.h" + +CCL_NAMESPACE_BEGIN + +class OpenGLShader { + public: + static constexpr const char *position_attribute_name = "pos"; + static constexpr const char *tex_coord_attribute_name = "texCoord"; + + OpenGLShader() = default; + virtual ~OpenGLShader() = default; + + /* Get attribute location for position and texture coordinate respectively. + * NOTE: The shader needs to be bound to have access to those. */ + int get_position_attrib_location(); + int get_tex_coord_attrib_location(); + + void bind(int width, int height); + void unbind(); + + protected: + uint get_shader_program(); + + void create_shader_if_needed(); + void destroy_shader(); + + /* Cached values of various OpenGL resources. */ + int position_attribute_location_ = -1; + int tex_coord_attribute_location_ = -1; + + uint shader_program_ = 0; + int image_texture_location_ = -1; + int fullscreen_location_ = -1; + + /* Shader compilation attempted. Which means, that if the shader program is 0 then compilation or + * linking has failed. Do not attempt to re-compile the shader. */ + bool shader_compile_attempted_ = false; +}; + +CCL_NAMESPACE_END diff --git a/intern/cycles/app/opengl/window.cpp b/intern/cycles/app/opengl/window.cpp new file mode 100644 index 00000000000..7351ae3eecd --- /dev/null +++ b/intern/cycles/app/opengl/window.cpp @@ -0,0 +1,352 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#include <stdio.h> +#include <stdlib.h> + +#include "app/opengl/window.h" + +#include "util/string.h" +#include "util/thread.h" +#include "util/time.h" +#include "util/version.h" + +#include <GL/glew.h> +#include <SDL.h> + +CCL_NAMESPACE_BEGIN + +/* structs */ + +struct Window { + WindowInitFunc initf = nullptr; + WindowExitFunc exitf = nullptr; + WindowResizeFunc resize = nullptr; + WindowDisplayFunc display = nullptr; + WindowKeyboardFunc keyboard = nullptr; + WindowMotionFunc motion = nullptr; + + bool first_display = true; + bool redraw = false; + + int mouseX = 0, mouseY = 0; + int mouseBut0 = 0, mouseBut2 = 0; + + int width = 0, height = 0; + + SDL_Window *window = nullptr; + SDL_GLContext gl_context = nullptr; + thread_mutex gl_context_mutex; +} V; + +/* public */ + +static void window_display_text(int x, int y, const char *text) +{ +/* Not currently supported, need to add text rendering support. */ +#if 0 + const char *c; + + glRasterPos3f(x, y, 0); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + printf("display %s\n", text); + + for (c = text; *c != '\0'; c++) { + const uint8_t *bitmap = helvetica10_character_map[*c]; + glBitmap(bitmap[0], + helvetica10_height, + helvetica10_x_offset, + helvetica10_y_offset, + bitmap[0], + 0.0f, + bitmap + 1); + } +#else + static string last_text = ""; + + if (text != last_text) { + printf("%s\n", text); + last_text = text; + } +#endif +} + +void window_display_info(const char *info) +{ + const int height = 20; + +#if 0 + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glColor4f(0.1f, 0.1f, 0.1f, 0.8f); + glRectf(0.0f, V.height - height, V.width, V.height); + glDisable(GL_BLEND); + + glColor3f(0.5f, 0.5f, 0.5f); +#endif + + window_display_text(10, 7 + V.height - height, info); + +#if 0 + glColor3f(1.0f, 1.0f, 1.0f); +#endif +} + +void window_display_help() +{ + const int w = (int)((float)V.width / 1.15f); + const int h = (int)((float)V.height / 1.15f); + + const int x1 = (V.width - w) / 2; +#if 0 + const int x2 = x1 + w; +#endif + + const int y1 = (V.height - h) / 2; + const int y2 = y1 + h; + +#if 0 + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glColor4f(0.5f, 0.5f, 0.5f, 0.8f); + glRectf(x1, y1, x2, y2); + glDisable(GL_BLEND); + + glColor3f(0.8f, 0.8f, 0.8f); +#endif + + string info = string("Cycles Renderer ") + CYCLES_VERSION_STRING; + + window_display_text(x1 + 20, y2 - 20, info.c_str()); + window_display_text(x1 + 20, y2 - 40, "(C) 2011-2016 Blender Foundation"); + window_display_text(x1 + 20, y2 - 80, "Controls:"); + window_display_text(x1 + 20, y2 - 100, "h: Info/Help"); + window_display_text(x1 + 20, y2 - 120, "r: Reset"); + window_display_text(x1 + 20, y2 - 140, "p: Pause"); + window_display_text(x1 + 20, y2 - 160, "esc: Cancel"); + window_display_text(x1 + 20, y2 - 180, "q: Quit program"); + + window_display_text(x1 + 20, y2 - 210, "i: Interactive mode"); + window_display_text(x1 + 20, y2 - 230, "Left mouse: Move camera"); + window_display_text(x1 + 20, y2 - 250, "Right mouse: Rotate camera"); + window_display_text(x1 + 20, y2 - 270, "W/A/S/D: Move camera"); + window_display_text(x1 + 20, y2 - 290, "0/1/2/3: Set max bounces"); + +#if 0 + glColor3f(1.0f, 1.0f, 1.0f); +#endif +} + +static void window_display() +{ + if (V.first_display) { + if (V.initf) { + V.initf(); + } + if (V.exitf) { + atexit(V.exitf); + } + + V.first_display = false; + } + + window_opengl_context_enable(); + + glViewport(0, 0, V.width, V.height); + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + + glClearColor(0.05f, 0.05f, 0.05f, 0.0f); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(0, V.width, 0, V.height, -1, 1); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + + glRasterPos3f(0, 0, 0); + + if (V.display) + V.display(); + + SDL_GL_SwapWindow(V.window); + window_opengl_context_disable(); +} + +static void window_reshape(int width, int height) +{ + if (V.width != width || V.height != height) { + if (V.resize) { + V.resize(width, height); + } + } + + V.width = width; + V.height = height; +} + +static bool window_keyboard(unsigned char key) +{ + if (V.keyboard) + V.keyboard(key); + + if (key == 'q') { + if (V.exitf) + V.exitf(); + return true; + } + + return false; +} + +static void window_mouse(int button, int state, int x, int y) +{ + if (button == SDL_BUTTON_LEFT) { + if (state == SDL_MOUSEBUTTONDOWN) { + V.mouseX = x; + V.mouseY = y; + V.mouseBut0 = 1; + } + else if (state == SDL_MOUSEBUTTONUP) { + V.mouseBut0 = 0; + } + } + else if (button == SDL_BUTTON_RIGHT) { + if (state == SDL_MOUSEBUTTONDOWN) { + V.mouseX = x; + V.mouseY = y; + V.mouseBut2 = 1; + } + else if (state == SDL_MOUSEBUTTONUP) { + V.mouseBut2 = 0; + } + } +} + +static void window_motion(int x, int y) +{ + const int but = V.mouseBut0 ? 0 : 2; + const int distX = x - V.mouseX; + const int distY = y - V.mouseY; + + if (V.motion) + V.motion(distX, distY, but); + + V.mouseX = x; + V.mouseY = y; +} + +bool window_opengl_context_enable() +{ + V.gl_context_mutex.lock(); + SDL_GL_MakeCurrent(V.window, V.gl_context); + return true; +} + +void window_opengl_context_disable() +{ + SDL_GL_MakeCurrent(V.window, nullptr); + V.gl_context_mutex.unlock(); +} + +void window_main_loop(const char *title, + int width, + int height, + WindowInitFunc initf, + WindowExitFunc exitf, + WindowResizeFunc resize, + WindowDisplayFunc display, + WindowKeyboardFunc keyboard, + WindowMotionFunc motion) +{ + V.width = width; + V.height = height; + V.first_display = true; + V.redraw = false; + V.initf = initf; + V.exitf = exitf; + V.resize = resize; + V.display = display; + V.keyboard = keyboard; + V.motion = motion; + + SDL_Init(SDL_INIT_VIDEO); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); + SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); + V.window = SDL_CreateWindow(title, + SDL_WINDOWPOS_UNDEFINED, + SDL_WINDOWPOS_UNDEFINED, + width, + height, + SDL_WINDOW_RESIZABLE | SDL_WINDOW_OPENGL | SDL_WINDOW_SHOWN); + if (V.window == nullptr) { + fprintf(stderr, "Failed to create window: %s\n", SDL_GetError()); + return; + } + + SDL_RaiseWindow(V.window); + + V.gl_context = SDL_GL_CreateContext(V.window); + glewInit(); + SDL_GL_MakeCurrent(V.window, nullptr); + + window_reshape(width, height); + window_display(); + + while (true) { + bool quit = false; + SDL_Event event; + while (!quit && SDL_PollEvent(&event)) { + if (event.type == SDL_TEXTINPUT) { + quit = window_keyboard(event.text.text[0]); + } + else if (event.type == SDL_MOUSEMOTION) { + window_motion(event.motion.x, event.motion.y); + } + else if (event.type == SDL_MOUSEBUTTONDOWN || event.type == SDL_MOUSEBUTTONUP) { + window_mouse(event.button.button, event.button.state, event.button.x, event.button.y); + } + else if (event.type == SDL_WINDOWEVENT) { + if (event.window.event == SDL_WINDOWEVENT_RESIZED || + event.window.event == SDL_WINDOWEVENT_SIZE_CHANGED) { + window_reshape(event.window.data1, event.window.data2); + } + } + else if (event.type == SDL_QUIT) { + if (V.exitf) { + V.exitf(); + } + quit = true; + } + } + + if (quit) { + break; + } + + if (V.redraw) { + V.redraw = false; + window_display(); + } + + SDL_WaitEventTimeout(NULL, 100); + } + + SDL_GL_DeleteContext(V.gl_context); + SDL_DestroyWindow(V.window); + SDL_Quit(); +} + +void window_redraw() +{ + V.redraw = true; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/app/opengl/window.h b/intern/cycles/app/opengl/window.h new file mode 100644 index 00000000000..531b5cab3fc --- /dev/null +++ b/intern/cycles/app/opengl/window.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +/* Functions to display a simple OpenGL window using SDL, simplified to the + * bare minimum we need to reduce boilerplate code in tests apps. */ + +CCL_NAMESPACE_BEGIN + +typedef void (*WindowInitFunc)(); +typedef void (*WindowExitFunc)(); +typedef void (*WindowResizeFunc)(int width, int height); +typedef void (*WindowDisplayFunc)(); +typedef void (*WindowKeyboardFunc)(unsigned char key); +typedef void (*WindowMotionFunc)(int x, int y, int button); + +void window_main_loop(const char *title, + int width, + int height, + WindowInitFunc initf, + WindowExitFunc exitf, + WindowResizeFunc resize, + WindowDisplayFunc display, + WindowKeyboardFunc keyboard, + WindowMotionFunc motion); + +void window_display_info(const char *info); +void window_display_help(); +void window_redraw(); + +bool window_opengl_context_enable(); +void window_opengl_context_disable(); + +CCL_NAMESPACE_END diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 4e62bae6fe3..e3e5734c6b6 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1514,9 +1514,12 @@ class CyclesPreferences(bpy.types.AddonPreferences): row.prop(self, "peer_memory") if compute_device_type == 'METAL': - row = layout.row() - row.use_property_split = True - row.prop(self, "use_metalrt") + import platform + # MetalRT only works on Apple Silicon at present, pending argument encoding fixes on AMD + if platform.machine() == 'arm64': + row = layout.row() + row.use_property_split = True + row.prop(self, "use_metalrt") def draw(self, context): diff --git a/intern/cycles/blender/session.cpp b/intern/cycles/blender/session.cpp index 8917c703700..5e9066da5de 100644 --- a/intern/cycles/blender/session.cpp +++ b/intern/cycles/blender/session.cpp @@ -493,8 +493,13 @@ void BlenderSession::render_frame_finish() session->set_output_driver(nullptr); session->full_buffer_written_cb = function_null; - /* The display driver holds OpenGL resources which belong to an OpenGL context held by the render - * engine on Blender side. Force destruction of those resources. */ + /* The display driver is the source of drawing context for both drawing and possible graphics + * interop objects in the path trace. Once the frame is finished the OpenGL context might be + * freed form Blender side. Need to ensure that all GPU resources are freed prior to that + * point. + * Ideally would only do this when OpenGL context is actually destroyed, but there is no way to + * know when this happens (at least in the code at the time when this comment was written). + * The penalty of re-creating resources on every frame is unlikely to be noticed. */ display_driver_ = nullptr; session->set_display_driver(nullptr); diff --git a/intern/cycles/blender/shader.cpp b/intern/cycles/blender/shader.cpp index 9de507966d8..ec50ad9db9a 100644 --- a/intern/cycles/blender/shader.cpp +++ b/intern/cycles/blender/shader.cpp @@ -32,7 +32,8 @@ typedef map<string, ConvertNode *> ProxyMap; void BlenderSync::find_shader(BL::ID &id, array<Node *> &used_shaders, Shader *default_shader) { - Shader *shader = (id) ? shader_map.find(id) : default_shader; + Shader *synced_shader = (id) ? shader_map.find(id) : nullptr; + Shader *shader = (synced_shader) ? synced_shader : default_shader; used_shaders.push_back_slow(shader); shader->tag_used(scene); @@ -1573,18 +1574,13 @@ void BlenderSync::sync_lights(BL::Depsgraph &b_depsgraph, bool update_all) } } -void BlenderSync::sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d) +void BlenderSync::sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all) { - /* for auto refresh images */ - ImageManager *image_manager = scene->image_manager; - const int frame = b_scene.frame_current(); - const bool auto_refresh_update = image_manager->set_animation_frame_update(frame); - shader_map.pre_sync(); - sync_world(b_depsgraph, b_v3d, auto_refresh_update); - sync_lights(b_depsgraph, auto_refresh_update); - sync_materials(b_depsgraph, auto_refresh_update); + sync_world(b_depsgraph, b_v3d, update_all); + sync_lights(b_depsgraph, update_all); + sync_materials(b_depsgraph, update_all); } CCL_NAMESPACE_END diff --git a/intern/cycles/blender/sync.cpp b/intern/cycles/blender/sync.cpp index 0b11af2dbf9..d4949a5ff30 100644 --- a/intern/cycles/blender/sync.cpp +++ b/intern/cycles/blender/sync.cpp @@ -246,7 +246,12 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render, int height, void **python_thread_state) { - if (!has_updates_) { + /* For auto refresh images. */ + ImageManager *image_manager = scene->image_manager; + const int frame = b_scene.frame_current(); + const bool auto_refresh_update = image_manager->set_animation_frame_update(frame); + + if (!has_updates_ && !auto_refresh_update) { return; } @@ -261,7 +266,7 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render, sync_view_layer(b_view_layer); sync_integrator(b_view_layer, background); sync_film(b_view_layer, b_v3d); - sync_shaders(b_depsgraph, b_v3d); + sync_shaders(b_depsgraph, b_v3d, auto_refresh_update); sync_images(); geometry_synced.clear(); /* use for objects and motion sync */ diff --git a/intern/cycles/blender/sync.h b/intern/cycles/blender/sync.h index d92efb80a5d..5cc18452ac1 100644 --- a/intern/cycles/blender/sync.h +++ b/intern/cycles/blender/sync.h @@ -114,7 +114,7 @@ class BlenderSync { /* Shader */ array<Node *> find_used_shaders(BL::Object &b_ob); void sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all); - void sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d); + void sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all); void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree); /* Object */ diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake index c964fbe0d72..6ad64d684c0 100644 --- a/intern/cycles/cmake/external_libs.cmake +++ b/intern/cycles/cmake/external_libs.cmake @@ -479,26 +479,22 @@ else() endif() ########################################################################### -# GLUT +# SDL ########################################################################### if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI) - if(MSVC AND EXISTS ${_cycles_lib_dir}) - add_definitions(-DFREEGLUT_STATIC -DFREEGLUT_LIB_PRAGMAS=0) - set(GLUT_LIBRARIES "${_cycles_lib_dir}/opengl/lib/freeglut_static.lib") - set(GLUT_INCLUDE_DIR "${_cycles_lib_dir}/opengl/include") - else() - find_package(GLUT) + # We can't use the version from the Blender precompiled libraries because + # it does not include the video subsystem. + find_package(SDL2) - if(NOT GLUT_FOUND) - set(WITH_CYCLES_STANDALONE_GUI OFF) - message(STATUS "GLUT not found, disabling Cycles standalone GUI") - endif() + if(NOT SDL2_FOUND) + set(WITH_CYCLES_STANDALONE_GUI OFF) + message(STATUS "SDL not found, disabling Cycles standalone GUI") endif() include_directories( SYSTEM - ${GLUT_INCLUDE_DIR} + ${SDL2_INCLUDE_DIRS} ) endif() diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp index 158a789db5a..612c391f7d5 100644 --- a/intern/cycles/device/cpu/device_impl.cpp +++ b/intern/cycles/device/cpu/device_impl.cpp @@ -191,7 +191,7 @@ device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_ void CPUDevice::const_copy_to(const char *name, void *host, size_t size) { -#if WITH_EMBREE +#ifdef WITH_EMBREE if (strcmp(name, "__data") == 0) { assert(size <= sizeof(KernelData)); diff --git a/intern/cycles/device/hip/device_impl.h b/intern/cycles/device/hip/device_impl.h index 00269ac287c..9afef3789af 100644 --- a/intern/cycles/device/hip/device_impl.h +++ b/intern/cycles/device/hip/device_impl.h @@ -12,8 +12,6 @@ # ifdef WITH_HIP_DYNLOAD # include "hipew.h" -# else -# include "util/opengl.h" # endif CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index 593c9c3cf06..c01f51fb506 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -77,11 +77,11 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile } case METAL_GPU_APPLE: { max_threads_per_threadgroup = 512; + use_metalrt = info.use_metalrt; break; } } - use_metalrt = info.use_metalrt; if (auto metalrt = getenv("CYCLES_METALRT")) { use_metalrt = (atoi(metalrt) != 0); } diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp index 220d4c9ffa2..eb12b0a6a11 100644 --- a/intern/cycles/integrator/path_trace.cpp +++ b/intern/cycles/integrator/path_trace.cpp @@ -54,14 +54,7 @@ PathTrace::PathTrace(Device *device, PathTrace::~PathTrace() { - /* Destroy any GPU resource which was used for graphics interop. - * Need to have access to the PathTraceDisplay as it is the only source of drawing context which - * is used for interop. */ - if (display_) { - for (auto &&path_trace_work : path_trace_works_) { - path_trace_work->destroy_gpu_resources(display_.get()); - } - } + destroy_gpu_resources(); } void PathTrace::load_kernels() @@ -559,6 +552,11 @@ void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver) void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver) { + /* The display driver is the source of the drawing context which might be used by + * path trace works. Make sure there is no graphics interop using resources from + * the old display, as it might no longer be available after this call. */ + destroy_gpu_resources(); + if (driver) { display_ = make_unique<PathTraceDisplay>(move(driver)); } @@ -1075,6 +1073,18 @@ bool PathTrace::has_denoised_result() const return render_state_.has_denoised_result; } +void PathTrace::destroy_gpu_resources() +{ + /* Destroy any GPU resource which was used for graphics interop. + * Need to have access to the PathTraceDisplay as it is the only source of drawing context which + * is used for interop. */ + if (display_) { + for (auto &&path_trace_work : path_trace_works_) { + path_trace_work->destroy_gpu_resources(display_.get()); + } + } +} + /* -------------------------------------------------------------------- * Report generation. */ diff --git a/intern/cycles/integrator/path_trace.h b/intern/cycles/integrator/path_trace.h index 1be5ce847bc..a470a6e1402 100644 --- a/intern/cycles/integrator/path_trace.h +++ b/intern/cycles/integrator/path_trace.h @@ -226,6 +226,9 @@ class PathTrace { void progress_set_status(const string &status, const string &substatus = ""); + /* Destroy GPU resources (such as graphics interop) used by work. */ + void destroy_gpu_resources(); + /* Pointer to a device which is configured to be used for path tracing. If multiple devices * are configured this is a `MultiDevice`. */ Device *device_ = nullptr; diff --git a/intern/cycles/integrator/render_scheduler.cpp b/intern/cycles/integrator/render_scheduler.cpp index fe4697e082b..90a5a01320b 100644 --- a/intern/cycles/integrator/render_scheduler.cpp +++ b/intern/cycles/integrator/render_scheduler.cpp @@ -244,7 +244,7 @@ void RenderScheduler::render_work_reschedule_on_cancel(RenderWork &render_work) render_work.tile.write = tile_write; render_work.full.write = full_write; - /* Do not write tile if it has zero samples it it, treat it similarly to all other tiles which + /* Do not write tile if it has zero samples in it, treat it similarly to all other tiles which * got canceled. */ if (!state_.tile_result_was_written && has_rendered_samples) { render_work.tile.write = true; diff --git a/intern/cycles/integrator/render_scheduler.h b/intern/cycles/integrator/render_scheduler.h index 404f65e98a1..dce876d44bd 100644 --- a/intern/cycles/integrator/render_scheduler.h +++ b/intern/cycles/integrator/render_scheduler.h @@ -124,7 +124,7 @@ class RenderScheduler { /* Get sample up to which rendering has been done. * This is an absolute 0-based value. * - * For example, if start sample is 10 and and 5 samples were rendered, then this call will + * For example, if start sample is 10 and 5 samples were rendered, then this call will * return 14. * * If there were no samples rendered, then the behavior is undefined. */ @@ -132,7 +132,7 @@ class RenderScheduler { /* Get number of samples rendered within the current scheduling session. * - * For example, if start sample is 10 and and 5 samples were rendered, then this call will + * For example, if start sample is 10 and 5 samples were rendered, then this call will * return 5. * * Note that this is based on the scheduling information. In practice this means that if someone diff --git a/intern/cycles/kernel/bvh/util.h b/intern/cycles/kernel/bvh/util.h index 1fd3a3f2850..71045157372 100644 --- a/intern/cycles/kernel/bvh/util.h +++ b/intern/cycles/kernel/bvh/util.h @@ -5,27 +5,6 @@ CCL_NAMESPACE_BEGIN -/* Ray offset to avoid self intersection. - * - * This function should be used to compute a modified ray start position for - * rays leaving from a surface. This is from "A Fast and Robust Method for Avoiding - * Self-Intersection" see https://research.nvidia.com/publication/2019-03_A-Fast-and - */ -ccl_device_inline float3 ray_offset(float3 P, float3 Ng) -{ - const float int_scale = 256.0f; - int3 of_i = make_int3((int)(int_scale * Ng.x), (int)(int_scale * Ng.y), (int)(int_scale * Ng.z)); - - float3 p_i = make_float3(__int_as_float(__float_as_int(P.x) + ((P.x < 0) ? -of_i.x : of_i.x)), - __int_as_float(__float_as_int(P.y) + ((P.y < 0) ? -of_i.y : of_i.y)), - __int_as_float(__float_as_int(P.z) + ((P.z < 0) ? -of_i.z : of_i.z))); - const float origin = 1.0f / 32.0f; - const float float_scale = 1.0f / 65536.0f; - return make_float3(fabsf(P.x) < origin ? P.x + float_scale * Ng.x : p_i.x, - fabsf(P.y) < origin ? P.y + float_scale * Ng.y : p_i.y, - fabsf(P.z) < origin ? P.z + float_scale * Ng.z : p_i.z); -} - #if defined(__KERNEL_CPU__) ccl_device int intersections_compare(const void *a, const void *b) { diff --git a/intern/cycles/kernel/geom/point.h b/intern/cycles/kernel/geom/point.h index f7c6cb86c5e..041ecb3c2cf 100644 --- a/intern/cycles/kernel/geom/point.h +++ b/intern/cycles/kernel/geom/point.h @@ -128,9 +128,10 @@ ccl_device float point_radius(KernelGlobals kg, ccl_private const ShaderData *sd return r; } else { - float3 dir = make_float3(r, r, r); + const float normalized_r = r * (1.0f / M_SQRT3_F); + float3 dir = make_float3(normalized_r, normalized_r, normalized_r); object_dir_transform(kg, sd, &dir); - return average(dir); + return len(dir); } } diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h index e616123e9e7..b84059d6676 100644 --- a/intern/cycles/kernel/integrator/init_from_bake.h +++ b/intern/cycles/kernel/integrator/init_from_bake.h @@ -30,6 +30,50 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u, float max) return ((((int)fu) & 1) ? 1.0f - u : u) * max; } +/* Offset towards center of triangle to avoid ray-tracing precision issues. */ +ccl_device const float2 bake_offset_towards_center(KernelGlobals kg, + const int prim, + const float u, + const float v) +{ + float3 tri_verts[3]; + triangle_vertices(kg, prim, tri_verts); + + /* Empirically determined values, by no means perfect. */ + const float position_offset = 1e-4f; + const float uv_offset = 1e-5f; + + /* Offset position towards center, amount relative to absolute size of position coordinates. */ + const float3 P = u * tri_verts[0] + v * tri_verts[1] + (1.0f - u - v) * tri_verts[2]; + const float3 center = (tri_verts[0] + tri_verts[1] + tri_verts[2]) / 3.0f; + const float3 to_center = center - P; + + const float3 offset_P = P + normalize(to_center) * + min(len(to_center), max(max3(fabs(P)), 1.0f) * position_offset); + + /* Compute barycentric coordinates at new position. */ + const float3 v1 = tri_verts[1] - tri_verts[0]; + const float3 v2 = tri_verts[2] - tri_verts[0]; + const float3 vP = offset_P - tri_verts[0]; + + const float d11 = dot(v1, v1); + const float d12 = dot(v1, v2); + const float d22 = dot(v2, v2); + const float dP1 = dot(vP, v1); + const float dP2 = dot(vP, v2); + + const float denom = d11 * d22 - d12 * d12; + if (denom == 0.0f) { + return make_float2(0.0f, 0.0f); + } + + const float offset_v = clamp((d22 * dP1 - d12 * dP2) / denom, uv_offset, 1.0f - uv_offset); + const float offset_w = clamp((d11 * dP2 - d12 * dP1) / denom, uv_offset, 1.0f - uv_offset); + const float offset_u = clamp(1.0f - offset_v - offset_w, uv_offset, 1.0f - uv_offset); + + return make_float2(offset_u, offset_v); +} + /* Return false to indicate that this pixel is finished. * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known * that the pixel did converge. */ @@ -87,7 +131,7 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Initialize path state for path integration. */ path_state_init_integrator(kg, state, sample, rng_hash); - /* Barycentric UV with sub-pixel offset. */ + /* Barycentric UV. */ float u = primitive[2]; float v = primitive[3]; @@ -96,6 +140,14 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, float dvdx = differential[2]; float dvdy = differential[3]; + /* Exactly at vertex? Nudge inwards to avoid self-intersection. */ + if ((u == 0.0f || u == 1.0f) && (v == 0.0f || v == 1.0f)) { + const float2 uv = bake_offset_towards_center(kg, prim, u, v); + u = uv.x; + v = uv.y; + } + + /* Sub-pixel offset. */ if (sample > 0) { u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f); v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f), diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index f548d91031d..d2442755646 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -352,12 +352,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, float ao_pdf; sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); - if (!(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f)) { - return; - } - Ray ray ccl_optional_struct_init; - ray.P = sd->P; + ray.P = shadow_ray_offset(kg, sd, ao_D); ray.D = ao_D; ray.t = kernel_data.integrator.ao_bounces_distance; ray.time = sd->time; diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp index 16e76b37b0b..85bdb47600e 100644 --- a/intern/cycles/kernel/osl/services.cpp +++ b/intern/cycles/kernel/osl/services.cpp @@ -1638,12 +1638,16 @@ bool OSLRenderServices::trace(TraceOpt &options, ray.D = TO_FLOAT3(R); ray.t = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist; ray.time = sd->time; + ray.self.object = OBJECT_NONE; + ray.self.prim = PRIM_NONE; + ray.self.light_object = OBJECT_NONE; + ray.self.light_prim = PRIM_NONE; if (options.mindist == 0.0f) { /* avoid self-intersections */ if (ray.P == sd->P) { - bool transmit = (dot(sd->Ng, ray.D) < 0.0f); - ray.P = ray_offset(sd->P, (transmit) ? -sd->Ng : sd->Ng); + ray.self.object = sd->object; + ray.self.prim = sd->prim; } } else { diff --git a/intern/cycles/kernel/svm/light_path.h b/intern/cycles/kernel/svm/light_path.h index dce0f83da68..7c2189d3608 100644 --- a/intern/cycles/kernel/svm/light_path.h +++ b/intern/cycles/kernel/svm/light_path.h @@ -58,8 +58,8 @@ ccl_device_noinline void svm_node_light_path(KernelGlobals kg, info = (float)integrator_state_bounce(state, path_flag); } - /* For background, light emission and shadow evaluation we from a - * surface or volume we are effective one bounce further. */ + /* For background, light emission and shadow evaluation from a + * surface or volume we are effectively one bounce further. */ if (path_flag & (PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { info += 1.0f; } diff --git a/intern/cycles/scene/shader.cpp b/intern/cycles/scene/shader.cpp index dde250d5d78..8a08f2a5be9 100644 --- a/intern/cycles/scene/shader.cpp +++ b/intern/cycles/scene/shader.cpp @@ -817,28 +817,28 @@ void ShaderManager::init_xyz_transforms() Transform xyz_to_rgb; if (config->hasRole("aces_interchange")) { - /* Standard OpenColorIO role, defined as ACES2065-1. */ - const Transform xyz_E_to_aces = make_transform(1.0498110175f, - 0.0f, - -0.0000974845f, - 0.0f, - -0.4959030231f, - 1.3733130458f, - 0.0982400361f, - 0.0f, - 0.0f, - 0.0f, - 0.9912520182f, - 0.0f); - const Transform xyz_D65_to_E = make_transform( - 1.0521111f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.9184170f, 0.0f); - + /* Standard OpenColorIO role, defined as ACES AP0 (ACES2065-1). */ Transform aces_to_rgb; if (!to_scene_linear_transform(config, "aces_interchange", aces_to_rgb)) { return; } - xyz_to_rgb = aces_to_rgb * xyz_E_to_aces * xyz_D65_to_E; + /* This is the OpenColorIO builtin transform: + * UTILITY - ACES-AP0_to_CIE-XYZ-D65_BFD. */ + const Transform ACES_AP0_to_xyz_D65 = make_transform(0.938280f, + -0.004451f, + 0.016628f, + 0.000000f, + 0.337369f, + 0.729522f, + -0.066890f, + 0.000000f, + 0.001174f, + -0.003711f, + 1.091595f, + 0.000000f); + const Transform xyz_to_aces = transform_inverse(ACES_AP0_to_xyz_D65); + xyz_to_rgb = aces_to_rgb * xyz_to_aces; } else if (config->hasRole("XYZ")) { /* Custom role used before the standard existed. */ diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 0e348b1ac0f..fddac1dbbcf 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -7,7 +7,6 @@ set(INC ) set(INC_SYS - ${GLEW_INCLUDE_DIR} ) set(SRC @@ -34,14 +33,6 @@ set(LIB ${TBB_LIBRARIES} ) -if(WITH_CYCLES_STANDALONE) - if(WITH_CYCLES_STANDALONE_GUI) - list(APPEND SRC - view.cpp - ) - endif() -endif() - set(SRC_HEADERS algorithm.h aligned_malloc.h @@ -142,7 +133,6 @@ set(SRC_HEADERS unique_ptr.h vector.h version.h - view.h windows.h xml.h ) diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h index ed9f230398d..555a5304764 100644 --- a/intern/cycles/util/math.h +++ b/intern/cycles/util/math.h @@ -67,6 +67,9 @@ CCL_NAMESPACE_BEGIN #ifndef M_SQRT2_F # define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */ #endif +#ifndef M_SQRT3_F +# define M_SQRT3_F (1.7320508075688772f) /* sqrt(3) */ +#endif #ifndef M_LN2_F # define M_LN2_F (0.6931471805599453f) /* ln(2) */ #endif diff --git a/intern/cycles/util/view.cpp b/intern/cycles/util/view.cpp deleted file mode 100644 index 475f8dbcee8..00000000000 --- a/intern/cycles/util/view.cpp +++ /dev/null @@ -1,269 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2011-2022 Blender Foundation */ - -#include <stdio.h> -#include <stdlib.h> - -#include "util/opengl.h" -#include "util/string.h" -#include "util/time.h" -#include "util/version.h" -#include "util/view.h" - -#ifdef __APPLE__ -# include <GLUT/glut.h> -#else -# include <GL/glut.h> -#endif - -CCL_NAMESPACE_BEGIN - -/* structs */ - -struct View { - ViewInitFunc initf; - ViewExitFunc exitf; - ViewResizeFunc resize; - ViewDisplayFunc display; - ViewKeyboardFunc keyboard; - ViewMotionFunc motion; - - bool first_display; - bool redraw; - - int mouseX, mouseY; - int mouseBut0, mouseBut2; - - int width, height; -} V; - -/* public */ - -static void view_display_text(int x, int y, const char *text) -{ - const char *c; - - glRasterPos3f(x, y, 0); - - for (c = text; *c != '\0'; c++) - glutBitmapCharacter(GLUT_BITMAP_HELVETICA_10, *c); -} - -void view_display_info(const char *info) -{ - const int height = 20; - - glEnable(GL_BLEND); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glColor4f(0.1f, 0.1f, 0.1f, 0.8f); - glRectf(0.0f, V.height - height, V.width, V.height); - glDisable(GL_BLEND); - - glColor3f(0.5f, 0.5f, 0.5f); - - view_display_text(10, 7 + V.height - height, info); - - glColor3f(1.0f, 1.0f, 1.0f); -} - -void view_display_help() -{ - const int w = (int)((float)V.width / 1.15f); - const int h = (int)((float)V.height / 1.15f); - - const int x1 = (V.width - w) / 2; - const int x2 = x1 + w; - - const int y1 = (V.height - h) / 2; - const int y2 = y1 + h; - - glEnable(GL_BLEND); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glColor4f(0.5f, 0.5f, 0.5f, 0.8f); - glRectf(x1, y1, x2, y2); - glDisable(GL_BLEND); - - glColor3f(0.8f, 0.8f, 0.8f); - - string info = string("Cycles Renderer ") + CYCLES_VERSION_STRING; - - view_display_text(x1 + 20, y2 - 20, info.c_str()); - view_display_text(x1 + 20, y2 - 40, "(C) 2011-2022 Blender Foundation"); - view_display_text(x1 + 20, y2 - 80, "Controls:"); - view_display_text(x1 + 20, y2 - 100, "h: Info/Help"); - view_display_text(x1 + 20, y2 - 120, "r: Reset"); - view_display_text(x1 + 20, y2 - 140, "p: Pause"); - view_display_text(x1 + 20, y2 - 160, "esc: Cancel"); - view_display_text(x1 + 20, y2 - 180, "q: Quit program"); - - view_display_text(x1 + 20, y2 - 210, "i: Interactive mode"); - view_display_text(x1 + 20, y2 - 230, "Left mouse: Move camera"); - view_display_text(x1 + 20, y2 - 250, "Right mouse: Rotate camera"); - view_display_text(x1 + 20, y2 - 270, "W/A/S/D: Move camera"); - view_display_text(x1 + 20, y2 - 290, "0/1/2/3: Set max bounces"); - - glColor3f(1.0f, 1.0f, 1.0f); -} - -static void view_display() -{ - if (V.first_display) { - if (V.initf) - V.initf(); - if (V.exitf) - atexit(V.exitf); - - V.first_display = false; - } - - glClearColor(0.05f, 0.05f, 0.05f, 0.0f); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - glOrtho(0, V.width, 0, V.height, -1, 1); - - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); - - glRasterPos3f(0, 0, 0); - - if (V.display) - V.display(); - - glutSwapBuffers(); -} - -static void view_reshape(int width, int height) -{ - if (width <= 0 || height <= 0) - return; - - V.width = width; - V.height = height; - - glViewport(0, 0, width, height); - - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); - - if (V.resize) - V.resize(width, height); -} - -static void view_keyboard(unsigned char key, int x, int y) -{ - if (V.keyboard) - V.keyboard(key); - - if (key == 'm') - printf("mouse %d %d\n", x, y); - if (key == 'q') { - if (V.exitf) - V.exitf(); - exit(0); - } -} - -static void view_mouse(int button, int state, int x, int y) -{ - if (button == 0) { - if (state == GLUT_DOWN) { - V.mouseX = x; - V.mouseY = y; - V.mouseBut0 = 1; - } - else if (state == GLUT_UP) { - V.mouseBut0 = 0; - } - } - else if (button == 2) { - if (state == GLUT_DOWN) { - V.mouseX = x; - V.mouseY = y; - V.mouseBut2 = 1; - } - else if (state == GLUT_UP) { - V.mouseBut2 = 0; - } - } -} - -static void view_motion(int x, int y) -{ - const int but = V.mouseBut0 ? 0 : 2; - const int distX = x - V.mouseX; - const int distY = y - V.mouseY; - - if (V.motion) - V.motion(distX, distY, but); - - V.mouseX = x; - V.mouseY = y; -} - -static void view_idle() -{ - if (V.redraw) { - V.redraw = false; - glutPostRedisplay(); - } - - time_sleep(0.1); -} - -void view_main_loop(const char *title, - int width, - int height, - ViewInitFunc initf, - ViewExitFunc exitf, - ViewResizeFunc resize, - ViewDisplayFunc display, - ViewKeyboardFunc keyboard, - ViewMotionFunc motion) -{ - const char *name = "app"; - char *argv = (char *)name; - int argc = 1; - - memset(&V, 0, sizeof(V)); - V.width = width; - V.height = height; - V.first_display = true; - V.redraw = false; - V.initf = initf; - V.exitf = exitf; - V.resize = resize; - V.display = display; - V.keyboard = keyboard; - V.motion = motion; - - glutInit(&argc, &argv); - glutInitWindowSize(width, height); - glutInitWindowPosition(0, 0); - glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH); - glutCreateWindow(title); - - glewInit(); - - view_reshape(width, height); - - glutDisplayFunc(view_display); - glutIdleFunc(view_idle); - glutReshapeFunc(view_reshape); - glutKeyboardFunc(view_keyboard); - glutMouseFunc(view_mouse); - glutMotionFunc(view_motion); - - glutMainLoop(); -} - -void view_redraw() -{ - V.redraw = true; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/view.h b/intern/cycles/util/view.h deleted file mode 100644 index 51c242c21f7..00000000000 --- a/intern/cycles/util/view.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2011-2022 Blender Foundation */ - -#ifndef __UTIL_VIEW_H__ -#define __UTIL_VIEW_H__ - -/* Functions to display a simple OpenGL window using GLUT, simplified to the - * bare minimum we need to reduce boilerplate code in tests apps. */ - -CCL_NAMESPACE_BEGIN - -typedef void (*ViewInitFunc)(); -typedef void (*ViewExitFunc)(); -typedef void (*ViewResizeFunc)(int width, int height); -typedef void (*ViewDisplayFunc)(); -typedef void (*ViewKeyboardFunc)(unsigned char key); -typedef void (*ViewMotionFunc)(int x, int y, int button); - -void view_main_loop(const char *title, - int width, - int height, - ViewInitFunc initf, - ViewExitFunc exitf, - ViewResizeFunc resize, - ViewDisplayFunc display, - ViewKeyboardFunc keyboard, - ViewMotionFunc motion); - -void view_display_info(const char *info); -void view_display_help(); -void view_redraw(); - -CCL_NAMESPACE_END - -#endif /*__UTIL_VIEW_H__*/ diff --git a/intern/ffmpeg/tests/ffmpeg_codecs.cc b/intern/ffmpeg/tests/ffmpeg_codecs.cc index bb6e13579e5..d0c40736884 100644 --- a/intern/ffmpeg/tests/ffmpeg_codecs.cc +++ b/intern/ffmpeg/tests/ffmpeg_codecs.cc @@ -4,12 +4,13 @@ extern "C" { #include <libavcodec/avcodec.h> +#include <libavutil/channel_layout.h> #include <libavutil/log.h> } namespace { -bool test_vcodec(AVCodec *codec, AVPixelFormat pixelformat) +bool test_vcodec(const AVCodec *codec, AVPixelFormat pixelformat) { av_log_set_level(AV_LOG_QUIET); bool result = false; @@ -30,7 +31,7 @@ bool test_vcodec(AVCodec *codec, AVPixelFormat pixelformat) } return result; } -bool test_acodec(AVCodec *codec, AVSampleFormat fmt) +bool test_acodec(const AVCodec *codec, AVSampleFormat fmt) { av_log_set_level(AV_LOG_QUIET); bool result = false; @@ -54,7 +55,7 @@ bool test_acodec(AVCodec *codec, AVSampleFormat fmt) bool test_codec_video_by_codecid(AVCodecID codec_id, AVPixelFormat pixelformat) { bool result = false; - AVCodec *codec = avcodec_find_encoder(codec_id); + const AVCodec *codec = avcodec_find_encoder(codec_id); if (codec) result = test_vcodec(codec, pixelformat); return result; @@ -63,7 +64,7 @@ bool test_codec_video_by_codecid(AVCodecID codec_id, AVPixelFormat pixelformat) bool test_codec_video_by_name(const char *codecname, AVPixelFormat pixelformat) { bool result = false; - AVCodec *codec = avcodec_find_encoder_by_name(codecname); + const AVCodec *codec = avcodec_find_encoder_by_name(codecname); if (codec) result = test_vcodec(codec, pixelformat); return result; @@ -72,7 +73,7 @@ bool test_codec_video_by_name(const char *codecname, AVPixelFormat pixelformat) bool test_codec_audio_by_codecid(AVCodecID codec_id, AVSampleFormat fmt) { bool result = false; - AVCodec *codec = avcodec_find_encoder(codec_id); + const AVCodec *codec = avcodec_find_encoder(codec_id); if (codec) result = test_acodec(codec, fmt); return result; @@ -81,7 +82,7 @@ bool test_codec_audio_by_codecid(AVCodecID codec_id, AVSampleFormat fmt) bool test_codec_audio_by_name(const char *codecname, AVSampleFormat fmt) { bool result = false; - AVCodec *codec = avcodec_find_encoder_by_name(codecname); + const AVCodec *codec = avcodec_find_encoder_by_name(codecname); if (codec) result = test_acodec(codec, fmt); return result; diff --git a/intern/ghost/GHOST_C-api.h b/intern/ghost/GHOST_C-api.h index 441c7315f1a..4e48a908c00 100644 --- a/intern/ghost/GHOST_C-api.h +++ b/intern/ghost/GHOST_C-api.h @@ -249,6 +249,16 @@ extern GHOST_TSuccess GHOST_EndFullScreen(GHOST_SystemHandle systemhandle); */ extern int GHOST_GetFullScreen(GHOST_SystemHandle systemhandle); +/** + * Get the Window under the cursor. + * \param x: The x-coordinate of the cursor. + * \param y: The y-coordinate of the cursor. + * \return The window under the cursor or nullptr in none. + */ +extern GHOST_WindowHandle GHOST_GetWindowUnderCursor(GHOST_SystemHandle systemhandle, + int32_t x, + int32_t y); + /*************************************************************************************** * Event management functionality ***************************************************************************************/ diff --git a/intern/ghost/GHOST_ISystem.h b/intern/ghost/GHOST_ISystem.h index 837ec25d0f8..ed193ee7e5d 100644 --- a/intern/ghost/GHOST_ISystem.h +++ b/intern/ghost/GHOST_ISystem.h @@ -309,6 +309,14 @@ class GHOST_ISystem { */ virtual void useWindowFocus(const bool use_focus) = 0; + /** + * Get the Window under the cursor. + * \param x: The x-coordinate of the cursor. + * \param y: The y-coordinate of the cursor. + * \return The window under the cursor or nullptr if none. + */ + virtual GHOST_IWindow *getWindowUnderCursor(int32_t x, int32_t y) = 0; + /*************************************************************************************** * Event management functionality ***************************************************************************************/ diff --git a/intern/ghost/intern/GHOST_C-api.cpp b/intern/ghost/intern/GHOST_C-api.cpp index a47d2468937..e3d01c24283 100644 --- a/intern/ghost/intern/GHOST_C-api.cpp +++ b/intern/ghost/intern/GHOST_C-api.cpp @@ -233,6 +233,16 @@ int GHOST_GetFullScreen(GHOST_SystemHandle systemhandle) return (int)system->getFullScreen(); } +GHOST_WindowHandle GHOST_GetWindowUnderCursor(GHOST_SystemHandle systemhandle, + int32_t x, + int32_t y) +{ + GHOST_ISystem *system = (GHOST_ISystem *)systemhandle; + GHOST_IWindow *window = system->getWindowUnderCursor(x, y); + + return (GHOST_WindowHandle)window; +} + bool GHOST_ProcessEvents(GHOST_SystemHandle systemhandle, bool waitForEvent) { GHOST_ISystem *system = (GHOST_ISystem *)systemhandle; diff --git a/intern/ghost/intern/GHOST_ContextD3D.cpp b/intern/ghost/intern/GHOST_ContextD3D.cpp index 11f15fd1ee3..ded76daa145 100644 --- a/intern/ghost/intern/GHOST_ContextD3D.cpp +++ b/intern/ghost/intern/GHOST_ContextD3D.cpp @@ -110,9 +110,11 @@ class GHOST_SharedOpenGLResource { struct SharedData { HANDLE device; GLuint fbo; - HANDLE render_buf{nullptr}; + HANDLE render_target{nullptr}; } m_shared; + enum RenderTarget { TARGET_RENDERBUF, TARGET_TEX2D }; + public: GHOST_SharedOpenGLResource(ID3D11Device *device, ID3D11DeviceContext *device_ctx, @@ -179,37 +181,64 @@ class GHOST_SharedOpenGLResource { } if (m_is_initialized) { - if (m_shared.render_buf) { - wglDXUnregisterObjectNV(m_shared.device, m_shared.render_buf); + if (m_shared.render_target +#if 1 + /* TODO: #wglDXUnregisterObjectNV() causes an access violation on AMD when the shared + * resource is a GL texture. Since there is currently no good alternative, just skip + * unregistering the shared resource. */ + && !m_use_gl_texture2d +#endif + ) { + wglDXUnregisterObjectNV(m_shared.device, m_shared.render_target); } if (m_shared.device) { wglDXCloseDeviceNV(m_shared.device); } glDeleteFramebuffers(1, &m_shared.fbo); - glDeleteRenderbuffers(1, &m_gl_render_buf); + if (m_use_gl_texture2d) { + glDeleteTextures(1, &m_gl_render_target); + } + else { + glDeleteRenderbuffers(1, &m_gl_render_target); + } } } - void reregisterSharedObject() + /* Returns true if the shared object was successfully registered, false otherwise. */ + bool reregisterSharedObject(RenderTarget target) { - if (m_shared.render_buf) { - wglDXUnregisterObjectNV(m_shared.device, m_shared.render_buf); + if (m_shared.render_target) { + wglDXUnregisterObjectNV(m_shared.device, m_shared.render_target); } if (!m_render_target_tex) { - return; + return false; } - m_shared.render_buf = wglDXRegisterObjectNV(m_shared.device, - m_render_target_tex, - m_gl_render_buf, - GL_RENDERBUFFER, - WGL_ACCESS_READ_WRITE_NV); + if (target == TARGET_TEX2D) { + glTexImage2D(GL_TEXTURE_2D, + 0, + GL_RGBA8, + m_cur_width, + m_cur_height, + 0, + GL_RGBA, + GL_UNSIGNED_BYTE, + nullptr); + } - if (!m_shared.render_buf) { + m_shared.render_target = wglDXRegisterObjectNV(m_shared.device, + m_render_target_tex, + m_gl_render_target, + (target == TARGET_TEX2D) ? GL_TEXTURE_2D : + GL_RENDERBUFFER, + WGL_ACCESS_READ_WRITE_NV); + if (!m_shared.render_target) { fprintf(stderr, "Error registering shared object using wglDXRegisterObjectNV()\n"); - return; + return false; } + + return true; } GHOST_TSuccess initialize() @@ -221,16 +250,33 @@ class GHOST_SharedOpenGLResource { } /* Build the renderbuffer. */ - glGenRenderbuffers(1, &m_gl_render_buf); - glBindRenderbuffer(GL_RENDERBUFFER, m_gl_render_buf); + glGenRenderbuffers(1, &m_gl_render_target); + glBindRenderbuffer(GL_RENDERBUFFER, m_gl_render_target); + + if (!reregisterSharedObject(TARGET_RENDERBUF)) { + glBindRenderbuffer(GL_RENDERBUFFER, 0); + if (m_gl_render_target) { + glDeleteRenderbuffers(1, &m_gl_render_target); + } + /* Fall back to texture 2d. */ + m_use_gl_texture2d = true; + glGenTextures(1, &m_gl_render_target); + glBindTexture(GL_TEXTURE_2D, m_gl_render_target); - reregisterSharedObject(); + reregisterSharedObject(TARGET_TEX2D); + } /* Build the framebuffer */ glGenFramebuffers(1, &m_shared.fbo); glBindFramebuffer(GL_FRAMEBUFFER, m_shared.fbo); - glFramebufferRenderbuffer( - GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, m_gl_render_buf); + if (m_use_gl_texture2d) { + glFramebufferTexture2D( + GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_gl_render_target, 0); + } + else { + glFramebufferRenderbuffer( + GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, m_gl_render_target); + } m_is_initialized = true; return GHOST_kSuccess; @@ -245,7 +291,7 @@ class GHOST_SharedOpenGLResource { if ((m_cur_width != width) || (m_cur_height != height)) { m_cur_width = width; m_cur_height = height; - reregisterSharedObject(); + reregisterSharedObject(m_use_gl_texture2d ? TARGET_TEX2D : TARGET_RENDERBUF); } } @@ -293,18 +339,19 @@ class GHOST_SharedOpenGLResource { private: void beginGLOnly() { - wglDXLockObjectsNV(m_shared.device, 1, &m_shared.render_buf); + wglDXLockObjectsNV(m_shared.device, 1, &m_shared.render_target); } void endGLOnly() { - wglDXUnlockObjectsNV(m_shared.device, 1, &m_shared.render_buf); + wglDXUnlockObjectsNV(m_shared.device, 1, &m_shared.render_target); } ID3D11Device *m_device; ID3D11DeviceContext *m_device_ctx; - GLuint m_gl_render_buf; + GLuint m_gl_render_target; unsigned int m_cur_width, m_cur_height; bool m_is_initialized{false}; + bool m_use_gl_texture2d{false}; }; GHOST_SharedOpenGLResource *GHOST_ContextD3D::createSharedOpenGLResource( diff --git a/intern/ghost/intern/GHOST_System.cpp b/intern/ghost/intern/GHOST_System.cpp index e9c63502f66..3df85e18bc7 100644 --- a/intern/ghost/intern/GHOST_System.cpp +++ b/intern/ghost/intern/GHOST_System.cpp @@ -189,6 +189,25 @@ bool GHOST_System::getFullScreen(void) return fullScreen; } +GHOST_IWindow *GHOST_System::getWindowUnderCursor(int32_t x, int32_t y) +{ + /* TODO: This solution should follow the order of the activated windows (Z-order). + * It is imperfect but usable in most cases. */ + for (GHOST_IWindow *iwindow : m_windowManager->getWindows()) { + if (iwindow->getState() == GHOST_kWindowStateMinimized) { + continue; + } + + GHOST_Rect bounds; + iwindow->getClientBounds(bounds); + if (bounds.isInside(x, y)) { + return iwindow; + } + } + + return NULL; +} + void GHOST_System::dispatchEvents() { #ifdef WITH_INPUT_NDOF diff --git a/intern/ghost/intern/GHOST_System.h b/intern/ghost/intern/GHOST_System.h index 8602dd94e8c..0e1e3f734ae 100644 --- a/intern/ghost/intern/GHOST_System.h +++ b/intern/ghost/intern/GHOST_System.h @@ -157,6 +157,14 @@ class GHOST_System : public GHOST_ISystem { void useWindowFocus(const bool use_focus); bool m_windowFocus; + /** + * Get the Window under the cursor. + * \param x: The x-coordinate of the cursor. + * \param y: The y-coordinate of the cursor. + * \return The window under the cursor or nullptr if none. + */ + GHOST_IWindow *getWindowUnderCursor(int32_t x, int32_t y); + /*************************************************************************************** * Event management functionality ***************************************************************************************/ diff --git a/intern/ghost/intern/GHOST_SystemCocoa.h b/intern/ghost/intern/GHOST_SystemCocoa.h index a601d00561a..8b6dfb4efed 100644 --- a/intern/ghost/intern/GHOST_SystemCocoa.h +++ b/intern/ghost/intern/GHOST_SystemCocoa.h @@ -109,6 +109,14 @@ class GHOST_SystemCocoa : public GHOST_System { */ GHOST_TSuccess disposeContext(GHOST_IContext *context); + /** + * Get the Window under the cursor. + * \param x: The x-coordinate of the cursor. + * \param y: The y-coordinate of the cursor. + * \return The window under the cursor or nullptr if none. + */ + GHOST_IWindow *getWindowUnderCursor(int32_t x, int32_t y); + /*************************************************************************************** * Event management functionality ***************************************************************************************/ diff --git a/intern/ghost/intern/GHOST_SystemCocoa.mm b/intern/ghost/intern/GHOST_SystemCocoa.mm index 51d4083b436..f0db6b6fdfc 100644 --- a/intern/ghost/intern/GHOST_SystemCocoa.mm +++ b/intern/ghost/intern/GHOST_SystemCocoa.mm @@ -307,6 +307,7 @@ static GHOST_TKey convertKey(int rawCode, unichar recvChar, UInt16 keyAction) case ']': return GHOST_kKeyRightBracket; case '`': + case '<': /* The position of '`' is equivalent to this symbol in the French layout. */ return GHOST_kKeyAccentGrave; default: return GHOST_kKeyUnknown; @@ -771,6 +772,20 @@ GHOST_TSuccess GHOST_SystemCocoa::disposeContext(GHOST_IContext *context) return GHOST_kSuccess; } +GHOST_IWindow *GHOST_SystemCocoa::getWindowUnderCursor(int32_t x, int32_t y) +{ + NSPoint scr_co = NSMakePoint(x, y); + + int windowNumberAtPoint = [NSWindow windowNumberAtPoint:scr_co belowWindowWithWindowNumber:0]; + NSWindow *nswindow = [NSApp windowWithWindowNumber:windowNumberAtPoint]; + + if (nswindow == nil) { + return nil; + } + + return m_windowManager->getWindowAssociatedWithOSWindow((void *)nswindow); +} + /** * \note : returns coordinates in Cocoa screen coordinates */ diff --git a/intern/ghost/intern/GHOST_SystemNULL.h b/intern/ghost/intern/GHOST_SystemNULL.h index 7b92d1d13a1..48973a00573 100644 --- a/intern/ghost/intern/GHOST_SystemNULL.h +++ b/intern/ghost/intern/GHOST_SystemNULL.h @@ -114,4 +114,9 @@ class GHOST_SystemNULL : public GHOST_System { type, ((glSettings.flags & GHOST_glStereoVisual) != 0)); } + + GHOST_IWindow *getWindowUnderCursor(int32_t x, int32_t y) + { + return NULL; + } }; diff --git a/intern/ghost/intern/GHOST_SystemWin32.cpp b/intern/ghost/intern/GHOST_SystemWin32.cpp index 1c26935ed64..e588c7485b4 100644 --- a/intern/ghost/intern/GHOST_SystemWin32.cpp +++ b/intern/ghost/intern/GHOST_SystemWin32.cpp @@ -53,9 +53,6 @@ #ifndef VK_COMMA # define VK_COMMA 0xBC #endif // VK_COMMA -#ifndef VK_QUOTE -# define VK_QUOTE 0xDE -#endif // VK_QUOTE #ifndef VK_BACK_QUOTE # define VK_BACK_QUOTE 0xC0 #endif // VK_BACK_QUOTE @@ -635,14 +632,32 @@ GHOST_TKey GHOST_SystemWin32::hardKey(RAWINPUT const &raw, GHOST_TKey GHOST_SystemWin32::processSpecialKey(short vKey, short scanCode) const { GHOST_TKey key = GHOST_kKeyUnknown; - switch (PRIMARYLANGID(m_langId)) { - case LANG_FRENCH: - if (vKey == VK_OEM_8) - key = GHOST_kKeyF13; // oem key; used purely for shortcuts . + char ch = (char)MapVirtualKeyA(vKey, MAPVK_VK_TO_CHAR); + switch (ch) { + case u'\"': + case u'\'': + key = GHOST_kKeyQuote; break; - case LANG_ENGLISH: - if (SUBLANGID(m_langId) == SUBLANG_ENGLISH_UK && vKey == VK_OEM_8) // "`¬" - key = GHOST_kKeyAccentGrave; + case u'.': + key = GHOST_kKeyNumpadPeriod; + break; + case u'/': + key = GHOST_kKeySlash; + break; + case u'`': + case u'²': + key = GHOST_kKeyAccentGrave; + break; + default: + if (vKey == VK_OEM_7) { + key = GHOST_kKeyQuote; + } + else if (vKey == VK_OEM_8) { + if (PRIMARYLANGID(m_langId) == LANG_FRENCH) { + /* OEM key; used purely for shortcuts. */ + key = GHOST_kKeyF13; + } + } break; } @@ -777,9 +792,6 @@ GHOST_TKey GHOST_SystemWin32::convertKey(short vKey, short scanCode, short exten case VK_CLOSE_BRACKET: key = GHOST_kKeyRightBracket; break; - case VK_QUOTE: - key = GHOST_kKeyQuote; - break; case VK_GR_LESS: key = GHOST_kKeyGrLess; break; @@ -821,9 +833,6 @@ GHOST_TKey GHOST_SystemWin32::convertKey(short vKey, short scanCode, short exten case VK_CAPITAL: key = GHOST_kKeyCapsLock; break; - case VK_OEM_8: - key = ((GHOST_SystemWin32 *)getSystem())->processSpecialKey(vKey, scanCode); - break; case VK_MEDIA_PLAY_PAUSE: key = GHOST_kKeyMediaPlay; break; @@ -836,8 +845,10 @@ GHOST_TKey GHOST_SystemWin32::convertKey(short vKey, short scanCode, short exten case VK_MEDIA_NEXT_TRACK: key = GHOST_kKeyMediaLast; break; + case VK_OEM_7: + case VK_OEM_8: default: - key = GHOST_kKeyUnknown; + key = ((GHOST_SystemWin32 *)getSystem())->processSpecialKey(vKey, scanCode); break; } } diff --git a/intern/ghost/intern/GHOST_XrContext.cpp b/intern/ghost/intern/GHOST_XrContext.cpp index 5d8feb8e48a..2ac3d9ec2a5 100644 --- a/intern/ghost/intern/GHOST_XrContext.cpp +++ b/intern/ghost/intern/GHOST_XrContext.cpp @@ -412,6 +412,9 @@ void GHOST_XrContext::getExtensionsToEnable( /* Interaction profile extensions. */ try_ext.push_back(XR_EXT_HP_MIXED_REALITY_CONTROLLER_EXTENSION_NAME); try_ext.push_back(XR_HTC_VIVE_COSMOS_CONTROLLER_INTERACTION_EXTENSION_NAME); +#ifdef XR_HTC_VIVE_FOCUS3_CONTROLLER_INTERACTION_EXTENSION_NAME + try_ext.push_back(XR_HTC_VIVE_FOCUS3_CONTROLLER_INTERACTION_EXTENSION_NAME); +#endif try_ext.push_back(XR_HUAWEI_CONTROLLER_INTERACTION_EXTENSION_NAME); /* Controller model extension. */ diff --git a/intern/opencolorio/CMakeLists.txt b/intern/opencolorio/CMakeLists.txt index dfccb9301ac..be6ccc5c2c5 100644 --- a/intern/opencolorio/CMakeLists.txt +++ b/intern/opencolorio/CMakeLists.txt @@ -7,6 +7,7 @@ set(INC ../guardedalloc ../../source/blender/blenlib ../../source/blender/gpu + ../../source/blender/gpu/intern ../../source/blender/makesdna ) @@ -20,6 +21,7 @@ set(SRC ocio_capi.h ocio_impl.h + ocio_shader_shared.hh ) set(LIB @@ -56,8 +58,38 @@ if(WITH_OPENCOLORIO) ) endif() - data_to_c_simple(gpu_shader_display_transform.glsl SRC) - data_to_c_simple(gpu_shader_display_transform_vertex.glsl SRC) + set(GLSL_SRC + gpu_shader_display_transform_vert.glsl + gpu_shader_display_transform_frag.glsl + + ocio_shader_shared.hh + ) + + set(GLSL_C) + foreach(GLSL_FILE ${GLSL_SRC}) + data_to_c_simple(${GLSL_FILE} GLSL_C) + endforeach() + + blender_add_lib(bf_ocio_shaders "${GLSL_C}" "" "" "") + + list(APPEND LIB + bf_ocio_shaders + ) + + set(GLSL_SOURCE_CONTENT "") + foreach(GLSL_FILE ${GLSL_SRC}) + get_filename_component(GLSL_FILE_NAME ${GLSL_FILE} NAME) + string(REPLACE "." "_" GLSL_FILE_NAME_UNDERSCORES ${GLSL_FILE_NAME}) + string(APPEND GLSL_SOURCE_CONTENT "SHADER_SOURCE\(datatoc_${GLSL_FILE_NAME_UNDERSCORES}, \"${GLSL_FILE_NAME}\", \"${GLSL_FILE}\"\)\n") + endforeach() + + set(glsl_source_list_file "${CMAKE_CURRENT_BINARY_DIR}/glsl_ocio_source_list.h") + file(GENERATE OUTPUT ${glsl_source_list_file} CONTENT "${GLSL_SOURCE_CONTENT}") + list(APPEND SRC ${glsl_source_list_file}) + list(APPEND INC ${CMAKE_CURRENT_BINARY_DIR}) + + target_include_directories(bf_ocio_shaders PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) + endif() diff --git a/intern/opencolorio/gpu_shader_display_transform.glsl b/intern/opencolorio/gpu_shader_display_transform_frag.glsl index f5a7a7bf45d..3c2352c13ba 100644 --- a/intern/opencolorio/gpu_shader_display_transform.glsl +++ b/intern/opencolorio/gpu_shader_display_transform_frag.glsl @@ -1,39 +1,10 @@ /* Blender OpenColorIO implementation */ -uniform sampler2D image_texture; -uniform sampler2D overlay_texture; - -uniform float dither; -uniform float scale; -uniform float exponent; -uniform bool predivide; -uniform bool overlay; +/* -------------------------------------------------------------------- */ +/** \name Curve Mapping Implementation + * \{ */ #ifdef USE_CURVE_MAPPING -uniform sampler1D curve_mapping_texture; - -layout(std140) uniform OCIO_GPUCurveMappingParameters -{ - /* Curve mapping parameters - * - * See documentation for OCIO_CurveMappingSettings to get fields descriptions. - * (this ones pretty much copies stuff from C structure.) - */ - vec4 curve_mapping_mintable; - vec4 curve_mapping_range; - vec4 curve_mapping_ext_in_x; - vec4 curve_mapping_ext_in_y; - vec4 curve_mapping_ext_out_x; - vec4 curve_mapping_ext_out_y; - vec4 curve_mapping_first_x; - vec4 curve_mapping_first_y; - vec4 curve_mapping_last_x; - vec4 curve_mapping_last_y; - vec4 curve_mapping_black; - vec4 curve_mapping_bwmul; - int curve_mapping_lut_size; - int curve_mapping_use_extend_extrapolate; -}; float read_curve_mapping(int table, int index) { @@ -43,27 +14,27 @@ float read_curve_mapping(int table, int index) float curvemap_calc_extend(int table, float x, vec2 first, vec2 last) { if (x <= first[0]) { - if (curve_mapping_use_extend_extrapolate == 0) { + if (curve_mapping.use_extend_extrapolate == 0) { /* horizontal extrapolation */ return first[1]; } else { - float fac = (curve_mapping_ext_in_x[table] != 0.0) ? - ((x - first[0]) / curve_mapping_ext_in_x[table]) : + float fac = (curve_mapping.ext_in_x[table] != 0.0) ? + ((x - first[0]) / curve_mapping.ext_in_x[table]) : 10000.0; - return first[1] + curve_mapping_ext_in_y[table] * fac; + return first[1] + curve_mapping.ext_in_y[table] * fac; } } else if (x >= last[0]) { - if (curve_mapping_use_extend_extrapolate == 0) { + if (curve_mapping.use_extend_extrapolate == 0) { /* horizontal extrapolation */ return last[1]; } else { - float fac = (curve_mapping_ext_out_x[table] != 0.0) ? - ((x - last[0]) / curve_mapping_ext_out_x[table]) : + float fac = (curve_mapping.ext_out_x[table] != 0.0) ? + ((x - last[0]) / curve_mapping.ext_out_x[table]) : -10000.0; - return last[1] + curve_mapping_ext_out_y[table] * fac; + return last[1] + curve_mapping.ext_out_y[table] * fac; } } return 0.0; @@ -71,10 +42,10 @@ float curvemap_calc_extend(int table, float x, vec2 first, vec2 last) float curvemap_evaluateF(int table, float value) { - float mintable_ = curve_mapping_mintable[table]; - float range = curve_mapping_range[table]; + float mintable_ = curve_mapping.mintable[table]; + float range = curve_mapping.range[table]; float mintable = 0.0; - int CM_TABLE = curve_mapping_lut_size - 1; + int CM_TABLE = curve_mapping.lut_size - 1; float fi; int i; @@ -87,8 +58,8 @@ float curvemap_evaluateF(int table, float value) if (fi < 0.0 || fi > float(CM_TABLE)) { return curvemap_calc_extend(table, value, - vec2(curve_mapping_first_x[table], curve_mapping_first_y[table]), - vec2(curve_mapping_last_x[table], curve_mapping_last_y[table])); + vec2(curve_mapping.first_x[table], curve_mapping.first_y[table]), + vec2(curve_mapping.last_x[table], curve_mapping.last_y[table])); } else { if (i < 0) { @@ -106,7 +77,7 @@ float curvemap_evaluateF(int table, float value) vec4 curvemapping_evaluate_premulRGBF(vec4 col) { - col.rgb = (col.rgb - curve_mapping_black.rgb) * curve_mapping_bwmul.rgb; + col.rgb = (col.rgb - curve_mapping.black.rgb) * curve_mapping.bwmul.rgb; vec4 result; result.r = curvemap_evaluateF(0, col.r); @@ -115,8 +86,15 @@ vec4 curvemapping_evaluate_premulRGBF(vec4 col) result.a = col.a; return result; } + #endif /* USE_CURVE_MAPPING */ +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Dithering + * \{ */ + /* Using a triangle distribution which gives a more final uniform noise. * See Banding in Games:A Noisy Rant(revision 5) Mikkel Gjøl, Playdead (slide 27) */ /* GPUs are rounding before writing to framebuffer so we center the distribution around 0.0. */ @@ -135,23 +113,33 @@ float dither_random_value(vec2 co) vec2 round_to_pixel(sampler2D tex, vec2 uv) { - vec2 size = textureSize(tex, 0); - return vec2(ivec2(uv * size)) / size; + vec2 size = vec2(textureSize(tex, 0)); + return floor(uv * size) / size; } vec4 apply_dither(vec4 col, vec2 uv) { - col.rgb += dither_random_value(uv) * 0.0033 * dither; + col.rgb += dither_random_value(uv) * 0.0033 * parameters.dither; return col; } -vec4 OCIO_ProcessColor(vec4 col, vec4 col_overlay, vec2 noise_uv) +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Main Processing + * \{ */ + +/* Prototypes: Implementation is generaterd and defined after. */ +vec4 OCIO_to_scene_linear(vec4 pixel); +vec4 OCIO_to_display(vec4 pixel); + +vec4 OCIO_ProcessColor(vec4 col, vec4 col_overlay) { #ifdef USE_CURVE_MAPPING col = curvemapping_evaluate_premulRGBF(col); #endif - if (predivide) { + if (parameters.use_predivide) { if (col.a > 0.0 && col.a < 1.0) { col.rgb *= 1.0 / col.a; } @@ -166,7 +154,7 @@ vec4 OCIO_ProcessColor(vec4 col, vec4 col_overlay, vec2 noise_uv) col = OCIO_to_scene_linear(col); /* Apply exposure in scene linear. */ - col.rgb *= scale; + col.rgb *= parameters.scale; /* Convert to display space. */ col = OCIO_to_display(col); @@ -177,34 +165,31 @@ vec4 OCIO_ProcessColor(vec4 col, vec4 col_overlay, vec2 noise_uv) * i.e: The linear color space w.r.t. display chromaticity and radiometry. * We separate the colormanagement process into two steps to be able to * merge UI using alpha blending in the correct color space. */ - if (overlay) { - col.rgb = pow(col.rgb, vec3(exponent * 2.2)); + if (parameters.use_overlay) { + col.rgb = pow(col.rgb, vec3(parameters.exponent * 2.2)); col = clamp(col, 0.0, 1.0); col *= 1.0 - col_overlay.a; col += col_overlay; /* Assumed unassociated alpha. */ col.rgb = pow(col.rgb, vec3(1.0 / 2.2)); } else { - col.rgb = pow(col.rgb, vec3(exponent)); + col.rgb = pow(col.rgb, vec3(parameters.exponent)); } - if (dither > 0.0) { + if (parameters.dither > 0.0) { + vec2 noise_uv = round_to_pixel(image_texture, texCoord_interp.st); col = apply_dither(col, noise_uv); } return col; } -/* ------------------------------------------------------------------------ */ - -in vec2 texCoord_interp; -out vec4 fragColor; +/** \} */ void main() { vec4 col = texture(image_texture, texCoord_interp.st); vec4 col_overlay = texture(overlay_texture, texCoord_interp.st); - vec2 noise_uv = round_to_pixel(image_texture, texCoord_interp.st); - fragColor = OCIO_ProcessColor(col, col_overlay, noise_uv); + fragColor = OCIO_ProcessColor(col, col_overlay); } diff --git a/intern/opencolorio/gpu_shader_display_transform_vertex.glsl b/intern/opencolorio/gpu_shader_display_transform_vert.glsl index 8cf9628b06b..06788be11de 100644 --- a/intern/opencolorio/gpu_shader_display_transform_vertex.glsl +++ b/intern/opencolorio/gpu_shader_display_transform_vert.glsl @@ -1,10 +1,4 @@ -uniform mat4 ModelViewProjectionMatrix; - -in vec2 texCoord; -in vec2 pos; -out vec2 texCoord_interp; - void main() { gl_Position = ModelViewProjectionMatrix * vec4(pos.xy, 0.0f, 1.0f); diff --git a/intern/opencolorio/ocio_impl.cc b/intern/opencolorio/ocio_impl.cc index 11e2a5f7371..a02a37522b9 100644 --- a/intern/opencolorio/ocio_impl.cc +++ b/intern/opencolorio/ocio_impl.cc @@ -320,16 +320,18 @@ void OCIOImpl::configGetXYZtoRGB(OCIO_ConstConfigRcPtr *config_, float xyz_to_rg } if (config->hasRole("aces_interchange")) { - /* Standard OpenColorIO role, defined as ACES2065-1. */ - const float xyz_E_to_aces[3][3] = {{1.0498110175f, -0.4959030231f, 0.0f}, - {0.0f, 1.3733130458f, 0.0f}, - {-0.0000974845f, 0.0982400361f, 0.9912520182f}}; - const float xyz_D65_to_E[3][3] = { - {1.0521111f, 0.0f, 0.0f}, {0.0f, 1.0f, 0.0f}, {0.0f, 0.0f, 0.9184170f}}; - + /* Standard OpenColorIO role, defined as ACES AP0 (ACES2065-1). */ float aces_to_rgb[3][3]; if (to_scene_linear_matrix(config, "aces_interchange", aces_to_rgb)) { - mul_m3_series(xyz_to_rgb, aces_to_rgb, xyz_E_to_aces, xyz_D65_to_E); + /* This is the OpenColorIO builtin transform: + * UTILITY - ACES-AP0_to_CIE-XYZ-D65_BFD. */ + const float ACES_AP0_to_xyz_D65[3][3] = {{0.938280f, 0.337369f, 0.001174f}, + {-0.004451f, 0.729522f, -0.003711f}, + {0.016628f, -0.066890f, 1.091595f}}; + float xyz_to_aces[3][3]; + invert_m3_m3(xyz_to_aces, ACES_AP0_to_xyz_D65); + + mul_m3_m3m3(xyz_to_rgb, aces_to_rgb, xyz_to_aces); } } else if (config->hasRole("XYZ")) { diff --git a/intern/opencolorio/ocio_impl_glsl.cc b/intern/opencolorio/ocio_impl_glsl.cc index 09803cd8038..e3d44ae9d55 100644 --- a/intern/opencolorio/ocio_impl_glsl.cc +++ b/intern/opencolorio/ocio_impl_glsl.cc @@ -21,14 +21,14 @@ #include "GPU_shader.h" #include "GPU_uniform_buffer.h" +#include "gpu_shader_create_info.hh" + using namespace OCIO_NAMESPACE; #include "MEM_guardedalloc.h" #include "ocio_impl.h" - -extern "C" char datatoc_gpu_shader_display_transform_glsl[]; -extern "C" char datatoc_gpu_shader_display_transform_vertex_glsl[]; +#include "ocio_shader_shared.hh" /* **** OpenGL drawing routines using GLSL for color space transform ***** */ @@ -39,41 +39,19 @@ enum OCIO_GPUTextureSlots { TEXTURE_SLOT_LUTS_OFFSET = 3, }; -/* Curve mapping parameters - * - * See documentation for OCIO_CurveMappingSettings to get fields descriptions. - * (this ones pretty much copies stuff from C structure.) - */ -struct OCIO_GPUCurveMappingParameters { - float curve_mapping_mintable[4]; - float curve_mapping_range[4]; - float curve_mapping_ext_in_x[4]; - float curve_mapping_ext_in_y[4]; - float curve_mapping_ext_out_x[4]; - float curve_mapping_ext_out_y[4]; - float curve_mapping_first_x[4]; - float curve_mapping_first_y[4]; - float curve_mapping_last_x[4]; - float curve_mapping_last_y[4]; - float curve_mapping_black[4]; - float curve_mapping_bwmul[4]; - int curve_mapping_lut_size; - int curve_mapping_use_extend_extrapolate; - int _pad[2]; - /** WARNING: Needs to be 16byte aligned. Used as UBO data. */ +enum OCIO_GPUUniformBufSlots { + UNIFORMBUF_SLOT_DISPLAY = 0, + UNIFORMBUF_SLOT_CURVEMAP = 1, + UNIFORMBUF_SLOT_LUTS = 2, }; struct OCIO_GPUShader { /* GPU shader. */ struct GPUShader *shader = nullptr; - /** Uniform locations. */ - int scale_loc = 0; - int exponent_loc = 0; - int dither_loc = 0; - int overlay_loc = 0; - int predivide_loc = 0; - int ubo_bind = 0; + /** Uniform parameters. */ + OCIO_GPUParameters parameters = {}; + GPUUniformBuf *parameters_buffer = nullptr; /* Destructor. */ ~OCIO_GPUShader() @@ -81,6 +59,9 @@ struct OCIO_GPUShader { if (shader) { GPU_shader_free(shader); } + if (parameters_buffer) { + GPU_uniformbuf_free(parameters_buffer); + } } }; @@ -103,6 +84,7 @@ struct OCIO_GPUTextures { /* Uniforms */ std::vector<OCIO_GPUUniform> uniforms; + GPUUniformBuf *uniforms_buffer = nullptr; /* Destructor. */ ~OCIO_GPUTextures() @@ -113,6 +95,9 @@ struct OCIO_GPUTextures { if (dummy) { GPU_texture_free(dummy); } + if (uniforms_buffer) { + GPU_uniformbuf_free(uniforms_buffer); + } } }; @@ -165,97 +150,134 @@ static bool createGPUShader(OCIO_GPUShader &shader, const GpuShaderDescRcPtr &shaderdesc_to_display, const bool use_curve_mapping) { - std::ostringstream os; - { - /* Fragment shader */ + using namespace blender::gpu::shader; - /* Work around OpenColorIO not supporting latest GLSL yet. */ - os << "#define texture2D texture\n"; - os << "#define texture3D texture\n"; + std::string source; + source += shaderdesc_to_scene_linear->getShaderText(); + source += "\n"; + source += shaderdesc_to_display->getShaderText(); + source += "\n"; - if (use_curve_mapping) { - os << "#define USE_CURVE_MAPPING\n"; + { + /* Replace all uniform declarations by a comment. + * This avoids double declarations from the backend. */ + size_t index = 0; + while (true) { + index = source.find("uniform ", index); + if (index == -1) { + break; + } + source.replace(index, 2, "//"); + index += 2; } - - os << shaderdesc_to_scene_linear->getShaderText() << "\n"; - os << shaderdesc_to_display->getShaderText() << "\n"; - - os << datatoc_gpu_shader_display_transform_glsl; } - shader.shader = GPU_shader_create(datatoc_gpu_shader_display_transform_vertex_glsl, - os.str().c_str(), - nullptr, - nullptr, - nullptr, - "OCIOShader"); - - if (shader.shader == nullptr) { - return false; - } - - shader.scale_loc = GPU_shader_get_uniform(shader.shader, "scale"); - shader.exponent_loc = GPU_shader_get_uniform(shader.shader, "exponent"); - shader.dither_loc = GPU_shader_get_uniform(shader.shader, "dither"); - shader.overlay_loc = GPU_shader_get_uniform(shader.shader, "overlay"); - shader.predivide_loc = GPU_shader_get_uniform(shader.shader, "predivide"); - shader.ubo_bind = GPU_shader_get_uniform_block_binding(shader.shader, - "OCIO_GPUCurveMappingParameters"); - - GPU_shader_bind(shader.shader); - - /* Set texture bind point uniform once. This is saved by the shader. */ - GPUShader *sh = shader.shader; - GPU_shader_uniform_int(sh, GPU_shader_get_uniform(sh, "image_texture"), TEXTURE_SLOT_IMAGE); - GPU_shader_uniform_int(sh, GPU_shader_get_uniform(sh, "overlay_texture"), TEXTURE_SLOT_OVERLAY); + StageInterfaceInfo iface("OCIO_Interface", ""); + iface.smooth(Type::VEC2, "texCoord_interp"); + + ShaderCreateInfo info("OCIO_Display"); + /* Work around OpenColorIO not supporting latest GLSL yet. */ + info.define("texture2D", "texture"); + info.define("texture3D", "texture"); + info.typedef_source("ocio_shader_shared.hh"); + info.sampler(TEXTURE_SLOT_IMAGE, ImageType::FLOAT_2D, "image_texture"); + info.sampler(TEXTURE_SLOT_OVERLAY, ImageType::FLOAT_2D, "overlay_texture"); + info.uniform_buf(UNIFORMBUF_SLOT_DISPLAY, "OCIO_GPUParameters", "parameters"); + info.push_constant(Type::MAT4, "ModelViewProjectionMatrix"); + info.vertex_in(0, Type::VEC2, "pos"); + info.vertex_in(1, Type::VEC2, "texCoord"); + info.vertex_out(iface); + info.fragment_out(0, Type::VEC4, "fragColor"); + info.vertex_source("gpu_shader_display_transform_vert.glsl"); + info.fragment_source("gpu_shader_display_transform_frag.glsl"); + info.fragment_source_generated = source; if (use_curve_mapping) { - GPU_shader_uniform_int( - sh, GPU_shader_get_uniform(sh, "curve_mapping_texture"), TEXTURE_SLOT_CURVE_MAPPING); + info.define("USE_CURVE_MAPPING"); + info.uniform_buf(UNIFORMBUF_SLOT_CURVEMAP, "OCIO_GPUCurveMappingParameters", "curve_mapping"); + info.sampler(TEXTURE_SLOT_CURVE_MAPPING, ImageType::FLOAT_1D, "curve_mapping_texture"); } /* Set LUT textures. */ - for (int i = 0; i < textures.luts.size(); i++) { - GPU_shader_uniform_int(sh, - GPU_shader_get_uniform(sh, textures.luts[i].sampler_name.c_str()), - TEXTURE_SLOT_LUTS_OFFSET + i); - } + int slot = TEXTURE_SLOT_LUTS_OFFSET; + for (OCIO_GPULutTexture &texture : textures.luts) { + ImageType type = GPU_texture_dimensions(texture.texture) == 2 ? ImageType::FLOAT_2D : + ImageType::FLOAT_3D; + info.sampler(slot++, type, texture.sampler_name.c_str()); + } + + /* Set LUT uniforms. */ + if (!textures.uniforms.empty()) { + /* NOTE: For simplicity, we pad everything to size of vec4 avoiding sorting and alignment + * issues. It is unlikely that this becomes a real issue. */ + size_t ubo_size = textures.uniforms.size() * sizeof(float) * 4; + void *ubo_data_buf = malloc(ubo_size); + + uint32_t *ubo_data = reinterpret_cast<uint32_t *>(ubo_data_buf); + + std::stringstream ss; + ss << "struct OCIO_GPULutParameters {\n"; + + int index = 0; + for (OCIO_GPUUniform &uniform : textures.uniforms) { + index += 1; + const GpuShaderDesc::UniformData &data = uniform.data; + const char *name = uniform.name.c_str(); + char prefix = ' '; + int vec_len; + switch (data.m_type) { + case UNIFORM_DOUBLE: { + vec_len = 1; + float value = float(data.m_getDouble()); + memcpy(ubo_data, &value, sizeof(float)); + break; + } + case UNIFORM_BOOL: { + prefix = 'b'; + vec_len = 1; + int value = int(data.m_getBool()); + memcpy(ubo_data, &value, sizeof(int)); + break; + } + case UNIFORM_FLOAT3: + vec_len = 3; + memcpy(ubo_data, data.m_getFloat3().data(), sizeof(float) * 3); + break; + case UNIFORM_VECTOR_FLOAT: + vec_len = data.m_vectorFloat.m_getSize(); + memcpy(ubo_data, data.m_vectorFloat.m_getVector(), sizeof(float) * vec_len); + break; + case UNIFORM_VECTOR_INT: + prefix = 'i'; + vec_len = data.m_vectorInt.m_getSize(); + memcpy(ubo_data, data.m_vectorInt.m_getVector(), sizeof(int) * vec_len); + break; + default: + continue; + } + /* Align every member to 16bytes. */ + ubo_data += 4; + /* Use a generic variable name because some GLSL compilers can interpret the preprocessor + * define as recursive. */ + ss << " " << prefix << "vec4 var" << index << ";\n"; + /* Use a define to keep the generated code working. */ + blender::StringRef suffix = blender::StringRefNull("xyzw").substr(0, vec_len); + ss << "#define " << name << " lut_parameters.var" << index << "." << suffix << "\n"; + } + ss << "};\n"; + info.typedef_source_generated = ss.str(); - /* Set uniforms. */ - for (OCIO_GPUUniform &uniform : textures.uniforms) { - const GpuShaderDesc::UniformData &data = uniform.data; - const char *name = uniform.name.c_str(); + info.uniform_buf(UNIFORMBUF_SLOT_LUTS, "OCIO_GPULutParameters", "lut_parameters"); - if (data.m_getDouble) { - GPU_shader_uniform_1f(sh, name, (float)data.m_getDouble()); - } - else if (data.m_getBool) { - GPU_shader_uniform_1f(sh, name, (float)(data.m_getBool() ? 1.0f : 0.0f)); - } - else if (data.m_getFloat3) { - GPU_shader_uniform_3f(sh, - name, - (float)data.m_getFloat3()[0], - (float)data.m_getFloat3()[1], - (float)data.m_getFloat3()[2]); - } - else if (data.m_vectorFloat.m_getSize && data.m_vectorFloat.m_getVector) { - GPU_shader_uniform_vector(sh, - GPU_shader_get_uniform(sh, name), - (int)data.m_vectorFloat.m_getSize(), - 1, - (float *)data.m_vectorFloat.m_getVector()); - } - else if (data.m_vectorInt.m_getSize && data.m_vectorInt.m_getVector) { - GPU_shader_uniform_vector_int(sh, - GPU_shader_get_uniform(sh, name), - (int)data.m_vectorInt.m_getSize(), - 1, - (int *)data.m_vectorInt.m_getVector()); - } + textures.uniforms_buffer = GPU_uniformbuf_create_ex( + ubo_size, ubo_data_buf, "OCIO_LutParameters"); + + free(ubo_data_buf); } - return true; + shader.shader = GPU_shader_create_from_info(reinterpret_cast<GPUShaderCreateInfo *>(&info)); + + return (shader.shader != nullptr); } /** \} */ @@ -302,7 +324,7 @@ static bool addGPULut2D(OCIO_GPUTextures &textures, GPU_R16F; OCIO_GPULutTexture lut; - lut.texture = GPU_texture_create_2d(texture_name, width, height, 0, format, values); + lut.texture = GPU_texture_create_2d(texture_name, width, height, 1, format, values); if (lut.texture == nullptr) { return false; } @@ -334,7 +356,7 @@ static bool addGPULut3D(OCIO_GPUTextures &textures, OCIO_GPULutTexture lut; lut.texture = GPU_texture_create_3d( - texture_name, edgelen, edgelen, edgelen, 0, GPU_RGB16F, GPU_DATA_FLOAT, values); + texture_name, edgelen, edgelen, edgelen, 1, GPU_RGB16F, GPU_DATA_FLOAT, values); if (lut.texture == nullptr) { return false; } @@ -438,27 +460,65 @@ static void updateGPUCurveMapping(OCIO_GPUCurveMappping &curvemap, /* Update uniforms. */ OCIO_GPUCurveMappingParameters data; for (int i = 0; i < 4; i++) { - data.curve_mapping_range[i] = curve_mapping_settings->range[i]; - data.curve_mapping_mintable[i] = curve_mapping_settings->mintable[i]; - data.curve_mapping_ext_in_x[i] = curve_mapping_settings->ext_in_x[i]; - data.curve_mapping_ext_in_y[i] = curve_mapping_settings->ext_in_y[i]; - data.curve_mapping_ext_out_x[i] = curve_mapping_settings->ext_out_x[i]; - data.curve_mapping_ext_out_y[i] = curve_mapping_settings->ext_out_y[i]; - data.curve_mapping_first_x[i] = curve_mapping_settings->first_x[i]; - data.curve_mapping_first_y[i] = curve_mapping_settings->first_y[i]; - data.curve_mapping_last_x[i] = curve_mapping_settings->last_x[i]; - data.curve_mapping_last_y[i] = curve_mapping_settings->last_y[i]; + data.range[i] = curve_mapping_settings->range[i]; + data.mintable[i] = curve_mapping_settings->mintable[i]; + data.ext_in_x[i] = curve_mapping_settings->ext_in_x[i]; + data.ext_in_y[i] = curve_mapping_settings->ext_in_y[i]; + data.ext_out_x[i] = curve_mapping_settings->ext_out_x[i]; + data.ext_out_y[i] = curve_mapping_settings->ext_out_y[i]; + data.first_x[i] = curve_mapping_settings->first_x[i]; + data.first_y[i] = curve_mapping_settings->first_y[i]; + data.last_x[i] = curve_mapping_settings->last_x[i]; + data.last_y[i] = curve_mapping_settings->last_y[i]; } for (int i = 0; i < 3; i++) { - data.curve_mapping_black[i] = curve_mapping_settings->black[i]; - data.curve_mapping_bwmul[i] = curve_mapping_settings->bwmul[i]; + data.black[i] = curve_mapping_settings->black[i]; + data.bwmul[i] = curve_mapping_settings->bwmul[i]; } - data.curve_mapping_lut_size = curve_mapping_settings->lut_size; - data.curve_mapping_use_extend_extrapolate = curve_mapping_settings->use_extend_extrapolate; + data.lut_size = curve_mapping_settings->lut_size; + data.use_extend_extrapolate = curve_mapping_settings->use_extend_extrapolate; GPU_uniformbuf_update(curvemap.buffer, &data); } +static void updateGPUDisplayParameters(OCIO_GPUShader &shader, + float scale, + float exponent, + float dither, + bool use_predivide, + bool use_overlay) +{ + bool do_update = false; + if (shader.parameters_buffer == nullptr) { + shader.parameters_buffer = GPU_uniformbuf_create(sizeof(OCIO_GPUParameters)); + do_update = true; + } + OCIO_GPUParameters &data = shader.parameters; + if (data.scale != scale) { + data.scale = scale; + do_update = true; + } + if (data.exponent != exponent) { + data.exponent = exponent; + do_update = true; + } + if (data.dither != dither) { + data.dither = dither; + do_update = true; + } + if (bool(data.use_predivide) != use_predivide) { + data.use_predivide = use_predivide; + do_update = true; + } + if (bool(data.use_overlay) != use_overlay) { + data.use_overlay = use_overlay; + do_update = true; + } + if (do_update) { + GPU_uniformbuf_update(shader.parameters_buffer, &data); + } +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -609,7 +669,7 @@ bool OCIOImpl::gpuDisplayShaderBind(OCIO_ConstConfigRcPtr *config, /* Update and bind curve mapping data. */ if (curve_mapping_settings) { updateGPUCurveMapping(curvemap, curve_mapping_settings); - GPU_uniformbuf_bind(curvemap.buffer, shader.ubo_bind); + GPU_uniformbuf_bind(curvemap.buffer, UNIFORMBUF_SLOT_CURVEMAP); GPU_texture_bind(curvemap.texture, TEXTURE_SLOT_CURVE_MAPPING); } @@ -623,17 +683,16 @@ bool OCIOImpl::gpuDisplayShaderBind(OCIO_ConstConfigRcPtr *config, GPU_texture_bind(textures.luts[i].texture, TEXTURE_SLOT_LUTS_OFFSET + i); } + if (textures.uniforms_buffer) { + GPU_uniformbuf_bind(textures.uniforms_buffer, UNIFORMBUF_SLOT_LUTS); + } + + updateGPUDisplayParameters(shader, scale, exponent, dither, use_predivide, use_overlay); + GPU_uniformbuf_bind(shader.parameters_buffer, UNIFORMBUF_SLOT_DISPLAY); + /* TODO(fclem): remove remains of IMM. */ immBindShader(shader.shader); - /* Bind Shader and set uniforms. */ - // GPU_shader_bind(shader.shader); - GPU_shader_uniform_float(shader.shader, shader.scale_loc, scale); - GPU_shader_uniform_float(shader.shader, shader.exponent_loc, exponent); - GPU_shader_uniform_float(shader.shader, shader.dither_loc, dither); - GPU_shader_uniform_int(shader.shader, shader.overlay_loc, use_overlay); - GPU_shader_uniform_int(shader.shader, shader.predivide_loc, use_predivide); - return true; } diff --git a/intern/opencolorio/ocio_shader_shared.hh b/intern/opencolorio/ocio_shader_shared.hh new file mode 100644 index 00000000000..c7045217196 --- /dev/null +++ b/intern/opencolorio/ocio_shader_shared.hh @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. All rights reserved. */ + +#ifndef GPU_SHADER +# include "GPU_shader_shared_utils.h" +#endif + +struct OCIO_GPUCurveMappingParameters { + /* Curve mapping parameters + * + * See documentation for OCIO_CurveMappingSettings to get fields descriptions. + * (this ones pretty much copies stuff from C structure.) + */ + float4 mintable; + float4 range; + float4 ext_in_x; + float4 ext_in_y; + float4 ext_out_x; + float4 ext_out_y; + float4 first_x; + float4 first_y; + float4 last_x; + float4 last_y; + float4 black; + float4 bwmul; + int lut_size; + int use_extend_extrapolate; + int _pad0; + int _pad1; +}; + +struct OCIO_GPUParameters { + float dither; + float scale; + float exponent; + bool1 use_predivide; + bool1 use_overlay; + int _pad0; + int _pad1; + int _pad2; +}; diff --git a/intern/opensubdiv/CMakeLists.txt b/intern/opensubdiv/CMakeLists.txt index 38ce9791b5a..bb3aa16a9fe 100644 --- a/intern/opensubdiv/CMakeLists.txt +++ b/intern/opensubdiv/CMakeLists.txt @@ -66,6 +66,8 @@ if(WITH_OPENSUBDIV) internal/evaluator/evaluator_capi.cc internal/evaluator/evaluator_impl.cc internal/evaluator/evaluator_impl.h + internal/evaluator/gl_compute_evaluator.cc + internal/evaluator/gl_compute_evaluator.h internal/evaluator/patch_map.cc internal/evaluator/patch_map.h @@ -104,6 +106,8 @@ if(WITH_OPENSUBDIV) add_definitions(-DNOMINMAX) add_definitions(-D_USE_MATH_DEFINES) endif() + + data_to_c_simple(internal/evaluator/shaders/glsl_compute_kernel.glsl SRC) else() list(APPEND SRC stub/opensubdiv_stub.cc diff --git a/intern/opensubdiv/internal/evaluator/eval_output_gpu.h b/intern/opensubdiv/internal/evaluator/eval_output_gpu.h index 783efd484aa..dc137e4322e 100644 --- a/intern/opensubdiv/internal/evaluator/eval_output_gpu.h +++ b/intern/opensubdiv/internal/evaluator/eval_output_gpu.h @@ -20,13 +20,11 @@ #define OPENSUBDIV_EVAL_OUTPUT_GPU_H_ #include "internal/evaluator/eval_output.h" +#include "internal/evaluator/gl_compute_evaluator.h" -#include <opensubdiv/osd/glComputeEvaluator.h> #include <opensubdiv/osd/glPatchTable.h> #include <opensubdiv/osd/glVertexBuffer.h> -using OpenSubdiv::Osd::GLComputeEvaluator; -using OpenSubdiv::Osd::GLStencilTableSSBO; using OpenSubdiv::Osd::GLVertexBuffer; namespace blender { diff --git a/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.cc b/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.cc new file mode 100644 index 00000000000..acf628c7035 --- /dev/null +++ b/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.cc @@ -0,0 +1,647 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "gl_compute_evaluator.h" + +#include <GL/glew.h> + +#include <opensubdiv/far/error.h> +#include <opensubdiv/far/patchDescriptor.h> +#include <opensubdiv/far/stencilTable.h> +#include <opensubdiv/osd/glslPatchShaderSource.h> + +#include <cassert> +#include <cmath> +#include <sstream> +#include <string> +#include <vector> + +using OpenSubdiv::Far::LimitStencilTable; +using OpenSubdiv::Far::StencilTable; +using OpenSubdiv::Osd::BufferDescriptor; +using OpenSubdiv::Osd::PatchArray; +using OpenSubdiv::Osd::PatchArrayVector; + +extern "C" char datatoc_glsl_compute_kernel_glsl[]; + +namespace blender { +namespace opensubdiv { + +template<class T> GLuint createSSBO(std::vector<T> const &src) +{ + if (src.empty()) { + return 0; + } + + GLuint devicePtr = 0; + +#if defined(GL_ARB_direct_state_access) + if (GLEW_ARB_direct_state_access) { + glCreateBuffers(1, &devicePtr); + glNamedBufferData(devicePtr, src.size() * sizeof(T), &src.at(0), GL_STATIC_DRAW); + } + else +#endif + { + GLint prev = 0; + glGetIntegerv(GL_SHADER_STORAGE_BUFFER_BINDING, &prev); + glGenBuffers(1, &devicePtr); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, devicePtr); + glBufferData(GL_SHADER_STORAGE_BUFFER, src.size() * sizeof(T), &src.at(0), GL_STATIC_DRAW); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, prev); + } + + return devicePtr; +} + +GLStencilTableSSBO::GLStencilTableSSBO(StencilTable const *stencilTable) +{ + _numStencils = stencilTable->GetNumStencils(); + if (_numStencils > 0) { + _sizes = createSSBO(stencilTable->GetSizes()); + _offsets = createSSBO(stencilTable->GetOffsets()); + _indices = createSSBO(stencilTable->GetControlIndices()); + _weights = createSSBO(stencilTable->GetWeights()); + _duWeights = _dvWeights = 0; + _duuWeights = _duvWeights = _dvvWeights = 0; + } + else { + _sizes = _offsets = _indices = _weights = 0; + _duWeights = _dvWeights = 0; + _duuWeights = _duvWeights = _dvvWeights = 0; + } +} + +GLStencilTableSSBO::GLStencilTableSSBO(LimitStencilTable const *limitStencilTable) +{ + _numStencils = limitStencilTable->GetNumStencils(); + if (_numStencils > 0) { + _sizes = createSSBO(limitStencilTable->GetSizes()); + _offsets = createSSBO(limitStencilTable->GetOffsets()); + _indices = createSSBO(limitStencilTable->GetControlIndices()); + _weights = createSSBO(limitStencilTable->GetWeights()); + _duWeights = createSSBO(limitStencilTable->GetDuWeights()); + _dvWeights = createSSBO(limitStencilTable->GetDvWeights()); + _duuWeights = createSSBO(limitStencilTable->GetDuuWeights()); + _duvWeights = createSSBO(limitStencilTable->GetDuvWeights()); + _dvvWeights = createSSBO(limitStencilTable->GetDvvWeights()); + } + else { + _sizes = _offsets = _indices = _weights = 0; + _duWeights = _dvWeights = 0; + _duuWeights = _duvWeights = _dvvWeights = 0; + } +} + +GLStencilTableSSBO::~GLStencilTableSSBO() +{ + if (_sizes) + glDeleteBuffers(1, &_sizes); + if (_offsets) + glDeleteBuffers(1, &_offsets); + if (_indices) + glDeleteBuffers(1, &_indices); + if (_weights) + glDeleteBuffers(1, &_weights); + if (_duWeights) + glDeleteBuffers(1, &_duWeights); + if (_dvWeights) + glDeleteBuffers(1, &_dvWeights); + if (_duuWeights) + glDeleteBuffers(1, &_duuWeights); + if (_duvWeights) + glDeleteBuffers(1, &_duvWeights); + if (_dvvWeights) + glDeleteBuffers(1, &_dvvWeights); +} + +// --------------------------------------------------------------------------- + +GLComputeEvaluator::GLComputeEvaluator() : _workGroupSize(64), _patchArraysSSBO(0) +{ + memset((void *)&_stencilKernel, 0, sizeof(_stencilKernel)); + memset((void *)&_patchKernel, 0, sizeof(_patchKernel)); +} + +GLComputeEvaluator::~GLComputeEvaluator() +{ + if (_patchArraysSSBO) { + glDeleteBuffers(1, &_patchArraysSSBO); + } +} + +static GLuint compileKernel(BufferDescriptor const &srcDesc, + BufferDescriptor const &dstDesc, + BufferDescriptor const &duDesc, + BufferDescriptor const &dvDesc, + BufferDescriptor const &duuDesc, + BufferDescriptor const &duvDesc, + BufferDescriptor const &dvvDesc, + const char *kernelDefine, + int workGroupSize) +{ + GLuint program = glCreateProgram(); + + GLuint shader = glCreateShader(GL_COMPUTE_SHADER); + + std::string patchBasisShaderSource = + OpenSubdiv::Osd::GLSLPatchShaderSource::GetPatchBasisShaderSource(); + const char *patchBasisShaderSourceDefine = "#define OSD_PATCH_BASIS_GLSL\n"; + + std::ostringstream defines; + defines << "#define LENGTH " << srcDesc.length << "\n" + << "#define SRC_STRIDE " << srcDesc.stride << "\n" + << "#define DST_STRIDE " << dstDesc.stride << "\n" + << "#define WORK_GROUP_SIZE " << workGroupSize << "\n" + << kernelDefine << "\n" + << patchBasisShaderSourceDefine << "\n"; + + bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0); + bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0); + if (deriv1) { + defines << "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"; + } + if (deriv2) { + defines << "#define OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES\n"; + } + + std::string defineStr = defines.str(); + + const char *shaderSources[4] = {"#version 430\n", 0, 0, 0}; + + shaderSources[1] = defineStr.c_str(); + shaderSources[2] = patchBasisShaderSource.c_str(); + shaderSources[3] = datatoc_glsl_compute_kernel_glsl; + glShaderSource(shader, 4, shaderSources, NULL); + glCompileShader(shader); + glAttachShader(program, shader); + + GLint linked = 0; + glLinkProgram(program); + glGetProgramiv(program, GL_LINK_STATUS, &linked); + + if (linked == GL_FALSE) { + char buffer[1024]; + glGetShaderInfoLog(shader, 1024, NULL, buffer); + OpenSubdiv::Far::Error(OpenSubdiv::Far::FAR_RUNTIME_ERROR, buffer); + + glGetProgramInfoLog(program, 1024, NULL, buffer); + OpenSubdiv::Far::Error(OpenSubdiv::Far::FAR_RUNTIME_ERROR, buffer); + + glDeleteProgram(program); + return 0; + } + + glDeleteShader(shader); + + return program; +} + +bool GLComputeEvaluator::Compile(BufferDescriptor const &srcDesc, + BufferDescriptor const &dstDesc, + BufferDescriptor const &duDesc, + BufferDescriptor const &dvDesc, + BufferDescriptor const &duuDesc, + BufferDescriptor const &duvDesc, + BufferDescriptor const &dvvDesc) +{ + + // create a stencil kernel + if (!_stencilKernel.Compile( + srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, _workGroupSize)) { + return false; + } + + // create a patch kernel + if (!_patchKernel.Compile( + srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, _workGroupSize)) { + return false; + } + + // create a patch arrays buffer + if (!_patchArraysSSBO) { + glGenBuffers(1, &_patchArraysSSBO); + } + + return true; +} + +/* static */ +void GLComputeEvaluator::Synchronize(void * /*kernel*/) +{ + // XXX: this is currently just for the performance measuring purpose. + // need to be reimplemented by fence and sync. + glFinish(); +} + +int GLComputeEvaluator::GetDispatchSize(int count) const +{ + return (count + _workGroupSize - 1) / _workGroupSize; +} + +void GLComputeEvaluator::DispatchCompute(int totalDispatchSize) const +{ + int maxWorkGroupCount[2] = {0, 0}; + + glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &maxWorkGroupCount[0]); + glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &maxWorkGroupCount[1]); + + const GLuint maxResX = static_cast<GLuint>(maxWorkGroupCount[0]); + + const int dispatchSize = GetDispatchSize(totalDispatchSize); + GLuint dispatchRX = static_cast<GLuint>(dispatchSize); + GLuint dispatchRY = 1u; + if (dispatchRX > maxResX) { + /* Since there are some limitations with regards to the maximum work group size (could be as + * low as 64k elements per call), we split the number elements into a "2d" number, with the + * final index being computed as `res_x + res_y * max_work_group_size`. Even with a maximum + * work group size of 64k, that still leaves us with roughly `64k * 64k = 4` billion elements + * total, which should be enough. If not, we could also use the 3rd dimension. */ + /* TODO(fclem): We could dispatch fewer groups if we compute the prime factorization and + * get the smallest rect fitting the requirements. */ + dispatchRX = dispatchRY = std::ceil(std::sqrt(dispatchSize)); + /* Avoid a completely empty dispatch line caused by rounding. */ + if ((dispatchRX * (dispatchRY - 1)) >= dispatchSize) { + dispatchRY -= 1; + } + } + + /* X and Y dimensions may have different limits so the above computation may not be right, but + * even with the standard 64k minimum on all dimensions we still have a lot of room. Therefore, + * we presume it all fits. */ + assert(dispatchRY < static_cast<GLuint>(maxWorkGroupCount[1])); + + glDispatchCompute(dispatchRX, dispatchRY, 1); +} + +bool GLComputeEvaluator::EvalStencils(GLuint srcBuffer, + BufferDescriptor const &srcDesc, + GLuint dstBuffer, + BufferDescriptor const &dstDesc, + GLuint duBuffer, + BufferDescriptor const &duDesc, + GLuint dvBuffer, + BufferDescriptor const &dvDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + GLuint duWeightsBuffer, + GLuint dvWeightsBuffer, + int start, + int end) const +{ + + return EvalStencils(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + 0, + BufferDescriptor(), + 0, + BufferDescriptor(), + 0, + BufferDescriptor(), + sizesBuffer, + offsetsBuffer, + indicesBuffer, + weightsBuffer, + duWeightsBuffer, + dvWeightsBuffer, + 0, + 0, + 0, + start, + end); +} + +bool GLComputeEvaluator::EvalStencils(GLuint srcBuffer, + BufferDescriptor const &srcDesc, + GLuint dstBuffer, + BufferDescriptor const &dstDesc, + GLuint duBuffer, + BufferDescriptor const &duDesc, + GLuint dvBuffer, + BufferDescriptor const &dvDesc, + GLuint duuBuffer, + BufferDescriptor const &duuDesc, + GLuint duvBuffer, + BufferDescriptor const &duvDesc, + GLuint dvvBuffer, + BufferDescriptor const &dvvDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + GLuint duWeightsBuffer, + GLuint dvWeightsBuffer, + GLuint duuWeightsBuffer, + GLuint duvWeightsBuffer, + GLuint dvvWeightsBuffer, + int start, + int end) const +{ + + if (!_stencilKernel.program) + return false; + int count = end - start; + if (count <= 0) { + return true; + } + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, sizesBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, offsetsBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, indicesBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, weightsBuffer); + if (duWeightsBuffer) + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 8, duWeightsBuffer); + if (dvWeightsBuffer) + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, dvWeightsBuffer); + if (duuWeightsBuffer) + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 13, duuWeightsBuffer); + if (duvWeightsBuffer) + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 14, duvWeightsBuffer); + if (dvvWeightsBuffer) + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 15, dvvWeightsBuffer); + + glUseProgram(_stencilKernel.program); + + glUniform1i(_stencilKernel.uniformStart, start); + glUniform1i(_stencilKernel.uniformEnd, end); + glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset); + glUniform1i(_stencilKernel.uniformDstOffset, dstDesc.offset); + if (_stencilKernel.uniformDuDesc > 0) { + glUniform3i(_stencilKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride); + } + if (_stencilKernel.uniformDvDesc > 0) { + glUniform3i(_stencilKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride); + } + if (_stencilKernel.uniformDuuDesc > 0) { + glUniform3i(_stencilKernel.uniformDuuDesc, duuDesc.offset, duuDesc.length, duuDesc.stride); + } + if (_stencilKernel.uniformDuvDesc > 0) { + glUniform3i(_stencilKernel.uniformDuvDesc, duvDesc.offset, duvDesc.length, duvDesc.stride); + } + if (_stencilKernel.uniformDvvDesc > 0) { + glUniform3i(_stencilKernel.uniformDvvDesc, dvvDesc.offset, dvvDesc.length, dvvDesc.stride); + } + + DispatchCompute(count); + + glUseProgram(0); + + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); + for (int i = 0; i < 16; ++i) { + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, 0); + } + + return true; +} + +bool GLComputeEvaluator::EvalPatches(GLuint srcBuffer, + BufferDescriptor const &srcDesc, + GLuint dstBuffer, + BufferDescriptor const &dstDesc, + GLuint duBuffer, + BufferDescriptor const &duDesc, + GLuint dvBuffer, + BufferDescriptor const &dvDesc, + int numPatchCoords, + GLuint patchCoordsBuffer, + const PatchArrayVector &patchArrays, + GLuint patchIndexBuffer, + GLuint patchParamsBuffer) const +{ + + return EvalPatches(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + 0, + BufferDescriptor(), + 0, + BufferDescriptor(), + 0, + BufferDescriptor(), + numPatchCoords, + patchCoordsBuffer, + patchArrays, + patchIndexBuffer, + patchParamsBuffer); +} + +bool GLComputeEvaluator::EvalPatches(GLuint srcBuffer, + BufferDescriptor const &srcDesc, + GLuint dstBuffer, + BufferDescriptor const &dstDesc, + GLuint duBuffer, + BufferDescriptor const &duDesc, + GLuint dvBuffer, + BufferDescriptor const &dvDesc, + GLuint duuBuffer, + BufferDescriptor const &duuDesc, + GLuint duvBuffer, + BufferDescriptor const &duvDesc, + GLuint dvvBuffer, + BufferDescriptor const &dvvDesc, + int numPatchCoords, + GLuint patchCoordsBuffer, + const PatchArrayVector &patchArrays, + GLuint patchIndexBuffer, + GLuint patchParamsBuffer) const +{ + + if (!_patchKernel.program) + return false; + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchCoordsBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchIndexBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, patchParamsBuffer); + + glUseProgram(_patchKernel.program); + + glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset); + glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset); + + int patchArraySize = sizeof(PatchArray); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, _patchArraysSSBO); + glBufferData( + GL_SHADER_STORAGE_BUFFER, patchArrays.size() * patchArraySize, NULL, GL_STATIC_DRAW); + for (int i = 0; i < (int)patchArrays.size(); ++i) { + glBufferSubData( + GL_SHADER_STORAGE_BUFFER, i * patchArraySize, sizeof(PatchArray), &patchArrays[i]); + } + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, _patchArraysSSBO); + + if (_patchKernel.uniformDuDesc > 0) { + glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride); + } + if (_patchKernel.uniformDvDesc > 0) { + glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride); + } + if (_patchKernel.uniformDuuDesc > 0) { + glUniform3i(_patchKernel.uniformDuuDesc, duuDesc.offset, duuDesc.length, duuDesc.stride); + } + if (_patchKernel.uniformDuvDesc > 0) { + glUniform3i(_patchKernel.uniformDuvDesc, duvDesc.offset, duvDesc.length, duvDesc.stride); + } + if (_patchKernel.uniformDvvDesc > 0) { + glUniform3i(_patchKernel.uniformDvvDesc, dvvDesc.offset, dvvDesc.length, dvvDesc.stride); + } + + DispatchCompute(numPatchCoords); + + glUseProgram(0); + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0); + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, 0); + + return true; +} +// --------------------------------------------------------------------------- + +GLComputeEvaluator::_StencilKernel::_StencilKernel() : program(0) +{ +} +GLComputeEvaluator::_StencilKernel::~_StencilKernel() +{ + if (program) { + glDeleteProgram(program); + } +} + +bool GLComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc, + BufferDescriptor const &dstDesc, + BufferDescriptor const &duDesc, + BufferDescriptor const &dvDesc, + BufferDescriptor const &duuDesc, + BufferDescriptor const &duvDesc, + BufferDescriptor const &dvvDesc, + int workGroupSize) +{ + // create stencil kernel + if (program) { + glDeleteProgram(program); + } + + const char *kernelDefine = "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS\n"; + + program = compileKernel( + srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, kernelDefine, workGroupSize); + if (program == 0) + return false; + + // cache uniform locations (TODO: use uniform block) + uniformStart = glGetUniformLocation(program, "batchStart"); + uniformEnd = glGetUniformLocation(program, "batchEnd"); + uniformSrcOffset = glGetUniformLocation(program, "srcOffset"); + uniformDstOffset = glGetUniformLocation(program, "dstOffset"); + uniformDuDesc = glGetUniformLocation(program, "duDesc"); + uniformDvDesc = glGetUniformLocation(program, "dvDesc"); + uniformDuuDesc = glGetUniformLocation(program, "duuDesc"); + uniformDuvDesc = glGetUniformLocation(program, "duvDesc"); + uniformDvvDesc = glGetUniformLocation(program, "dvvDesc"); + + return true; +} + +// --------------------------------------------------------------------------- + +GLComputeEvaluator::_PatchKernel::_PatchKernel() : program(0) +{ +} +GLComputeEvaluator::_PatchKernel::~_PatchKernel() +{ + if (program) { + glDeleteProgram(program); + } +} + +bool GLComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc, + BufferDescriptor const &dstDesc, + BufferDescriptor const &duDesc, + BufferDescriptor const &dvDesc, + BufferDescriptor const &duuDesc, + BufferDescriptor const &duvDesc, + BufferDescriptor const &dvvDesc, + int workGroupSize) +{ + // create stencil kernel + if (program) { + glDeleteProgram(program); + } + + const char *kernelDefine = "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES\n"; + + program = compileKernel( + srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, kernelDefine, workGroupSize); + if (program == 0) + return false; + + // cache uniform locations + uniformSrcOffset = glGetUniformLocation(program, "srcOffset"); + uniformDstOffset = glGetUniformLocation(program, "dstOffset"); + uniformPatchArray = glGetUniformLocation(program, "patchArray"); + uniformDuDesc = glGetUniformLocation(program, "duDesc"); + uniformDvDesc = glGetUniformLocation(program, "dvDesc"); + uniformDuuDesc = glGetUniformLocation(program, "duuDesc"); + uniformDuvDesc = glGetUniformLocation(program, "duvDesc"); + uniformDvvDesc = glGetUniformLocation(program, "dvvDesc"); + + return true; +} + +} // namespace opensubdiv +} // namespace blender diff --git a/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.h b/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.h new file mode 100644 index 00000000000..85c12f73b08 --- /dev/null +++ b/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.h @@ -0,0 +1,2465 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_GL_COMPUTE_EVALUATOR_H_ +#define OPENSUBDIV_GL_COMPUTE_EVALUATOR_H_ + +#include <opensubdiv/osd/bufferDescriptor.h> +#include <opensubdiv/osd/opengl.h> +#include <opensubdiv/osd/types.h> +#include <opensubdiv/version.h> + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { +namespace Far { +class LimitStencilTable; +class StencilTable; +} // namespace Far +} // namespace OPENSUBDIV_VERSION +} // namespace OpenSubdiv + +namespace blender { +namespace opensubdiv { + +/// \brief GL stencil table (Shader Storage buffer) +/// +/// This class is a GLSL SSBO representation of OpenSubdiv::Far::StencilTable. +/// +/// GLSLComputeKernel consumes this table to apply stencils +/// +class GLStencilTableSSBO { + public: + static GLStencilTableSSBO *Create(OpenSubdiv::Far::StencilTable const *stencilTable, + void *deviceContext = NULL) + { + (void)deviceContext; // unused + return new GLStencilTableSSBO(stencilTable); + } + static GLStencilTableSSBO *Create(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable, + void *deviceContext = NULL) + { + (void)deviceContext; // unused + return new GLStencilTableSSBO(limitStencilTable); + } + + explicit GLStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable); + explicit GLStencilTableSSBO(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable); + ~GLStencilTableSSBO(); + + // interfaces needed for GLSLComputeKernel + GLuint GetSizesBuffer() const + { + return _sizes; + } + GLuint GetOffsetsBuffer() const + { + return _offsets; + } + GLuint GetIndicesBuffer() const + { + return _indices; + } + GLuint GetWeightsBuffer() const + { + return _weights; + } + GLuint GetDuWeightsBuffer() const + { + return _duWeights; + } + GLuint GetDvWeightsBuffer() const + { + return _dvWeights; + } + GLuint GetDuuWeightsBuffer() const + { + return _duuWeights; + } + GLuint GetDuvWeightsBuffer() const + { + return _duvWeights; + } + GLuint GetDvvWeightsBuffer() const + { + return _dvvWeights; + } + int GetNumStencils() const + { + return _numStencils; + } + + private: + GLuint _sizes; + GLuint _offsets; + GLuint _indices; + GLuint _weights; + GLuint _duWeights; + GLuint _dvWeights; + GLuint _duuWeights; + GLuint _duvWeights; + GLuint _dvvWeights; + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +class GLComputeEvaluator { + public: + typedef bool Instantiatable; + static GLComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + void *deviceContext = NULL) + { + return Create(srcDesc, + dstDesc, + duDesc, + dvDesc, + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor(), + deviceContext); + } + + static GLComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + void *deviceContext = NULL) + { + (void)deviceContext; // not used + GLComputeEvaluator *instance = new GLComputeEvaluator(); + if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc)) + return instance; + delete instance; + return NULL; + } + + /// Constructor. + GLComputeEvaluator(); + + /// Destructor. note that the GL context must be made current. + ~GLComputeEvaluator(); + + /// ---------------------------------------------------------------------- + /// + /// Stencil evaluations with StencilTable + /// + /// ---------------------------------------------------------------------- + + /// \brief Generic static stencil function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLSL kernel + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + static bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, + dstDesc, + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor()); + if (instance) { + bool r = instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic static stencil function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLSL kernel + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + static bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + STENCIL_TABLE const *stencilTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalStencils(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + stencilTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc); + if (instance) { + bool r = instance->EvalStencils(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic static stencil function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLSL kernel + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + static bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + STENCIL_TABLE const *stencilTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalStencils(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + stencilTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc); + if (instance) { + bool r = instance->EvalStencils(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic stencil function. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable) const + { + return EvalStencils(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + 0, + OpenSubdiv::Osd::BufferDescriptor(), + 0, + OpenSubdiv::Osd::BufferDescriptor(), + stencilTable->GetSizesBuffer(), + stencilTable->GetOffsetsBuffer(), + stencilTable->GetIndicesBuffer(), + stencilTable->GetWeightsBuffer(), + 0, + 0, + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils()); + } + + /// \brief Generic stencil function. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + STENCIL_TABLE const *stencilTable) const + { + return EvalStencils(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + stencilTable->GetSizesBuffer(), + stencilTable->GetOffsetsBuffer(), + stencilTable->GetIndicesBuffer(), + stencilTable->GetWeightsBuffer(), + stencilTable->GetDuWeightsBuffer(), + stencilTable->GetDvWeightsBuffer(), + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils()); + } + + /// \brief Generic stencil function. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + STENCIL_TABLE const *stencilTable) const + { + return EvalStencils(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + duuBuffer->BindVBO(), + duuDesc, + duvBuffer->BindVBO(), + duvDesc, + dvvBuffer->BindVBO(), + dvvDesc, + stencilTable->GetSizesBuffer(), + stencilTable->GetOffsetsBuffer(), + stencilTable->GetIndicesBuffer(), + stencilTable->GetWeightsBuffer(), + stencilTable->GetDuWeightsBuffer(), + stencilTable->GetDvWeightsBuffer(), + stencilTable->GetDuuWeightsBuffer(), + stencilTable->GetDuvWeightsBuffer(), + stencilTable->GetDvvWeightsBuffer(), + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils()); + } + + /// \brief Dispatch the GLSL compute kernel on GPU asynchronously + /// returns false if the kernel hasn't been compiled yet. + /// + /// @param srcBuffer GL buffer of input primvar source data + /// + /// @param srcDesc vertex buffer descriptor for the srcBuffer + /// + /// @param dstBuffer GL buffer of output primvar destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer GL buffer of output derivative wrt u + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer GL buffer of output derivative wrt v + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param sizesBuffer GL buffer of the sizes in the stencil table + /// + /// @param offsetsBuffer GL buffer of the offsets in the stencil table + /// + /// @param indicesBuffer GL buffer of the indices in the stencil table + /// + /// @param weightsBuffer GL buffer of the weights in the stencil table + /// + /// @param duWeightsBuffer GL buffer of the du weights in the stencil table + /// + /// @param dvWeightsBuffer GL buffer of the dv weights in the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + bool EvalStencils(GLuint srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + GLuint dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + GLuint duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + GLuint dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + GLuint duWeightsBuffer, + GLuint dvWeightsBuffer, + int start, + int end) const; + + /// \brief Dispatch the GLSL compute kernel on GPU asynchronously + /// returns false if the kernel hasn't been compiled yet. + /// + /// @param srcBuffer GL buffer of input primvar source data + /// + /// @param srcDesc vertex buffer descriptor for the srcBuffer + /// + /// @param dstBuffer GL buffer of output primvar destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer GL buffer of output derivative wrt u + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer GL buffer of output derivative wrt v + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer GL buffer of output 2nd derivative wrt u + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer GL buffer of output 2nd derivative wrt u and v + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer GL buffer of output 2nd derivative wrt v + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param sizesBuffer GL buffer of the sizes in the stencil table + /// + /// @param offsetsBuffer GL buffer of the offsets in the stencil table + /// + /// @param indicesBuffer GL buffer of the indices in the stencil table + /// + /// @param weightsBuffer GL buffer of the weights in the stencil table + /// + /// @param duWeightsBuffer GL buffer of the du weights in the stencil table + /// + /// @param dvWeightsBuffer GL buffer of the dv weights in the stencil table + /// + /// @param duuWeightsBuffer GL buffer of the duu weights in the stencil table + /// + /// @param duvWeightsBuffer GL buffer of the duv weights in the stencil table + /// + /// @param dvvWeightsBuffer GL buffer of the dvv weights in the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + bool EvalStencils(GLuint srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + GLuint dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + GLuint duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + GLuint dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + GLuint duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + GLuint duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + GLuint dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + GLuint duWeightsBuffer, + GLuint dvWeightsBuffer, + GLuint duuWeightsBuffer, + GLuint duvWeightsBuffer, + GLuint dvvWeightsBuffer, + int start, + int end) const; + + /// ---------------------------------------------------------------------- + /// + /// Limit evaluations with PatchTable + /// + /// ---------------------------------------------------------------------- + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatches( + srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, + dstDesc, + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor()); + if (instance) { + bool r = instance->EvalPatches( + srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatches(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc); + if (instance) { + bool r = instance->EvalPatches(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatches(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc); + if (instance) { + bool r = instance->EvalPatches(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + 0, + OpenSubdiv::Osd::BufferDescriptor(), + 0, + OpenSubdiv::Osd::BufferDescriptor(), + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetPatchArrays(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function with derivatives. This function has + /// a same signature as other device kernels have so that it can be + /// called in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetPatchArrays(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function with derivatives. This function has + /// a same signature as other device kernels have so that it can be + /// called in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + duuBuffer->BindVBO(), + duuDesc, + duvBuffer->BindVBO(), + duvDesc, + dvvBuffer->BindVBO(), + dvvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetPatchArrays(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + bool EvalPatches(GLuint srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + GLuint dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + GLuint duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + GLuint dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + GLuint patchCoordsBuffer, + const OpenSubdiv::Osd::PatchArrayVector &patchArrays, + GLuint patchIndexBuffer, + GLuint patchParamsBuffer) const; + + bool EvalPatches(GLuint srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + GLuint dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + GLuint duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + GLuint dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + GLuint duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + GLuint duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + GLuint dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + GLuint patchCoordsBuffer, + const OpenSubdiv::Osd::PatchArrayVector &patchArrays, + GLuint patchIndexBuffer, + GLuint patchParamsBuffer) const; + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesVarying( + srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, + dstDesc, + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor()); + if (instance) { + bool r = instance->EvalPatchesVarying( + srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + 0, + OpenSubdiv::Osd::BufferDescriptor(), + 0, + OpenSubdiv::Osd::BufferDescriptor(), + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetVaryingPatchArrays(), + patchTable->GetVaryingPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc); + if (instance) { + bool r = instance->EvalPatchesVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetVaryingPatchArrays(), + patchTable->GetVaryingPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc); + if (instance) { + bool r = instance->EvalPatchesVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + duuBuffer->BindVBO(), + duuDesc, + duvBuffer->BindVBO(), + duvDesc, + dvvBuffer->BindVBO(), + dvvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetVaryingPatchArrays(), + patchTable->GetVaryingPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, + dstDesc, + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor()); + if (instance) { + bool r = instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel = 0) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + 0, + OpenSubdiv::Osd::BufferDescriptor(), + 0, + OpenSubdiv::Osd::BufferDescriptor(), + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetFVarPatchArrays(fvarChannel), + patchTable->GetFVarPatchIndexBuffer(fvarChannel), + patchTable->GetFVarPatchParamBuffer(fvarChannel)); + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc); + if (instance) { + bool r = instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel = 0) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetFVarPatchArrays(fvarChannel), + patchTable->GetFVarPatchIndexBuffer(fvarChannel), + patchTable->GetFVarPatchParamBuffer(fvarChannel)); + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc); + if (instance) { + bool r = instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel = 0) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + duuBuffer->BindVBO(), + duuDesc, + duvBuffer->BindVBO(), + duvDesc, + dvvBuffer->BindVBO(), + dvvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetFVarPatchArrays(fvarChannel), + patchTable->GetFVarPatchIndexBuffer(fvarChannel), + patchTable->GetFVarPatchParamBuffer(fvarChannel)); + } + + /// ---------------------------------------------------------------------- + /// + /// Other methods + /// + /// ---------------------------------------------------------------------- + + /// Configure GLSL kernel. A valid GL context must be made current before + /// calling this function. Returns false if it fails to compile the kernel. + bool Compile( + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + OpenSubdiv::Osd::BufferDescriptor const &duDesc = OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor const &dvDesc = OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor const &duuDesc = OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor const &duvDesc = OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc = OpenSubdiv::Osd::BufferDescriptor()); + + /// Wait the dispatched kernel finishes. + static void Synchronize(void *deviceContext); + + private: + struct _StencilKernel { + _StencilKernel(); + ~_StencilKernel(); + bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int workGroupSize); + GLuint program; + GLuint uniformStart; + GLuint uniformEnd; + GLuint uniformSrcOffset; + GLuint uniformDstOffset; + GLuint uniformDuDesc; + GLuint uniformDvDesc; + GLuint uniformDuuDesc; + GLuint uniformDuvDesc; + GLuint uniformDvvDesc; + } _stencilKernel; + + struct _PatchKernel { + _PatchKernel(); + ~_PatchKernel(); + bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int workGroupSize); + GLuint program; + GLuint uniformSrcOffset; + GLuint uniformDstOffset; + GLuint uniformPatchArray; + GLuint uniformDuDesc; + GLuint uniformDvDesc; + GLuint uniformDuuDesc; + GLuint uniformDuvDesc; + GLuint uniformDvvDesc; + } _patchKernel; + + int _workGroupSize; + GLuint _patchArraysSSBO; + + int GetDispatchSize(int count) const; + + void DispatchCompute(int totalDispatchSize) const; +}; +} // namespace opensubdiv +} // namespace blender + +#endif // OPENSUBDIV_GL_COMPUTE_EVALUATOR_H_ diff --git a/intern/opensubdiv/internal/evaluator/shaders/glsl_compute_kernel.glsl b/intern/opensubdiv/internal/evaluator/shaders/glsl_compute_kernel.glsl new file mode 100644 index 00000000000..2f60aee0999 --- /dev/null +++ b/intern/opensubdiv/internal/evaluator/shaders/glsl_compute_kernel.glsl @@ -0,0 +1,383 @@ +// +// Copyright 2013 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +//------------------------------------------------------------------------------ + +layout(local_size_x = WORK_GROUP_SIZE, local_size_y = 1, local_size_z = 1) in; +layout(std430) buffer; + +// source and destination buffers + +uniform int srcOffset = 0; +uniform int dstOffset = 0; +layout(binding = 0) buffer src_buffer +{ + float srcVertexBuffer[]; +}; +layout(binding = 1) buffer dst_buffer +{ + float dstVertexBuffer[]; +}; + + // derivative buffers (if needed) + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) +uniform ivec3 duDesc; +uniform ivec3 dvDesc; +layout(binding = 2) buffer du_buffer +{ + float duBuffer[]; +}; +layout(binding = 3) buffer dv_buffer +{ + float dvBuffer[]; +}; +#endif + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) +uniform ivec3 duuDesc; +uniform ivec3 duvDesc; +uniform ivec3 dvvDesc; +layout(binding = 10) buffer duu_buffer +{ + float duuBuffer[]; +}; +layout(binding = 11) buffer duv_buffer +{ + float duvBuffer[]; +}; +layout(binding = 12) buffer dvv_buffer +{ + float dvvBuffer[]; +}; +#endif + + // stencil buffers + +#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS) + +uniform int batchStart = 0; +uniform int batchEnd = 0; +layout(binding = 4) buffer stencilSizes +{ + int _sizes[]; +}; +layout(binding = 5) buffer stencilOffsets +{ + int _offsets[]; +}; +layout(binding = 6) buffer stencilIndices +{ + int _indices[]; +}; +layout(binding = 7) buffer stencilWeights +{ + float _weights[]; +}; + +# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) +layout(binding = 8) buffer stencilDuWeights +{ + float _duWeights[]; +}; +layout(binding = 9) buffer stencilDvWeights +{ + float _dvWeights[]; +}; +# endif + +# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) +layout(binding = 13) buffer stencilDuuWeights +{ + float _duuWeights[]; +}; +layout(binding = 14) buffer stencilDuvWeights +{ + float _duvWeights[]; +}; +layout(binding = 15) buffer stencilDvvWeights +{ + float _dvvWeights[]; +}; +# endif + +uint getGlobalInvocationIndex() +{ + uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x; + return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row; +} + +#endif + +// patch buffers + +#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES) + +layout(binding = 4) buffer patchArray_buffer +{ + OsdPatchArray patchArrayBuffer[]; +}; +layout(binding = 5) buffer patchCoord_buffer +{ + OsdPatchCoord patchCoords[]; +}; +layout(binding = 6) buffer patchIndex_buffer +{ + int patchIndexBuffer[]; +}; +layout(binding = 7) buffer patchParam_buffer +{ + OsdPatchParam patchParamBuffer[]; +}; + +OsdPatchCoord GetPatchCoord(int coordIndex) +{ + return patchCoords[coordIndex]; +} + +OsdPatchArray GetPatchArray(int arrayIndex) +{ + return patchArrayBuffer[arrayIndex]; +} + +OsdPatchParam GetPatchParam(int patchIndex) +{ + return patchParamBuffer[patchIndex]; +} + +#endif + +//------------------------------------------------------------------------------ + +struct Vertex { + float vertexData[LENGTH]; +}; + +void clear(out Vertex v) +{ + for (int i = 0; i < LENGTH; ++i) { + v.vertexData[i] = 0; + } +} + +Vertex readVertex(int index) +{ + Vertex v; + int vertexIndex = srcOffset + index * SRC_STRIDE; + for (int i = 0; i < LENGTH; ++i) { + v.vertexData[i] = srcVertexBuffer[vertexIndex + i]; + } + return v; +} + +void writeVertex(int index, Vertex v) +{ + int vertexIndex = dstOffset + index * DST_STRIDE; + for (int i = 0; i < LENGTH; ++i) { + dstVertexBuffer[vertexIndex + i] = v.vertexData[i]; + } +} + +void addWithWeight(inout Vertex v, const Vertex src, float weight) +{ + for (int i = 0; i < LENGTH; ++i) { + v.vertexData[i] += weight * src.vertexData[i]; + } +} + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) +void writeDu(int index, Vertex du) +{ + int duIndex = duDesc.x + index * duDesc.z; + for (int i = 0; i < LENGTH; ++i) { + duBuffer[duIndex + i] = du.vertexData[i]; + } +} + +void writeDv(int index, Vertex dv) +{ + int dvIndex = dvDesc.x + index * dvDesc.z; + for (int i = 0; i < LENGTH; ++i) { + dvBuffer[dvIndex + i] = dv.vertexData[i]; + } +} +#endif + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) +void writeDuu(int index, Vertex duu) +{ + int duuIndex = duuDesc.x + index * duuDesc.z; + for (int i = 0; i < LENGTH; ++i) { + duuBuffer[duuIndex + i] = duu.vertexData[i]; + } +} + +void writeDuv(int index, Vertex duv) +{ + int duvIndex = duvDesc.x + index * duvDesc.z; + for (int i = 0; i < LENGTH; ++i) { + duvBuffer[duvIndex + i] = duv.vertexData[i]; + } +} + +void writeDvv(int index, Vertex dvv) +{ + int dvvIndex = dvvDesc.x + index * dvvDesc.z; + for (int i = 0; i < LENGTH; ++i) { + dvvBuffer[dvvIndex + i] = dvv.vertexData[i]; + } +} +#endif + +//------------------------------------------------------------------------------ +#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS) + +void main() +{ + int current = int(getGlobalInvocationIndex()) + batchStart; + + if (current >= batchEnd) { + return; + } + + Vertex dst; + clear(dst); + + int offset = _offsets[current], size = _sizes[current]; + + for (int stencil = 0; stencil < size; ++stencil) { + int vindex = offset + stencil; + addWithWeight(dst, readVertex(_indices[vindex]), _weights[vindex]); + } + + writeVertex(current, dst); + +# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) + Vertex du, dv; + clear(du); + clear(dv); + for (int i = 0; i < size; ++i) { + // expects the compiler optimizes readVertex out here. + Vertex src = readVertex(_indices[offset + i]); + addWithWeight(du, src, _duWeights[offset + i]); + addWithWeight(dv, src, _dvWeights[offset + i]); + } + + if (duDesc.y > 0) { // length + writeDu(current, du); + } + if (dvDesc.y > 0) { + writeDv(current, dv); + } +# endif +# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) + Vertex duu, duv, dvv; + clear(duu); + clear(duv); + clear(dvv); + for (int i = 0; i < size; ++i) { + // expects the compiler optimizes readVertex out here. + Vertex src = readVertex(_indices[offset + i]); + addWithWeight(duu, src, _duuWeights[offset + i]); + addWithWeight(duv, src, _duvWeights[offset + i]); + addWithWeight(dvv, src, _dvvWeights[offset + i]); + } + + if (duuDesc.y > 0) { // length + writeDuu(current, duu); + } + if (duvDesc.y > 0) { + writeDuv(current, duv); + } + if (dvvDesc.y > 0) { + writeDvv(current, dvv); + } +# endif +} + +#endif + +//------------------------------------------------------------------------------ +#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES) + +// PERFORMANCE: stride could be constant, but not as significant as length + +void main() +{ + + int current = int(gl_GlobalInvocationID.x); + + OsdPatchCoord coord = GetPatchCoord(current); + OsdPatchArray array = GetPatchArray(coord.arrayIndex); + OsdPatchParam param = GetPatchParam(coord.patchIndex); + + int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc; + + float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20]; + int nPoints = OsdEvaluatePatchBasis( + patchType, param, coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv); + + Vertex dst, du, dv, duu, duv, dvv; + clear(dst); + clear(du); + clear(dv); + clear(duu); + clear(duv); + clear(dvv); + + int indexBase = array.indexBase + array.stride * (coord.patchIndex - array.primitiveIdBase); + + for (int cv = 0; cv < nPoints; ++cv) { + int index = patchIndexBuffer[indexBase + cv]; + addWithWeight(dst, readVertex(index), wP[cv]); + addWithWeight(du, readVertex(index), wDu[cv]); + addWithWeight(dv, readVertex(index), wDv[cv]); + addWithWeight(duu, readVertex(index), wDuu[cv]); + addWithWeight(duv, readVertex(index), wDuv[cv]); + addWithWeight(dvv, readVertex(index), wDvv[cv]); + } + writeVertex(current, dst); + +# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) + if (duDesc.y > 0) { // length + writeDu(current, du); + } + if (dvDesc.y > 0) { + writeDv(current, dv); + } +# endif +# if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) + if (duuDesc.y > 0) { // length + writeDuu(current, duu); + } + if (duvDesc.y > 0) { // length + writeDuv(current, duv); + } + if (dvvDesc.y > 0) { + writeDvv(current, dvv); + } +# endif +} + +#endif |