diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-09-14 16:37:47 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-09-30 21:48:08 +0300 |
commit | a754e35198d852ea34e2b82cd2b126538e6f5a3b (patch) | |
tree | 9118b3fa19ab70aa1b50440ce62e5d028d940cfd /intern | |
parent | ac582056e2e70f3b0d91ff69d0307dd357e2e2ed (diff) |
Cycles: refactor API for GPU display
* Split GPUDisplay into two classes. PathTraceDisplay to implement the Cycles side,
and DisplayDriver to implement the host application side. The DisplayDriver is now
a fully abstract base class, embedded in the PathTraceDisplay.
* Move copy_pixels_to_texture implementation out of the host side into the Cycles side,
since it can be implemented in terms of the texture buffer mapping.
* Move definition of DeviceGraphicsInteropDestination into display driver header, so
that we do not need to expose private device headers in the public API.
* Add more detailed comments about how the DisplayDriver should be implemented.
The "driver" terminology might not be obvious, but is also used in other renderers.
Differential Revision: https://developer.blender.org/D12626
Diffstat (limited to 'intern')
26 files changed, 432 insertions, 341 deletions
diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt index 64d226cb9ec..2660eee017b 100644 --- a/intern/cycles/blender/CMakeLists.txt +++ b/intern/cycles/blender/CMakeLists.txt @@ -31,9 +31,9 @@ set(INC_SYS set(SRC blender_camera.cpp blender_device.cpp + blender_display_driver.cpp blender_image.cpp blender_geometry.cpp - blender_gpu_display.cpp blender_light.cpp blender_mesh.cpp blender_object.cpp @@ -51,7 +51,7 @@ set(SRC CCL_api.h blender_device.h - blender_gpu_display.h + blender_display_driver.h blender_id_map.h blender_image.h blender_object_cull.h diff --git a/intern/cycles/blender/blender_gpu_display.cpp b/intern/cycles/blender/blender_display_driver.cpp index 5a4567deac3..5267f41eef7 100644 --- a/intern/cycles/blender/blender_gpu_display.cpp +++ b/intern/cycles/blender/blender_display_driver.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "blender/blender_gpu_display.h" +#include "blender/blender_display_driver.h" #include "device/device.h" #include "util/util_logging.h" @@ -273,17 +273,17 @@ uint BlenderDisplaySpaceShader::get_shader_program() } /* -------------------------------------------------------------------- - * BlenderGPUDisplay. + * BlenderDisplayDriver. */ -BlenderGPUDisplay::BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene) +BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene) : b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene)) { /* Create context while on the main thread. */ gl_context_create(); } -BlenderGPUDisplay::~BlenderGPUDisplay() +BlenderDisplayDriver::~BlenderDisplayDriver() { gl_resources_destroy(); } @@ -292,19 +292,18 @@ BlenderGPUDisplay::~BlenderGPUDisplay() * Update procedure. */ -bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams ¶ms, +bool BlenderDisplayDriver::update_begin(const Params ¶ms, int texture_width, int texture_height) { - /* Note that it's the responsibility of BlenderGPUDisplay to ensure updating and drawing + /* Note that it's the responsibility of BlenderDisplayDriver to ensure updating and drawing * the texture does not happen at the same time. This is achieved indirectly. * * When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock. * This same lock is also held when do_draw() is called, which together ensure mutual * exclusion. * - * This locking is not performed at the GPU display level, because that would cause lock - * inversion. */ + * This locking is not performed on the Cycles side, because that would cause lock inversion. */ if (!gl_context_enable()) { return false; } @@ -361,7 +360,7 @@ bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams ¶ms, return true; } -void BlenderGPUDisplay::do_update_end() +void BlenderDisplayDriver::update_end() { gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); glFlush(); @@ -370,53 +369,17 @@ void BlenderGPUDisplay::do_update_end() } /* -------------------------------------------------------------------- - * Texture update from CPU buffer. - */ - -void BlenderGPUDisplay::do_copy_pixels_to_texture( - const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height) -{ - /* This call copies pixels to a Pixel Buffer Object (PBO) which is much cheaper from CPU time - * point of view than to copy data directly to the OpenGL texture. - * - * The possible downside of this approach is that it might require a higher peak memory when - * doing partial updates of the texture (although, in practice even partial updates might peak - * with a full-frame buffer stored on the CPU if the GPU is currently occupied). */ - - half4 *mapped_rgba_pixels = map_texture_buffer(); - if (!mapped_rgba_pixels) { - return; - } - - if (texture_x == 0 && texture_y == 0 && pixels_width == texture_.width && - pixels_height == texture_.height) { - const size_t size_in_bytes = sizeof(half4) * texture_.width * texture_.height; - memcpy(mapped_rgba_pixels, rgba_pixels, size_in_bytes); - } - else { - const half4 *rgba_row = rgba_pixels; - half4 *mapped_rgba_row = mapped_rgba_pixels + texture_y * texture_.width + texture_x; - for (int y = 0; y < pixels_height; - ++y, rgba_row += pixels_width, mapped_rgba_row += texture_.width) { - memcpy(mapped_rgba_row, rgba_row, sizeof(half4) * pixels_width); - } - } - - unmap_texture_buffer(); -} - -/* -------------------------------------------------------------------- * Texture buffer mapping. */ -half4 *BlenderGPUDisplay::do_map_texture_buffer() +half4 *BlenderDisplayDriver::map_texture_buffer() { glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id); half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>( glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY)); if (!mapped_rgba_pixels) { - LOG(ERROR) << "Error mapping BlenderGPUDisplay pixel buffer object."; + LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object."; } if (texture_.need_clear) { @@ -431,7 +394,7 @@ half4 *BlenderGPUDisplay::do_map_texture_buffer() return mapped_rgba_pixels; } -void BlenderGPUDisplay::do_unmap_texture_buffer() +void BlenderDisplayDriver::unmap_texture_buffer() { glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); @@ -442,9 +405,9 @@ void BlenderGPUDisplay::do_unmap_texture_buffer() * Graphics interoperability. */ -DeviceGraphicsInteropDestination BlenderGPUDisplay::do_graphics_interop_get() +BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get() { - DeviceGraphicsInteropDestination interop_dst; + GraphicsInterop interop_dst; interop_dst.buffer_width = texture_.buffer_width; interop_dst.buffer_height = texture_.buffer_height; @@ -456,12 +419,12 @@ DeviceGraphicsInteropDestination BlenderGPUDisplay::do_graphics_interop_get() return interop_dst; } -void BlenderGPUDisplay::graphics_interop_activate() +void BlenderDisplayDriver::graphics_interop_activate() { gl_context_enable(); } -void BlenderGPUDisplay::graphics_interop_deactivate() +void BlenderDisplayDriver::graphics_interop_deactivate() { gl_context_disable(); } @@ -470,17 +433,17 @@ void BlenderGPUDisplay::graphics_interop_deactivate() * Drawing. */ -void BlenderGPUDisplay::clear() +void BlenderDisplayDriver::clear() { texture_.need_clear = true; } -void BlenderGPUDisplay::set_zoom(float zoom_x, float zoom_y) +void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y) { zoom_ = make_float2(zoom_x, zoom_y); } -void BlenderGPUDisplay::do_draw(const GPUDisplayParams ¶ms) +void BlenderDisplayDriver::draw(const Params ¶ms) { /* See do_update_begin() for why no locking is required here. */ const bool transparent = true; // TODO(sergey): Derive this from Film. @@ -584,7 +547,7 @@ void BlenderGPUDisplay::do_draw(const GPUDisplayParams ¶ms) } } -void BlenderGPUDisplay::gl_context_create() +void BlenderDisplayDriver::gl_context_create() { /* When rendering in viewport there is no render context available via engine. * Check whether own context is to be created here. @@ -613,7 +576,7 @@ void BlenderGPUDisplay::gl_context_create() } } -bool BlenderGPUDisplay::gl_context_enable() +bool BlenderDisplayDriver::gl_context_enable() { if (use_gl_context_) { if (!gl_context_) { @@ -628,7 +591,7 @@ bool BlenderGPUDisplay::gl_context_enable() return true; } -void BlenderGPUDisplay::gl_context_disable() +void BlenderDisplayDriver::gl_context_disable() { if (use_gl_context_) { if (gl_context_) { @@ -641,7 +604,7 @@ void BlenderGPUDisplay::gl_context_disable() RE_engine_render_context_disable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data)); } -void BlenderGPUDisplay::gl_context_dispose() +void BlenderDisplayDriver::gl_context_dispose() { if (gl_context_) { const bool drw_state = DRW_opengl_context_release(); @@ -653,7 +616,7 @@ void BlenderGPUDisplay::gl_context_dispose() } } -bool BlenderGPUDisplay::gl_draw_resources_ensure() +bool BlenderDisplayDriver::gl_draw_resources_ensure() { if (!texture_.gl_id) { /* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can @@ -680,7 +643,7 @@ bool BlenderGPUDisplay::gl_draw_resources_ensure() return true; } -void BlenderGPUDisplay::gl_resources_destroy() +void BlenderDisplayDriver::gl_resources_destroy() { gl_context_enable(); @@ -703,7 +666,7 @@ void BlenderGPUDisplay::gl_resources_destroy() gl_context_dispose(); } -bool BlenderGPUDisplay::gl_texture_resources_ensure() +bool BlenderDisplayDriver::gl_texture_resources_ensure() { if (texture_.creation_attempted) { return texture_.is_created; @@ -740,7 +703,7 @@ bool BlenderGPUDisplay::gl_texture_resources_ensure() return true; } -void BlenderGPUDisplay::texture_update_if_needed() +void BlenderDisplayDriver::texture_update_if_needed() { if (!texture_.need_update) { return; @@ -754,7 +717,7 @@ void BlenderGPUDisplay::texture_update_if_needed() texture_.need_update = false; } -void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams ¶ms) +void BlenderDisplayDriver::vertex_buffer_update(const Params ¶ms) { /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be * rendered. */ @@ -767,23 +730,23 @@ void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams ¶ms) vpointer[0] = 0.0f; vpointer[1] = 0.0f; - vpointer[2] = params.offset.x; - vpointer[3] = params.offset.y; + vpointer[2] = params.full_offset.x; + vpointer[3] = params.full_offset.y; vpointer[4] = 1.0f; vpointer[5] = 0.0f; - vpointer[6] = (float)params.size.x + params.offset.x; - vpointer[7] = params.offset.y; + vpointer[6] = (float)params.size.x + params.full_offset.x; + vpointer[7] = params.full_offset.y; vpointer[8] = 1.0f; vpointer[9] = 1.0f; - vpointer[10] = (float)params.size.x + params.offset.x; - vpointer[11] = (float)params.size.y + params.offset.y; + vpointer[10] = (float)params.size.x + params.full_offset.x; + vpointer[11] = (float)params.size.y + params.full_offset.y; vpointer[12] = 0.0f; vpointer[13] = 1.0f; - vpointer[14] = params.offset.x; - vpointer[15] = (float)params.size.y + params.offset.y; + vpointer[14] = params.full_offset.x; + vpointer[15] = (float)params.size.y + params.full_offset.y; glUnmapBuffer(GL_ARRAY_BUFFER); } diff --git a/intern/cycles/blender/blender_gpu_display.h b/intern/cycles/blender/blender_display_driver.h index 89420567037..558997c6b4f 100644 --- a/intern/cycles/blender/blender_gpu_display.h +++ b/intern/cycles/blender/blender_display_driver.h @@ -22,12 +22,14 @@ #include "RNA_blender_cpp.h" -#include "render/gpu_display.h" +#include "render/display_driver.h" + +#include "util/util_thread.h" #include "util/util_unique_ptr.h" CCL_NAMESPACE_BEGIN -/* Base class of shader used for GPU display rendering. */ +/* Base class of shader used for display driver rendering. */ class BlenderDisplayShader { public: static constexpr const char *position_attribute_name = "pos"; @@ -96,11 +98,11 @@ class BlenderDisplaySpaceShader : public BlenderDisplayShader { uint shader_program_ = 0; }; -/* GPU display implementation which is specific for Blender viewport integration. */ -class BlenderGPUDisplay : public GPUDisplay { +/* Display driver implementation which is specific for Blender viewport integration. */ +class BlenderDisplayDriver : public DisplayDriver { public: - BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene); - ~BlenderGPUDisplay(); + BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene); + ~BlenderDisplayDriver(); virtual void graphics_interop_activate() override; virtual void graphics_interop_deactivate() override; @@ -110,22 +112,15 @@ class BlenderGPUDisplay : public GPUDisplay { void set_zoom(float zoom_x, float zoom_y); protected: - virtual bool do_update_begin(const GPUDisplayParams ¶ms, - int texture_width, - int texture_height) override; - virtual void do_update_end() override; + virtual bool update_begin(const Params ¶ms, int texture_width, int texture_height) override; + virtual void update_end() override; - virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels, - int texture_x, - int texture_y, - int pixels_width, - int pixels_height) override; - virtual void do_draw(const GPUDisplayParams ¶ms) override; + virtual half4 *map_texture_buffer() override; + virtual void unmap_texture_buffer() override; - virtual half4 *do_map_texture_buffer() override; - virtual void do_unmap_texture_buffer() override; + virtual GraphicsInterop graphics_interop_get() override; - virtual DeviceGraphicsInteropDestination do_graphics_interop_get() override; + virtual void draw(const Params ¶ms) override; /* Helper function which allocates new GPU context. */ void gl_context_create(); @@ -152,13 +147,13 @@ class BlenderGPUDisplay : public GPUDisplay { * This buffer is used to render texture in the viewport. * * NOTE: The buffer needs to be bound. */ - void vertex_buffer_update(const GPUDisplayParams ¶ms); + void vertex_buffer_update(const Params ¶ms); BL::RenderEngine b_engine_; /* OpenGL context which is used the render engine doesn't have its own. */ void *gl_context_ = nullptr; - /* The when Blender RenderEngine side context is not available and the GPUDisplay is to create + /* The when Blender RenderEngine side context is not available and the DisplayDriver is to create * its own context. */ bool use_gl_context_ = false; /* Mutex used to guard the `gl_context_`. */ diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 88edc7eafe7..1a42456eda0 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -42,7 +42,7 @@ #include "util/util_progress.h" #include "util/util_time.h" -#include "blender/blender_gpu_display.h" +#include "blender/blender_display_driver.h" #include "blender/blender_session.h" #include "blender/blender_sync.h" #include "blender/blender_util.h" @@ -159,9 +159,10 @@ void BlenderSession::create_session() /* Create GPU display. */ if (!b_engine.is_preview() && !headless) { - unique_ptr<BlenderGPUDisplay> gpu_display = make_unique<BlenderGPUDisplay>(b_engine, b_scene); - gpu_display_ = gpu_display.get(); - session->set_gpu_display(move(gpu_display)); + unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(b_engine, + b_scene); + display_driver_ = display_driver.get(); + session->set_display_driver(move(display_driver)); } /* Viewport and preview (as in, material preview) does not do tiled rendering, so can inform @@ -446,7 +447,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_) /* Use final write for preview renders, otherwise render result wouldn't be be updated on Blender * side. */ - /* TODO(sergey): Investigate whether GPUDisplay can be used for the preview as well. */ + /* TODO(sergey): Investigate whether DisplayDriver can be used for the preview as well. */ if (b_engine.is_preview()) { session->update_render_tile_cb = [&]() { write_render_tile(); }; } @@ -708,7 +709,7 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_, session->read_render_tile_cb = [&]() { read_render_tile(); }; session->write_render_tile_cb = [&]() { write_render_tile(); }; - session->set_gpu_display(nullptr); + session->set_display_driver(nullptr); if (!session->progress.get_cancel()) { /* Sync scene. */ @@ -895,7 +896,7 @@ void BlenderSession::draw(BL::SpaceImageEditor &space_image) } BL::Array<float, 2> zoom = space_image.zoom(); - gpu_display_->set_zoom(zoom[0], zoom[1]); + display_driver_->set_zoom(zoom[0], zoom[1]); session->draw(); } diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h index 58683ee07a1..1ca8fdf87d0 100644 --- a/intern/cycles/blender/blender_session.h +++ b/intern/cycles/blender/blender_session.h @@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN -class BlenderGPUDisplay; +class BlenderDisplayDriver; class BlenderSync; class ImageMetaData; class Scene; @@ -164,8 +164,8 @@ class BlenderSession { int last_pass_index = -1; } draw_state_; - /* NOTE: The BlenderSession references the GPU display. */ - BlenderGPUDisplay *gpu_display_ = nullptr; + /* NOTE: The BlenderSession references the display driver. */ + BlenderDisplayDriver *display_driver_ = nullptr; vector<string> full_buffer_files_; }; diff --git a/intern/cycles/device/cuda/graphics_interop.cpp b/intern/cycles/device/cuda/graphics_interop.cpp index e8ca8b90eae..30efefd9b6b 100644 --- a/intern/cycles/device/cuda/graphics_interop.cpp +++ b/intern/cycles/device/cuda/graphics_interop.cpp @@ -37,14 +37,15 @@ CUDADeviceGraphicsInterop::~CUDADeviceGraphicsInterop() } } -void CUDADeviceGraphicsInterop::set_destination( - const DeviceGraphicsInteropDestination &destination) +void CUDADeviceGraphicsInterop::set_display_interop( + const DisplayDriver::GraphicsInterop &display_interop) { - const int64_t new_buffer_area = int64_t(destination.buffer_width) * destination.buffer_height; + const int64_t new_buffer_area = int64_t(display_interop.buffer_width) * + display_interop.buffer_height; - need_clear_ = destination.need_clear; + need_clear_ = display_interop.need_clear; - if (opengl_pbo_id_ == destination.opengl_pbo_id && buffer_area_ == new_buffer_area) { + if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) { return; } @@ -55,12 +56,12 @@ void CUDADeviceGraphicsInterop::set_destination( } const CUresult result = cuGraphicsGLRegisterBuffer( - &cu_graphics_resource_, destination.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); + &cu_graphics_resource_, display_interop.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); if (result != CUDA_SUCCESS) { LOG(ERROR) << "Error registering OpenGL buffer: " << cuewErrorString(result); } - opengl_pbo_id_ = destination.opengl_pbo_id; + opengl_pbo_id_ = display_interop.opengl_pbo_id; buffer_area_ = new_buffer_area; } diff --git a/intern/cycles/device/cuda/graphics_interop.h b/intern/cycles/device/cuda/graphics_interop.h index 8a70c8aa71d..ec480f20c86 100644 --- a/intern/cycles/device/cuda/graphics_interop.h +++ b/intern/cycles/device/cuda/graphics_interop.h @@ -41,7 +41,7 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop { CUDADeviceGraphicsInterop &operator=(const CUDADeviceGraphicsInterop &other) = delete; CUDADeviceGraphicsInterop &operator=(CUDADeviceGraphicsInterop &&other) = delete; - virtual void set_destination(const DeviceGraphicsInteropDestination &destination) override; + virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override; virtual device_ptr map() override; virtual void unmap() override; diff --git a/intern/cycles/device/device_graphics_interop.h b/intern/cycles/device/device_graphics_interop.h index 671b1c189d7..eaf76077141 100644 --- a/intern/cycles/device/device_graphics_interop.h +++ b/intern/cycles/device/device_graphics_interop.h @@ -16,25 +16,12 @@ #pragma once +#include "render/display_driver.h" + #include "util/util_types.h" CCL_NAMESPACE_BEGIN -/* Information about interoperability destination. - * Is provided by the GPUDisplay. */ -class DeviceGraphicsInteropDestination { - public: - /* Dimensions of the buffer, in pixels. */ - int buffer_width = 0; - int buffer_height = 0; - - /* OpenGL pixel buffer object. */ - int opengl_pbo_id = 0; - - /* Clear the entire destination before doing partial write to it. */ - bool need_clear = false; -}; - /* Device-side graphics interoperability support. * * Takes care of holding all the handlers needed by the device to implement interoperability with @@ -46,7 +33,7 @@ class DeviceGraphicsInterop { /* Update this device-side graphics interoperability object with the given destination resource * information. */ - virtual void set_destination(const DeviceGraphicsInteropDestination &destination) = 0; + virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0; virtual device_ptr map() = 0; virtual void unmap() = 0; diff --git a/intern/cycles/device/hip/graphics_interop.cpp b/intern/cycles/device/hip/graphics_interop.cpp index add6dbed5e1..0d5d71019b3 100644 --- a/intern/cycles/device/hip/graphics_interop.cpp +++ b/intern/cycles/device/hip/graphics_interop.cpp @@ -37,11 +37,15 @@ HIPDeviceGraphicsInterop::~HIPDeviceGraphicsInterop() } } -void HIPDeviceGraphicsInterop::set_destination(const DeviceGraphicsInteropDestination &destination) +void HIPDeviceGraphicsInterop::set_display_interop( + const DisplayDriver::GraphicsInterop &display_interop) { - const int64_t new_buffer_area = int64_t(destination.buffer_width) * destination.buffer_height; + const int64_t new_buffer_area = int64_t(display_interop.buffer_width) * + display_interop.buffer_height; - if (opengl_pbo_id_ == destination.opengl_pbo_id && buffer_area_ == new_buffer_area) { + need_clear_ = display_interop.need_clear; + + if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) { return; } @@ -52,12 +56,12 @@ void HIPDeviceGraphicsInterop::set_destination(const DeviceGraphicsInteropDestin } const hipError_t result = hipGraphicsGLRegisterBuffer( - &hip_graphics_resource_, destination.opengl_pbo_id, hipGraphicsRegisterFlagsNone); + &hip_graphics_resource_, display_interop.opengl_pbo_id, hipGraphicsRegisterFlagsNone); if (result != hipSuccess) { LOG(ERROR) << "Error registering OpenGL buffer: " << hipewErrorString(result); } - opengl_pbo_id_ = destination.opengl_pbo_id; + opengl_pbo_id_ = display_interop.opengl_pbo_id; buffer_area_ = new_buffer_area; } @@ -77,6 +81,14 @@ device_ptr HIPDeviceGraphicsInterop::map() hip_device_assert( device_, hipGraphicsResourceGetMappedPointer(&hip_buffer, &bytes, hip_graphics_resource_)); + if (need_clear_) { + hip_device_assert( + device_, + hipMemsetD8Async(static_cast<hipDeviceptr_t>(hip_buffer), 0, bytes, queue_->stream())); + + need_clear_ = false; + } + return static_cast<device_ptr>(hip_buffer); } diff --git a/intern/cycles/device/hip/graphics_interop.h b/intern/cycles/device/hip/graphics_interop.h index adcaa13a2d7..2b2d287ff6c 100644 --- a/intern/cycles/device/hip/graphics_interop.h +++ b/intern/cycles/device/hip/graphics_interop.h @@ -39,7 +39,7 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop { HIPDeviceGraphicsInterop &operator=(const HIPDeviceGraphicsInterop &other) = delete; HIPDeviceGraphicsInterop &operator=(HIPDeviceGraphicsInterop &&other) = delete; - virtual void set_destination(const DeviceGraphicsInteropDestination &destination) override; + virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override; virtual device_ptr map() override; virtual void unmap() override; @@ -53,6 +53,9 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop { /* Buffer area in pixels of the corresponding PBO. */ int64_t buffer_area_ = 0; + /* The destination was requested to be cleared. */ + bool need_clear_ = false; + hipGraphicsResource hip_graphics_resource_ = nullptr; }; diff --git a/intern/cycles/integrator/CMakeLists.txt b/intern/cycles/integrator/CMakeLists.txt index bfabd35d7c3..8acd72f0508 100644 --- a/intern/cycles/integrator/CMakeLists.txt +++ b/intern/cycles/integrator/CMakeLists.txt @@ -27,6 +27,7 @@ set(SRC pass_accessor.cpp pass_accessor_cpu.cpp pass_accessor_gpu.cpp + path_trace_display.cpp path_trace_work.cpp path_trace_work_cpu.cpp path_trace_work_gpu.cpp @@ -47,6 +48,7 @@ set(SRC_HEADERS pass_accessor.h pass_accessor_cpu.h pass_accessor_gpu.h + path_trace_display.h path_trace_work.h path_trace_work_cpu.h path_trace_work_gpu.h diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp index 9633d3b87d3..36cd7314b4c 100644 --- a/intern/cycles/integrator/path_trace.cpp +++ b/intern/cycles/integrator/path_trace.cpp @@ -19,8 +19,8 @@ #include "device/cpu/device.h" #include "device/device.h" #include "integrator/pass_accessor.h" +#include "integrator/path_trace_display.h" #include "integrator/render_scheduler.h" -#include "render/gpu_display.h" #include "render/pass.h" #include "render/scene.h" #include "render/tile.h" @@ -67,11 +67,11 @@ PathTrace::PathTrace(Device *device, PathTrace::~PathTrace() { /* Destroy any GPU resource which was used for graphics interop. - * Need to have access to the GPUDisplay as it is the only source of drawing context which is - * used for interop. */ - if (gpu_display_) { + * Need to have access to the PathTraceDisplay as it is the only source of drawing context which + * is used for interop. */ + if (display_) { for (auto &&path_trace_work : path_trace_works_) { - path_trace_work->destroy_gpu_resources(gpu_display_.get()); + path_trace_work->destroy_gpu_resources(display_.get()); } } } @@ -94,7 +94,7 @@ bool PathTrace::ready_to_reset() { /* The logic here is optimized for the best feedback in the viewport, which implies having a GPU * display. Of there is no such display, the logic here will break. */ - DCHECK(gpu_display_); + DCHECK(display_); /* The logic here tries to provide behavior which feels the most interactive feel to artists. * General idea is to be able to reset as quickly as possible, while still providing interactive @@ -126,8 +126,8 @@ void PathTrace::reset(const BufferParams &full_params, const BufferParams &big_t /* NOTE: GPU display checks for buffer modification and avoids unnecessary re-allocation. * It is requires to inform about reset whenever it happens, so that the redraw state tracking is * properly updated. */ - if (gpu_display_) { - gpu_display_->reset(full_params); + if (display_) { + display_->reset(full_params); } render_state_.has_denoised_result = false; @@ -535,25 +535,30 @@ void PathTrace::denoise(const RenderWork &render_work) render_scheduler_.report_denoise_time(render_work, time_dt() - start_time); } -void PathTrace::set_gpu_display(unique_ptr<GPUDisplay> gpu_display) +void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver) { - gpu_display_ = move(gpu_display); + if (driver) { + display_ = make_unique<PathTraceDisplay>(move(driver)); + } + else { + display_ = nullptr; + } } -void PathTrace::clear_gpu_display() +void PathTrace::clear_display() { - if (gpu_display_) { - gpu_display_->clear(); + if (display_) { + display_->clear(); } } void PathTrace::draw() { - if (!gpu_display_) { + if (!display_) { return; } - did_draw_after_reset_ |= gpu_display_->draw(); + did_draw_after_reset_ |= display_->draw(); } void PathTrace::update_display(const RenderWork &render_work) @@ -562,13 +567,13 @@ void PathTrace::update_display(const RenderWork &render_work) return; } - if (!gpu_display_ && !tile_buffer_update_cb) { + if (!display_ && !tile_buffer_update_cb) { VLOG(3) << "Ignore display update."; return; } if (full_params_.width == 0 || full_params_.height == 0) { - VLOG(3) << "Skipping GPUDisplay update due to 0 size of the render buffer."; + VLOG(3) << "Skipping PathTraceDisplay update due to 0 size of the render buffer."; return; } @@ -580,13 +585,13 @@ void PathTrace::update_display(const RenderWork &render_work) tile_buffer_update_cb(); } - if (gpu_display_) { + if (display_) { VLOG(3) << "Perform copy to GPUDisplay work."; const int resolution_divider = render_work.resolution_divider; const int texture_width = max(1, full_params_.width / resolution_divider); const int texture_height = max(1, full_params_.height / resolution_divider); - if (!gpu_display_->update_begin(texture_width, texture_height)) { + if (!display_->update_begin(texture_width, texture_height)) { LOG(ERROR) << "Error beginning GPUDisplay update."; return; } @@ -600,10 +605,10 @@ void PathTrace::update_display(const RenderWork &render_work) * all works in parallel. */ const int num_samples = get_num_samples_in_buffer(); for (auto &&path_trace_work : path_trace_works_) { - path_trace_work->copy_to_gpu_display(gpu_display_.get(), pass_mode, num_samples); + path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples); } - gpu_display_->update_end(); + display_->update_end(); } render_scheduler_.report_display_update_time(render_work, time_dt() - start_time); diff --git a/intern/cycles/integrator/path_trace.h b/intern/cycles/integrator/path_trace.h index f507c2d7e0a..46eb0435c91 100644 --- a/intern/cycles/integrator/path_trace.h +++ b/intern/cycles/integrator/path_trace.h @@ -31,12 +31,13 @@ CCL_NAMESPACE_BEGIN class AdaptiveSampling; class Device; class DeviceScene; +class DisplayDriver; class Film; class RenderBuffers; class RenderScheduler; class RenderWork; +class PathTraceDisplay; class Progress; -class GPUDisplay; class TileManager; /* PathTrace class takes care of kernel graph and scheduling on a (multi)device. It takes care of @@ -98,13 +99,13 @@ class PathTrace { * Use this to configure the adaptive sampler before rendering any samples. */ void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling); - /* Set GPU display which takes care of drawing the render result. */ - void set_gpu_display(unique_ptr<GPUDisplay> gpu_display); + /* Set display driver which takes care of drawing the render result. */ + void set_display_driver(unique_ptr<DisplayDriver> driver); - /* Clear the GPU display by filling it in with all zeroes. */ - void clear_gpu_display(); + /* Clear the display buffer by filling it in with all zeroes. */ + void clear_display(); - /* Perform drawing of the current state of the GPUDisplay. */ + /* Perform drawing of the current state of the DisplayDriver. */ void draw(); /* Cancel rendering process as soon as possible, without waiting for full tile to be sampled. @@ -252,7 +253,7 @@ class PathTrace { RenderScheduler &render_scheduler_; TileManager &tile_manager_; - unique_ptr<GPUDisplay> gpu_display_; + unique_ptr<PathTraceDisplay> display_; /* Per-compute device descriptors of work which is responsible for path tracing on its configured * device. */ diff --git a/intern/cycles/render/gpu_display.cpp b/intern/cycles/integrator/path_trace_display.cpp index a8f0cc50583..28f0a7f7745 100644 --- a/intern/cycles/render/gpu_display.cpp +++ b/intern/cycles/integrator/path_trace_display.cpp @@ -14,20 +14,25 @@ * limitations under the License. */ -#include "render/gpu_display.h" +#include "integrator/path_trace_display.h" #include "render/buffers.h" + #include "util/util_logging.h" CCL_NAMESPACE_BEGIN -void GPUDisplay::reset(const BufferParams &buffer_params) +PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(move(driver)) +{ +} + +void PathTraceDisplay::reset(const BufferParams &buffer_params) { thread_scoped_lock lock(mutex_); - const GPUDisplayParams old_params = params_; + const DisplayDriver::Params old_params = params_; - params_.offset = make_int2(buffer_params.full_x, buffer_params.full_y); + params_.full_offset = make_int2(buffer_params.full_x, buffer_params.full_y); params_.full_size = make_int2(buffer_params.full_width, buffer_params.full_height); params_.size = make_int2(buffer_params.width, buffer_params.height); @@ -44,7 +49,7 @@ void GPUDisplay::reset(const BufferParams &buffer_params) texture_state_.is_outdated = true; } -void GPUDisplay::mark_texture_updated() +void PathTraceDisplay::mark_texture_updated() { texture_state_.is_outdated = false; texture_state_.is_usable = true; @@ -54,7 +59,7 @@ void GPUDisplay::mark_texture_updated() * Update procedure. */ -bool GPUDisplay::update_begin(int texture_width, int texture_height) +bool PathTraceDisplay::update_begin(int texture_width, int texture_height) { DCHECK(!update_state_.is_active); @@ -66,15 +71,15 @@ bool GPUDisplay::update_begin(int texture_width, int texture_height) /* Get parameters within a mutex lock, to avoid reset() modifying them at the same time. * The update itself is non-blocking however, for better performance and to avoid * potential deadlocks due to locks held by the subclass. */ - GPUDisplayParams params; + DisplayDriver::Params params; { thread_scoped_lock lock(mutex_); params = params_; texture_state_.size = make_int2(texture_width, texture_height); } - if (!do_update_begin(params, texture_width, texture_height)) { - LOG(ERROR) << "GPUDisplay implementation could not begin update."; + if (!driver_->update_begin(params, texture_width, texture_height)) { + LOG(ERROR) << "PathTraceDisplay implementation could not begin update."; return false; } @@ -83,7 +88,7 @@ bool GPUDisplay::update_begin(int texture_width, int texture_height) return true; } -void GPUDisplay::update_end() +void PathTraceDisplay::update_end() { DCHECK(update_state_.is_active); @@ -92,12 +97,12 @@ void GPUDisplay::update_end() return; } - do_update_end(); + driver_->update_end(); update_state_.is_active = false; } -int2 GPUDisplay::get_texture_size() const +int2 PathTraceDisplay::get_texture_size() const { return texture_state_.size; } @@ -106,25 +111,54 @@ int2 GPUDisplay::get_texture_size() const * Texture update from CPU buffer. */ -void GPUDisplay::copy_pixels_to_texture( +void PathTraceDisplay::copy_pixels_to_texture( const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height) { DCHECK(update_state_.is_active); if (!update_state_.is_active) { - LOG(ERROR) << "Attempt to copy pixels data outside of GPUDisplay update."; + LOG(ERROR) << "Attempt to copy pixels data outside of PathTraceDisplay update."; return; } mark_texture_updated(); - do_copy_pixels_to_texture(rgba_pixels, texture_x, texture_y, pixels_width, pixels_height); + + /* This call copies pixels to a mapped texture buffer which is typically much cheaper from CPU + * time point of view than to copy data directly to a texture. + * + * The possible downside of this approach is that it might require a higher peak memory when + * doing partial updates of the texture (although, in practice even partial updates might peak + * with a full-frame buffer stored on the CPU if the GPU is currently occupied). */ + half4 *mapped_rgba_pixels = map_texture_buffer(); + if (!mapped_rgba_pixels) { + return; + } + + const int texture_width = texture_state_.size.x; + const int texture_height = texture_state_.size.y; + + if (texture_x == 0 && texture_y == 0 && pixels_width == texture_width && + pixels_height == texture_height) { + const size_t size_in_bytes = sizeof(half4) * texture_width * texture_height; + memcpy(mapped_rgba_pixels, rgba_pixels, size_in_bytes); + } + else { + const half4 *rgba_row = rgba_pixels; + half4 *mapped_rgba_row = mapped_rgba_pixels + texture_y * texture_width + texture_x; + for (int y = 0; y < pixels_height; + ++y, rgba_row += pixels_width, mapped_rgba_row += texture_width) { + memcpy(mapped_rgba_row, rgba_row, sizeof(half4) * pixels_width); + } + } + + unmap_texture_buffer(); } /* -------------------------------------------------------------------- * Texture buffer mapping. */ -half4 *GPUDisplay::map_texture_buffer() +half4 *PathTraceDisplay::map_texture_buffer() { DCHECK(!texture_buffer_state_.is_mapped); DCHECK(update_state_.is_active); @@ -135,11 +169,11 @@ half4 *GPUDisplay::map_texture_buffer() } if (!update_state_.is_active) { - LOG(ERROR) << "Attempt to copy pixels data outside of GPUDisplay update."; + LOG(ERROR) << "Attempt to copy pixels data outside of PathTraceDisplay update."; return nullptr; } - half4 *mapped_rgba_pixels = do_map_texture_buffer(); + half4 *mapped_rgba_pixels = driver_->map_texture_buffer(); if (mapped_rgba_pixels) { texture_buffer_state_.is_mapped = true; @@ -148,7 +182,7 @@ half4 *GPUDisplay::map_texture_buffer() return mapped_rgba_pixels; } -void GPUDisplay::unmap_texture_buffer() +void PathTraceDisplay::unmap_texture_buffer() { DCHECK(texture_buffer_state_.is_mapped); @@ -160,14 +194,14 @@ void GPUDisplay::unmap_texture_buffer() texture_buffer_state_.is_mapped = false; mark_texture_updated(); - do_unmap_texture_buffer(); + driver_->unmap_texture_buffer(); } /* -------------------------------------------------------------------- * Graphics interoperability. */ -DeviceGraphicsInteropDestination GPUDisplay::graphics_interop_get() +DisplayDriver::GraphicsInterop PathTraceDisplay::graphics_interop_get() { DCHECK(!texture_buffer_state_.is_mapped); DCHECK(update_state_.is_active); @@ -175,38 +209,45 @@ DeviceGraphicsInteropDestination GPUDisplay::graphics_interop_get() if (texture_buffer_state_.is_mapped) { LOG(ERROR) << "Attempt to use graphics interoperability mode while the texture buffer is mapped."; - return DeviceGraphicsInteropDestination(); + return DisplayDriver::GraphicsInterop(); } if (!update_state_.is_active) { - LOG(ERROR) << "Attempt to use graphics interoperability outside of GPUDisplay update."; - return DeviceGraphicsInteropDestination(); + LOG(ERROR) << "Attempt to use graphics interoperability outside of PathTraceDisplay update."; + return DisplayDriver::GraphicsInterop(); } /* Assume that interop will write new values to the texture. */ mark_texture_updated(); - return do_graphics_interop_get(); + return driver_->graphics_interop_get(); } -void GPUDisplay::graphics_interop_activate() +void PathTraceDisplay::graphics_interop_activate() { + driver_->graphics_interop_activate(); } -void GPUDisplay::graphics_interop_deactivate() +void PathTraceDisplay::graphics_interop_deactivate() { + driver_->graphics_interop_deactivate(); } /* -------------------------------------------------------------------- * Drawing. */ -bool GPUDisplay::draw() +void PathTraceDisplay::clear() +{ + driver_->clear(); +} + +bool PathTraceDisplay::draw() { /* Get parameters within a mutex lock, to avoid reset() modifying them at the same time. * The drawing itself is non-blocking however, for better performance and to avoid * potential deadlocks due to locks held by the subclass. */ - GPUDisplayParams params; + DisplayDriver::Params params; bool is_usable; bool is_outdated; @@ -218,7 +259,7 @@ bool GPUDisplay::draw() } if (is_usable) { - do_draw(params); + driver_->draw(params); } return !is_outdated; diff --git a/intern/cycles/render/gpu_display.h b/intern/cycles/integrator/path_trace_display.h index 3c3cfaea513..24aaa0df6b1 100644 --- a/intern/cycles/render/gpu_display.h +++ b/intern/cycles/integrator/path_trace_display.h @@ -16,52 +16,30 @@ #pragma once -#include "device/device_graphics_interop.h" +#include "render/display_driver.h" + #include "util/util_half.h" #include "util/util_thread.h" #include "util/util_types.h" +#include "util/util_unique_ptr.h" CCL_NAMESPACE_BEGIN class BufferParams; -/* GPUDisplay class takes care of drawing render result in a viewport. The render result is stored - * in a GPU-side texture, which is updated from a path tracer and drawn by an application. +/* PathTraceDisplay is used for efficient render buffer display. * - * The base GPUDisplay does some special texture state tracking, which allows render Session to - * make decisions on whether reset for an updated state is possible or not. This state should only - * be tracked in a base class and a particular implementation should not worry about it. + * The host applications implements a DisplayDriver, storing a render pass in a GPU-side + * textures. This texture is continuously updated by the path tracer and drawn by the host + * application. * - * The subclasses should only implement the pure virtual methods, which allows them to not worry - * about parent method calls, which helps them to be as small and reliable as possible. */ - -class GPUDisplayParams { - public: - /* Offset of the display within a viewport. - * For example, set to a lower-bottom corner of border render in Blender's viewport. */ - int2 offset = make_int2(0, 0); - - /* Full viewport size. - * - * NOTE: Is not affected by the resolution divider. */ - int2 full_size = make_int2(0, 0); - - /* Effective viewport size. - * In the case of border render, size of the border rectangle. - * - * NOTE: Is not affected by the resolution divider. */ - int2 size = make_int2(0, 0); - - bool modified(const GPUDisplayParams &other) const - { - return !(offset == other.offset && full_size == other.full_size && size == other.size); - } -}; + * PathTraceDisplay is a wrapper around the DisplayDriver, adding thread safety, state tracking + * and error checking. */ -class GPUDisplay { +class PathTraceDisplay { public: - GPUDisplay() = default; - virtual ~GPUDisplay() = default; + PathTraceDisplay(unique_ptr<DisplayDriver> driver); + virtual ~PathTraceDisplay() = default; /* Reset the display for the new state of render session. Is called whenever session is reset, * which happens on changes like viewport navigation or viewport dimension change. @@ -69,11 +47,6 @@ class GPUDisplay { * This call will configure parameters for a changed buffer and reset the texture state. */ void reset(const BufferParams &buffer_params); - const GPUDisplayParams &get_params() const - { - return params_; - } - /* -------------------------------------------------------------------- * Update procedure. * @@ -94,7 +67,8 @@ class GPUDisplay { /* -------------------------------------------------------------------- * Texture update from CPU buffer. * - * NOTE: The GPUDisplay should be marked for an update being in process with `update_begin()`. + * NOTE: The PathTraceDisplay should be marked for an update being in process with + * `update_begin()`. * * Most portable implementation, which must be supported by all platforms. Might not be the most * efficient one. @@ -115,7 +89,8 @@ class GPUDisplay { * This functionality is used to update GPU-side texture content without need to maintain CPU * side buffer on the caller. * - * NOTE: The GPUDisplay should be marked for an update being in process with `update_begin()`. + * NOTE: The PathTraceDisplay should be marked for an update being in process with + * `update_begin()`. * * NOTE: Texture buffer can not be mapped while graphics interoperability is active. This means * that `map_texture_buffer()` is not allowed between `graphics_interop_begin()` and @@ -145,14 +120,14 @@ class GPUDisplay { * that `graphics_interop_get()` is not allowed between `map_texture_buffer()` and * `unmap_texture_buffer()` calls. */ - /* Get GPUDisplay graphics interoperability information which acts as a destination for the + /* Get PathTraceDisplay graphics interoperability information which acts as a destination for the * device API. */ - DeviceGraphicsInteropDestination graphics_interop_get(); + DisplayDriver::GraphicsInterop graphics_interop_get(); /* (De)activate GPU display for graphics interoperability outside of regular display update * routines. */ - virtual void graphics_interop_activate(); - virtual void graphics_interop_deactivate(); + void graphics_interop_activate(); + void graphics_interop_deactivate(); /* -------------------------------------------------------------------- * Drawing. @@ -168,42 +143,21 @@ class GPUDisplay { * after clear will write new pixel values for an updating area, leaving everything else zeroed. * * If the GPU display supports graphics interoperability then the zeroing the display is to be - * delegated to the device via the `DeviceGraphicsInteropDestination`. */ - virtual void clear() = 0; + * delegated to the device via the `DisplayDriver::GraphicsInterop`. */ + void clear(); /* Draw the current state of the texture. * * Returns true if this call did draw an updated state of the texture. */ bool draw(); - protected: - /* Implementation-specific calls which subclasses are to implement. - * These `do_foo()` method corresponds to their `foo()` calls, but they are purely virtual to - * simplify their particular implementation. */ - virtual bool do_update_begin(const GPUDisplayParams ¶ms, - int texture_width, - int texture_height) = 0; - virtual void do_update_end() = 0; - - virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels, - int texture_x, - int texture_y, - int pixels_width, - int pixels_height) = 0; - - virtual half4 *do_map_texture_buffer() = 0; - virtual void do_unmap_texture_buffer() = 0; - - /* Note that this might be called in parallel to do_update_begin() and do_update_end(), - * the subclass is responsible for appropriate mutex locks to avoid multiple threads - * editing and drawing the texture at the same time. */ - virtual void do_draw(const GPUDisplayParams ¶ms) = 0; - - virtual DeviceGraphicsInteropDestination do_graphics_interop_get() = 0; - private: + /* Display driver implemented by the host application. */ + unique_ptr<DisplayDriver> driver_; + + /* Current display parameters */ thread_mutex mutex_; - GPUDisplayParams params_; + DisplayDriver::Params params_; /* Mark texture as its content has been updated. * Used from places which knows that the texture content has been brought up-to-date, so that the diff --git a/intern/cycles/integrator/path_trace_work.cpp b/intern/cycles/integrator/path_trace_work.cpp index d9634acac10..c29177907c9 100644 --- a/intern/cycles/integrator/path_trace_work.cpp +++ b/intern/cycles/integrator/path_trace_work.cpp @@ -16,12 +16,12 @@ #include "device/device.h" +#include "integrator/path_trace_display.h" #include "integrator/path_trace_work.h" #include "integrator/path_trace_work_cpu.h" #include "integrator/path_trace_work_gpu.h" #include "render/buffers.h" #include "render/film.h" -#include "render/gpu_display.h" #include "render/scene.h" #include "kernel/kernel_types.h" @@ -185,12 +185,12 @@ PassAccessor::PassAccessInfo PathTraceWork::get_display_pass_access_info(PassMod return pass_access_info; } -PassAccessor::Destination PathTraceWork::get_gpu_display_destination_template( - const GPUDisplay *gpu_display) const +PassAccessor::Destination PathTraceWork::get_display_destination_template( + const PathTraceDisplay *display) const { PassAccessor::Destination destination(film_->get_display_pass()); - const int2 display_texture_size = gpu_display->get_texture_size(); + const int2 display_texture_size = display->get_texture_size(); const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x; const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y; diff --git a/intern/cycles/integrator/path_trace_work.h b/intern/cycles/integrator/path_trace_work.h index e1be1655edd..404165b7c55 100644 --- a/intern/cycles/integrator/path_trace_work.h +++ b/intern/cycles/integrator/path_trace_work.h @@ -28,7 +28,7 @@ class BufferParams; class Device; class DeviceScene; class Film; -class GPUDisplay; +class PathTraceDisplay; class RenderBuffers; class PathTraceWork { @@ -83,11 +83,9 @@ class PathTraceWork { * noisy pass mode will be passed here when it is known that the buffer does not have denoised * passes yet (because denoiser did not run). If the denoised pass is requested and denoiser is * not used then this function will fall-back to the noisy pass instead. */ - virtual void copy_to_gpu_display(GPUDisplay *gpu_display, - PassMode pass_mode, - int num_samples) = 0; + virtual void copy_to_display(PathTraceDisplay *display, PassMode pass_mode, int num_samples) = 0; - virtual void destroy_gpu_resources(GPUDisplay *gpu_display) = 0; + virtual void destroy_gpu_resources(PathTraceDisplay *display) = 0; /* Copy data from/to given render buffers. * Will copy pixels from a corresponding place (from multi-device point of view) of the render @@ -162,8 +160,8 @@ class PathTraceWork { /* Get destination which offset and stride are configured so that writing to it will write to a * proper location of GPU display texture, taking current tile and device slice into account. */ - PassAccessor::Destination get_gpu_display_destination_template( - const GPUDisplay *gpu_display) const; + PassAccessor::Destination get_display_destination_template( + const PathTraceDisplay *display) const; /* Device which will be used for path tracing. * Note that it is an actual render device (and never is a multi-device). */ diff --git a/intern/cycles/integrator/path_trace_work_cpu.cpp b/intern/cycles/integrator/path_trace_work_cpu.cpp index 14658d4d1ce..18a5365453d 100644 --- a/intern/cycles/integrator/path_trace_work_cpu.cpp +++ b/intern/cycles/integrator/path_trace_work_cpu.cpp @@ -22,9 +22,9 @@ #include "kernel/kernel_path_state.h" #include "integrator/pass_accessor_cpu.h" +#include "integrator/path_trace_display.h" #include "render/buffers.h" -#include "render/gpu_display.h" #include "render/scene.h" #include "util/util_atomic.h" @@ -161,14 +161,14 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global } } -void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display, - PassMode pass_mode, - int num_samples) +void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display, + PassMode pass_mode, + int num_samples) { - half4 *rgba_half = gpu_display->map_texture_buffer(); + half4 *rgba_half = display->map_texture_buffer(); if (!rgba_half) { - /* TODO(sergey): Look into using copy_to_gpu_display() if mapping failed. Might be needed for - * some implementations of GPUDisplay which can not map memory? */ + /* TODO(sergey): Look into using copy_to_display() if mapping failed. Might be needed for + * some implementations of PathTraceDisplay which can not map memory? */ return; } @@ -178,7 +178,7 @@ void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display, const PassAccessorCPU pass_accessor(pass_access_info, kfilm.exposure, num_samples); - PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display); + PassAccessor::Destination destination = get_display_destination_template(display); destination.pixels_half_rgba = rgba_half; tbb::task_arena local_arena = local_tbb_arena_create(device_); @@ -186,10 +186,10 @@ void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display, pass_accessor.get_render_tile_pixels(buffers_.get(), effective_buffer_params_, destination); }); - gpu_display->unmap_texture_buffer(); + display->unmap_texture_buffer(); } -void PathTraceWorkCPU::destroy_gpu_resources(GPUDisplay * /*gpu_display*/) +void PathTraceWorkCPU::destroy_gpu_resources(PathTraceDisplay * /*display*/) { } diff --git a/intern/cycles/integrator/path_trace_work_cpu.h b/intern/cycles/integrator/path_trace_work_cpu.h index ab729bbf879..d011e8d05bd 100644 --- a/intern/cycles/integrator/path_trace_work_cpu.h +++ b/intern/cycles/integrator/path_trace_work_cpu.h @@ -50,10 +50,10 @@ class PathTraceWorkCPU : public PathTraceWork { int start_sample, int samples_num) override; - virtual void copy_to_gpu_display(GPUDisplay *gpu_display, - PassMode pass_mode, - int num_samples) override; - virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override; + virtual void copy_to_display(PathTraceDisplay *display, + PassMode pass_mode, + int num_samples) override; + virtual void destroy_gpu_resources(PathTraceDisplay *display) override; virtual bool copy_render_buffers_from_device() override; virtual bool copy_render_buffers_to_device() override; diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp index e41d8d1d252..17c49f244d2 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.cpp +++ b/intern/cycles/integrator/path_trace_work_gpu.cpp @@ -15,12 +15,12 @@ */ #include "integrator/path_trace_work_gpu.h" +#include "integrator/path_trace_display.h" #include "device/device.h" #include "integrator/pass_accessor_gpu.h" #include "render/buffers.h" -#include "render/gpu_display.h" #include "render/scene.h" #include "util/util_logging.h" #include "util/util_tbb.h" @@ -46,7 +46,7 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device, queued_paths_(device, "queued_paths", MEM_READ_WRITE), num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE), work_tiles_(device, "work_tiles", MEM_READ_WRITE), - gpu_display_rgba_half_(device, "display buffer half", MEM_READ_WRITE), + display_rgba_half_(device, "display buffer half", MEM_READ_WRITE), max_num_paths_(queue_->num_concurrent_states(sizeof(IntegratorStateCPU))), min_num_active_paths_(queue_->num_concurrent_busy_states()), max_active_path_index_(0) @@ -652,7 +652,7 @@ int PathTraceWorkGPU::get_num_active_paths() bool PathTraceWorkGPU::should_use_graphics_interop() { /* There are few aspects with the graphics interop when using multiple devices caused by the fact - * that the GPUDisplay has a single texture: + * that the PathTraceDisplay has a single texture: * * CUDA will return `CUDA_ERROR_NOT_SUPPORTED` from `cuGraphicsGLRegisterBuffer()` when * attempting to register OpenGL PBO which has been mapped. Which makes sense, because @@ -678,9 +678,9 @@ bool PathTraceWorkGPU::should_use_graphics_interop() return interop_use_; } -void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display, - PassMode pass_mode, - int num_samples) +void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display, + PassMode pass_mode, + int num_samples) { if (device_->have_error()) { /* Don't attempt to update GPU display if the device has errors: the error state will make @@ -694,7 +694,7 @@ void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display, } if (should_use_graphics_interop()) { - if (copy_to_gpu_display_interop(gpu_display, pass_mode, num_samples)) { + if (copy_to_display_interop(display, pass_mode, num_samples)) { return; } @@ -703,12 +703,12 @@ void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display, interop_use_ = false; } - copy_to_gpu_display_naive(gpu_display, pass_mode, num_samples); + copy_to_display_naive(display, pass_mode, num_samples); } -void PathTraceWorkGPU::copy_to_gpu_display_naive(GPUDisplay *gpu_display, - PassMode pass_mode, - int num_samples) +void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display, + PassMode pass_mode, + int num_samples) { const int full_x = effective_buffer_params_.full_x; const int full_y = effective_buffer_params_.full_y; @@ -725,44 +725,42 @@ void PathTraceWorkGPU::copy_to_gpu_display_naive(GPUDisplay *gpu_display, * NOTE: allocation happens to the final resolution so that no re-allocation happens on every * change of the resolution divider. However, if the display becomes smaller, shrink the * allocated memory as well. */ - if (gpu_display_rgba_half_.data_width != final_width || - gpu_display_rgba_half_.data_height != final_height) { - gpu_display_rgba_half_.alloc(final_width, final_height); + if (display_rgba_half_.data_width != final_width || + display_rgba_half_.data_height != final_height) { + display_rgba_half_.alloc(final_width, final_height); /* TODO(sergey): There should be a way to make sure device-side memory is allocated without * transferring zeroes to the device. */ - queue_->zero_to_device(gpu_display_rgba_half_); + queue_->zero_to_device(display_rgba_half_); } PassAccessor::Destination destination(film_->get_display_pass()); - destination.d_pixels_half_rgba = gpu_display_rgba_half_.device_pointer; + destination.d_pixels_half_rgba = display_rgba_half_.device_pointer; get_render_tile_film_pixels(destination, pass_mode, num_samples); - queue_->copy_from_device(gpu_display_rgba_half_); + queue_->copy_from_device(display_rgba_half_); queue_->synchronize(); - gpu_display->copy_pixels_to_texture( - gpu_display_rgba_half_.data(), texture_x, texture_y, width, height); + display->copy_pixels_to_texture(display_rgba_half_.data(), texture_x, texture_y, width, height); } -bool PathTraceWorkGPU::copy_to_gpu_display_interop(GPUDisplay *gpu_display, - PassMode pass_mode, - int num_samples) +bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display, + PassMode pass_mode, + int num_samples) { if (!device_graphics_interop_) { device_graphics_interop_ = queue_->graphics_interop_create(); } - const DeviceGraphicsInteropDestination graphics_interop_dst = - gpu_display->graphics_interop_get(); - device_graphics_interop_->set_destination(graphics_interop_dst); + const DisplayDriver::GraphicsInterop graphics_interop_dst = display->graphics_interop_get(); + device_graphics_interop_->set_display_interop(graphics_interop_dst); const device_ptr d_rgba_half = device_graphics_interop_->map(); if (!d_rgba_half) { return false; } - PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display); + PassAccessor::Destination destination = get_display_destination_template(display); destination.d_pixels_half_rgba = d_rgba_half; get_render_tile_film_pixels(destination, pass_mode, num_samples); @@ -772,14 +770,14 @@ bool PathTraceWorkGPU::copy_to_gpu_display_interop(GPUDisplay *gpu_display, return true; } -void PathTraceWorkGPU::destroy_gpu_resources(GPUDisplay *gpu_display) +void PathTraceWorkGPU::destroy_gpu_resources(PathTraceDisplay *display) { if (!device_graphics_interop_) { return; } - gpu_display->graphics_interop_activate(); + display->graphics_interop_activate(); device_graphics_interop_ = nullptr; - gpu_display->graphics_interop_deactivate(); + display->graphics_interop_deactivate(); } void PathTraceWorkGPU::get_render_tile_film_pixels(const PassAccessor::Destination &destination, diff --git a/intern/cycles/integrator/path_trace_work_gpu.h b/intern/cycles/integrator/path_trace_work_gpu.h index 38788122b0d..9212537d2fd 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.h +++ b/intern/cycles/integrator/path_trace_work_gpu.h @@ -48,10 +48,10 @@ class PathTraceWorkGPU : public PathTraceWork { int start_sample, int samples_num) override; - virtual void copy_to_gpu_display(GPUDisplay *gpu_display, - PassMode pass_mode, - int num_samples) override; - virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override; + virtual void copy_to_display(PathTraceDisplay *display, + PassMode pass_mode, + int num_samples) override; + virtual void destroy_gpu_resources(PathTraceDisplay *display) override; virtual bool copy_render_buffers_from_device() override; virtual bool copy_render_buffers_to_device() override; @@ -88,16 +88,16 @@ class PathTraceWorkGPU : public PathTraceWork { int get_num_active_paths(); - /* Check whether graphics interop can be used for the GPUDisplay update. */ + /* Check whether graphics interop can be used for the PathTraceDisplay update. */ bool should_use_graphics_interop(); - /* Naive implementation of the `copy_to_gpu_display()` which performs film conversion on the - * device, then copies pixels to the host and pushes them to the `gpu_display`. */ - void copy_to_gpu_display_naive(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples); + /* Naive implementation of the `copy_to_display()` which performs film conversion on the + * device, then copies pixels to the host and pushes them to the `display`. */ + void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, int num_samples); - /* Implementation of `copy_to_gpu_display()` which uses driver's OpenGL/GPU interoperability + /* Implementation of `copy_to_display()` which uses driver's OpenGL/GPU interoperability * functionality, avoiding copy of pixels to the host. */ - bool copy_to_gpu_display_interop(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples); + bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, int num_samples); /* Synchronously run film conversion kernel and store display result in the given destination. */ void get_render_tile_film_pixels(const PassAccessor::Destination &destination, @@ -139,9 +139,9 @@ class PathTraceWorkGPU : public PathTraceWork { /* Temporary buffer for passing work tiles to kernel. */ device_vector<KernelWorkTile> work_tiles_; - /* Temporary buffer used by the copy_to_gpu_display() whenever graphics interoperability is not + /* Temporary buffer used by the copy_to_display() whenever graphics interoperability is not * available. Is allocated on-demand. */ - device_vector<half4> gpu_display_rgba_half_; + device_vector<half4> display_rgba_half_; unique_ptr<DeviceGraphicsInterop> device_graphics_interop_; diff --git a/intern/cycles/integrator/render_scheduler.h b/intern/cycles/integrator/render_scheduler.h index 6ed368a2dc8..c4ab15e54ba 100644 --- a/intern/cycles/integrator/render_scheduler.h +++ b/intern/cycles/integrator/render_scheduler.h @@ -344,7 +344,7 @@ class RenderScheduler { /* Number of rendered samples on top of the start sample. */ int num_rendered_samples = 0; - /* Point in time the latest GPUDisplay work has been scheduled. */ + /* Point in time the latest PathTraceDisplay work has been scheduled. */ double last_display_update_time = 0.0; /* Value of -1 means display was never updated. */ int last_display_update_sample = -1; diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt index 6edb5261b32..ce1a9e5f430 100644 --- a/intern/cycles/render/CMakeLists.txt +++ b/intern/cycles/render/CMakeLists.txt @@ -35,7 +35,6 @@ set(SRC denoising.cpp film.cpp geometry.cpp - gpu_display.cpp graph.cpp hair.cpp image.cpp @@ -78,9 +77,9 @@ set(SRC_HEADERS colorspace.h constant_fold.h denoising.h + display_driver.h film.h geometry.h - gpu_display.h graph.h hair.h image.h diff --git a/intern/cycles/render/display_driver.h b/intern/cycles/render/display_driver.h new file mode 100644 index 00000000000..85f305034d7 --- /dev/null +++ b/intern/cycles/render/display_driver.h @@ -0,0 +1,131 @@ +/* + * Copyright 2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "util/util_half.h" +#include "util/util_types.h" + +CCL_NAMESPACE_BEGIN + +/* Display driver for efficient interactive display of renders. + * + * Host applications implement this interface for viewport rendering. For best performance, we + * recommend: + * - Allocating a texture on the GPU to be interactively updated + * - Using the graphics interop mechanism to avoid CPU-GPU copying overhead + * - Using a dedicated or thread-safe graphics API context for updates, to avoid + * blocking the host application. + */ +class DisplayDriver { + public: + DisplayDriver() = default; + virtual ~DisplayDriver() = default; + + /* Render buffer parameters. */ + struct Params { + public: + /* Render resolution, ignoring progressive resolution changes. + * The texture buffer should be allocated with this size. */ + int2 size = make_int2(0, 0); + + /* For border rendering, the full resolution of the render, and the offset within that larger + * render. */ + int2 full_size = make_int2(0, 0); + int2 full_offset = make_int2(0, 0); + + bool modified(const Params &other) const + { + return !(full_offset == other.full_offset && full_size == other.full_size && + size == other.size); + } + }; + + /* Update the render from the rendering thread. + * + * Cycles periodically updates the render to be displayed. For multithreaded updates with + * potentially multiple rendering devices, it will call these methods as follows. + * + * if (driver.update_begin(params, width, height)) { + * parallel_for_each(rendering_device) { + * buffer = driver.map_texture_buffer(); + * if (buffer) { + * fill(buffer); + * driver.unmap_texture_buffer(); + * } + * } + * driver.update_end(); + * } + * + * The parameters may dynamically change due to camera changes in the scene, and resources should + * be re-allocated accordingly. + * + * The width and height passed to update_begin() are the effective render resolution taking into + * account progressive resolution changes, which may be equal to or smaller than the params.size. + * For efficiency, changes in this resolution should be handled without re-allocating resources, + * but rather by using a subset of the full resolution buffer. */ + virtual bool update_begin(const Params ¶ms, int width, int height) = 0; + virtual void update_end() = 0; + + virtual half4 *map_texture_buffer() = 0; + virtual void unmap_texture_buffer() = 0; + + /* Optionally return a handle to a native graphics API texture buffer. If supported, + * the rendering device may write directly to this buffer instead of calling + * map_texture_buffer() and unmap_texture_buffer(). */ + class GraphicsInterop { + public: + /* Dimensions of the buffer, in pixels. */ + int buffer_width = 0; + int buffer_height = 0; + + /* OpenGL pixel buffer object. */ + int opengl_pbo_id = 0; + + /* Clear the entire buffer before doing partial write to it. */ + bool need_clear = false; + }; + + virtual GraphicsInterop graphics_interop_get() + { + return GraphicsInterop(); + } + + /* (De)activate graphics context required for editing or deleting the graphics interop + * object. + * + * For example, destruction of the CUDA object associated with an OpenGL requires the + * OpenGL context to be active. */ + virtual void graphics_interop_activate(){}; + virtual void graphics_interop_deactivate(){}; + + /* Clear the display buffer by filling it with zeros. */ + virtual void clear() = 0; + + /* Draw the render using the native graphics API. + * + * Note that this may be called in parallel to updates. The implementation is responsible for + * mutex locking or other mechanisms to avoid conflicts. + * + * The parameters may have changed since the last update. The implementation is responsible for + * deciding to skip or adjust render display for such changes. + * + * Host application drawing the render buffer should use Session.draw(), which will + * call this method. */ + virtual void draw(const Params ¶ms) = 0; +}; + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 269d67e8bda..c191b9a9b4a 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -25,7 +25,7 @@ #include "render/bake.h" #include "render/buffers.h" #include "render/camera.h" -#include "render/gpu_display.h" +#include "render/display_driver.h" #include "render/graph.h" #include "render/integrator.h" #include "render/light.h" @@ -162,7 +162,7 @@ bool Session::ready_to_reset() void Session::run_main_render_loop() { - path_trace_->clear_gpu_display(); + path_trace_->clear_display(); while (true) { RenderWork render_work = run_update_for_next_iteration(); @@ -514,9 +514,9 @@ void Session::set_pause(bool pause) } } -void Session::set_gpu_display(unique_ptr<GPUDisplay> gpu_display) +void Session::set_display_driver(unique_ptr<DisplayDriver> driver) { - path_trace_->set_gpu_display(move(gpu_display)); + path_trace_->set_display_driver(move(driver)); } double Session::get_estimated_remaining_time() const diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index e3056e7778b..607e40c47c1 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -35,9 +35,9 @@ CCL_NAMESPACE_BEGIN class BufferParams; class Device; class DeviceScene; +class DisplayDriver; class PathTrace; class Progress; -class GPUDisplay; class RenderBuffers; class Scene; class SceneParams; @@ -143,7 +143,7 @@ class Session { void set_samples(int samples); void set_time_limit(double time_limit); - void set_gpu_display(unique_ptr<GPUDisplay> gpu_display); + void set_display_driver(unique_ptr<DisplayDriver> driver); double get_estimated_remaining_time() const; |