Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2019-09-15 05:38:31 +0300
committerJean-Baptiste Kempf <jb@videolan.org>2019-09-28 01:31:02 +0300
commit490a1420f34765f6b1aa9610e23aea247bec2dcc (patch)
treeedba1e1a015ea6217777dd31ce677cfcbd32fb2e /examples
parent3f35ef1f312dd9f8eaa91c4c4d94f5605bfebdc4 (diff)
dav1dplay: initial support for --zerocopy
Right now this just allocates a new buffer for every frame, uses it, then discards it immediately. This is not optimal, either dav1d should start reusing buffers internally or we need to pool them in dav1dplay. As it stands, this is not really a performance gain. I'll have to investigate why, but my suspicion is that seeing any gains might require reusing buffers somewhere. Note: Thrashing buffers is not as bad as it seems, initially. Not only does libplacebo pool and reuse GPU memory and buffer state objects internally, but this also absolves us from having to do any manual polling to figure out when the buffer is reusable again. Creating, using and immediately destroying buffers actually isn't as bad an approach as it might otherwise seem. It's entirely possible that this is only bad because of lock contention. As said, I'll have to investigate further...
Diffstat (limited to 'examples')
-rw-r--r--examples/dav1dplay.c150
1 files changed, 141 insertions, 9 deletions
diff --git a/examples/dav1dplay.c b/examples/dav1dplay.c
index d711b53..56c8acd 100644
--- a/examples/dav1dplay.c
+++ b/examples/dav1dplay.c
@@ -51,6 +51,7 @@ typedef struct {
const char *inputfile;
int highquality;
int untimed;
+ int zerocopy;
} Dav1dPlaySettings;
#define WINDOW_WIDTH 910
@@ -161,7 +162,11 @@ typedef struct rdr_info
// Callback to the render function that renders a prevously sent frame
void (*render)(void *cookie, const Dav1dPlaySettings *settings);
// Callback to the send frame function
- int (*update_frame)(void *cookie, Dav1dPicture *dav1d_pic);
+ int (*update_frame)(void *cookie, Dav1dPicture *dav1d_pic,
+ const Dav1dPlaySettings *settings);
+ // Callback for alloc/release pictures (optional)
+ int (*alloc_pic)(Dav1dPicture *pic, void *cookie);
+ void (*release_pic)(Dav1dPicture *pic, void *cookie);
} Dav1dPlayRenderInfo;
#ifdef HAVE_PLACEBO_VULKAN
@@ -389,7 +394,8 @@ static void placebo_render(void *cookie, const Dav1dPlaySettings *settings)
SDL_UnlockMutex(rd_priv_ctx->lock);
}
-static int placebo_upload_planes(void *cookie, Dav1dPicture *dav1d_pic)
+static int placebo_upload_planes(void *cookie, Dav1dPicture *dav1d_pic,
+ const Dav1dPlaySettings *settings)
{
Dav1dPlayRendererPrivateContext *rd_priv_ctx = cookie;
assert(rd_priv_ctx != NULL);
@@ -417,7 +423,6 @@ static int placebo_upload_planes(void *cookie, Dav1dPicture *dav1d_pic)
.height = height,
.pixel_stride = 1,
.row_stride = dav1d_pic->stride[0],
- .pixels = dav1d_pic->data[0],
.component_size = {8},
.component_map = {0},
};
@@ -428,7 +433,6 @@ static int placebo_upload_planes(void *cookie, Dav1dPicture *dav1d_pic)
.height = height/2,
.pixel_stride = 1,
.row_stride = dav1d_pic->stride[1],
- .pixels = dav1d_pic->data[1],
.component_size = {8},
.component_map = {1},
};
@@ -439,11 +443,23 @@ static int placebo_upload_planes(void *cookie, Dav1dPicture *dav1d_pic)
.height = height/2,
.pixel_stride = 1,
.row_stride = dav1d_pic->stride[1],
- .pixels = dav1d_pic->data[2],
.component_size = {8},
.component_map = {2},
};
+ if (settings->zerocopy) {
+ const struct pl_buf *buf = dav1d_pic->allocator_data;
+ assert(buf);
+ data_y.buf = data_u.buf = data_v.buf = buf;
+ data_y.buf_offset = (uintptr_t) dav1d_pic->data[0] - (uintptr_t) buf->data;
+ data_u.buf_offset = (uintptr_t) dav1d_pic->data[1] - (uintptr_t) buf->data;
+ data_v.buf_offset = (uintptr_t) dav1d_pic->data[2] - (uintptr_t) buf->data;
+ } else {
+ data_y.pixels = dav1d_pic->data[0];
+ data_u.pixels = dav1d_pic->data[1];
+ data_v.pixels = dav1d_pic->data[2];
+ }
+
bool ok = true;
ok &= pl_upload_plane(rd_priv_ctx->vk->gpu, &(rd_priv_ctx->y_plane), &(rd_priv_ctx->y_tex), &data_y);
ok &= pl_upload_plane(rd_priv_ctx->vk->gpu, &(rd_priv_ctx->u_plane), &(rd_priv_ctx->u_tex), &data_u);
@@ -460,11 +476,106 @@ static int placebo_upload_planes(void *cookie, Dav1dPicture *dav1d_pic)
return !ok;
}
+// Align to power of 2
+#define ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1))
+
+static int placebo_alloc_pic(Dav1dPicture *const p, void *cookie)
+{
+ Dav1dPlayRendererPrivateContext *rd_priv_ctx = cookie;
+ assert(rd_priv_ctx != NULL);
+ SDL_LockMutex(rd_priv_ctx->lock);
+
+ const struct pl_gpu *gpu = rd_priv_ctx->vk->gpu;
+ int ret = DAV1D_ERR(ENOMEM);
+
+ // Copied from dav1d_default_picture_alloc
+ const int hbd = p->p.bpc > 8;
+ const int aligned_w = ALIGN2(p->p.w, 128);
+ const int aligned_h = ALIGN2(p->p.h, 128);
+ const int has_chroma = p->p.layout != DAV1D_PIXEL_LAYOUT_I400;
+ const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ p->stride[0] = aligned_w << hbd;
+ p->stride[1] = has_chroma ? (aligned_w >> ss_hor) << hbd : 0;
+
+ // Align strides up to multiples of the GPU performance hints
+ p->stride[0] = ALIGN2(p->stride[0], gpu->limits.align_tex_xfer_stride);
+ p->stride[1] = ALIGN2(p->stride[1], gpu->limits.align_tex_xfer_stride);
+
+ // Aligning offsets to 4 also implicity aligns to the texel size (1 or 2)
+ size_t off_align = ALIGN2(gpu->limits.align_tex_xfer_offset, 4);
+ const size_t y_sz = ALIGN2(p->stride[0] * aligned_h, off_align);
+ const size_t uv_sz = ALIGN2(p->stride[1] * (aligned_h >> ss_ver), off_align);
+
+ // The extra DAV1D_PICTURE_ALIGNMENTs are to brute force plane alignment,
+ // even in the case that the driver gives us insane alignments
+ const size_t pic_size = y_sz + 2 * uv_sz;
+ const size_t total_size = pic_size + DAV1D_PICTURE_ALIGNMENT * 4;
+
+ // Validate size limitations
+ if (total_size > gpu->limits.max_xfer_size) {
+ printf("alloc of %zu bytes exceeds limits\n", total_size);
+ goto err;
+ }
+
+ const struct pl_buf *buf = pl_buf_create(gpu, &(struct pl_buf_params) {
+ .type = PL_BUF_TEX_TRANSFER,
+ .host_mapped = true,
+ .size = total_size,
+ .memory_type = PL_BUF_MEM_HOST,
+ .user_data = p,
+ });
+
+ if (!buf) {
+ printf("alloc of GPU mapped buffer failed\n");
+ goto err;
+ }
+
+ assert(buf->data);
+ uintptr_t base = (uintptr_t) buf->data, data[3];
+ data[0] = ALIGN2(base, DAV1D_PICTURE_ALIGNMENT);
+ data[1] = ALIGN2(data[0] + y_sz, DAV1D_PICTURE_ALIGNMENT);
+ data[2] = ALIGN2(data[1] + uv_sz, DAV1D_PICTURE_ALIGNMENT);
+
+ // Sanity check offset alignment for the sake of debugging
+ if (data[0] - base != ALIGN2(data[0] - base, off_align) ||
+ data[1] - base != ALIGN2(data[1] - base, off_align) ||
+ data[2] - base != ALIGN2(data[2] - base, off_align))
+ {
+ printf("GPU buffer horribly misaligned, expect slowdown!\n");
+ }
+
+ p->allocator_data = (void *) buf;
+ p->data[0] = (void *) data[0];
+ p->data[1] = (void *) data[1];
+ p->data[2] = (void *) data[2];
+ ret = 0;
+
+ // fall through
+err:
+ SDL_UnlockMutex(rd_priv_ctx->lock);
+ return ret;
+}
+
+static void placebo_release_pic(Dav1dPicture *pic, void *cookie)
+{
+ Dav1dPlayRendererPrivateContext *rd_priv_ctx = cookie;
+ assert(rd_priv_ctx != NULL);
+ assert(pic->allocator_data);
+
+ SDL_LockMutex(rd_priv_ctx->lock);
+ const struct pl_gpu *gpu = rd_priv_ctx->vk->gpu;
+ pl_buf_destroy(gpu, (const struct pl_buf **) &pic->allocator_data);
+ SDL_UnlockMutex(rd_priv_ctx->lock);
+}
+
static const Dav1dPlayRenderInfo renderer_info = {
.create_renderer = placebo_renderer_create,
.destroy_renderer = placebo_renderer_destroy,
.render = placebo_render,
- .update_frame = placebo_upload_planes
+ .update_frame = placebo_upload_planes,
+ .alloc_pic = placebo_alloc_pic,
+ .release_pic = placebo_release_pic,
};
#else
@@ -540,7 +651,8 @@ static void sdl_render(void *cookie, const Dav1dPlaySettings *settings)
SDL_UnlockMutex(rd_priv_ctx->lock);
}
-static int sdl_update_texture(void *cookie, Dav1dPicture *dav1d_pic)
+static int sdl_update_texture(void *cookie, Dav1dPicture *dav1d_pic,
+ const Dav1dPlaySettings *settings)
{
Dav1dPlayRendererPrivateContext *rd_priv_ctx = cookie;
assert(rd_priv_ctx != NULL);
@@ -655,6 +767,7 @@ static void dp_settings_print_usage(const char *const app,
" --framethreads $num: number of frame threads (default: 1)\n"
" --tilethreads $num: number of tile threads (default: 1)\n"
" --highquality: enable high quality rendering\n"
+ " --zerocopy/-z: enable zero copy upload path\n"
" --version/-v: print version and exit\n");
exit(1);
}
@@ -678,7 +791,7 @@ static void dp_rd_ctx_parse_args(Dav1dPlayRenderContext *rd_ctx,
Dav1dSettings *lib_settings = &rd_ctx->lib_settings;
// Short options
- static const char short_opts[] = "i:vu";
+ static const char short_opts[] = "i:vuz";
enum {
ARG_FRAME_THREADS = 256,
@@ -694,6 +807,7 @@ static void dp_rd_ctx_parse_args(Dav1dPlayRenderContext *rd_ctx,
{ "framethreads", 1, NULL, ARG_FRAME_THREADS },
{ "tilethreads", 1, NULL, ARG_TILE_THREADS },
{ "highquality", 0, NULL, ARG_HIGH_QUALITY },
+ { "zerocopy", 0, NULL, 'z' },
{ NULL, 0, NULL, 0 },
};
@@ -714,6 +828,12 @@ static void dp_rd_ctx_parse_args(Dav1dPlayRenderContext *rd_ctx,
fprintf(stderr, "warning: --highquality requires libplacebo\n");
#endif
break;
+ case 'z':
+ settings->zerocopy = true;
+#ifndef HAVE_PLACEBO_VULKAN
+ fprintf(stderr, "warning: --zerocopy requires libplacebo\n");
+#endif
+ break;
case ARG_FRAME_THREADS:
lib_settings->n_frame_threads =
parse_unsigned(optarg, ARG_FRAME_THREADS, argv[0]);
@@ -829,7 +949,7 @@ static void dp_rd_ctx_post_event(Dav1dPlayRenderContext *rd_ctx, uint32_t code)
static void dp_rd_ctx_update_with_dav1d_picture(Dav1dPlayRenderContext *rd_ctx,
Dav1dPicture *dav1d_pic)
{
- renderer_info.update_frame(rd_ctx->rd_priv, dav1d_pic);
+ renderer_info.update_frame(rd_ctx->rd_priv, dav1d_pic, &rd_ctx->settings);
rd_ctx->current_pts = dav1d_pic->m.timestamp;
}
@@ -1068,6 +1188,18 @@ int main(int argc, char **argv)
// Parse and validate arguments
dp_rd_ctx_parse_args(rd_ctx, argc, argv);
+ if (rd_ctx->settings.zerocopy) {
+ if (renderer_info.alloc_pic) {
+ rd_ctx->lib_settings.allocator = (Dav1dPicAllocator) {
+ .cookie = rd_ctx->rd_priv,
+ .alloc_picture_callback = renderer_info.alloc_pic,
+ .release_picture_callback = renderer_info.release_pic,
+ };
+ } else {
+ fprintf(stderr, "--zerocopy unsupported by compiled renderer\n");
+ }
+ }
+
// Start decoder thread
decoder_thread = SDL_CreateThread(decoder_thread_main, "Decoder thread", rd_ctx);