From a0c0900e470fde0d6db360e555620476c2323895 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sat, 27 Apr 2013 22:24:14 +0200 Subject: ffv1dec: Support frame threading with gop > 1 This is about 20-30% faster than slice threading Signed-off-by: Michael Niedermayer --- libavcodec/ffv1.h | 1 + libavcodec/ffv1dec.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++----- libavcodec/pthread.c | 2 +- 3 files changed, 98 insertions(+), 11 deletions(-) diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h index 781ddf2bea..914eb103d7 100644 --- a/libavcodec/ffv1.h +++ b/libavcodec/ffv1.h @@ -91,6 +91,7 @@ typedef struct FFV1Context { int flags; int picture_number; ThreadFrame picture, last_picture; + struct FFV1Context *fsrc; AVFrame *cur; int plane_count; diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index d623233ea8..eb2a1f0fea 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -326,6 +326,42 @@ static int decode_slice(AVCodecContext *c, void *arg) int width, height, x, y, ret; const int ps = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step_minus1 + 1; AVFrame * const p = f->cur; + int i, si; + + for( si=0; fs != f->slice_context[si]; si ++) + ; + + if(f->fsrc && !p->key_frame) + ff_thread_await_progress(&f->last_picture, si, 0); + + if(f->fsrc && !p->key_frame) { + FFV1Context *fssrc = f->fsrc->slice_context[si]; + FFV1Context *fsdst = f->slice_context[si]; + av_assert1(fsdst->plane_count == fssrc->plane_count); + av_assert1(fsdst == fs); + + if (!p->key_frame) + fsdst->slice_damaged |= fssrc->slice_damaged; + + for (i = 0; i < f->plane_count; i++) { + PlaneContext *psrc = &fssrc->plane[i]; + PlaneContext *pdst = &fsdst->plane[i]; + + av_free(pdst->state); + av_free(pdst->vlc_state); + memcpy(pdst, psrc, sizeof(*pdst)); + pdst->state = NULL; + pdst->vlc_state = NULL; + + if (fssrc->ac) { + pdst->state = av_malloc(CONTEXT_SIZE * psrc->context_count); + memcpy(pdst->state, psrc->state, CONTEXT_SIZE * psrc->context_count); + } else { + pdst->vlc_state = av_malloc(sizeof(*pdst->vlc_state) * psrc->context_count); + memcpy(pdst->vlc_state, psrc->vlc_state, sizeof(*pdst->vlc_state) * psrc->context_count); + } + } + } if (f->version > 2) { if (ffv1_init_slice_state(f, fs) < 0) @@ -386,6 +422,8 @@ static int decode_slice(AVCodecContext *c, void *arg) emms_c(); + ff_thread_report_progress(&f->picture, si, 0); + return 0; } @@ -724,6 +762,8 @@ static av_cold int decode_init(AVCodecContext *avctx) if ((ret = ffv1_init_slice_contexts(f)) < 0) return ret; + avctx->internal->allocate_progress = 1; + return 0; } @@ -744,6 +784,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac f->cur = p = f->picture.f; + f->avctx = avctx; ff_init_range_decoder(c, buf, buf_size); ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8); @@ -770,6 +811,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n", f->version, p->key_frame, f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample); + ff_thread_finish_setup(avctx); + buf_p = buf + buf_size; for (i = f->slice_count - 1; i >= 0; i--) { FFV1Context *fs = f->slice_context[i]; @@ -822,6 +865,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac if (fs->slice_damaged && f->last_picture.f->data[0]) { const uint8_t *src[4]; uint8_t *dst[4]; + ff_thread_await_progress(&f->last_picture, INT_MAX, 0); for (j = 0; j < 4; j++) { int sh = (j==1 || j==2) ? f->chroma_h_shift : 0; int sv = (j==1 || j==2) ? f->chroma_v_shift : 0; @@ -837,6 +881,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac fs->slice_height); } } + ff_thread_report_progress(&f->picture, INT_MAX, 0); f->picture_number++; @@ -854,18 +899,58 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac static int init_thread_copy(AVCodecContext *avctx) { FFV1Context *f = avctx->priv_data; - int ret, i; - for (i = 0; i < f->quant_table_count; i++) { - void *p = f->initial_states[i]; - f->initial_states[i] = av_malloc(f->context_count[i] * sizeof(*f->initial_states[i])); - if (!f->initial_states[i]) - return AVERROR(ENOMEM); - memcpy(f->initial_states[i], p, f->context_count[i] * sizeof(*f->initial_states[i])); + f->picture.f = NULL; + f->last_picture.f = NULL; + f->sample_buffer = NULL; + f->quant_table_count = 0; + f->slice_count = 0; + + return 0; +} + +static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src) +{ + FFV1Context *fsrc = src->priv_data; + FFV1Context *fdst = dst->priv_data; + int i, ret; + + if (dst == src) + return 0; + + if (!fdst->quant_table_count) { + memcpy(fdst, fsrc, sizeof(*fdst)); + + for (i = 0; i < fdst->quant_table_count; i++) { + fdst->initial_states[i] = av_malloc(fdst->context_count[i] * sizeof(*fdst->initial_states[i])); + memcpy(fdst->initial_states[i], fsrc->initial_states[i], fdst->context_count[i] * sizeof(*fdst->initial_states[i])); + } + + fdst->picture.f = av_frame_alloc(); + fdst->last_picture.f = av_frame_alloc(); + + if ((ret = ffv1_init_slice_contexts(fdst)) < 0) + return ret; } - if ((ret = ffv1_init_slice_contexts(f)) < 0) - return ret; + av_assert1(fdst->slice_count == fsrc->slice_count); + + fdst->key_frame_ok = fsrc->key_frame_ok; + + ff_thread_release_buffer(dst, &fdst->picture); + if (fsrc->picture.f->data[0]) { + if ((ret = ff_thread_ref_frame(&fdst->picture, &fsrc->picture)) < 0) + return ret; + } + for (i = 0; i < fdst->slice_count; i++) { + FFV1Context *fsdst = fdst->slice_context[i]; + FFV1Context *fssrc = fsrc->slice_context[i]; + + fsdst->slice_damaged = fssrc->slice_damaged; + } + + fdst->fsrc = fsrc; + return 0; } @@ -878,7 +963,8 @@ AVCodec ff_ffv1_decoder = { .close = ffv1_close, .decode = decode_frame, .init_thread_copy = init_thread_copy, + .update_thread_context = update_thread_context, .capabilities = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/ | - CODEC_CAP_SLICE_THREADS, + CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS, .long_name = NULL_IF_CONFIG_SMALL("FFmpeg video codec #1"), }; diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c index 516e5fcfd0..6942c1ebf1 100644 --- a/libavcodec/pthread.c +++ b/libavcodec/pthread.c @@ -652,7 +652,7 @@ int ff_thread_decode_frame(AVCodecContext *avctx, */ if (fctx->delaying) { - if (fctx->next_decoding >= (avctx->thread_count-1)) fctx->delaying = 0; + if (fctx->next_decoding >= (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1))) fctx->delaying = 0; *got_picture_ptr=0; if (avpkt->size) -- cgit v1.2.3