diff options
author | Roman Arzumanyan <rarzumanyan@nvidia.com> | 2022-09-10 11:05:56 +0300 |
---|---|---|
committer | Timo Rothenpieler <timo@rothenpieler.org> | 2022-09-13 23:59:30 +0300 |
commit | cc81ab283c72921a23f7dc149c6a2b386eaf77c6 (patch) | |
tree | 31cd9da00d55a59d453a8d327ce6d0bfbf325fb2 /libavfilter/vf_colorspace_cuda.c | |
parent | c177108ae1144fd4e6cedb4a702260dbaa179825 (diff) |
libavfilter: add vf_colorrange_cuda, CUDA-accelerated color conversion filter
Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
Diffstat (limited to 'libavfilter/vf_colorspace_cuda.c')
-rw-r--r-- | libavfilter/vf_colorspace_cuda.c | 435 |
1 files changed, 435 insertions, 0 deletions
diff --git a/libavfilter/vf_colorspace_cuda.c b/libavfilter/vf_colorspace_cuda.c new file mode 100644 index 0000000000..131c4ad72b --- /dev/null +++ b/libavfilter/vf_colorspace_cuda.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <string.h> + +#include "libavutil/avstring.h" +#include "libavutil/common.h" +#include "libavutil/cuda_check.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/internal.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" + +#include "avfilter.h" +#include "formats.h" +#include "internal.h" +#include "scale_eval.h" +#include "video.h" + +#include "cuda/load_helper.h" + +static const enum AVPixelFormat supported_formats[] = { + AV_PIX_FMT_NV12, + AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUV444P, +}; + +#define DIV_UP(a, b) (((a) + (b)-1) / (b)) +#define BLOCKX 32 +#define BLOCKY 16 + +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) + +typedef struct CUDAColorspaceContext { + const AVClass* class; + + AVCUDADeviceContext* hwctx; + AVBufferRef* frames_ctx; + AVFrame* own_frame; + AVFrame* tmp_frame; + + CUcontext cu_ctx; + CUstream cu_stream; + CUmodule cu_module; + CUfunction cu_convert[AVCOL_RANGE_NB]; + + enum AVPixelFormat pix_fmt; + enum AVColorRange range; + + int num_planes; +} CUDAColorspaceContext; + +static av_cold int cudacolorspace_init(AVFilterContext* ctx) +{ + CUDAColorspaceContext* s = ctx->priv; + + s->own_frame = av_frame_alloc(); + if (!s->own_frame) + return AVERROR(ENOMEM); + + s->tmp_frame = av_frame_alloc(); + if (!s->tmp_frame) + return AVERROR(ENOMEM); + + return 0; +} + +static av_cold void cudacolorspace_uninit(AVFilterContext* ctx) +{ + CUDAColorspaceContext* s = ctx->priv; + + if (s->hwctx && s->cu_module) { + CudaFunctions* cu = s->hwctx->internal->cuda_dl; + CUcontext dummy; + + CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); + CHECK_CU(cu->cuModuleUnload(s->cu_module)); + s->cu_module = NULL; + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + } + + av_frame_free(&s->own_frame); + av_buffer_unref(&s->frames_ctx); + av_frame_free(&s->tmp_frame); +} + +static av_cold int init_hwframe_ctx(CUDAColorspaceContext* s, AVBufferRef* device_ctx, + int width, int height) +{ + AVBufferRef* out_ref = NULL; + AVHWFramesContext* out_ctx; + int ret; + + out_ref = av_hwframe_ctx_alloc(device_ctx); + if (!out_ref) + return AVERROR(ENOMEM); + + out_ctx = (AVHWFramesContext*)out_ref->data; + + out_ctx->format = AV_PIX_FMT_CUDA; + out_ctx->sw_format = s->pix_fmt; + out_ctx->width = FFALIGN(width, 32); + out_ctx->height = FFALIGN(height, 32); + + ret = av_hwframe_ctx_init(out_ref); + if (ret < 0) + goto fail; + + av_frame_unref(s->own_frame); + ret = av_hwframe_get_buffer(out_ref, s->own_frame, 0); + if (ret < 0) + goto fail; + + s->own_frame->width = width; + s->own_frame->height = height; + + av_buffer_unref(&s->frames_ctx); + s->frames_ctx = out_ref; + + return 0; +fail: + av_buffer_unref(&out_ref); + return ret; +} + +static int format_is_supported(enum AVPixelFormat fmt) +{ + for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) + if (fmt == supported_formats[i]) + return 1; + + return 0; +} + +static av_cold int init_processing_chain(AVFilterContext* ctx, int width, + int height) +{ + CUDAColorspaceContext* s = ctx->priv; + AVHWFramesContext* in_frames_ctx; + + int ret; + + if (!ctx->inputs[0]->hw_frames_ctx) { + av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); + return AVERROR(EINVAL); + } + + in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data; + s->pix_fmt = in_frames_ctx->sw_format; + + if (!format_is_supported(s->pix_fmt)) { + av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", + av_get_pix_fmt_name(s->pix_fmt)); + return AVERROR(EINVAL); + } + + if ((AVCOL_RANGE_MPEG != s->range) && (AVCOL_RANGE_JPEG != s->range)) { + av_log(ctx, AV_LOG_ERROR, "Unsupported color range\n"); + return AVERROR(EINVAL); + } + + s->num_planes = av_pix_fmt_count_planes(s->pix_fmt); + + ret = init_hwframe_ctx(s, in_frames_ctx->device_ref, width, height); + if (ret < 0) + return ret; + + ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx); + if (!ctx->outputs[0]->hw_frames_ctx) + return AVERROR(ENOMEM); + + return 0; +} + +static av_cold int cudacolorspace_load_functions(AVFilterContext* ctx) +{ + CUDAColorspaceContext* s = ctx->priv; + CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; + CudaFunctions* cu = s->hwctx->internal->cuda_dl; + int ret; + + extern const unsigned char ff_vf_colorspace_cuda_ptx_data[]; + extern const unsigned int ff_vf_colorspace_cuda_ptx_len; + + ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); + if (ret < 0) + return ret; + + ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module, + ff_vf_colorspace_cuda_ptx_data, + ff_vf_colorspace_cuda_ptx_len); + if (ret < 0) + goto fail; + + ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_convert[AVCOL_RANGE_MPEG], s->cu_module, "to_mpeg_cuda")); + if (ret < 0) + goto fail; + + ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_convert[AVCOL_RANGE_JPEG], s->cu_module, "to_jpeg_cuda")); + if (ret < 0) + goto fail; + +fail: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + return ret; +} + +static av_cold int cudacolorspace_config_props(AVFilterLink* outlink) +{ + AVFilterContext* ctx = outlink->src; + AVFilterLink* inlink = outlink->src->inputs[0]; + CUDAColorspaceContext* s = ctx->priv; + AVHWFramesContext* frames_ctx = + (AVHWFramesContext*)inlink->hw_frames_ctx->data; + AVCUDADeviceContext* device_hwctx = frames_ctx->device_ctx->hwctx; + int ret; + + s->hwctx = device_hwctx; + s->cu_stream = s->hwctx->stream; + + outlink->w = inlink->w; + outlink->h = inlink->h; + + ret = init_processing_chain(ctx, inlink->w, inlink->h); + if (ret < 0) + return ret; + + if (inlink->sample_aspect_ratio.num) { + outlink->sample_aspect_ratio = av_mul_q( + (AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, + inlink->sample_aspect_ratio); + } else { + outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; + } + + ret = cudacolorspace_load_functions(ctx); + if (ret < 0) + return ret; + + return ret; +} + +static int conv_cuda_convert(AVFilterContext* ctx, AVFrame* out, AVFrame* in) +{ + CUDAColorspaceContext* s = ctx->priv; + CudaFunctions* cu = s->hwctx->internal->cuda_dl; + CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx; + int ret; + + ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); + if (ret < 0) + return ret; + + out->color_range = s->range; + + for (int i = 0; i < s->num_planes; i++) { + int width = in->width, height = in->height, comp_id = (i > 0); + + switch (s->pix_fmt) { + case AV_PIX_FMT_YUV444P: + break; + case AV_PIX_FMT_YUV420P: + width = comp_id ? in->width / 2 : in->width; + case AV_PIX_FMT_NV12: + height = comp_id ? in->height / 2 : in->height; + break; + default: + av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", + av_get_pix_fmt_name(s->pix_fmt)); + return AVERROR(EINVAL); + } + + if (!s->cu_convert[out->color_range]) { + av_log(ctx, AV_LOG_ERROR, "Unsupported color range\n"); + return AVERROR(EINVAL); + } + + if (in->color_range != out->color_range) { + void* args[] = {&in->data[i], &out->data[i], &in->linesize[i], + &comp_id}; + ret = CHECK_CU(cu->cuLaunchKernel( + s->cu_convert[out->color_range], DIV_UP(width, BLOCKX), + DIV_UP(height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, s->cu_stream, + args, NULL)); + } else { + ret = av_hwframe_transfer_data(out, in, 0); + if (ret < 0) + return ret; + } + } + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + return ret; +} + +static int cudacolorspace_conv(AVFilterContext* ctx, AVFrame* out, AVFrame* in) +{ + CUDAColorspaceContext* s = ctx->priv; + AVFilterLink* outlink = ctx->outputs[0]; + AVFrame* src = in; + int ret; + + ret = conv_cuda_convert(ctx, s->own_frame, src); + if (ret < 0) + return ret; + + src = s->own_frame; + ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0); + if (ret < 0) + return ret; + + av_frame_move_ref(out, s->own_frame); + av_frame_move_ref(s->own_frame, s->tmp_frame); + + s->own_frame->width = outlink->w; + s->own_frame->height = outlink->h; + + ret = av_frame_copy_props(out, in); + if (ret < 0) + return ret; + + return 0; +} + +static int cudacolorspace_filter_frame(AVFilterLink* link, AVFrame* in) +{ + AVFilterContext* ctx = link->dst; + CUDAColorspaceContext* s = ctx->priv; + AVFilterLink* outlink = ctx->outputs[0]; + CudaFunctions* cu = s->hwctx->internal->cuda_dl; + + AVFrame* out = NULL; + CUcontext dummy; + int ret = 0; + + out = av_frame_alloc(); + if (!out) { + ret = AVERROR(ENOMEM); + goto fail; + } + + ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx)); + if (ret < 0) + goto fail; + + ret = cudacolorspace_conv(ctx, out, in); + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + if (ret < 0) + goto fail; + + av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den, + (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w, + (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h, + INT_MAX); + + av_frame_free(&in); + return ff_filter_frame(outlink, out); +fail: + av_frame_free(&in); + av_frame_free(&out); + return ret; +} + +#define OFFSET(x) offsetof(CUDAColorspaceContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption options[] = { + {"range", "Output video range", OFFSET(range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED }, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_NB - 1, FLAGS, "range"}, + {"tv", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range"}, + {"mpeg", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range"}, + {"pc", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range"}, + {"jpeg", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range"}, + {NULL}, +}; + +static const AVClass cudacolorspace_class = { + .class_name = "colorspace_cuda", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + +static const AVFilterPad cudacolorspace_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = cudacolorspace_filter_frame, + }, +}; + +static const AVFilterPad cudacolorspace_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = cudacolorspace_config_props, + }, +}; + +const AVFilter ff_vf_colorspace_cuda = { + .name = "colorspace_cuda", + .description = NULL_IF_CONFIG_SMALL("CUDA accelerated video color converter"), + + .init = cudacolorspace_init, + .uninit = cudacolorspace_uninit, + + .priv_size = sizeof(CUDAColorspaceContext), + .priv_class = &cudacolorspace_class, + + FILTER_INPUTS(cudacolorspace_inputs), + FILTER_OUTPUTS(cudacolorspace_outputs), + + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA), + + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; |