diff options
563 files changed, 28670 insertions, 14807 deletions
diff --git a/.gitignore b/.gitignore index eac1758ed9..3e8ed0046e 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,9 @@ ffmpeg ffplay ffprobe ffserver +libavcodec/*_tablegen +libavcodec/*_tables.c +libavcodec/*_tables.h libavcodec/libavcodec* libavcore/libavcore* libavdevice/libavdevice* @@ -9,12 +9,26 @@ version <next>: - mpeg2 aspect ratio dection fixed - libxvid aspect pickiness fixed - Frame multithreaded decoding +- E-AC-3 audio encoder +- ac3enc: add channel coupling support +- floating-point sample format support to the ac3, eac3, dca, aac, and vorbis decoders. +- H264/MPEG frame-level multi-threading +- All av_metadata_* functions renamed to av_dict_* and moved to libavutil + + +version 0.7_beta2: + - Lots of deprecated API cruft removed - fft and imdct optimizations for AVX (Sandy Bridge) processors - showinfo filter added - DPX image encoder - SMPTE 302M AES3 audio decoder - Apple Core Audio Format muxer +- 9bit and 10bit H.264 decoding +- 9bit and 10bit FFV1 encoding / decoding +- split filter added +- select filter added +- sdl output device added version 0.7_beta1: @@ -2,6 +2,11 @@ include config.mak SRC_DIR = $(SRC_PATH_BARE) +vpath %.c $(SRC_DIR) +vpath %.h $(SRC_DIR) +vpath %.S $(SRC_DIR) +vpath %.asm $(SRC_DIR) +vpath %.v $(SRC_DIR) vpath %.texi $(SRC_PATH_BARE) PROGS-$(CONFIG_FFMPEG) += ffmpeg @@ -24,6 +29,8 @@ ALLPROGS = $(BASENAMES:%=%$(EXESUF)) ALLPROGS_G = $(BASENAMES:%=%_g$(EXESUF)) ALLMANPAGES = $(BASENAMES:%=%.1) +ALLFFLIBS = avcodec avdevice avfilter avformat avutil postproc swscale + FFLIBS-$(CONFIG_AVDEVICE) += avdevice FFLIBS-$(CONFIG_AVFILTER) += avfilter FFLIBS-$(CONFIG_AVFORMAT) += avformat @@ -43,15 +50,9 @@ FF_LDFLAGS := $(FFLDFLAGS) FF_EXTRALIBS := $(FFEXTRALIBS) FF_DEP_LIBS := $(DEP_LIBS) -ALL_TARGETS-$(CONFIG_DOC) += documentation +all-$(CONFIG_DOC): documentation -ifdef PROGS -INSTALL_TARGETS-yes += install-progs install-data -INSTALL_TARGETS-$(CONFIG_DOC) += install-man -endif -INSTALL_PROGS_TARGETS-$(CONFIG_SHARED) = install-libs - -all: $(FF_DEP_LIBS) $(PROGS) $(ALL_TARGETS-yes) +all: $(FF_DEP_LIBS) $(PROGS) $(PROGS): %$(EXESUF): %_g$(EXESUF) $(CP) $< $@ @@ -80,12 +81,15 @@ endef $(foreach D,$(FFLIBS),$(eval $(call DOSUBDIR,lib$(D)))) +ffplay.o: CFLAGS += $(SDL_CFLAGS) ffplay_g$(EXESUF): FF_EXTRALIBS += $(SDL_LIBS) ffserver_g$(EXESUF): FF_LDFLAGS += $(FFSERVERLDFLAGS) %_g$(EXESUF): %.o cmdutils.o $(FF_DEP_LIBS) $(LD) $(FF_LDFLAGS) -o $@ $< cmdutils.o $(FF_EXTRALIBS) +alltools: $(TOOLS) + tools/%$(EXESUF): tools/%.o $(LD) $(FF_LDFLAGS) -o $@ $< $(FF_EXTRALIBS) @@ -95,8 +99,6 @@ tools/%.o: tools/%.c -include $(wildcard tools/*.d) -include $(wildcard tests/*.d) -ffplay.o: CFLAGS += $(SDL_CFLAGS) - VERSION_SH = $(SRC_PATH_BARE)/version.sh GIT_LOG = $(SRC_PATH_BARE)/.git/logs/HEAD @@ -110,8 +112,6 @@ version.h .version: # force version.sh to run whenever version might have changed -include .version -alltools: $(TOOLS) - DOCS = $(addprefix doc/, developer.html faq.html general.html libavfilter.html) $(HTMLPAGES) $(MANPAGES) $(PODPAGES) documentation: $(DOCS) @@ -123,7 +123,7 @@ TEXIDEP = awk '/^@include/ { printf "$@: $(@D)/%s\n", $$2 }' <$< >$(@:%=%.d) doc/%.html: TAG = HTML doc/%.html: doc/%.texi $(SRC_PATH_BARE)/doc/t2h.init $(Q)$(TEXIDEP) - $(M)cd doc && texi2html -monolithic --init-file $(SRC_PATH_BARE)/doc/t2h.init $(<:doc/%=%) + $(M)texi2html -monolithic --init-file $(SRC_PATH_BARE)/doc/t2h.init --output $@ $< doc/%.pod: TAG = POD doc/%.pod: doc/%.texi @@ -134,9 +134,19 @@ doc/%.1: TAG = MAN doc/%.1: doc/%.pod $(M)pod2man --section=1 --center=" " --release=" " $< > $@ -install: $(INSTALL_TARGETS-yes) +ifdef PROGS +install: install-progs install-data +endif + +install: install-libs install-headers -install-progs: $(PROGS) $(INSTALL_PROGS_TARGETS-yes) +install-libs: install-libs-yes + +install-progs-yes: +install-progs-$(CONFIG_DOC): install-man +install-progs-$(CONFIG_SHARED): install-libs + +install-progs: install-progs-yes $(PROGS) $(Q)mkdir -p "$(BINDIR)" $(INSTALL) -c -m 755 $(PROGS) "$(BINDIR)" @@ -148,7 +158,7 @@ install-man: $(MANPAGES) $(Q)mkdir -p "$(MANDIR)/man1" $(INSTALL) -m 644 $(MANPAGES) "$(MANDIR)/man1" -uninstall: uninstall-progs uninstall-data uninstall-man +uninstall: uninstall-libs uninstall-headers uninstall-progs uninstall-data uninstall-man uninstall-progs: $(RM) $(addprefix "$(BINDIR)/", $(ALLPROGS)) @@ -174,7 +184,7 @@ clean:: testclean distclean:: $(RM) $(DISTCLEANSUFFIXES) - $(RM) version.h config.* libavutil/avconfig.h + $(RM) config.* .version version.h libavutil/avconfig.h config: $(SRC_PATH)/configure $(value FFMPEG_CONFIGURATION) @@ -186,25 +196,18 @@ check: test fulltest test: codectest lavftest lavfitest seektest FFSERVER_REFFILE = $(SRC_PATH)/tests/ffserver.regression.ref -SEEK_REFFILE = $(SRC_PATH)/tests/seek.regression.ref codectest: fate-codec lavftest: fate-lavf lavfitest: fate-lavfi seektest: fate-seek -AREF = tests/data/acodec.ref.wav -VREF = tests/data/vsynth1.ref.yuv +AREF = fate-acodec-aref +VREF = fate-vsynth1-vref fate-vsynth2-vref REFS = $(AREF) $(VREF) -$(REFS): TAG = GEN - $(VREF): ffmpeg$(EXESUF) tests/vsynth1/00.pgm tests/vsynth2/00.pgm - $(M)$(SRC_PATH)/tests/codec-regression.sh vref vsynth1 tests/vsynth1 "$(TARGET_EXEC)" "$(TARGET_PATH)" - $(Q)$(SRC_PATH)/tests/codec-regression.sh vref vsynth2 tests/vsynth2 "$(TARGET_EXEC)" "$(TARGET_PATH)" - $(AREF): ffmpeg$(EXESUF) tests/data/asynth1.sw - $(M)$(SRC_PATH)/tests/codec-regression.sh aref acodec tests/acodec "$(TARGET_EXEC)" "$(TARGET_PATH)" ffservertest: ffserver$(EXESUF) tests/vsynth1/00.pgm tests/data/asynth1.sw @echo @@ -215,15 +218,15 @@ ffservertest: ffserver$(EXESUF) tests/vsynth1/00.pgm tests/data/asynth1.sw tests/vsynth1/00.pgm: tests/videogen$(HOSTEXESUF) @mkdir -p tests/vsynth1 - $(M)$(BUILD_ROOT)/$< 'tests/vsynth1/' + $(M)./$< 'tests/vsynth1/' tests/vsynth2/00.pgm: tests/rotozoom$(HOSTEXESUF) @mkdir -p tests/vsynth2 - $(M)$(BUILD_ROOT)/$< 'tests/vsynth2/' $(SRC_PATH)/tests/lena.pnm + $(M)./$< 'tests/vsynth2/' $(SRC_PATH)/tests/lena.pnm tests/data/asynth1.sw: tests/audiogen$(HOSTEXESUF) @mkdir -p tests/data - $(M)$(BUILD_ROOT)/$< $@ + $(M)./$< $@ tests/data/asynth1.sw tests/vsynth%/00.pgm: TAG = GEN @@ -258,8 +261,8 @@ FATE = $(FATE_ACODEC) \ $(FATE_LAVFI) \ $(FATE_SEEK) \ -$(FATE_ACODEC): $(AREF) -$(FATE_VCODEC): $(VREF) +$(filter-out %-aref,$(FATE_ACODEC)): $(AREF) +$(filter-out %-vref,$(FATE_VCODEC)): $(VREF) $(FATE_LAVF): $(REFS) $(FATE_LAVFI): $(REFS) tools/lavfi-showfiltfmts$(EXESUF) $(FATE_SEEK): fate-codec fate-lavf tests/seek_test$(EXESUF) @@ -300,4 +303,5 @@ $(FATE): ffmpeg$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF)) fate-list: @printf '%s\n' $(sort $(FATE)) -.PHONY: documentation *test regtest-* alltools check config +.PHONY: all alltools *clean check config documentation examples install* +.PHONY: *test testprogs uninstall* diff --git a/cmdutils.c b/cmdutils.c index b7eabaf782..83fc1ccc3b 100644 --- a/cmdutils.c +++ b/cmdutils.c @@ -76,7 +76,8 @@ void uninit_opts(void) av_freep(&avformat_opts->key); av_freep(&avformat_opts); #if CONFIG_SWSCALE - av_freep(&sws_opts); + sws_freeContext(sws_opts); + sws_opts = NULL; #endif for (i = 0; i < opt_name_count; i++) { av_freep(&opt_names[i]); @@ -216,7 +217,7 @@ static inline void prepare_app_arguments(int *argc_ptr, char ***argv_ptr) #endif /* WIN32 && !__MINGW32CE__ */ void parse_options(int argc, char **argv, const OptionDef *options, - void (* parse_arg_function)(const char*)) + int (* parse_arg_function)(const char *opt, const char *arg)) { const char *opt, *arg; int optindex, handleoptions=1; @@ -272,21 +273,19 @@ unknown_opt: *po->u.int64_arg = parse_number_or_die(opt, arg, OPT_INT64, INT64_MIN, INT64_MAX); } else if (po->flags & OPT_FLOAT) { *po->u.float_arg = parse_number_or_die(opt, arg, OPT_FLOAT, -INFINITY, INFINITY); - } else if (po->flags & OPT_FUNC2) { - if (po->u.func2_arg(opt, arg) < 0) { + } else if (po->u.func_arg) { + if (po->u.func_arg(opt, arg) < 0) { fprintf(stderr, "%s: failed to set value '%s' for option '%s'\n", argv[0], arg, opt); exit(1); } - } else if (po->flags & OPT_DUMMY) { - /* Do nothing for this option */ - } else { - po->u.func_arg(arg); } if(po->flags & OPT_EXIT) exit(0); } else { - if (parse_arg_function) - parse_arg_function(opt); + if (parse_arg_function) { + if (parse_arg_function(NULL, opt) < 0) + exit(1); + } } } } @@ -301,7 +300,7 @@ int opt_default(const char *opt, const char *arg){ AVInputFormat *iformat = NULL; while ((p = av_codec_next(p))) { - AVClass *c = p->priv_class; + const AVClass *c = p->priv_class; if (c && av_find_opt(&c, opt, NULL, 0, 0)) break; } @@ -411,13 +410,25 @@ int opt_timelimit(const char *opt, const char *arg) return 0; } +static void *alloc_priv_context(int size, const AVClass *class) +{ + void *p = av_mallocz(size); + if (p) { + *(const AVClass **)p = class; + av_opt_set_defaults(p); + } + return p; +} + void set_context_opts(void *ctx, void *opts_ctx, int flags, AVCodec *codec) { int i; void *priv_ctx=NULL; if(!strcmp("AVCodecContext", (*(AVClass**)ctx)->class_name)){ AVCodecContext *avctx= ctx; - if(codec && codec->priv_class && avctx->priv_data){ + if(codec && codec->priv_class){ + if(!avctx->priv_data && codec->priv_data_size) + avctx->priv_data= alloc_priv_context(codec->priv_data_size, codec->priv_class); priv_ctx= avctx->priv_data; } } else if (!strcmp("AVFormatContext", (*(AVClass**)ctx)->class_name)) { @@ -837,6 +848,23 @@ FILE *get_preset_file(char *filename, size_t filename_size, av_strlcpy(filename, preset_name, filename_size); f = fopen(filename, "r"); } else { +#ifdef _WIN32 + char datadir[MAX_PATH], *ls; + base[2] = NULL; + + if (GetModuleFileNameA(GetModuleHandleA(NULL), datadir, sizeof(datadir) - 1)) + { + for (ls = datadir; ls < datadir + strlen(datadir); ls++) + if (*ls == '\\') *ls = '/'; + + if (ls = strrchr(datadir, '/')) + { + *ls = 0; + strncat(datadir, "/ffpresets", sizeof(datadir) - 1 - strlen(datadir)); + base[2] = datadir; + } + } +#endif for (i = 0; i < 3 && !f; i++) { if (!base[i]) continue; @@ -897,6 +925,7 @@ int get_filtered_video_frame(AVFilterContext *ctx, AVFrame *frame, { int ret; AVFilterBufferRef *picref; + *picref_ptr = NULL; if ((ret = avfilter_request_frame(ctx->inputs[0])) < 0) return ret; diff --git a/cmdutils.h b/cmdutils.h index d460200f4e..5fd398d054 100644 --- a/cmdutils.h +++ b/cmdutils.h @@ -22,11 +22,17 @@ #ifndef FFMPEG_CMDUTILS_H #define FFMPEG_CMDUTILS_H -#include <inttypes.h> +#include <stdint.h> + #include "libavcodec/avcodec.h" +#include "libavfilter/avfilter.h" #include "libavformat/avformat.h" #include "libswscale/swscale.h" +#ifdef __MINGW32__ +#undef main /* We don't want SDL to override our main() */ +#endif + /** * program name, defined by the program for show_version(). */ @@ -119,17 +125,14 @@ typedef struct { #define OPT_INT 0x0080 #define OPT_FLOAT 0x0100 #define OPT_SUBTITLE 0x0200 -#define OPT_FUNC2 0x0400 -#define OPT_INT64 0x0800 -#define OPT_EXIT 0x1000 -#define OPT_DATA 0x2000 -#define OPT_DUMMY 0x4000 +#define OPT_INT64 0x0400 +#define OPT_EXIT 0x0800 +#define OPT_DATA 0x1000 union { - void (*func_arg)(const char *); //FIXME passing error code as int return would be nicer then exit() in the func int *int_arg; char **str_arg; float *float_arg; - int (*func2_arg)(const char *, const char *); + int (*func_arg)(const char *, const char *); int64_t *int64_arg; } u; const char *help; @@ -147,7 +150,7 @@ void show_help_options(const OptionDef *options, const char *msg, int mask, int * not have to be processed. */ void parse_options(int argc, char **argv, const OptionDef *options, - void (* parse_arg_function)(const char*)); + int (* parse_arg_function)(const char *opt, const char *arg)); void set_context_opts(void *ctx, void *opts_ctx, int flags, AVCodec *codec); @@ -241,7 +244,8 @@ int read_file(const char *filename, char **bufptr, size_t *size); * If is_path is non-zero, look for the file in the path preset_name. * Otherwise search for a file named arg.ffpreset in the directories * $FFMPEG_DATADIR (if set), $HOME/.ffmpeg, and in the datadir defined - * at configuration time, in that order. If no such file is found and + * at configuration time or in a "ffpresets" folder along the executable + * on win32, in that order. If no such file is found and * codec_name is defined, then search for a file named * codec_name-preset_name.ffpreset in the above-mentioned directories. * @@ -255,9 +259,6 @@ int read_file(const char *filename, char **bufptr, size_t *size); FILE *get_preset_file(char *filename, size_t filename_size, const char *preset_name, int is_path, const char *codec_name); -#if CONFIG_AVFILTER -#include "libavfilter/avfilter.h" - typedef struct { enum PixelFormat pix_fmt; } FFSinkContext; @@ -273,6 +274,4 @@ extern AVFilter ffsink; int get_filtered_video_frame(AVFilterContext *sink, AVFrame *frame, AVFilterBufferRef **picref, AVRational *pts_tb); -#endif /* CONFIG_AVFILTER */ - #endif /* FFMPEG_CMDUTILS_H */ diff --git a/cmdutils_common_opts.h b/cmdutils_common_opts.h index da309977bd..9b5e5d22cd 100644 --- a/cmdutils_common_opts.h +++ b/cmdutils_common_opts.h @@ -10,4 +10,4 @@ { "protocols", OPT_EXIT, {(void*)show_protocols}, "show available protocols" }, { "filters", OPT_EXIT, {(void*)show_filters }, "show available filters" }, { "pix_fmts" , OPT_EXIT, {(void*)show_pix_fmts }, "show available pixel formats" }, - { "loglevel", HAS_ARG | OPT_FUNC2, {(void*)opt_loglevel}, "set libav* logging level", "loglevel" }, + { "loglevel", HAS_ARG, {(void*)opt_loglevel}, "set libav* logging level", "loglevel" }, diff --git a/common.mak b/common.mak index 2b7baa2e15..a293987272 100644 --- a/common.mak +++ b/common.mak @@ -6,17 +6,6 @@ all: all-yes ifndef SUBDIR -vpath %.c $(SRC_DIR) -vpath %.h $(SRC_DIR) -vpath %.S $(SRC_DIR) -vpath %.asm $(SRC_DIR) -vpath %.v $(SRC_DIR) - -ifeq ($(SRC_DIR),$(SRC_PATH_BARE)) -BUILD_ROOT_REL = . -else -BUILD_ROOT_REL = .. -endif ifndef V Q = @ @@ -31,9 +20,7 @@ $(foreach VAR,$(SILENT),$(eval override $(VAR) = @$($(VAR)))) $(eval INSTALL = @$(call ECHO,INSTALL,$$(^:$(SRC_DIR)/%=%)); $(INSTALL)) endif -ALLFFLIBS = avcodec avdevice avfilter avformat avutil postproc swscale - -IFLAGS := -I$(BUILD_ROOT_REL) -I$(SRC_PATH) +IFLAGS := -I. -I$(SRC_PATH) CPPFLAGS := $(IFLAGS) $(CPPFLAGS) CFLAGS += $(ECFLAGS) YASMFLAGS += $(IFLAGS) -Pconfig.asm @@ -51,8 +38,6 @@ HOSTCFLAGS += $(IFLAGS) %.ho: %.h $(CC) $(CPPFLAGS) $(CFLAGS) -Wno-unused -c -o $@ -x c $< -%$(EXESUF): %.c - %.ver: %.v $(Q)sed 's/$$MAJOR/$($(basename $(@F))_VERSION_MAJOR)/' $^ > $@ @@ -62,13 +47,6 @@ HOSTCFLAGS += $(IFLAGS) %.h: @: -install: install-libs install-headers -install-libs: install-libs-yes - -uninstall: uninstall-libs uninstall-headers - -.PHONY: all depend dep *clean install* uninstall* examples testprogs - # Disable suffix rules. Most of the builtin rules are suffix rules, # so this saves some time on slow systems. .SUFFIXES: @@ -79,13 +57,12 @@ endif OBJS-$(HAVE_MMX) += $(MMX-OBJS-yes) -CFLAGS += $(CFLAGS-yes) OBJS += $(OBJS-yes) FFLIBS := $(FFLIBS-yes) $(FFLIBS) TESTPROGS += $(TESTPROGS-yes) FFEXTRALIBS := $(addprefix -l,$(addsuffix $(BUILDSUF),$(FFLIBS))) $(EXTRALIBS) -FFLDFLAGS := $(addprefix -L$(BUILD_ROOT)/lib,$(ALLFFLIBS)) $(LDFLAGS) +FFLDFLAGS := $(addprefix -Llib,$(ALLFFLIBS)) $(LDFLAGS) EXAMPLES := $(addprefix $(SUBDIR),$(addsuffix -example$(EXESUF),$(EXAMPLES))) OBJS := $(addprefix $(SUBDIR),$(sort $(OBJS))) @@ -94,7 +71,7 @@ TESTPROGS := $(addprefix $(SUBDIR),$(addsuffix -test$(EXESUF),$(TESTPROGS))) HOSTOBJS := $(addprefix $(SUBDIR),$(addsuffix .o,$(HOSTPROGS))) HOSTPROGS := $(addprefix $(SUBDIR),$(addsuffix $(HOSTEXESUF),$(HOSTPROGS))) -DEP_LIBS := $(foreach NAME,$(FFLIBS),$(BUILD_ROOT_REL)/lib$(NAME)/$($(CONFIG_SHARED:yes=S)LIBNAME)) +DEP_LIBS := $(foreach NAME,$(FFLIBS),lib$(NAME)/$($(CONFIG_SHARED:yes=S)LIBNAME)) ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR)/$(ARCH)/*.h)) SKIPHEADERS += $(addprefix $(ARCH)/,$(ARCH_HEADERS)) @@ -106,8 +106,8 @@ Configuration options: --disable-lpc disable LPC code --disable-mdct disable MDCT code --disable-rdft disable RDFT code - --disable-vaapi disable VAAPI code - --disable-vdpau disable VDPAU code + --enable-vaapi enable VAAPI code + --enable-vdpau enable VDPAU code --disable-dxva2 disable DXVA2 code --enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary) --enable-hardcoded-tables use hardcoded tables instead of runtime generation @@ -192,7 +192,6 @@ External library support: --enable-zlib enable zlib [autodetect] Advanced options (experts only): - --source-path=PATH path to source code [$source_path] --cross-prefix=PREFIX use PREFIX for compilation tools [$cross_prefix] --enable-cross-compile assume a cross-compiler is used --sysroot=PATH root of cross-build tree @@ -956,6 +955,7 @@ CONFIG_LIST=" mdct memalign_hack mlib + mpegaudiodsp network nonfree pic @@ -1023,6 +1023,7 @@ ARCH_EXT_LIST=' ppc4xx sse ssse3 + vfpv3 vis ' @@ -1075,8 +1076,6 @@ HAVE_LIST=" isatty kbhit ldbrx - libdc1394_1 - libdc1394_2 llrint llrintf local_aligned_16 @@ -1183,13 +1182,13 @@ CMDLINE_SET=" host_ldflags host_libs host_os + install ld logfile malloc_prefix nm pkg_config samples - source_path strip sysinclude sysroot @@ -1212,6 +1211,7 @@ armv6t2_deps="arm" armvfp_deps="arm" iwmmxt_deps="arm" neon_deps="arm" +vfpv3_deps="armvfp" mmi_deps="mips" @@ -1243,6 +1243,7 @@ symver_if_any="symver_asm_label symver_gnu_asm" dct_select="rdft" mdct_select="fft" rdft_select="fft" +mpegaudiodsp_select="dct" # decoders / encoders / hardware accelerators aac_decoder_select="mdct sinewin" @@ -1265,6 +1266,7 @@ dca_decoder_select="mdct" dnxhd_encoder_select="aandct" dxa_decoder_select="zlib" eac3_decoder_select="ac3_decoder" +eac3_encoder_select="mdct ac3dsp" eamad_decoder_select="aandct" eatgq_decoder_select="aandct" eatqi_decoder_select="aandct" @@ -1296,11 +1298,16 @@ ljpeg_encoder_select="aandct" loco_decoder_select="golomb" mjpeg_encoder_select="aandct" mlp_decoder_select="mlp_parser" -mp1float_decoder_select="dct" -mp2float_decoder_select="dct" -mp3adufloat_decoder_select="dct" -mp3float_decoder_select="dct" -mp3on4float_decoder_select="dct" +mp1_decoder_select="mpegaudiodsp" +mp2_decoder_select="mpegaudiodsp" +mp3adu_decoder_select="mpegaudiodsp" +mp3_decoder_select="mpegaudiodsp" +mp3on4_decoder_select="mpegaudiodsp" +mp1float_decoder_select="mpegaudiodsp" +mp2float_decoder_select="mpegaudiodsp" +mp3adufloat_decoder_select="mpegaudiodsp" +mp3float_decoder_select="mpegaudiodsp" +mp3on4float_decoder_select="mpegaudiodsp" mpeg1video_encoder_select="aandct" mpeg2video_encoder_select="aandct" mpeg4_decoder_select="h263_decoder mpeg4video_parser" @@ -1330,7 +1337,7 @@ nellymoser_encoder_select="mdct sinewin" png_decoder_select="zlib" png_encoder_select="zlib" qcelp_decoder_select="lsp" -qdm2_decoder_select="mdct rdft" +qdm2_decoder_select="mdct rdft mpegaudiodsp" ra_144_encoder_select="lpc" rv10_decoder_select="h263_decoder" rv10_encoder_select="h263_encoder" @@ -1455,12 +1462,15 @@ w64_demuxer_deps="wav_demuxer" alsa_indev_deps="alsa_asoundlib_h snd_pcm_htimestamp" alsa_outdev_deps="alsa_asoundlib_h" bktr_indev_deps_any="dev_bktr_ioctl_bt848_h machine_ioctl_bt848_h dev_video_bktr_ioctl_bt848_h dev_ic_bt8xx_h" +dshow_indev_deps="IBaseFilter" +dshow_indev_extralibs="-lpsapi -lole32 -lstrmiids -luuid" dv1394_indev_deps="dv1394 dv_demuxer" fbdev_indev_deps="linux_fb_h" jack_indev_deps="jack_jack_h sem_timedwait" libdc1394_indev_deps="libdc1394" oss_indev_deps_any="soundcard_h sys_soundcard_h" oss_outdev_deps_any="soundcard_h sys_soundcard_h" +sdl_outdev_deps="sdl" sndio_indev_deps="sndio_h" sndio_outdev_deps="sndio_h" v4l_indev_deps="linux_videodev_h" @@ -1479,7 +1489,7 @@ mmst_protocol_deps="network" rtmp_protocol_select="tcp_protocol" rtp_protocol_select="udp_protocol" tcp_protocol_deps="network" -udp_protocol_deps="network pthreads" +udp_protocol_deps="network" # filters blackframe_filter_deps="gpl" @@ -1633,6 +1643,7 @@ ar_default="ar" cc_default="gcc" cc_version=\"unknown\" host_cc_default="gcc" +install="install" ln_s="ln -sf" nm_default="nm" objformat="elf" @@ -1691,7 +1702,7 @@ LIB_INSTALL_EXTRA_CMD='$$(RANLIB) "$(LIBDIR)/$(LIBNAME)"' CC_O='-o $@' -host_cflags='-D_ISOC99_SOURCE -O3 -g -Wall' +host_cflags='-D_ISOC99_SOURCE -O3 -g' host_libs='-lm' target_path='$(CURDIR)' @@ -1702,13 +1713,12 @@ DEPEND_CMD='$(DEPCC) $(DEPFLAGS) $< | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(*F) DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -MM' # find source path -source_path="$(dirname "$0")" -enable source_path_used if test -f configure; then source_path="$(pwd)" disable source_path_used else - source_path="$(cd "$source_path"; pwd)" + source_path=$(cd $(dirname "$0"); pwd) + enable source_path_used echo "$source_path" | grep -q '[[:blank:]]' && die "Out of tree builds are impossible with whitespace in source path." test -e "$source_path/config.h" && @@ -2354,6 +2364,7 @@ check_cc -D_LARGEFILE_SOURCE <<EOF && add_cppflags -D_LARGEFILE_SOURCE EOF check_host_cflags -std=c99 +check_host_cflags -Wall case "$arch" in alpha|ia64|mips|parisc|sparc) @@ -2478,11 +2489,13 @@ case $target_os in objformat="win32" enable dos_paths check_cflags -fno-common + add_cppflags -U__STRICT_ANSI__ ;; *-dos|freedos|opendos) network_extralibs="-lsocket" objformat="coff" enable dos_paths + add_cppflags -U__STRICT_ANSI__ ;; linux) add_cppflags -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600 @@ -2495,6 +2508,8 @@ case $target_os in os/2*) strip="lxlite -CS" ln_s="cp -f" + objformat="aout" + add_cppflags -D_GNU_SOURCE add_ldflags -Zomf -Zbin-files -Zargs-wild -Zmap SHFLAGS='$(SUBDIR)$(NAME).def -Zdll -Zomf' FFSERVERLDFLAGS="" @@ -2516,6 +2531,7 @@ case $target_os in enable dos_paths ;; gnu/kfreebsd) + add_cppflags -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600 -D_BSD_SOURCE ;; gnu) ;; @@ -2644,6 +2660,7 @@ EOF enabled armvfp && check_asm armvfp '"fadds s0, s0, s0"' enabled iwmmxt && check_asm iwmmxt '"wunpckelub wr6, wr4"' enabled neon && check_asm neon '"vadd.i16 q0, q0, q0"' + enabled vfpv3 && check_asm vfpv3 '"vmov.f32 s0, #1.0"' enabled_all armv6t2 shared !pic && enable_pic @@ -2750,7 +2767,7 @@ EOF check_yasm "pextrd [eax], xmm0, 1" && enable yasm || die "yasm not found, use --disable-yasm for a crippled build" - check_yasm "vpaddw xmm0, xmm0, xmm0" || disable avx + check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx fi case "$cpu" in @@ -2843,11 +2860,6 @@ check_header X11/extensions/XvMClib.h check_struct dxva2api.h DXVA_PictureParameters wDecodedPictureIndex -if ! enabled_any memalign memalign_hack posix_memalign malloc_aligned && - enabled_any $need_memalign ; then - die "Error, no aligned memory allocator but SSE enabled, disable it or use --enable-memalign-hack." -fi - disabled zlib || check_lib zlib.h zlibVersion -lz || disable zlib disabled bzlib || check_lib2 bzlib.h BZ2_bzlibVersion -lbz2 || disable bzlib @@ -2899,6 +2911,7 @@ check_mathfunc truncf enabled avisynth && require2 vfw32 "windows.h vfw.h" AVIFileInit -lavifil32 enabled libcelt && require libcelt celt/celt.h celt_decode -lcelt0 enabled frei0r && { check_header frei0r.h || die "ERROR: frei0r.h header not found"; } +enabled libdc1394 && require_pkg_config libdc1394-2 dc1394/dc1394.h dc1394_new enabled libdirac && require_pkg_config dirac \ "libdirac_decoder/dirac_parser.h libdirac_encoder/dirac_encoder.h" \ "dirac_decoder_init dirac_encoder_init" @@ -2909,7 +2922,7 @@ enabled libmp3lame && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_q enabled libnut && require libnut libnut.h nut_demuxer_init -lnut enabled libopencore_amrnb && require libopencore_amrnb opencore-amrnb/interf_dec.h Decoder_Interface_init -lopencore-amrnb enabled libopencore_amrwb && require libopencore_amrwb opencore-amrwb/dec_if.h D_IF_init -lopencore-amrwb -enabled libopencv && require_pkg_config opencv opencv/cv.h cvCreateImageHeader +enabled libopencv && require_pkg_config opencv opencv/cxcore.h cvCreateImageHeader enabled libopenjpeg && require libopenjpeg openjpeg.h opj_version -lopenjpeg enabled librtmp && require_pkg_config librtmp librtmp/rtmp.h RTMP_Socket enabled libschroedinger && require_pkg_config schroedinger-1.0 schroedinger/schro.h schro_init @@ -2924,36 +2937,28 @@ enabled libvpx && { enabled libvpx_encoder && { check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_enc_init_ver VPX_CQ" -lvpx || die "ERROR: libvpx encoder version must be >=0.9.6"; } } enabled libx264 && require libx264 x264.h x264_encoder_encode -lx264 && - { check_cpp_condition x264.h "X264_BUILD >= 99" || - die "ERROR: libx264 version must be >= 0.99."; } + { check_cpp_condition x264.h "X264_BUILD >= 115" || + die "ERROR: libx264 version must be >= 0.115."; } enabled libxavs && require libxavs xavs.h xavs_encoder_encode -lxavs enabled libxvid && require libxvid xvid.h xvid_global -lxvidcore enabled mlib && require mediaLib mlib_types.h mlib_VectorSub_S16_U8_Mod -lmlib -# libdc1394 check -if enabled libdc1394; then - { check_lib dc1394/dc1394.h dc1394_new -ldc1394 -lraw1394 && - enable libdc1394_2; } || - { check_lib libdc1394/dc1394_control.h dc1394_create_handle -ldc1394_control -lraw1394 && - enable libdc1394_1; } || - die "ERROR: No version of libdc1394 found " -fi - SDL_CONFIG="${cross_prefix}sdl-config" -if "${SDL_CONFIG}" --version > /dev/null 2>&1; then +if check_pkg_config sdl SDL_version.h SDL_Linked_Version; then + check_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) >= 0x010201" $sdl_cflags && + enable sdl && + check_struct SDL.h SDL_VideoInfo current_w $sdl_cflags && enable sdl_video_size +else + if "${SDL_CONFIG}" --version > /dev/null 2>&1; then sdl_cflags=$("${SDL_CONFIG}" --cflags) sdl_libs=$("${SDL_CONFIG}" --libs) - check_func_headers SDL.h SDL_Init $sdl_cflags $sdl_libs && + check_func_headers SDL_version.h SDL_Linked_Version $sdl_cflags $sdl_libs && check_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) >= 0x010201" $sdl_cflags && enable sdl && check_struct SDL.h SDL_VideoInfo current_w $sdl_cflags && enable sdl_video_size -else - if check_pkg_config sdl SDL_version.h SDL_Linked_Version; then - check_cpp_condition SDL.h "(SDL_MAJOR_VERSION<<16 | SDL_MINOR_VERSION<<8 | SDL_PATCHLEVEL) >= 0x010201" $sdl_cflags && - enable sdl && - check_struct SDL.h SDL_VideoInfo current_w $sdl_cflags && enable sdl_video_size - fi + fi fi +enabled sdl && add_cflags $sdl_cflags && add_extralibs $sdl_libs texi2html -version > /dev/null 2>&1 && enable texi2html || disable texi2html @@ -2967,6 +2972,8 @@ check_func_headers "windows.h vfw.h" capCreateCaptureWindow "$vfwcap_indev_extra # w32api 3.12 had it defined wrong check_cpp_condition vfw.h "WM_CAP_DRIVER_CONNECT > WM_USER" && enable vfwcap_defines +check_type "dshow.h" IBaseFilter + # check for ioctl_meteor.h, ioctl_bt848.h and alternatives { check_header dev/bktr/ioctl_meteor.h && check_header dev/bktr/ioctl_bt848.h; } || @@ -3036,7 +3043,7 @@ enabled extra_warnings && check_cflags -Winline # add some linker flags check_ldflags -Wl,--warn-common -check_ldflags '-Wl,-rpath-link,\$(BUILD_ROOT)/libpostproc -Wl,-rpath-link,\$(BUILD_ROOT)/libswscale -Wl,-rpath-link,\$(BUILD_ROOT)/libavfilter -Wl,-rpath-link,\$(BUILD_ROOT)/libavdevice -Wl,-rpath-link,\$(BUILD_ROOT)/libavformat -Wl,-rpath-link,\$(BUILD_ROOT)/libavcodec -Wl,-rpath-link,\$(BUILD_ROOT)/libavutil' +check_ldflags -Wl,-rpath-link,libpostproc -Wl,-rpath-link,libswscale -Wl,-rpath-link,libavfilter -Wl,-rpath-link,libavdevice -Wl,-rpath-link,libavformat -Wl,-rpath-link,libavcodec -Wl,-rpath-link,libavutil test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic echo "X{};" > $TMPV @@ -3138,6 +3145,9 @@ check_deps $CONFIG_LIST \ enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; } +! enabled_any memalign posix_memalign malloc_aligned && + enabled_any $need_memalign && enable memalign_hack + echo "install prefix $prefix" echo "source path $source_path" echo "C compiler $cc" @@ -3151,13 +3161,14 @@ fi echo "big-endian ${bigendian-no}" echo "runtime cpu detection ${runtime_cpudetect-no}" if enabled x86; then - echo "yasm ${yasm-no}" + echo "${yasmexe} ${yasm-no}" echo "MMX enabled ${mmx-no}" echo "MMX2 enabled ${mmx2-no}" echo "3DNow! enabled ${amd3dnow-no}" echo "3DNow! extended enabled ${amd3dnowext-no}" echo "SSE enabled ${sse-no}" echo "SSSE3 enabled ${ssse3-no}" + echo "AVX enabled ${avx-no}" echo "CMOV enabled ${cmov-no}" echo "CMOV is fast ${fast_cmov-no}" echo "EBX available ${ebx_available-no}" @@ -3302,7 +3313,6 @@ DATADIR=\$(DESTDIR)$datadir MANDIR=\$(DESTDIR)$mandir SRC_PATH="$source_path" SRC_PATH_BARE=$source_path -BUILD_ROOT="$PWD" CC_IDENT=$cc_ident ARCH=$arch CC=$cc @@ -3349,7 +3359,7 @@ SDL_LIBS=$sdl_libs SDL_CFLAGS=$sdl_cflags LIB_INSTALL_EXTRA_CMD=$LIB_INSTALL_EXTRA_CMD EXTRALIBS=$extralibs -INSTALL=install +INSTALL=$install LIBTARGET=${LIBTARGET} SLIBNAME=${SLIBNAME} SLIBNAME_WITH_VERSION=${SLIBNAME_WITH_VERSION} diff --git a/doc/APIchanges b/doc/APIchanges index 6635ec1f30..e8dc0642fa 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -13,33 +13,107 @@ libavutil: 2011-04-18 API changes, most recent first: -2011-05-10 - 188dea1 - lavc 53.3.0 - avcodec.h +2011-06-12 - xxxxxxx - lavfi 2.18.0 - avcodec.h + Add avfilter_get_video_buffer_ref_from_frame() function in + libavfilter/avcodec.h. + +2011-06-12 - xxxxxxx - lavfi 2.17.0 - avfiltergraph.h + Add avfilter_inout_alloc() and avfilter_inout_free() functions. + +2011-06-12 - xxxxxxx - lavfi 2.16.0 - avfilter_graph_parse() + Change avfilter_graph_parse() signature. + +2011-06-xx - xxxxxxx - lavu 51.6.0 - opt.h + Add av_opt_flag_is_set(). + +2011-06-10 - c381960 - lavfi 2.15.0 - avfilter_get_audio_buffer_ref_from_arrays + Add avfilter_get_audio_buffer_ref_from_arrays() to avfilter.h. + +2011-06-09 - d9f80ea - lavu 51.8.0 - AVMetadata + Move AVMetadata from lavf to lavu and rename it to + AVDictionary -- new installed header dict.h. + All av_metadata_* functions renamed to av_dict_*. + +2011-06-07 - a6703fa - lavu 51.8.0 - av_get_bytes_per_sample() + Add av_get_bytes_per_sample() in libavutil/samplefmt.h. + Deprecate av_get_bits_per_sample_fmt(). + +2011-06-xx - b39b062 - lavu 51.8.0 - opt.h + Add av_opt_free convenience function. + +2011-06-06 - 95a0242 - lavfi 2.14.0 - AVFilterBufferRefAudioProps + Remove AVFilterBufferRefAudioProps.size, and use nb_samples in + avfilter_get_audio_buffer() and avfilter_default_get_audio_buffer() in + place of size. + +2011-06-06 - 0bc2cca - lavu 51.6.0 - av_samples_alloc() + Switch nb_channels and nb_samples parameters order in + av_samples_alloc(). + +2011-06-06 - e1c7414 - lavu 51.5.0 - av_samples_* + Change the data layout created by av_samples_fill_arrays() and + av_samples_alloc(). + +2011-06-06 - 27bcf55 - lavfi 2.13.0 - vsrc_buffer.h + Make av_vsrc_buffer_add_video_buffer_ref() accepts an additional + flags parameter in input. + +2011-06-03 - e977ca2 - lavfi 2.12.0 - avfilter_link_free() + Add avfilter_link_free() function. + +2011-06-02 - 5ad38d9 - lavu 51.4.0 - av_force_cpu_flags() + Add av_cpu_flags() in libavutil/cpu.h. + +2011-05-28 - e71f260 - lavu 51.3.0 - pixdesc.h + Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate + avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor. + +2011-05-25 - 30315a8 - lavf 53.3.0 - avformat.h + Add fps_probe_size to AVFormatContext. + +2011-05-22 - 5ecdfd0 - lavf 53.2.0 - avformat.h + Introduce avformat_alloc_output_context2() and deprecate + avformat_alloc_output_context(). + +2011-05-22 - 83db719 - lavfi 2.10.0 - vsrc_buffer.h + Make libavfilter/vsrc_buffer.h public. + +2011-05-19 - c000a9f - lavfi 2.8.0 - avcodec.h + Add av_vsrc_buffer_add_frame() to libavfilter/avcodec.h. + +2011-05-14 - 9fdf772 - lavfi 2.6.0 - avcodec.h + Add avfilter_get_video_buffer_ref_from_frame() to libavfilter/avcodec.h. + +2011-05-18 - 64150ff - lavc 53.7.0 - AVCodecContext.request_sample_fmt + Add request_sample_fmt field to AVCodecContext. + +2011-05-10 - 188dea1 - lavc 53.6.0 - avcodec.h Deprecate AVLPCType and the following fields in AVCodecContext: lpc_coeff_precision, prediction_order_method, min_partition_order, max_partition_order, lpc_type, lpc_passes. Corresponding FLAC encoder options should be used instead. -2011-05-07 - xxxxxxx - lavfi 2.5.0 - avcodec.h +2011-05-07 - 9fdf772 - lavfi 2.5.0 - avcodec.h Add libavfilter/avcodec.h header and avfilter_copy_frame_props() function. -2011-05-07 - xxxxxxx - lavc 53.5.0 - AVFrame +2011-05-07 - 18ded93 - lavc 53.5.0 - AVFrame Add format field to AVFrame. -2011-05-07 - xxxxxxx - lavc 53.4.0 - AVFrame +2011-05-07 - 22333a6 - lavc 53.4.0 - AVFrame Add width and height fields to AVFrame. -2011-05-01 - xxxxxxx - lavfi 2.4.0 - avfilter.h +2011-05-01 - 35fe66a - lavfi 2.4.0 - avfilter.h Rename AVFilterBufferRefVideoProps.pixel_aspect to sample_aspect_ratio. -2011-05-01 - xxxxxxx - lavc 53.3.0 - AVFrame +2011-05-01 - 77e9dee - lavc 53.3.0 - AVFrame Add a sample_aspect_ratio field to AVFrame. -2011-05-01 - xxxxxxx - lavc 53.2.0 - AVFrame +2011-05-01 - 1ba5727 - lavc 53.2.0 - AVFrame Add a pkt_pos field to AVFrame. -2011-04-xx - xxxxxxx - lavu 51.2.0 - mem.h +2011-04-29 - 35ceaa7 - lavu 51.2.0 - mem.h Add av_dynarray_add function for adding an element to a dynamic array. @@ -94,9 +168,6 @@ API changes, most recent first: 333e894 deprecate url_open_protocol e230705 deprecate url_poll and URLPollEntry -2011-04-10 - lavu 50.40.0 - pixfmt.h - Add PIX_FMT_BGR48LE and PIX_FMT_BGR48BE pixel formats - 2011-04-08 - lavf 52.106.0 - avformat.h Minor avformat.h cleanup: a9bf9d8 deprecate av_guess_image2_codec @@ -145,7 +216,7 @@ API changes, most recent first: 2011-03-25 - 34b47d7 - lavc 52.115.0 - AVCodecContext.audio_service_type Add audio_service_type field to AVCodecContext. -2011-XX-XX - XXXXXXX - lavu XX.XXX.X - pixfmt.h +2011-03-17 - e309fdc - lavu 50.40.0 - pixfmt.h Add PIX_FMT_BGR48LE and PIX_FMT_BGR48BE pixel formats 2011-03-02 - 863c471 - lavf 52.103.0 - av_pkt_dump2, av_pkt_dump_log2 @@ -214,10 +285,10 @@ API changes, most recent first: 2011-02-02 - dfd2a00 - lavu 50.37.0 - log.h Make av_dlog public. -2011-01-31 - X - lavfi 1.76.0 - vsrc_buffer +2011-01-31 - 7b3ea55 - lavfi 1.76.0 - vsrc_buffer Add sample_aspect_ratio fields to vsrc_buffer arguments -2011-01-31 - X - lavfi 1.75.0 - AVFilterLink sample_aspect_ratio +2011-01-31 - 910b5b8 - lavfi 1.75.0 - AVFilterLink sample_aspect_ratio Add sample_aspect_ratio field to AVFilterLink. 2011-01-15 - r26374 - lavfi 1.74.0 - AVFilterBufferRefAudioProps diff --git a/doc/decoders.texi b/doc/decoders.texi new file mode 100644 index 0000000000..18ac2fa8db --- /dev/null +++ b/doc/decoders.texi @@ -0,0 +1,50 @@ +@chapter Decoders +@c man begin DECODERS + +Decoders are configured elements in FFmpeg which allow the decoding of +multimedia streams. + +When you configure your FFmpeg build, all the supported native decoders +are enabled by default. Decoders requiring an external library must be enabled +manually via the corresponding @code{--enable-lib} option. You can list all +available decoders using the configure option @code{--list-decoders}. + +You can disable all the decoders with the configure option +@code{--disable-decoders} and selectively enable / disable single decoders +with the options @code{--enable-decoder=@var{DECODER}} / +@code{--disable-decoder=@var{DECODER}}. + +The option @code{-codecs} of the ff* tools will display the list of +enabled decoders. + +@c man end DECODERS + +@chapter Video Decoders +@c man begin VIDEO DECODERS + +A description of some of the currently available video decoders +follows. + +@section rawvideo + +Rawvideo decoder. + +This decoder decodes rawvideo streams. + +@subsection Options + +@table @option +@item top @var{top_field_first} +Specify the assumed field type of the input video. +@table @option +@item -1 +the video is assumed to be progressive (default) +@item 0 +bottom-field-first is assumed +@item 1 +top-field-first is assumed +@end table + +@end table + +@c man end VIDEO DECODERS diff --git a/doc/developer.texi b/doc/developer.texi index 4ff3f0380b..69c2951620 100644 --- a/doc/developer.texi +++ b/doc/developer.texi @@ -392,6 +392,8 @@ send a reminder by email. Your patch should eventually be dealt with. improves readability. @item Consider to add a regression test for your code. +@item + If you added YASM code please check that things still work with --disable-yasm @end enumerate @section Patch review process diff --git a/doc/encoders.texi b/doc/encoders.texi index 2855d89f7a..a54f726397 100644 --- a/doc/encoders.texi +++ b/doc/encoders.texi @@ -17,11 +17,15 @@ with the options @code{--enable-encoder=@var{ENCODER}} / The option @code{-codecs} of the ff* tools will display the list of enabled encoders. -A description of some of the currently available encoders follows. +@c man end ENCODERS + +@chapter Audio Encoders +@c man begin AUDIO ENCODERS -@section Audio Encoders +A description of some of the currently available audio encoders +follows. -@subsection ac3 and ac3_fixed +@section ac3 and ac3_fixed AC-3 audio encoders. @@ -36,7 +40,7 @@ quality audio for a given bitrate. The @var{ac3_fixed} encoder is not the default codec for any of the output formats, so it must be specified explicitly using the option @code{-acodec ac3_fixed} in order to use it. -@subheading AC-3 Metadata +@subsection AC-3 Metadata The AC-3 metadata options are used to set parameters that describe the audio, but in most cases do not affect the audio encoding itself. Some of the options @@ -55,7 +59,7 @@ documents. @item @uref{http://www.dolby.com/uploadedFiles/zz-_Shared_Assets/English_PDFs/Professional/46_DDEncodingGuidelines.pdf,Dolby Digital Professional Encoding Guidelines} @end itemize -@subsubheading Metadata Control Options +@subsubsection Metadata Control Options @table @option @@ -72,7 +76,7 @@ Metadata values can be changed before encoding each frame. @end table -@subsubheading Downmix Levels +@subsubsection Downmix Levels @table @option @@ -106,7 +110,7 @@ Silence Surround Channel(s) @end table -@subsubheading Audio Production Information +@subsubsection Audio Production Information Audio Production Information is optional information describing the mixing environment. Either none or both of the fields are written to the bitstream. @@ -140,7 +144,7 @@ Small Room @end table -@subsubheading Other Metadata Options +@subsubsection Other Metadata Options @table @option @@ -195,7 +199,7 @@ Original Source (default) @end table -@subsubheading Extended Bitstream Information +@subsection Extended Bitstream Information The extended bitstream options are part of the Alternate Bit Stream Syntax as specified in Annex D of the A/52:2010 standard. It is grouped into 2 parts. If any one parameter in a group is specified, all values in that group will be @@ -205,7 +209,7 @@ will use these values instead of the ones specified in the @code{center_mixlev} and @code{surround_mixlev} options if it supports the Alternate Bit Stream Syntax. -@subsubheading Extended Bitstream Information - Part 1 +@subsubsection Extended Bitstream Information - Part 1 @table @option @@ -302,7 +306,7 @@ Silence Surround Channel(s) @end table -@subsubheading Extended Bitstream Information - Part 2 +@subsubsection Extended Bitstream Information - Part 2 @table @option @@ -353,7 +357,7 @@ HDCD A/D Converter @end table -@subheading Other AC-3 Encoding Options +@subsection Other AC-3 Encoding Options @table @option @@ -365,4 +369,223 @@ is highly recommended that it be left as enabled except for testing purposes. @end table -@c man end ENCODERS +@subsection Floating-Point-Only AC-3 Encoding Options + +These options are only valid for the floating-point encoder and do not exist +for the fixed-point encoder due to the corresponding features not being +implemented in fixed-point. + +@table @option + +@item -channel_coupling @var{boolean} +Enables/Disables use of channel coupling, which is an optional AC-3 feature +that increases quality by combining high frequency information from multiple +channels into a single channel. The per-channel high frequency information is +sent with less accuracy in both the frequency and time domains. This allows +more bits to be used for lower frequencies while preserving enough information +to reconstruct the high frequencies. This option is enabled by default for the +floating-point encoder and should generally be left as enabled except for +testing purposes or to increase encoding speed. +@table @option +@item -1 +@itemx auto +Selected by Encoder (default) +@item 0 +@itemx off +Disable Channel Coupling +@item 1 +@itemx on +Enable Channel Coupling +@end table + +@item -cpl_start_band @var{number} +Coupling Start Band. Sets the channel coupling start band, from 1 to 15. If a +value higher than the bandwidth is used, it will be reduced to 1 less than the +coupling end band. If @var{auto} is used, the start band will be determined by +the encoder based on the bit rate, sample rate, and channel layout. This option +has no effect if channel coupling is disabled. +@table @option +@item -1 +@itemx auto +Selected by Encoder (default) +@end table + +@end table + +@c man end AUDIO ENCODERS + +@chapter Video Encoders +@c man begin VIDEO ENCODERS + +A description of some of the currently available video encoders +follows. + +@section libvpx + +VP8 format supported through libvpx. + +Requires the presence of the libvpx headers and library during configuration. +You need to explicitly configure the build with @code{--enable-libvpx}. + +@subsection Options + +Mapping from FFmpeg to libvpx options with conversion notes in parentheses. + +@table @option + +@item threads +g_threads + +@item profile +g_profile + +@item vb +rc_target_bitrate + +@item g +kf_max_dist + +@item keyint_min +kf_min_dist + +@item qmin +rc_min_quantizer + +@item qmax +rc_max_quantizer + +@item bufsize, vb +rc_buf_sz +@code{(bufsize * 1000 / vb)} + +rc_buf_optimal_sz +@code{(bufsize * 1000 / vb * 5 / 6)} + +@item rc_init_occupancy, vb +rc_buf_initial_sz +@code{(rc_init_occupancy * 1000 / vb)} + +@item rc_buffer_aggressivity +rc_undershoot_pct + +@item skip_threshold +rc_dropframe_thresh + +@item qcomp +rc_2pass_vbr_bias_pct + +@item maxrate, vb +rc_2pass_vbr_maxsection_pct +@code{(maxrate * 100 / vb)} + +@item minrate, vb +rc_2pass_vbr_minsection_pct +@code{(minrate * 100 / vb)} + +@item minrate, maxrate, vb +@code{VPX_CBR} +@code{(minrate == maxrate == vb)} + +@item crf +@code{VPX_CQ}, @code{VP8E_SET_CQ_LEVEL} + +@item quality +@table @option +@item @var{best} +@code{VPX_DL_BEST_QUALITY} +@item @var{good} +@code{VPX_DL_GOOD_QUALITY} +@item @var{realtime} +@code{VPX_DL_REALTIME} +@end table + +@item speed +@code{VP8E_SET_CPUUSED} + +@item nr +@code{VP8E_SET_NOISE_SENSITIVITY} + +@item mb_threshold +@code{VP8E_SET_STATIC_THRESHOLD} + +@item slices +@code{VP8E_SET_TOKEN_PARTITIONS} + +@item Alternate reference frame related +@table @option +@item vp8flags altref +@code{VP8E_SET_ENABLEAUTOALTREF} +@item @var{arnr_max_frames} +@code{VP8E_SET_ARNR_MAXFRAMES} +@item @var{arnr_type} +@code{VP8E_SET_ARNR_TYPE} +@item @var{arnr_strength} +@code{VP8E_SET_ARNR_STRENGTH} +@item @var{rc_lookahead} +g_lag_in_frames +@end table + +@item vp8flags error_resilient +g_error_resilient + +@end table + +For more information about libvpx see: +@url{http://www.webmproject.org/} + +@section libx264 + +H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 format supported through +libx264. + +Requires the presence of the libx264 headers and library during +configuration. You need to explicitly configure the build with +@code{--enable-libx264}. + +@subsection Options + +@table @option + +@item preset @var{preset_name} +Set the encoding preset. + +@item tune @var{tune_name} +Tune the encoding params. +Deprecated in favor of @var{x264_opts} + +@item fastfirstpass @var{bool} +Use fast settings when encoding first pass, default value is 1. +Deprecated in favor of @var{x264_opts}. + +@item profile @var{profile_name} +Set profile restrictions. +Deprecated in favor of @var{x264_opts}. + +@item level @var{level} +Specify level (as defined by Annex A). +Deprecated in favor of @var{x264_opts}. + +@item passlogfile @var{filename} +Specify filename for 2 pass stats. +Deprecated in favor of @var{x264_opts}. + +@item wpredp @var{wpred_type} +Specify Weighted prediction for P-frames. +Deprecated in favor of @var{x264_opts}. + +@item x264opts @var{options} +Allow to set any x264 option, see x264 manual for a list. + +@var{options} is a list of @var{key}=@var{value} couples separated by +":". +@end table + +For example to specify libx264 encoding options with @file{ffmpeg}: +@example +ffmpeg -i foo.mpg -vcodec libx264 -x264opts keyint=123:min-keyint=20 -an out.mkv +@end example + +For more information about libx264 and the supported options see: +@url{http://www.videolan.org/developers/x264.html} + +@c man end VIDEO ENCODERS diff --git a/doc/eval.texi b/doc/eval.texi index e07267bdfa..ac27750851 100644 --- a/doc/eval.texi +++ b/doc/eval.texi @@ -76,6 +76,13 @@ integer. For example, "trunc(-1.5)" is "-1.0". @item sqrt(expr) Compute the square root of @var{expr}. This is equivalent to "(@var{expr})^.5". + +@item not(expr) +Return 1.0 if @var{expr} is zero, 0.0 otherwise. + +@item pow(x, y) +Compute the power of @var{x} elevated @var{y}, it is equivalent to +"(@var{x})^(@var{y})". @end table Note that: @@ -93,11 +100,6 @@ is equivalent to A*B + not(A)*C @end example -When A evaluates to either 1 or 0, that is the same as -@example -A*B + eq(A,0)*C -@end example - In your C code, you can extend the list of unary and binary functions, and define recognized constants, so that they are available for your expressions. diff --git a/doc/examples/Makefile b/doc/examples/Makefile new file mode 100644 index 0000000000..c32d524da4 --- /dev/null +++ b/doc/examples/Makefile @@ -0,0 +1,21 @@ +# use pkg-config for getting CFLAGS abd LDFLAGS +FFMPEG_LIBS=libavdevice libavformat libavfilter libavcodec libswscale libavutil +CFLAGS+=$(shell pkg-config --cflags $(FFMPEG_LIBS)) +LDFLAGS+=$(shell pkg-config --libs $(FFMPEG_LIBS)) + +EXAMPLES=encoding-example muxing-example + +OBJS=$(addsuffix .o,$(EXAMPLES)) + +%: %.o + $(CC) $< $(LDFLAGS) -o $@ + +%.o: %.c + $(CC) $< $(CFLAGS) -c -o $@ + +.phony: all clean + +all: $(OBJS) $(EXAMPLES) + +clean: + rm -rf $(EXAMPLES) $(OBJS) diff --git a/libavcodec/api-example.c b/doc/examples/encoding-example.c index f34075e666..a32c09ab69 100644 --- a/libavcodec/api-example.c +++ b/doc/examples/encoding-example.c @@ -1,5 +1,5 @@ /* - * copyright (c) 2001 Fabrice Bellard + * Copyright (c) 2001 Fabrice Bellard * * This file is part of FFmpeg. * diff --git a/libavformat/output-example.c b/doc/examples/muxing-example.c index 1a429638d1..a1f19a47bf 100644 --- a/libavformat/output-example.c +++ b/doc/examples/muxing-example.c @@ -1,7 +1,4 @@ /* - * Libavformat API example: Output a media file in any supported - * libavformat format. The default codecs are used. - * * Copyright (c) 2003 Fabrice Bellard * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -22,6 +19,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ + +/** + * @file + * Libavformat API example: Output a media file in any supported + * libavformat format. The default codecs are used. + */ + #include <stdlib.h> #include <stdio.h> #include <string.h> @@ -443,10 +447,10 @@ int main(int argc, char **argv) filename = argv[1]; /* allocate the output media context */ - oc = avformat_alloc_output_context(NULL, NULL, filename); + avformat_alloc_output_context2(&oc, NULL, NULL, filename); if (!oc) { printf("Could not deduce output format from file extension: using MPEG.\n"); - oc = avformat_alloc_output_context("mpeg", NULL, filename); + avformat_alloc_output_context2(&oc, NULL, "mpeg", filename); } if (!oc) { exit(1); diff --git a/doc/fate.txt b/doc/fate.txt index f8ce68ea77..6ca302c024 100644 --- a/doc/fate.txt +++ b/doc/fate.txt @@ -7,8 +7,7 @@ that is provided separately from the actual source distribution. Use the following command to get the fate test samples -# rsync -aL rsync://rsync.mplayerhq.hu:/samples/fate-suite/ fate/fate-suite -# rsync -aL rsync://fate-suite.libav.org:/fate-suite/ fate-suite +# make fate-rsync SAMPLES=fate-suite/ To inform the build system about the testsuite location, pass `--samples=<path to the samples>` to configure or set the SAMPLES Make diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi index 62b8996f02..fe9e9a9e92 100644 --- a/doc/ffmpeg.texi +++ b/doc/ffmpeg.texi @@ -402,7 +402,10 @@ and the following constants are available: @end table @item -rc_override @var{override} -rate control override for specific intervals +Rate control override for specific intervals, formated as "int,int,int" +list separated with slashes. Two first values are the beginning and +end frame numbers, last one is quantizer to use if positive, or quality +factor if negative. @item -me_method @var{method} Set motion estimation method to @var{method}. Available methods are (from lowest to best quality): @@ -813,6 +816,7 @@ following rules: First ffmpeg searches for a file named @var{arg}.ffpreset in the directories @file{$FFMPEG_DATADIR} (if set), and @file{$HOME/.ffmpeg}, and in the datadir defined at configuration time (usually @file{PREFIX/share/ffmpeg}) +or in a @file{ffpresets} folder along the executable on win32, in that order. For example, if the argument is @code{libx264-max}, it will search for the file @file{libx264-max.ffpreset}. @@ -1042,6 +1046,7 @@ file to which you want to add them. @c man end EXAMPLES @include eval.texi +@include decoders.texi @include encoders.texi @include demuxers.texi @include muxers.texi diff --git a/doc/ffplay.texi b/doc/ffplay.texi index 9e842911e0..f9f5e07cda 100644 --- a/doc/ffplay.texi +++ b/doc/ffplay.texi @@ -13,7 +13,7 @@ @example @c man begin SYNOPSIS -ffplay [options] @file{input_file} +ffplay [options] [@file{input_file}] @c man end @end example @@ -82,6 +82,8 @@ the input video. Use the option "-filters" to show all the available filters (including also sources and sinks). +@item -i @var{input_file} +Read @var{input_file}. @end table @section Advanced options @@ -91,12 +93,8 @@ Set pixel format. @item -stats Show the stream duration, the codec parameters, the current position in the stream and the audio/video synchronisation drift. -@item -debug -Print specific debug info. @item -bug Work around bugs. -@item -vismv -Visualize motion vectors. @item -fast Non-spec-compliant optimizations. @item -genpts @@ -174,6 +172,7 @@ Seek to percentage in file corresponding to fraction of width. @c man end @include eval.texi +@include decoders.texi @include demuxers.texi @include muxers.texi @include indevs.texi diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi index 15e7764a1e..6f7e83b267 100644 --- a/doc/ffprobe.texi +++ b/doc/ffprobe.texi @@ -108,11 +108,14 @@ multimedia stream. Each media stream information is printed within a dedicated section with name "STREAM". +@item -i @var{input_file} +Read @var{input_file}. + @end table @c man end +@include decoders.texi @include demuxers.texi -@include muxers.texi @include protocols.texi @include indevs.texi diff --git a/doc/filters.texi b/doc/filters.texi index a2557403b3..719d94f45a 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -761,7 +761,7 @@ The list of the currently supported filters follows: @item pullup @item qp @item rectangle -@item remove_logo +@item remove-logo @item rgbtest @item rotate @item sab @@ -1163,6 +1163,122 @@ scale="trunc(3/2*iw/hsub)*hsub:trunc(3/2*ih/vsub)*vsub" scale='min(500\, iw*3/2):-1' @end example +@section select +Select frames to pass in output. + +It accepts in input an expression, which is evaluated for each input +frame. If the expression is evaluated to a non-zero value, the frame +is selected and passed to the output, otherwise it is discarded. + +The expression can contain the following constants: + +@table @option +@item PI +Greek PI + +@item PHI +golden ratio + +@item E +Euler number + +@item n +the sequential number of the filtered frame, starting from 0 + +@item selected_n +the sequential number of the selected frame, starting from 0 + +@item prev_selected_n +the sequential number of the last selected frame, NAN if undefined + +@item TB +timebase of the input timestamps + +@item pts +the PTS (Presentation TimeStamp) of the filtered video frame, +expressed in @var{TB} units, NAN if undefined + +@item t +the PTS (Presentation TimeStamp) of the filtered video frame, +expressed in seconds, NAN if undefined + +@item prev_pts +the PTS of the previously filtered video frame, NAN if undefined + +@item prev_selected_pts +the PTS of the last previously filtered video frame, NAN if undefined + +@item prev_selected_t +the PTS of the last previously selected video frame, NAN if undefined + +@item start_pts +the PTS of the first video frame in the video, NAN if undefined + +@item start_t +the time of the first video frame in the video, NAN if undefined + +@item pict_type +the picture type of the filtered frame, can assume one of the following +values: +@table @option +@item PICT_TYPE_I +@item PICT_TYPE_P +@item PICT_TYPE_B +@item PICT_TYPE_S +@item PICT_TYPE_SI +@item PICT_TYPE_SP +@item PICT_TYPE_BI +@end table + +@item interlace_type +the frame interlace type, can assume one of the following values: +@table @option +@item INTERLACE_TYPE_P +the frame is progressive (not interlaced) +@item INTERLACE_TYPE_T +the frame is top-field-first +@item INTERLACE_TYPE_B +the frame is bottom-field-first +@end table + +@item key +1 if the filtered frame is a key-frame, 0 otherwise + +@item pos +the position in the file of the filtered frame, -1 if the information +is not available (e.g. for synthetic video) +@end table + +The default value of the select expression is "1". + +Some examples follow: + +@example +# select all frames in input +select + +# the above is the same as: +select=1 + +# skip all frames: +select=0 + +# select only I-frames +select='eq(pict_type\,PICT_TYPE_I)' + +# select one frame every 100 +select='not(mod(n\,100))' + +# select only frames contained in the 10-20 time interval +select='gte(t\,10)*lte(t\,20)' + +# select only I frames contained in the 10-20 time interval +select='gte(t\,10)*lte(t\,20)*eq(pict_type\,PICT_TYPE_I)' + +# select frames with a minimum distance of 10 seconds +select='isnan(prev_selected_t)+gte(t-prev_selected_t\,10)' +@end example + @anchor{setdar} @section setdar @@ -1360,12 +1476,12 @@ Check also the documentation of the @code{AVPictureType} enum and of the @code{av_get_picture_type_char} function defined in @file{libavutil/avutil.h}. -@item crc -Adler-32 CRC of the input frame +@item checksum +Adler-32 checksum of all the planes of the input frame -@item plane_crc -Adler-32 CRC of each plane of the input frame, expressed in the form -"[@var{crc0} @var{crc1} @var{crc2} @var{crc3}]" +@item plane_checksum +Adler-32 checksum of each plane of the input frame, expressed in the form +"[@var{c0} @var{c1} @var{c2} @var{c3}]" @end table @section slicify @@ -1383,6 +1499,21 @@ not specified it will use the default value of 16. Adding this in the beginning of filter chains should make filtering faster due to better use of the memory cache. +@section split + +Pass on the input video to two outputs. Both outputs are identical to +the input video. + +For example: +@example +[in] split [splitout1][splitout2]; +[splitout1] crop=100:100:0:0 [cropout]; +[splitout2] pad=200:200:100:100 [padout]; +@end example + +will create two separate outputs from the same input, one cropped and +one padded. + @section transpose Transpose rows with columns in the input video and optionally flip it. @@ -1536,9 +1667,10 @@ This source is mainly intended for a programmatic use, in particular through the interface defined in @file{libavfilter/vsrc_buffer.h}. It accepts the following parameters: -@var{width}:@var{height}:@var{pix_fmt_string}:@var{timebase_num}:@var{timebase_den}:@var{sample_aspect_ratio_num}:@var{sample_aspect_ratio.den} +@var{width}:@var{height}:@var{pix_fmt_string}:@var{timebase_num}:@var{timebase_den}:@var{sample_aspect_ratio_num}:@var{sample_aspect_ratio.den}:@var{scale_params} -All the parameters need to be explicitely defined. +All the parameters but @var{scale_params} need to be explicitely +defined. Follows the list of the accepted parameters. @@ -1559,6 +1691,11 @@ timestamps of the buffered frames. @item sample_aspect_ratio.num, sample_aspect_ratio.den Specify numerator and denominator of the sample aspect ratio assumed by the video frames. + +@item scale_params +Specify the optional parameters to be used for the scale filter which +is automatically inserted when an input change is detected in the +input size or format. @end table For example: @@ -1573,7 +1710,7 @@ Since the pixel format with name "yuv410p" corresponds to the number 6 (check the enum PixelFormat definition in @file{libavutil/pixfmt.h}), this example corresponds to: @example -buffer=320:240:6:1:24 +buffer=320:240:6:1:24:1:1 @end example @section color diff --git a/doc/general.texi b/doc/general.texi index f9787139a4..e5e76db056 100644 --- a/doc/general.texi +++ b/doc/general.texi @@ -284,8 +284,7 @@ following image formats are supported: @tab Digital Picture Exchange @item JPEG @tab X @tab X @tab Progressive JPEG is not supported. -@item JPEG 2000 @tab @tab E - @tab decoding supported through external library libopenjpeg +@item JPEG 2000 @tab X @tab X @item JPEG-LS @tab X @tab X @item LJPEG @tab X @tab @tab Lossless JPEG @@ -618,7 +617,7 @@ following image formats are supported: @tab Used in Origin's Wing Commander IV AVI files. @item DSP Group TrueSpeech @tab @tab X @item DV audio @tab @tab X -@item Enhanced AC-3 @tab @tab X +@item Enhanced AC-3 @tab X @tab X @item FLAC (Free Lossless Audio Codec) @tab X @tab IX @item GSM @tab E @tab X @tab encoding supported through external library libgsm @@ -763,12 +762,6 @@ performance on systems without hardware floating point support). Using a cross-compiler is preferred for various reasons. -@subsection DJGPP - -FFmpeg cannot be compiled because of broken system headers, add -@code{--extra-cflags=-U__STRICT_ANSI__} to the configure options as a -workaround. - @section OS/2 For information about compiling FFmpeg on OS/2 see @@ -844,18 +837,6 @@ are listed below: FFmpeg automatically passes @code{-fno-common} to the compiler to work around a GCC bug (see @url{http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37216}). -Within the MSYS shell, configure and make with: - -@example -./configure --enable-memalign-hack -make -make install -@end example - -This will install @file{ffmpeg.exe} along with many other development files -to @file{/usr/local}. You may specify another install path using the -@code{--prefix} option in @file{configure}. - Notes: @itemize diff --git a/doc/muxers.texi b/doc/muxers.texi index 9d46803988..55b44d1018 100644 --- a/doc/muxers.texi +++ b/doc/muxers.texi @@ -210,4 +210,67 @@ Alternatively you can write the command as: ffmpeg -benchmark -i INPUT -f null - @end example +@section matroska + +Matroska container muxer. + +This muxer implements the matroska and webm container specs. + +The recognized metadata settings in this muxer are: + +@table @option + +@item title=@var{title name} +Name provided to a single track +@end table + +@table @option + +@item language=@var{language name} +Specifies the language of the track in the Matroska languages form +@end table + +@table @option + +@item stereo_mode=@var{mode} +Stereo 3D video layout of two views in a single video track +@table @option +@item mono +video is not stereo +@item left_right +Both views are arranged side by side, Left-eye view is on the left +@item bottom_top +Both views are arranged in top-bottom orientation, Left-eye view is at bottom +@item top_bottom +Both views are arranged in top-bottom orientation, Left-eye view is on top +@item checkerboard_rl +Each view is arranged in a checkerboard interleaved pattern, Left-eye view being first +@item checkerboard_lr +Each view is arranged in a checkerboard interleaved pattern, Right-eye view being first +@item row_interleaved_rl +Each view is constituted by a row based interleaving, Right-eye view is first row +@item row_interleaved_lr +Each view is constituted by a row based interleaving, Left-eye view is first row +@item col_interleaved_rl +Both views are arranged in a column based interleaving manner, Right-eye view is first column +@item col_interleaved_lr +Both views are arranged in a column based interleaving manner, Left-eye view is first column +@item anaglyph_cyan_red +All frames are in anaglyph format viewable through red-cyan filters +@item right_left +Both views are arranged side by side, Right-eye view is on the left +@item anaglyph_green_magenta +All frames are in anaglyph format viewable through green-magenta filters +@item block_lr +Both eyes laced in one Block, Left-eye view is first +@item block_rl +Both eyes laced in one Block, Right-eye view is first +@end table +@end table + +For example a 3D WebM clip can be created using the following command line: +@example +ffmpeg -i sample_left_right_clip.mpg -an -vcodec libvpx -metadata stereo_mode=left_right -y stereo_clip.webm +@end example + @c man end MUXERS diff --git a/doc/outdevs.texi b/doc/outdevs.texi index fbb312363c..79619f80b9 100644 --- a/doc/outdevs.texi +++ b/doc/outdevs.texi @@ -26,6 +26,46 @@ ALSA (Advanced Linux Sound Architecture) output device. OSS (Open Sound System) output device. +@section sdl + +SDL (Simple Directmedia Layer) output device. + +This output devices allows to show a video stream in an SDL +window. Only one SDL window is allowed per application, so you can +have only one instance of this output device in an application. + +To enable this output device you need libsdl installed on your system +when configuring your build. + +For more information about SDL, check: +@url{http://www.libsdl.org/} + +@subsection Options + +@table @option + +@item window_title +Set the SDL window title, if not specified default to the filename +specified for the output device. + +@item icon_title +Set the name of the iconified SDL window, if not specified it is set +to the same value of @var{window_title}. + +@item window_size +Set the SDL window size, can be a string of the form +@var{width}x@var{height} or a video size abbreviation. +If not specified it defaults to the size of the input video. +@end table + +@subsection Examples + +The following command shows the @file{ffmpeg} output is an +SDL window, forcing its size to the qcif format: +@example +ffmpeg -i INPUT -vcodec rawvideo -pix_fmt yuv420p -window_size qcif -f sdl "SDL output" +@end example + @section sndio sndio audio output device. @@ -39,6 +39,7 @@ #include "libavutil/colorspace.h" #include "libavutil/fifo.h" #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "libavutil/pixdesc.h" #include "libavutil/avstring.h" #include "libavutil/libm.h" @@ -116,11 +117,9 @@ static const OptionDef options[]; #endif static const char *last_asked_format = NULL; -static AVFormatContext *input_files[MAX_FILES]; static int64_t input_files_ts_offset[MAX_FILES]; static double *input_files_ts_scale[MAX_FILES] = {NULL}; static AVCodec **input_codecs = NULL; -static int nb_input_files = 0; static int nb_input_codecs = 0; static int nb_input_files_ts_scale[MAX_FILES] = {0}; @@ -173,17 +172,15 @@ static int loop_output = AVFMT_NOOUTPUTLOOP; static int qp_hist = 0; #if CONFIG_AVFILTER static char *vfilters = NULL; -#else -static unsigned int sws_flags = SWS_BICUBIC; #endif static int intra_only = 0; -static int audio_sample_rate = 44100; +static int audio_sample_rate = 0; static int64_t channel_layout = 0; #define QSCALE_NONE -99999 static float audio_qscale = QSCALE_NONE; static int audio_disable = 0; -static int audio_channels = 1; +static int audio_channels = 0; static char *audio_codec_name = NULL; static unsigned int audio_codec_tag = 0; static char *audio_language = NULL; @@ -205,7 +202,7 @@ static int64_t start_time = 0; static int64_t recording_timestamp = 0; static int64_t input_ts_offset = 0; static int file_overwrite = 0; -static AVMetadata *metadata; +static AVDictionary *metadata; static int do_benchmark = 0; static int do_hex_dump = 0; static int do_pkt_dump = 0; @@ -222,7 +219,6 @@ static float audio_drift_threshold= 0.1; static int copy_ts= 0; static int copy_tb= 0; static int opt_shortest = 0; -static int video_global_header = 0; static char *vstats_filename; static FILE *vstats_file; static int opt_programid = 0; @@ -289,8 +285,10 @@ typedef struct AVOutputStream { int resample_height; int resample_width; int resample_pix_fmt; + AVRational frame_rate; float frame_aspect_ratio; + /* forced key frames */ int64_t *forced_kf_pts; int forced_kf_count; @@ -323,7 +321,6 @@ static int nb_output_streams_for_file[MAX_FILES] = { 0 }; typedef struct AVInputStream { int file_index; - int index; AVStream *st; int discard; /* true if stream data should be discarded */ int decoding_needed; /* true if the packets must be decoded in 'raw_fifo' */ @@ -343,10 +340,10 @@ typedef struct AVInputStream { } AVInputStream; typedef struct AVInputFile { + AVFormatContext *ctx; int eof_reached; /* true if eof reached */ int ist_index; /* index of first stream in ist_table */ int buffer_size; /* current total buffer size */ - int nb_streams; /* nb streams we are aware of */ } AVInputFile; #if HAVE_TERMIOS_H @@ -355,6 +352,11 @@ typedef struct AVInputFile { static struct termios oldtty; #endif +static AVInputStream *input_streams = NULL; +static int nb_input_streams = 0; +static AVInputFile *input_files = NULL; +static int nb_input_files = 0; + #if CONFIG_AVFILTER static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost) @@ -406,8 +408,8 @@ static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost) ost->graph->scale_sws_opts = av_strdup(args); if (ost->avfilter) { - AVFilterInOut *outputs = av_malloc(sizeof(AVFilterInOut)); - AVFilterInOut *inputs = av_malloc(sizeof(AVFilterInOut)); + AVFilterInOut *outputs = avfilter_inout_alloc(); + AVFilterInOut *inputs = avfilter_inout_alloc(); outputs->name = av_strdup("in"); outputs->filter_ctx = last_filter; @@ -419,7 +421,7 @@ static int configure_video_filters(AVInputStream *ist, AVOutputStream *ost) inputs->pad_idx = 0; inputs->next = NULL; - if ((ret = avfilter_graph_parse(ost->graph, ost->avfilter, inputs, outputs, NULL)) < 0) + if ((ret = avfilter_graph_parse(ost->graph, ost->avfilter, &inputs, &outputs, NULL)) < 0) return ret; av_freep(&ost->avfilter); } else { @@ -541,7 +543,7 @@ static int ffmpeg_exit(int ret) av_free(output_streams_for_file[i]); } for(i=0;i<nb_input_files;i++) { - av_close_input_file(input_files[i]); + av_close_input_file(input_files[i].ctx); av_free(input_files_ts_scale[i]); } @@ -558,6 +560,9 @@ static int ffmpeg_exit(int ret) av_free(stream_maps); av_free(meta_data_maps); + av_freep(&input_streams); + av_freep(&input_files); + av_free(video_codec_name); av_free(audio_codec_name); av_free(subtitle_codec_name); @@ -715,6 +720,8 @@ static int read_ffserver_streams(AVFormatContext *s, const char *filename) // FIXME: a more elegant solution is needed st = av_mallocz(sizeof(AVStream)); memcpy(st, ic->streams[i], sizeof(AVStream)); + st->info = av_malloc(sizeof(*st->info)); + memcpy(st->info, ic->streams[i]->info, sizeof(*st->info)); st->codec = avcodec_alloc_context(); if (!st->codec) { print_error(filename, AVERROR(ENOMEM)); @@ -840,7 +847,7 @@ need_realloc: if ((ost->audio_resample && !ost->resample) || resample_changed) { if (resample_changed) { av_log(NULL, AV_LOG_INFO, "Input stream #%d.%d frame changed from rate:%d fmt:%s ch:%d to rate:%d fmt:%s ch:%d\n", - ist->file_index, ist->index, + ist->file_index, ist->st->index, ost->resample_sample_rate, av_get_sample_fmt_name(ost->resample_sample_fmt), ost->resample_channels, dec->sample_rate, av_get_sample_fmt_name(dec->sample_fmt), dec->channels); ost->resample_sample_fmt = dec->sample_fmt; @@ -1203,9 +1210,9 @@ static void do_video_out(AVFormatContext *s, if (resample_changed) { av_log(NULL, AV_LOG_INFO, "Input stream #%d.%d frame changed from size:%dx%d fmt:%s to size:%dx%d fmt:%s\n", - ist->file_index, ist->index, - ost->resample_width, ost->resample_height, avcodec_get_pix_fmt_name(ost->resample_pix_fmt), - dec->width , dec->height , avcodec_get_pix_fmt_name(dec->pix_fmt)); + ist->file_index, ist->st->index, + ost->resample_width, ost->resample_height, av_get_pix_fmt_name(ost->resample_pix_fmt), + dec->width , dec->height , av_get_pix_fmt_name(dec->pix_fmt)); ost->resample_width = dec->width; ost->resample_height = dec->height; ost->resample_pix_fmt = dec->pix_fmt; @@ -1238,7 +1245,7 @@ static void do_video_out(AVFormatContext *s, } } sws_scale(ost->img_resample_ctx, formatted_picture->data, formatted_picture->linesize, - 0, ost->resample_height, ost->resample_frame.data, ost->resample_frame.linesize); + 0, ost->resample_height, final_picture->data, final_picture->linesize); } #endif @@ -1375,7 +1382,8 @@ static void print_report(AVFormatContext **output_files, int64_t total_size; AVCodecContext *enc; int frame_number, vid, i; - double bitrate, ti1, pts; + double bitrate; + int64_t pts = INT64_MAX; static int64_t last_time = -1; static int qp_histogram[52]; @@ -1400,7 +1408,6 @@ static void print_report(AVFormatContext **output_files, total_size= avio_tell(oc->pb); buf[0] = '\0'; - ti1 = 1e10; vid = 0; for(i=0;i<nb_ostreams;i++) { float q = -1; @@ -1451,19 +1458,28 @@ static void print_report(AVFormatContext **output_files, vid = 1; } /* compute min output value */ - pts = (double)ost->st->pts.val * av_q2d(ost->st->time_base); - if ((pts < ti1) && (pts > 0)) - ti1 = pts; + pts = FFMIN(pts, av_rescale_q(ost->st->pts.val, + ost->st->time_base, AV_TIME_BASE_Q)); } - if (ti1 < 0.01) - ti1 = 0.01; if (verbose > 0 || is_last_report) { - bitrate = (double)(total_size * 8) / ti1 / 1000.0; + int hours, mins, secs, us; + secs = pts / AV_TIME_BASE; + us = pts % AV_TIME_BASE; + mins = secs / 60; + secs %= 60; + hours = mins / 60; + mins %= 60; + + bitrate = pts ? total_size * 8 / (pts / 1000.0) : 0; snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), - "size=%8.0fkB time=%0.2f bitrate=%6.1fkbits/s", - (double)total_size / 1024, ti1, bitrate); + "size=%8.0fkB time=", total_size / 1024.0); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), + "%02d:%02d:%02d.%02d ", hours, mins, secs, + (100 * us) / AV_TIME_BASE); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), + "bitrate=%6.1fkbits/s", bitrate); if (nb_frames_dup || nb_frames_drop) snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d", @@ -1644,15 +1660,16 @@ static int output_packet(AVInputStream *ist, int ist_index, } #if CONFIG_AVFILTER - if(ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO){ + if(ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) + if (start_time == 0 || ist->pts >= start_time) { for(i=0;i<nb_ostreams;i++) { ost = ost_table[i]; if (ost->input_video_filter && ost->source_index == ist_index) { if (!picture.sample_aspect_ratio.num) picture.sample_aspect_ratio = ist->st->sample_aspect_ratio; picture.pts = ist->pts; - // add it to be filtered - av_vsrc_buffer_add_frame2(ost->input_video_filter, &picture, ""); //TODO user setable params + + av_vsrc_buffer_add_frame(ost->input_video_filter, &picture, AV_VSRC_BUF_FLAG_OVERWRITE); } } } @@ -1693,7 +1710,8 @@ static int output_packet(AVInputStream *ist, int ist_index, while (frame_available) { AVRational ist_pts_tb; if (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO && ost->output_video_filter) - get_filtered_video_frame(ost->output_video_filter, &picture, &ost->picref, &ist_pts_tb); + if (get_filtered_video_frame(ost->output_video_filter, &picture, &ost->picref, &ist_pts_tb) < 0) + goto cont; if (ost->picref) ist->pts = av_rescale_q(ost->picref->pts, ist_pts_tb, AV_TIME_BASE_Q); #endif @@ -1726,6 +1744,7 @@ static int output_packet(AVInputStream *ist, int ist_index, } } else { AVFrame avframe; //FIXME/XXX remove this + AVPicture pict; AVPacket opkt; int64_t ost_tb_start_time= av_rescale_q(start_time, AV_TIME_BASE_Q, ost->st->time_base); @@ -1779,6 +1798,13 @@ static int output_packet(AVInputStream *ist, int ist_index, opkt.size = data_size; } + if (os->oformat->flags & AVFMT_RAWPICTURE) { + /* store AVPicture in AVPacket, as expected by the output format */ + avpicture_fill(&pict, opkt.data, ost->st->codec->pix_fmt, ost->st->codec->width, ost->st->codec->height); + opkt.data = (uint8_t *)&pict; + opkt.size = sizeof(AVPicture); + opkt.flags |= AV_PKT_FLAG_KEY; + } write_frame(os, &opkt, ost->st->codec, ost->bitstream_filters); ost->st->codec->frame_number++; ost->frame_number++; @@ -1788,8 +1814,7 @@ static int output_packet(AVInputStream *ist, int ist_index, cont: frame_available = (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) && ost->output_video_filter && avfilter_poll_frame(ost->output_video_filter->inputs[0]); - if(ost->picref) - avfilter_unref_buffer(ost->picref); + avfilter_unref_buffer(ost->picref); } #endif } @@ -1904,7 +1929,7 @@ static void print_sdp(AVFormatContext **avc, int n) static int copy_chapters(int infile, int outfile) { - AVFormatContext *is = input_files[infile]; + AVFormatContext *is = input_files[infile].ctx; AVFormatContext *os = output_files[outfile]; int i; @@ -1931,7 +1956,7 @@ static int copy_chapters(int infile, int outfile) out_ch->end = FFMIN(rt, in_ch->end - ts_off); if (metadata_chapters_autocopy) - av_metadata_copy(&out_ch->metadata, in_ch->metadata, 0); + av_dict_copy(&out_ch->metadata, in_ch->metadata, 0); os->nb_chapters++; os->chapters = av_realloc(os->chapters, sizeof(AVChapter)*os->nb_chapters); @@ -1970,16 +1995,16 @@ static void parse_forced_key_frames(char *kf, AVOutputStream *ost, */ static int transcode(AVFormatContext **output_files, int nb_output_files, - AVFormatContext **input_files, + AVInputFile *input_files, int nb_input_files, AVStreamMap *stream_maps, int nb_stream_maps) { - int ret = 0, i, j, k, n, nb_istreams = 0, nb_ostreams = 0, step; + int ret = 0, i, j, k, n, nb_ostreams = 0, step; + AVFormatContext *is, *os; AVCodecContext *codec, *icodec; AVOutputStream *ost, **ost_table = NULL; - AVInputStream *ist, **ist_table = NULL; - AVInputFile *file_table; + AVInputStream *ist; char error[1024]; int key; int want_sdp = 1; @@ -1988,46 +2013,9 @@ static int transcode(AVFormatContext **output_files, int nb_frame_threshold[AVMEDIA_TYPE_NB]={0}; int nb_streams[AVMEDIA_TYPE_NB]={0}; - file_table= av_mallocz(nb_input_files * sizeof(AVInputFile)); - if (!file_table) - goto fail; - - /* input stream init */ - j = 0; - for(i=0;i<nb_input_files;i++) { - is = input_files[i]; - file_table[i].ist_index = j; - file_table[i].nb_streams = is->nb_streams; - j += is->nb_streams; - } - nb_istreams = j; - - ist_table = av_mallocz(nb_istreams * sizeof(AVInputStream *)); - if (!ist_table) - goto fail; - - for(i=0;i<nb_istreams;i++) { - ist = av_mallocz(sizeof(AVInputStream)); - if (!ist) - goto fail; - ist_table[i] = ist; - } - j = 0; - for(i=0;i<nb_input_files;i++) { - is = input_files[i]; - for(k=0;k<is->nb_streams;k++) { - ist = ist_table[j++]; - ist->st = is->streams[k]; - ist->file_index = i; - ist->index = k; - ist->discard = 1; /* the stream is discarded by default - (changed later) */ - - if (rate_emu) { - ist->start = av_gettime(); - } - } - } + if (rate_emu) + for (i = 0; i < nb_input_streams; i++) + input_streams[i].start = av_gettime(); /* output stream init */ nb_ostreams = 0; @@ -2053,7 +2041,7 @@ static int transcode(AVFormatContext **output_files, int si = stream_maps[i].stream_index; if (fi < 0 || fi > nb_input_files - 1 || - si < 0 || si > file_table[fi].nb_streams - 1) { + si < 0 || si > input_files[fi].ctx->nb_streams - 1) { fprintf(stderr,"Could not find input stream #%d.%d\n", fi, si); ret = AVERROR(EINVAL); goto fail; @@ -2061,7 +2049,7 @@ static int transcode(AVFormatContext **output_files, fi = stream_maps[i].sync_file_index; si = stream_maps[i].sync_stream_index; if (fi < 0 || fi > nb_input_files - 1 || - si < 0 || si > file_table[fi].nb_streams - 1) { + si < 0 || si > input_files[fi].ctx->nb_streams - 1) { fprintf(stderr,"Could not find sync stream #%d.%d\n", fi, si); ret = AVERROR(EINVAL); goto fail; @@ -2083,12 +2071,12 @@ static int transcode(AVFormatContext **output_files, for(j=0; j<AVMEDIA_TYPE_NB; j++) nb_frame_threshold[j] += step; - for(j=0; j<nb_istreams; j++) { + for(j=0; j<nb_input_streams; j++) { int skip=0; - ist = ist_table[j]; + ist = &input_streams[j]; if(opt_programid){ int pi,si; - AVFormatContext *f= input_files[ ist->file_index ]; + AVFormatContext *f= input_files[ ist->file_index ].ctx; skip=1; for(pi=0; pi<f->nb_programs; pi++){ AVProgram *p= f->programs[pi]; @@ -2116,11 +2104,11 @@ static int transcode(AVFormatContext **output_files, ost = ost_table[n] = output_streams_for_file[k][i]; ost->st = os->streams[i]; if (nb_stream_maps > 0) { - ost->source_index = file_table[stream_maps[n].file_index].ist_index + + ost->source_index = input_files[stream_maps[n].file_index].ist_index + stream_maps[n].stream_index; /* Sanity check that the stream types match */ - if (ist_table[ost->source_index]->st->codec->codec_type != ost->st->codec->codec_type) { + if (input_streams[ost->source_index].st->codec->codec_type != ost->st->codec->codec_type) { int i= ost->file_index; av_dump_format(output_files[i], i, output_files[i]->filename, 1); fprintf(stderr, "Codec type mismatch for mapping #%d.%d -> #%d.%d\n", @@ -2132,12 +2120,12 @@ static int transcode(AVFormatContext **output_files, } else { /* get corresponding input stream index : we select the first one with the right type */ found = 0; - for(j=0;j<nb_istreams;j++) { + for (j = 0; j < nb_input_streams; j++) { int skip=0; - ist = ist_table[j]; + ist = &input_streams[j]; if(opt_programid){ int pi,si; - AVFormatContext *f= input_files[ ist->file_index ]; + AVFormatContext *f = input_files[ist->file_index].ctx; skip=1; for(pi=0; pi<f->nb_programs; pi++){ AVProgram *p= f->programs[pi]; @@ -2160,8 +2148,8 @@ static int transcode(AVFormatContext **output_files, if (!found) { if(! opt_programid) { /* try again and reuse existing stream */ - for(j=0;j<nb_istreams;j++) { - ist = ist_table[j]; + for (j = 0; j < nb_input_streams; j++) { + ist = &input_streams[j]; if ( ist->st->codec->codec_type == ost->st->codec->codec_type && ist->st->discard != AVDISCARD_ALL) { ost->source_index = j; @@ -2178,10 +2166,10 @@ static int transcode(AVFormatContext **output_files, } } } - ist = ist_table[ost->source_index]; + ist = &input_streams[ost->source_index]; ist->discard = 0; ost->sync_ist = (nb_stream_maps > 0) ? - ist_table[file_table[stream_maps[n].sync_file_index].ist_index + + &input_streams[input_files[stream_maps[n].sync_file_index].ist_index + stream_maps[n].sync_stream_index] : ist; } } @@ -2190,14 +2178,14 @@ static int transcode(AVFormatContext **output_files, for(i=0;i<nb_ostreams;i++) { ost = ost_table[i]; os = output_files[ost->file_index]; - ist = ist_table[ost->source_index]; + ist = &input_streams[ost->source_index]; codec = ost->st->codec; icodec = ist->st->codec; if (metadata_streams_autocopy) - av_metadata_copy(&ost->st->metadata, ist->st->metadata, - AV_METADATA_DONT_OVERWRITE); + av_dict_copy(&ost->st->metadata, ist->st->metadata, + AV_DICT_DONT_OVERWRITE); ost->st->disposition = ist->st->disposition; codec->bits_per_raw_sample= icodec->bits_per_raw_sample; @@ -2281,6 +2269,17 @@ static int transcode(AVFormatContext **output_files, if(!ost->fifo) goto fail; ost->reformat_pair = MAKE_SFMT_PAIR(AV_SAMPLE_FMT_NONE,AV_SAMPLE_FMT_NONE); + if (!codec->sample_rate) { + codec->sample_rate = icodec->sample_rate; + if (icodec->lowres) + codec->sample_rate >>= icodec->lowres; + } + choose_sample_rate(ost->st, codec->codec); + codec->time_base = (AVRational){1, codec->sample_rate}; + if (!codec->channels) + codec->channels = icodec->channels; + if (av_get_channel_layout_nb_channels(codec->channel_layout) != codec->channels) + codec->channel_layout = 0; ost->audio_resample = codec->sample_rate != icodec->sample_rate || audio_sync_method > 1; icodec->request_channels = codec->channels; ist->decoding_needed = 1; @@ -2306,6 +2305,14 @@ static int transcode(AVFormatContext **output_files, ost->encoding_needed = 1; ist->decoding_needed = 1; + if (!ost->frame_rate.num) + ost->frame_rate = ist->st->r_frame_rate.num ? ist->st->r_frame_rate : (AVRational){25,1}; + if (codec->codec && codec->codec->supported_framerates && !force_fps) { + int idx = av_find_nearest_q_idx(ost->frame_rate, codec->codec->supported_framerates); + ost->frame_rate = codec->codec->supported_framerates[idx]; + } + codec->time_base = (AVRational){ost->frame_rate.den, ost->frame_rate.num}; + #if CONFIG_AVFILTER if (configure_video_filters(ist, ost)) { fprintf(stderr, "Error opening filters!\n"); @@ -2369,7 +2376,7 @@ static int transcode(AVFormatContext **output_files, ost = ost_table[i]; if (ost->encoding_needed) { AVCodec *codec = i < nb_output_codecs ? output_codecs[i] : NULL; - AVCodecContext *dec = ist_table[ost->source_index]->st->codec; + AVCodecContext *dec = input_streams[ost->source_index].st->codec; if (!codec) codec = avcodec_find_encoder(ost->st->codec->codec_id); if (!codec) { @@ -2398,21 +2405,21 @@ static int transcode(AVFormatContext **output_files, } /* open each decoder */ - for(i=0;i<nb_istreams;i++) { - ist = ist_table[i]; + for (i = 0; i < nb_input_streams; i++) { + ist = &input_streams[i]; if (ist->decoding_needed) { AVCodec *codec = i < nb_input_codecs ? input_codecs[i] : NULL; if (!codec) codec = avcodec_find_decoder(ist->st->codec->codec_id); if (!codec) { snprintf(error, sizeof(error), "Decoder (codec id %d) not found for input stream #%d.%d", - ist->st->codec->codec_id, ist->file_index, ist->index); + ist->st->codec->codec_id, ist->file_index, ist->st->index); ret = AVERROR(EINVAL); goto dump_format; } if (avcodec_open(ist->st->codec, codec) < 0) { snprintf(error, sizeof(error), "Error while opening decoder for input stream #%d.%d", - ist->file_index, ist->index); + ist->file_index, ist->st->index); ret = AVERROR(EINVAL); goto dump_format; } @@ -2422,9 +2429,9 @@ static int transcode(AVFormatContext **output_files, } /* init pts */ - for(i=0;i<nb_istreams;i++) { + for (i = 0; i < nb_input_streams; i++) { AVStream *st; - ist = ist_table[i]; + ist = &input_streams[i]; st= ist->st; ist->pts = st->avg_frame_rate.num ? - st->codec->has_b_frames*AV_TIME_BASE / av_q2d(st->avg_frame_rate) : 0; ist->next_pts = AV_NOPTS_VALUE; @@ -2434,7 +2441,7 @@ static int transcode(AVFormatContext **output_files, /* set meta data information from input file if required */ for (i=0;i<nb_meta_data_maps;i++) { AVFormatContext *files[2]; - AVMetadata **meta[2]; + AVDictionary **meta[2]; int j; #define METADATA_CHECK_INDEX(index, nb_elems, desc)\ @@ -2453,7 +2460,7 @@ static int transcode(AVFormatContext **output_files, METADATA_CHECK_INDEX(in_file_index, nb_input_files, "input file") files[0] = output_files[out_file_index]; - files[1] = input_files[in_file_index]; + files[1] = input_files[in_file_index].ctx; for (j = 0; j < 2; j++) { AVMetaDataMap *map = &meta_data_maps[i][j]; @@ -2477,15 +2484,15 @@ static int transcode(AVFormatContext **output_files, } } - av_metadata_copy(meta[0], *meta[1], AV_METADATA_DONT_OVERWRITE); + av_dict_copy(meta[0], *meta[1], AV_DICT_DONT_OVERWRITE); } /* copy global metadata by default */ if (metadata_global_autocopy) { for (i = 0; i < nb_output_files; i++) - av_metadata_copy(&output_files[i]->metadata, input_files[0]->metadata, - AV_METADATA_DONT_OVERWRITE); + av_dict_copy(&output_files[i]->metadata, input_files[0].ctx->metadata, + AV_DICT_DONT_OVERWRITE); } /* copy chapters according to chapter maps */ @@ -2511,7 +2518,7 @@ static int transcode(AVFormatContext **output_files, /* copy chapters from the first input file that has them*/ if (!nb_chapter_maps) for (i = 0; i < nb_input_files; i++) { - if (!input_files[i]->nb_chapters) + if (!input_files[i].ctx->nb_chapters) continue; for (j = 0; j < nb_output_files; j++) @@ -2546,14 +2553,14 @@ static int transcode(AVFormatContext **output_files, for(i=0;i<nb_ostreams;i++) { ost = ost_table[i]; fprintf(stderr, " Stream #%d.%d -> #%d.%d", - ist_table[ost->source_index]->file_index, - ist_table[ost->source_index]->index, + input_streams[ost->source_index].file_index, + input_streams[ost->source_index].st->index, ost->file_index, ost->index); - if (ost->sync_ist != ist_table[ost->source_index]) + if (ost->sync_ist != &input_streams[ost->source_index]) fprintf(stderr, " [sync #%d.%d]", ost->sync_ist->file_index, - ost->sync_ist->index); + ost->sync_ist->st->index); fprintf(stderr, "\n"); } } @@ -2608,16 +2615,14 @@ static int transcode(AVFormatContext **output_files, if (key == 'd' || key == 'D'){ int debug=0; if(key == 'D') { - ist = ist_table[0]; - debug = ist->st->codec->debug<<1; + debug = input_streams[0].st->codec->debug<<1; if(!debug) debug = 1; while(debug & (FF_DEBUG_DCT_COEFF|FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) //unsupported, would just crash debug += debug; }else scanf("%d", &debug); - for(i=0;i<nb_istreams;i++) { - ist = ist_table[i]; - ist->st->codec->debug = debug; + for(i=0;i<nb_input_streams;i++) { + input_streams[i].st->codec->debug = debug; } for(i=0;i<nb_ostreams;i++) { ost = ost_table[i]; @@ -2646,12 +2651,12 @@ static int transcode(AVFormatContext **output_files, double ipts, opts; ost = ost_table[i]; os = output_files[ost->file_index]; - ist = ist_table[ost->source_index]; + ist = &input_streams[ost->source_index]; if(ist->is_past_recording_time || no_packet[ist->file_index]) continue; opts = ost->st->pts.val * av_q2d(ost->st->time_base); ipts = (double)ist->pts; - if (!file_table[ist->file_index].eof_reached){ + if (!input_files[ist->file_index].eof_reached){ if(ipts < ipts_min) { ipts_min = ipts; if(input_sync ) file_index = ist->file_index; @@ -2682,7 +2687,7 @@ static int transcode(AVFormatContext **output_files, break; /* read a frame from it and output it in the fifo */ - is = input_files[file_index]; + is = input_files[file_index].ctx; ret= av_read_frame(is, &pkt); if(ret == AVERROR(EAGAIN)){ no_packet[file_index]=1; @@ -2690,7 +2695,7 @@ static int transcode(AVFormatContext **output_files, continue; } if (ret < 0) { - file_table[file_index].eof_reached = 1; + input_files[file_index].eof_reached = 1; if (opt_shortest) break; else @@ -2706,10 +2711,10 @@ static int transcode(AVFormatContext **output_files, } /* the following test is needed in case new streams appear dynamically in stream : we ignore them */ - if (pkt.stream_index >= file_table[file_index].nb_streams) + if (pkt.stream_index >= input_files[file_index].ctx->nb_streams) goto discard_packet; - ist_index = file_table[file_index].ist_index + pkt.stream_index; - ist = ist_table[ist_index]; + ist_index = input_files[file_index].ist_index + pkt.stream_index; + ist = &input_streams[ist_index]; if (ist->discard) goto discard_packet; @@ -2743,7 +2748,7 @@ static int transcode(AVFormatContext **output_files, /* finish if recording time exhausted */ if (recording_time != INT64_MAX && - (pkt.pts != AV_NOPTS_VALUE || pkt.dts != AV_NOPTS_VALUE ? + (pkt.pts != AV_NOPTS_VALUE ? av_compare_ts(pkt.pts, ist->st->time_base, recording_time + start_time, (AVRational){1, 1000000}) : av_compare_ts(ist->pts, AV_TIME_BASE_Q, recording_time + start_time, (AVRational){1, 1000000}) @@ -2752,12 +2757,12 @@ static int transcode(AVFormatContext **output_files, goto discard_packet; } - //fprintf(stderr,"read #%d.%d size=%d\n", ist->file_index, ist->index, pkt.size); + //fprintf(stderr,"read #%d.%d size=%d\n", ist->file_index, ist->st->index, pkt.size); if (output_packet(ist, ist_index, ost_table, nb_ostreams, &pkt) < 0) { if (verbose >= 0) fprintf(stderr, "Error while decoding stream #%d.%d\n", - ist->file_index, ist->index); + ist->file_index, ist->st->index); if (exit_on_error) ffmpeg_exit(1); av_free_packet(&pkt); @@ -2772,8 +2777,8 @@ static int transcode(AVFormatContext **output_files, } /* at the end of stream, we must flush the decoder buffers */ - for(i=0;i<nb_istreams;i++) { - ist = ist_table[i]; + for (i = 0; i < nb_input_streams; i++) { + ist = &input_streams[i]; if (ist->decoding_needed) { output_packet(ist, i, ost_table, nb_ostreams, NULL); } @@ -2803,8 +2808,8 @@ static int transcode(AVFormatContext **output_files, } /* close each decoder */ - for(i=0;i<nb_istreams;i++) { - ist = ist_table[i]; + for (i = 0; i < nb_input_streams; i++) { + ist = &input_streams[i]; if (ist->decoding_needed) { avcodec_close(ist->st->codec); } @@ -2815,15 +2820,7 @@ static int transcode(AVFormatContext **output_files, fail: av_freep(&bit_buffer); - av_free(file_table); - if (ist_table) { - for(i=0;i<nb_istreams;i++) { - ist = ist_table[i]; - av_free(ist); - } - av_free(ist_table); - } if (ost_table) { for(i=0;i<nb_ostreams;i++) { ost = ost_table[i]; @@ -2853,14 +2850,16 @@ static int transcode(AVFormatContext **output_files, return ret; } -static void opt_format(const char *arg) +static int opt_format(const char *opt, const char *arg) { last_asked_format = arg; + return 0; } -static void opt_video_rc_override_string(const char *arg) +static int opt_video_rc_override_string(const char *opt, const char *arg) { video_rc_override_string = arg; + return 0; } static int opt_me_threshold(const char *opt, const char *arg) @@ -2902,12 +2901,13 @@ static int opt_frame_crop(const char *opt, const char *arg) return AVERROR(EINVAL); } -static void opt_frame_size(const char *arg) +static int opt_frame_size(const char *opt, const char *arg) { if (av_parse_video_size(&frame_width, &frame_height, arg) < 0) { fprintf(stderr, "Incorrect frame size\n"); - ffmpeg_exit(1); + return AVERROR(EINVAL); } + return 0; } static int opt_pad(const char *opt, const char *arg) { @@ -2915,21 +2915,22 @@ static int opt_pad(const char *opt, const char *arg) { return -1; } -static void opt_frame_pix_fmt(const char *arg) +static int opt_frame_pix_fmt(const char *opt, const char *arg) { if (strcmp(arg, "list")) { frame_pix_fmt = av_get_pix_fmt(arg); if (frame_pix_fmt == PIX_FMT_NONE) { fprintf(stderr, "Unknown pixel format requested: %s\n", arg); - ffmpeg_exit(1); + return AVERROR(EINVAL); } } else { show_pix_fmts(); ffmpeg_exit(0); } + return 0; } -static void opt_frame_aspect_ratio(const char *arg) +static int opt_frame_aspect_ratio(const char *opt, const char *arg) { int x = 0, y = 0; double ar = 0; @@ -2948,9 +2949,10 @@ static void opt_frame_aspect_ratio(const char *arg) if (!ar) { fprintf(stderr, "Incorrect aspect ratio specification.\n"); - ffmpeg_exit(1); + return AVERROR(EINVAL); } frame_aspect_ratio = ar; + return 0; } static int opt_metadata(const char *opt, const char *arg) @@ -2963,7 +2965,7 @@ static int opt_metadata(const char *opt, const char *arg) } *mid++= 0; - av_metadata_set2(&metadata, arg, mid, 0); + av_dict_set(&metadata, arg, mid, 0); return 0; } @@ -2981,6 +2983,7 @@ static int opt_qscale(const char *opt, const char *arg) static int opt_top_field_first(const char *opt, const char *arg) { top_field_first = parse_number_or_die(opt, arg, OPT_INT, 0, 1); + opt_default(opt, arg); return 0; } @@ -2994,13 +2997,13 @@ static int opt_thread_count(const char *opt, const char *arg) return 0; } -static void opt_audio_sample_fmt(const char *arg) +static int opt_audio_sample_fmt(const char *opt, const char *arg) { if (strcmp(arg, "list")) { audio_sample_fmt = av_get_sample_fmt(arg); if (audio_sample_fmt == AV_SAMPLE_FMT_NONE) { av_log(NULL, AV_LOG_ERROR, "Invalid sample format '%s'\n", arg); - ffmpeg_exit(1); + return AVERROR(EINVAL); } } else { int i; @@ -3009,6 +3012,7 @@ static void opt_audio_sample_fmt(const char *arg) printf("%s\n", av_get_sample_fmt_string(fmt_str, sizeof(fmt_str), i)); ffmpeg_exit(0); } + return 0; } static int opt_audio_rate(const char *opt, const char *arg) @@ -3029,40 +3033,28 @@ static int opt_video_channel(const char *opt, const char *arg) return 0; } -static void opt_video_standard(const char *arg) +static int opt_video_standard(const char *opt, const char *arg) { video_standard = av_strdup(arg); + return 0; } -static void opt_codec(int *pstream_copy, char **pcodec_name, - int codec_type, const char *arg) +static int opt_codec(const char *opt, const char *arg) { + int *pstream_copy; char **pcodec_name; enum AVMediaType codec_type; + + if (!strcmp(opt, "acodec")) { pstream_copy = &audio_stream_copy; pcodec_name = &audio_codec_name; codec_type = AVMEDIA_TYPE_AUDIO; } + else if (!strcmp(opt, "vcodec")) { pstream_copy = &video_stream_copy; pcodec_name = &video_codec_name; codec_type = AVMEDIA_TYPE_VIDEO; } + else if (!strcmp(opt, "scodec")) { pstream_copy = &subtitle_stream_copy; pcodec_name = &subtitle_codec_name; codec_type = AVMEDIA_TYPE_SUBTITLE; } + else if (!strcmp(opt, "dcodec")) { pstream_copy = &data_stream_copy; pcodec_name = &data_codec_name; codec_type = AVMEDIA_TYPE_DATA; } + av_freep(pcodec_name); if (!strcmp(arg, "copy")) { *pstream_copy = 1; } else { *pcodec_name = av_strdup(arg); } -} - -static void opt_audio_codec(const char *arg) -{ - opt_codec(&audio_stream_copy, &audio_codec_name, AVMEDIA_TYPE_AUDIO, arg); -} - -static void opt_video_codec(const char *arg) -{ - opt_codec(&video_stream_copy, &video_codec_name, AVMEDIA_TYPE_VIDEO, arg); -} - -static void opt_subtitle_codec(const char *arg) -{ - opt_codec(&subtitle_stream_copy, &subtitle_codec_name, AVMEDIA_TYPE_SUBTITLE, arg); -} - -static void opt_data_codec(const char *arg) -{ - opt_codec(&data_stream_copy, &data_codec_name, AVMEDIA_TYPE_DATA, arg); + return 0; } static int opt_codec_tag(const char *opt, const char *arg) @@ -3083,7 +3075,7 @@ static int opt_codec_tag(const char *opt, const char *arg) return 0; } -static void opt_map(const char *arg) +static int opt_map(const char *opt, const char *arg) { AVStreamMap *m; char *p; @@ -3106,6 +3098,7 @@ static void opt_map(const char *arg) m->sync_file_index = m->file_index; m->sync_stream_index = m->stream_index; } + return 0; } static void parse_meta_type(char *arg, char *type, int *index, char **endptr) @@ -3129,7 +3122,7 @@ static void parse_meta_type(char *arg, char *type, int *index, char **endptr) *type = 'g'; } -static void opt_map_metadata(const char *arg) +static int opt_map_metadata(const char *opt, const char *arg) { AVMetaDataMap *m, *m1; char *p; @@ -3153,16 +3146,18 @@ static void opt_map_metadata(const char *arg) metadata_streams_autocopy = 0; if (m->type == 'c' || m1->type == 'c') metadata_chapters_autocopy = 0; + + return 0; } -static void opt_map_meta_data(const char *arg) +static int opt_map_meta_data(const char *opt, const char *arg) { fprintf(stderr, "-map_meta_data is deprecated and will be removed soon. " "Use -map_metadata instead.\n"); - opt_map_metadata(arg); + return opt_map_metadata(opt, arg); } -static void opt_map_chapters(const char *arg) +static int opt_map_chapters(const char *opt, const char *arg) { AVChapterMap *c; char *p; @@ -3175,9 +3170,10 @@ static void opt_map_chapters(const char *arg) p++; c->in_file = strtol(p, &p, 0); + return 0; } -static void opt_input_ts_scale(const char *arg) +static int opt_input_ts_scale(const char *opt, const char *arg) { unsigned int stream; double scale; @@ -3193,6 +3189,7 @@ static void opt_input_ts_scale(const char *arg) input_files_ts_scale[nb_input_files] = grow_array(input_files_ts_scale[nb_input_files], sizeof(*input_files_ts_scale[nb_input_files]), &nb_input_files_ts_scale[nb_input_files], stream + 1); input_files_ts_scale[nb_input_files][stream]= scale; + return 0; } static int opt_recording_time(const char *opt, const char *arg) @@ -3253,7 +3250,7 @@ static enum CodecID find_codec_or_die(const char *name, int type, int encoder, i return codec->id; } -static void opt_input_file(const char *filename) +static int opt_input_file(const char *opt, const char *filename) { AVFormatContext *ic; AVFormatParameters params, *ap = ¶ms; @@ -3375,26 +3372,32 @@ static void opt_input_file(const char *filename) for(i=0;i<ic->nb_streams;i++) { AVStream *st = ic->streams[i]; AVCodecContext *dec = st->codec; + AVInputStream *ist; + dec->thread_count = thread_count; input_codecs = grow_array(input_codecs, sizeof(*input_codecs), &nb_input_codecs, nb_input_codecs + 1); + + input_streams = grow_array(input_streams, sizeof(*input_streams), &nb_input_streams, nb_input_streams + 1); + ist = &input_streams[nb_input_streams - 1]; + ist->st = st; + ist->file_index = nb_input_files; + ist->discard = 1; + switch (dec->codec_type) { case AVMEDIA_TYPE_AUDIO: input_codecs[nb_input_codecs-1] = avcodec_find_decoder_by_name(audio_codec_name); + if(!input_codecs[nb_input_codecs-1]) + input_codecs[nb_input_codecs-1] = avcodec_find_decoder(dec->codec_id); set_context_opts(dec, avcodec_opts[AVMEDIA_TYPE_AUDIO], AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM, input_codecs[nb_input_codecs-1]); channel_layout = dec->channel_layout; - audio_channels = dec->channels; - audio_sample_rate = dec->sample_rate; audio_sample_fmt = dec->sample_fmt; if(audio_disable) st->discard= AVDISCARD_ALL; - /* Note that av_find_stream_info can add more streams, and we - * currently have no chance of setting up lowres decoding - * early enough for them. */ - if (dec->lowres) - audio_sample_rate >>= dec->lowres; break; case AVMEDIA_TYPE_VIDEO: input_codecs[nb_input_codecs-1] = avcodec_find_decoder_by_name(video_codec_name); + if(!input_codecs[nb_input_codecs-1]) + input_codecs[nb_input_codecs-1] = avcodec_find_decoder(dec->codec_id); set_context_opts(dec, avcodec_opts[AVMEDIA_TYPE_VIDEO], AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM, input_codecs[nb_input_codecs-1]); frame_height = dec->height; frame_width = dec->width; @@ -3419,9 +3422,6 @@ static void opt_input_file(const char *filename) (float)rfps / rfps_base, rfps, rfps_base); } - /* update the current frame rate to match the stream frame rate */ - frame_rate.num = rfps; - frame_rate.den = rfps_base; if(video_disable) st->discard= AVDISCARD_ALL; @@ -3432,6 +3432,8 @@ static void opt_input_file(const char *filename) break; case AVMEDIA_TYPE_SUBTITLE: input_codecs[nb_input_codecs-1] = avcodec_find_decoder_by_name(subtitle_codec_name); + if(!input_codecs[nb_input_codecs-1]) + input_codecs[nb_input_codecs-1] = avcodec_find_decoder(dec->codec_id); if(subtitle_disable) st->discard = AVDISCARD_ALL; break; @@ -3443,21 +3445,27 @@ static void opt_input_file(const char *filename) } } - input_files[nb_input_files] = ic; input_files_ts_offset[nb_input_files] = input_ts_offset - (copy_ts ? 0 : timestamp); /* dump the file content */ if (verbose >= 0) av_dump_format(ic, nb_input_files, filename, 0); - nb_input_files++; + input_files = grow_array(input_files, sizeof(*input_files), &nb_input_files, nb_input_files + 1); + input_files[nb_input_files - 1].ctx = ic; + input_files[nb_input_files - 1].ist_index = nb_input_streams - ic->nb_streams; video_channel = 0; + top_field_first = -1; + frame_rate = (AVRational){0, 0}; + audio_sample_rate = 0; + audio_channels = 0; av_freep(&video_codec_name); av_freep(&audio_codec_name); av_freep(&subtitle_codec_name); uninit_opts(); init_opts(); + return 0; } static void check_inputs(int *has_video_ptr, @@ -3474,7 +3482,7 @@ static void check_inputs(int *has_video_ptr, has_data = 0; for(j=0;j<nb_input_files;j++) { - ic = input_files[j]; + ic = input_files[j].ctx; for(i=0;i<ic->nb_streams;i++) { AVCodecContext *enc = ic->streams[i]->codec; switch(enc->codec_type) { @@ -3532,8 +3540,8 @@ static void new_video_stream(AVFormatContext *oc, int file_idx) ost->frame_aspect_ratio = frame_aspect_ratio; frame_aspect_ratio = 0; #if CONFIG_AVFILTER - ost->avfilter= vfilters; - vfilters= NULL; + ost->avfilter = vfilters; + vfilters = NULL; #endif } @@ -3548,15 +3556,10 @@ static void new_video_stream(AVFormatContext *oc, int file_idx) if(video_codec_tag) video_enc->codec_tag= video_codec_tag; - if( (video_global_header&1) - || (video_global_header==0 && (oc->oformat->flags & AVFMT_GLOBALHEADER))){ + if(oc->oformat->flags & AVFMT_GLOBALHEADER) { video_enc->flags |= CODEC_FLAG_GLOBAL_HEADER; avcodec_opts[AVMEDIA_TYPE_VIDEO]->flags|= CODEC_FLAG_GLOBAL_HEADER; } - if(video_global_header&2){ - video_enc->flags2 |= CODEC_FLAG2_LOCAL_HEADER; - avcodec_opts[AVMEDIA_TYPE_VIDEO]->flags2|= CODEC_FLAG2_LOCAL_HEADER; - } if (video_stream_copy) { st->stream_copy = 1; @@ -3566,16 +3569,12 @@ static void new_video_stream(AVFormatContext *oc, int file_idx) } else { const char *p; int i; - AVRational fps= frame_rate.num ? frame_rate : (AVRational){25,1}; + if (frame_rate.num) + ost->frame_rate = frame_rate; video_enc->codec_id = codec_id; set_context_opts(video_enc, avcodec_opts[AVMEDIA_TYPE_VIDEO], AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, codec); - if (codec && codec->supported_framerates && !force_fps) - fps = codec->supported_framerates[av_find_nearest_q_idx(fps, codec->supported_framerates)]; - video_enc->time_base.den = fps.num; - video_enc->time_base.num = fps.den; - video_enc->width = frame_width; video_enc->height = frame_height; video_enc->pix_fmt = frame_pix_fmt; @@ -3643,7 +3642,7 @@ static void new_video_stream(AVFormatContext *oc, int file_idx) parse_forced_key_frames(forced_key_frames, ost, video_enc); } if (video_language) { - av_metadata_set2(&st->metadata, "language", video_language, 0); + av_dict_set(&st->metadata, "language", video_language, 0); av_freep(&video_language); } @@ -3702,8 +3701,6 @@ static void new_audio_stream(AVFormatContext *oc, int file_idx) } if (audio_stream_copy) { st->stream_copy = 1; - audio_enc->channels = audio_channels; - audio_enc->sample_rate = audio_sample_rate; } else { audio_enc->codec_id = codec_id; set_context_opts(audio_enc, avcodec_opts[AVMEDIA_TYPE_AUDIO], AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM, codec); @@ -3712,18 +3709,16 @@ static void new_audio_stream(AVFormatContext *oc, int file_idx) audio_enc->flags |= CODEC_FLAG_QSCALE; audio_enc->global_quality = st->quality = FF_QP2LAMBDA * audio_qscale; } - audio_enc->channels = audio_channels; + if (audio_channels) + audio_enc->channels = audio_channels; audio_enc->sample_fmt = audio_sample_fmt; - audio_enc->sample_rate = audio_sample_rate; + if (audio_sample_rate) + audio_enc->sample_rate = audio_sample_rate; audio_enc->channel_layout = channel_layout; - if (av_get_channel_layout_nb_channels(channel_layout) != audio_channels) - audio_enc->channel_layout = 0; choose_sample_fmt(st, codec); - choose_sample_rate(st, codec); } - audio_enc->time_base= (AVRational){1, audio_sample_rate}; if (audio_language) { - av_metadata_set2(&st->metadata, "language", audio_language, 0); + av_dict_set(&st->metadata, "language", audio_language, 0); av_freep(&audio_language); } @@ -3736,7 +3731,6 @@ static void new_audio_stream(AVFormatContext *oc, int file_idx) static void new_data_stream(AVFormatContext *oc, int file_idx) { AVStream *st; - AVOutputStream *ost; AVCodec *codec=NULL; AVCodecContext *data_enc; @@ -3745,7 +3739,7 @@ static void new_data_stream(AVFormatContext *oc, int file_idx) fprintf(stderr, "Could not alloc stream\n"); ffmpeg_exit(1); } - ost = new_output_stream(oc, file_idx); + new_output_stream(oc, file_idx); data_enc = st->codec; output_codecs = grow_array(output_codecs, sizeof(*output_codecs), &nb_output_codecs, nb_output_codecs + 1); if (!data_stream_copy) { @@ -3820,7 +3814,7 @@ static void new_subtitle_stream(AVFormatContext *oc, int file_idx) } if (subtitle_language) { - av_metadata_set2(&st->metadata, "language", subtitle_language, 0); + av_dict_set(&st->metadata, "language", subtitle_language, 0); av_freep(&subtitle_language); } @@ -3869,7 +3863,7 @@ static int opt_streamid(const char *opt, const char *arg) return 0; } -static void opt_output_file(const char *filename) +static int opt_output_file(const char *opt, const char *filename) { AVFormatContext *oc; int err, use_video, use_audio, use_subtitle, use_data; @@ -3877,13 +3871,18 @@ static void opt_output_file(const char *filename) AVFormatParameters params, *ap = ¶ms; AVOutputFormat *file_oformat; + if(nb_output_files >= FF_ARRAY_ELEMS(output_files)){ + fprintf(stderr, "Too many output files\n"); + ffmpeg_exit(1); + } + if (!strcmp(filename, "-")) filename = "pipe:"; - oc = avformat_alloc_output_context(last_asked_format, NULL, filename); + err = avformat_alloc_output_context2(&oc, NULL, last_asked_format, filename); last_asked_format = NULL; if (!oc) { - print_error(filename, AVERROR(ENOMEM)); + print_error(filename, err); ffmpeg_exit(1); } file_oformat= oc->oformat; @@ -3934,8 +3933,8 @@ static void opt_output_file(const char *filename) oc->timestamp = recording_timestamp; - av_metadata_copy(&oc->metadata, metadata, 0); - av_metadata_free(&metadata); + av_dict_copy(&oc->metadata, metadata, 0); + av_dict_free(&metadata); } output_files[nb_output_files++] = oc; @@ -3990,9 +3989,14 @@ static void opt_output_file(const char *filename) set_context_opts(oc, avformat_opts, AV_OPT_FLAG_ENCODING_PARAM, NULL); + frame_rate = (AVRational){0, 0}; + audio_sample_rate = 0; + audio_channels = 0; + av_freep(&forced_key_frames); uninit_opts(); init_opts(); + return 0; } /* same option as mencoder */ @@ -4131,7 +4135,7 @@ static void show_help(void) av_opt_show2(sws_opts, NULL, AV_OPT_FLAG_ENCODING_PARAM|AV_OPT_FLAG_DECODING_PARAM, 0); } -static void opt_target(const char *arg) +static int opt_target(const char *opt, const char *arg) { enum { PAL, NTSC, FILM, UNKNOWN } norm = UNKNOWN; static const char *const frame_rates[] = {"25", "30000/1001", "24000/1001"}; @@ -4157,9 +4161,9 @@ static void opt_target(const char *arg) /* Try to determine PAL/NTSC by peeking in the input files */ if(nb_input_files) { int i, j; - for(j = 0; j < nb_input_files; j++) { - for(i = 0; i < input_files[j]->nb_streams; i++) { - AVCodecContext *c = input_files[j]->streams[i]->codec; + for (j = 0; j < nb_input_files; j++) { + for (i = 0; i < input_files[j].ctx->nb_streams; i++) { + AVCodecContext *c = input_files[j].ctx->streams[i]->codec; if(c->codec_type != AVMEDIA_TYPE_VIDEO) continue; fr = c->time_base.den * 1000 / c->time_base.num; @@ -4188,13 +4192,12 @@ static void opt_target(const char *arg) } if(!strcmp(arg, "vcd")) { + opt_codec("vcodec", "mpeg1video"); + opt_codec("acodec", "mp2"); + opt_format("f", "vcd"); - opt_video_codec("mpeg1video"); - opt_audio_codec("mp2"); - opt_format("vcd"); - - opt_frame_size(norm == PAL ? "352x288" : "352x240"); - opt_frame_rate(NULL, frame_rates[norm]); + opt_frame_size("s", norm == PAL ? "352x288" : "352x240"); + opt_frame_rate("r", frame_rates[norm]); opt_default("g", norm == PAL ? "15" : "18"); opt_default("b", "1150000"); @@ -4217,12 +4220,12 @@ static void opt_target(const char *arg) mux_preload= (36000+3*1200) / 90000.0; //0.44 } else if(!strcmp(arg, "svcd")) { - opt_video_codec("mpeg2video"); - opt_audio_codec("mp2"); - opt_format("svcd"); + opt_codec("vcodec", "mpeg2video"); + opt_codec("acodec", "mp2"); + opt_format("f", "svcd"); - opt_frame_size(norm == PAL ? "480x576" : "480x480"); - opt_frame_rate(NULL, frame_rates[norm]); + opt_frame_size("s", norm == PAL ? "480x576" : "480x480"); + opt_frame_rate("r", frame_rates[norm]); opt_default("g", norm == PAL ? "15" : "18"); opt_default("b", "2040000"); @@ -4239,12 +4242,12 @@ static void opt_target(const char *arg) } else if(!strcmp(arg, "dvd")) { - opt_video_codec("mpeg2video"); - opt_audio_codec("ac3"); - opt_format("dvd"); + opt_codec("vcodec", "mpeg2video"); + opt_codec("acodec", "ac3"); + opt_format("f", "dvd"); - opt_frame_size(norm == PAL ? "720x576" : "720x480"); - opt_frame_rate(NULL, frame_rates[norm]); + opt_frame_size("vcodec", norm == PAL ? "720x576" : "720x480"); + opt_frame_rate("r", frame_rates[norm]); opt_default("g", norm == PAL ? "15" : "18"); opt_default("b", "6000000"); @@ -4260,29 +4263,31 @@ static void opt_target(const char *arg) } else if(!strncmp(arg, "dv", 2)) { - opt_format("dv"); + opt_format("f", "dv"); - opt_frame_size(norm == PAL ? "720x576" : "720x480"); - opt_frame_pix_fmt(!strncmp(arg, "dv50", 4) ? "yuv422p" : - (norm == PAL ? "yuv420p" : "yuv411p")); - opt_frame_rate(NULL, frame_rates[norm]); + opt_frame_size("s", norm == PAL ? "720x576" : "720x480"); + opt_frame_pix_fmt("pix_fmt", !strncmp(arg, "dv50", 4) ? "yuv422p" : + norm == PAL ? "yuv420p" : "yuv411p"); + opt_frame_rate("r", frame_rates[norm]); audio_sample_rate = 48000; audio_channels = 2; } else { fprintf(stderr, "Unknown target: %s\n", arg); - ffmpeg_exit(1); + return AVERROR(EINVAL); } + return 0; } -static void opt_vstats_file (const char *arg) +static int opt_vstats_file(const char *opt, const char *arg) { av_free (vstats_filename); vstats_filename=av_strdup (arg); + return 0; } -static void opt_vstats (void) +static int opt_vstats(const char *opt, const char *arg) { char filename[40]; time_t today2 = time(NULL); @@ -4290,7 +4295,7 @@ static void opt_vstats (void) snprintf(filename, sizeof(filename), "vstats_%02d%02d%02d.log", today->tm_hour, today->tm_min, today->tm_sec); - opt_vstats_file(filename); + return opt_vstats_file(opt, filename); } static int opt_bsf(const char *opt, const char *arg) @@ -4336,14 +4341,11 @@ static int opt_preset(const char *opt, const char *arg) fprintf(stderr, "%s: Invalid syntax: '%s'\n", filename, line); ffmpeg_exit(1); } - if(!strcmp(tmp, "acodec")){ - opt_audio_codec(tmp2); - }else if(!strcmp(tmp, "vcodec")){ - opt_video_codec(tmp2); - }else if(!strcmp(tmp, "scodec")){ - opt_subtitle_codec(tmp2); - }else if(!strcmp(tmp, "dcodec")){ - opt_data_codec(tmp2); + if (!strcmp(tmp, "acodec") || + !strcmp(tmp, "vcodec") || + !strcmp(tmp, "scodec") || + !strcmp(tmp, "dcodec")) { + opt_codec(tmp, tmp2); }else if(opt_default(tmp, tmp2) < 0){ fprintf(stderr, "%s: Invalid option or argument: '%s', parsed as '%s' = '%s'\n", filename, line, tmp, tmp2); ffmpeg_exit(1); @@ -4377,17 +4379,17 @@ static const OptionDef options[] = { { "map_metadata", HAS_ARG | OPT_EXPERT, {(void*)opt_map_metadata}, "set metadata information of outfile from infile", "outfile[,metadata]:infile[,metadata]" }, { "map_chapters", HAS_ARG | OPT_EXPERT, {(void*)opt_map_chapters}, "set chapters mapping", "outfile:infile" }, - { "t", OPT_FUNC2 | HAS_ARG, {(void*)opt_recording_time}, "record or transcode \"duration\" seconds of audio/video", "duration" }, + { "t", HAS_ARG, {(void*)opt_recording_time}, "record or transcode \"duration\" seconds of audio/video", "duration" }, { "fs", HAS_ARG | OPT_INT64, {(void*)&limit_filesize}, "set the limit file size in bytes", "limit_size" }, // - { "ss", OPT_FUNC2 | HAS_ARG, {(void*)opt_start_time}, "set the start time offset", "time_off" }, - { "itsoffset", OPT_FUNC2 | HAS_ARG, {(void*)opt_input_ts_offset}, "set the input ts offset", "time_off" }, + { "ss", HAS_ARG, {(void*)opt_start_time}, "set the start time offset", "time_off" }, + { "itsoffset", HAS_ARG, {(void*)opt_input_ts_offset}, "set the input ts offset", "time_off" }, { "itsscale", HAS_ARG, {(void*)opt_input_ts_scale}, "set the input ts scale", "stream:scale" }, - { "timestamp", OPT_FUNC2 | HAS_ARG, {(void*)opt_recording_timestamp}, "set the recording timestamp ('now' to set the current time)", "time" }, - { "metadata", OPT_FUNC2 | HAS_ARG, {(void*)opt_metadata}, "add metadata", "string=string" }, + { "timestamp", HAS_ARG, {(void*)opt_recording_timestamp}, "set the recording timestamp ('now' to set the current time)", "time" }, + { "metadata", HAS_ARG, {(void*)opt_metadata}, "add metadata", "string=string" }, { "dframes", OPT_INT | HAS_ARG, {(void*)&max_frames[AVMEDIA_TYPE_DATA]}, "set the number of data frames to record", "number" }, { "benchmark", OPT_BOOL | OPT_EXPERT, {(void*)&do_benchmark}, "add timings for benchmarking" }, - { "timelimit", OPT_FUNC2 | HAS_ARG, {(void*)opt_timelimit}, "set max runtime in seconds", "limit" }, + { "timelimit", HAS_ARG, {(void*)opt_timelimit}, "set max runtime in seconds", "limit" }, { "dump", OPT_BOOL | OPT_EXPERT, {(void*)&do_pkt_dump}, "dump each input packet" }, { "hex", OPT_BOOL | OPT_EXPERT, {(void*)&do_hex_dump}, @@ -4395,13 +4397,12 @@ static const OptionDef options[] = { { "re", OPT_BOOL | OPT_EXPERT, {(void*)&rate_emu}, "read input at native frame rate", "" }, { "loop_input", OPT_BOOL | OPT_EXPERT, {(void*)&loop_input}, "loop (current only works with images)" }, { "loop_output", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&loop_output}, "number of times to loop output in formats that support looping (0 loops forever)", "" }, - { "v", HAS_ARG | OPT_FUNC2, {(void*)opt_verbose}, "set ffmpeg verbosity level", "number" }, + { "v", HAS_ARG, {(void*)opt_verbose}, "set ffmpeg verbosity level", "number" }, { "target", HAS_ARG, {(void*)opt_target}, "specify target file type (\"vcd\", \"svcd\", \"dvd\", \"dv\", \"dv50\", \"pal-vcd\", \"ntsc-svcd\", ...)", "type" }, - { "threads", OPT_FUNC2 | HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" }, + { "threads", HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" }, { "vsync", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&video_sync_method}, "video sync method", "" }, { "async", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&audio_sync_method}, "audio sync method", "" }, { "adrift_threshold", HAS_ARG | OPT_FLOAT | OPT_EXPERT, {(void*)&audio_drift_threshold}, "audio drift threshold", "threshold" }, - { "vglobal", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&video_global_header}, "video global header storage type", "" }, { "copyts", OPT_BOOL | OPT_EXPERT, {(void*)©_ts}, "copy timestamps" }, { "copytb", OPT_BOOL | OPT_EXPERT, {(void*)©_tb}, "copy input stream time base when stream copying" }, { "shortest", OPT_BOOL | OPT_EXPERT, {(void*)&opt_shortest}, "finish encoding within shortest input" }, // @@ -4411,33 +4412,33 @@ static const OptionDef options[] = { { "copyinkf", OPT_BOOL | OPT_EXPERT, {(void*)©_initial_nonkeyframes}, "copy initial non-keyframes" }, /* video options */ - { "b", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" }, - { "vb", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" }, + { "b", HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" }, + { "vb", HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" }, { "vframes", OPT_INT | HAS_ARG | OPT_VIDEO, {(void*)&max_frames[AVMEDIA_TYPE_VIDEO]}, "set the number of video frames to record", "number" }, - { "r", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_rate}, "set frame rate (Hz value, fraction or abbreviation)", "rate" }, + { "r", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_rate}, "set frame rate (Hz value, fraction or abbreviation)", "rate" }, { "s", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_size}, "set frame size (WxH or abbreviation)", "size" }, { "aspect", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_aspect_ratio}, "set aspect ratio (4:3, 16:9 or 1.3333, 1.7777)", "aspect" }, { "pix_fmt", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_frame_pix_fmt}, "set pixel format, 'list' as argument shows all the pixel formats supported", "format" }, { "bits_per_raw_sample", OPT_INT | HAS_ARG | OPT_VIDEO, {(void*)&frame_bits_per_raw_sample}, "set the number of bits per raw sample", "number" }, - { "croptop", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" }, - { "cropbottom", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" }, - { "cropleft", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" }, - { "cropright", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" }, - { "padtop", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" }, - { "padbottom", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" }, - { "padleft", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" }, - { "padright", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" }, - { "padcolor", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "color" }, + { "croptop", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" }, + { "cropbottom", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" }, + { "cropleft", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" }, + { "cropright", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop}, "Removed, use the crop filter instead", "size" }, + { "padtop", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" }, + { "padbottom", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" }, + { "padleft", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" }, + { "padright", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "size" }, + { "padcolor", HAS_ARG | OPT_VIDEO, {(void*)opt_pad}, "Removed, use the pad filter instead", "color" }, { "intra", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&intra_only}, "use only intra frames"}, { "vn", OPT_BOOL | OPT_VIDEO, {(void*)&video_disable}, "disable video" }, { "vdt", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&video_discard}, "discard threshold", "n" }, - { "qscale", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qscale}, "use fixed video quantizer scale (VBR)", "q" }, + { "qscale", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qscale}, "use fixed video quantizer scale (VBR)", "q" }, { "rc_override", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_video_rc_override_string}, "rate control override for specific intervals", "override" }, - { "vcodec", HAS_ARG | OPT_VIDEO, {(void*)opt_video_codec}, "force video codec ('copy' to copy stream)", "codec" }, - { "me_threshold", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO, {(void*)opt_me_threshold}, "motion estimaton threshold", "threshold" }, + { "vcodec", HAS_ARG | OPT_VIDEO, {(void*)opt_codec}, "force video codec ('copy' to copy stream)", "codec" }, + { "me_threshold", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_me_threshold}, "motion estimaton threshold", "threshold" }, { "sameq", OPT_BOOL | OPT_VIDEO, {(void*)&same_quality}, "use same quantizer as source (implies VBR)" }, - { "pass", HAS_ARG | OPT_FUNC2 | OPT_VIDEO, {(void*)opt_pass}, "select the pass number (1 or 2)", "n" }, + { "pass", HAS_ARG | OPT_VIDEO, {(void*)opt_pass}, "select the pass number (1 or 2)", "n" }, { "passlogfile", HAS_ARG | OPT_VIDEO, {(void*)&opt_passlogfile}, "select two pass log file name prefix", "prefix" }, { "deinterlace", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_deinterlace}, "deinterlace pictures" }, @@ -4449,39 +4450,39 @@ static const OptionDef options[] = { #endif { "intra_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_intra_matrix}, "specify intra matrix coeffs", "matrix" }, { "inter_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_inter_matrix}, "specify inter matrix coeffs", "matrix" }, - { "top", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO, {(void*)opt_top_field_first}, "top=1/bottom=0/auto=-1 field first", "" }, + { "top", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_top_field_first}, "top=1/bottom=0/auto=-1 field first", "" }, { "dc", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&intra_dc_precision}, "intra_dc_precision", "precision" }, - { "vtag", OPT_FUNC2 | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_codec_tag}, "force video tag/fourcc", "fourcc/tag" }, - { "newvideo", OPT_VIDEO | OPT_FUNC2, {(void*)opt_new_stream}, "add a new video stream to the current output stream" }, + { "vtag", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_codec_tag}, "force video tag/fourcc", "fourcc/tag" }, + { "newvideo", OPT_VIDEO, {(void*)opt_new_stream}, "add a new video stream to the current output stream" }, { "vlang", HAS_ARG | OPT_STRING | OPT_VIDEO, {(void *)&video_language}, "set the ISO 639 language code (3 letters) of the current video stream" , "code" }, { "qphist", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, { (void *)&qp_hist }, "show QP histogram" }, { "force_fps", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&force_fps}, "force the selected framerate, disable the best supported framerate selection" }, - { "streamid", OPT_FUNC2 | HAS_ARG | OPT_EXPERT, {(void*)opt_streamid}, "set the value of an outfile streamid", "streamIndex:value" }, + { "streamid", HAS_ARG | OPT_EXPERT, {(void*)opt_streamid}, "set the value of an outfile streamid", "streamIndex:value" }, { "force_key_frames", OPT_STRING | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void *)&forced_key_frames}, "force key frames at specified timestamps", "timestamps" }, /* audio options */ - { "ab", OPT_FUNC2 | HAS_ARG | OPT_AUDIO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" }, + { "ab", HAS_ARG | OPT_AUDIO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" }, { "aframes", OPT_INT | HAS_ARG | OPT_AUDIO, {(void*)&max_frames[AVMEDIA_TYPE_AUDIO]}, "set the number of audio frames to record", "number" }, { "aq", OPT_FLOAT | HAS_ARG | OPT_AUDIO, {(void*)&audio_qscale}, "set audio quality (codec-specific)", "quality", }, - { "ar", HAS_ARG | OPT_FUNC2 | OPT_AUDIO, {(void*)opt_audio_rate}, "set audio sampling rate (in Hz)", "rate" }, - { "ac", HAS_ARG | OPT_FUNC2 | OPT_AUDIO, {(void*)opt_audio_channels}, "set number of audio channels", "channels" }, + { "ar", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_rate}, "set audio sampling rate (in Hz)", "rate" }, + { "ac", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_channels}, "set number of audio channels", "channels" }, { "an", OPT_BOOL | OPT_AUDIO, {(void*)&audio_disable}, "disable audio" }, - { "acodec", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_codec}, "force audio codec ('copy' to copy stream)", "codec" }, - { "atag", OPT_FUNC2 | HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_codec_tag}, "force audio tag/fourcc", "fourcc/tag" }, + { "acodec", HAS_ARG | OPT_AUDIO, {(void*)opt_codec}, "force audio codec ('copy' to copy stream)", "codec" }, + { "atag", HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_codec_tag}, "force audio tag/fourcc", "fourcc/tag" }, { "vol", OPT_INT | HAS_ARG | OPT_AUDIO, {(void*)&audio_volume}, "change audio volume (256=normal)" , "volume" }, // - { "newaudio", OPT_AUDIO | OPT_FUNC2, {(void*)opt_new_stream}, "add a new audio stream to the current output stream" }, + { "newaudio", OPT_AUDIO, {(void*)opt_new_stream}, "add a new audio stream to the current output stream" }, { "alang", HAS_ARG | OPT_STRING | OPT_AUDIO, {(void *)&audio_language}, "set the ISO 639 language code (3 letters) of the current audio stream" , "code" }, { "sample_fmt", HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_audio_sample_fmt}, "set sample format, 'list' as argument shows all the sample formats supported", "format" }, /* subtitle options */ { "sn", OPT_BOOL | OPT_SUBTITLE, {(void*)&subtitle_disable}, "disable subtitle" }, - { "scodec", HAS_ARG | OPT_SUBTITLE, {(void*)opt_subtitle_codec}, "force subtitle codec ('copy' to copy stream)", "codec" }, - { "newsubtitle", OPT_SUBTITLE | OPT_FUNC2, {(void*)opt_new_stream}, "add a new subtitle stream to the current output stream" }, + { "scodec", HAS_ARG | OPT_SUBTITLE, {(void*)opt_codec}, "force subtitle codec ('copy' to copy stream)", "codec" }, + { "newsubtitle", OPT_SUBTITLE, {(void*)opt_new_stream}, "add a new subtitle stream to the current output stream" }, { "slang", HAS_ARG | OPT_STRING | OPT_SUBTITLE, {(void *)&subtitle_language}, "set the ISO 639 language code (3 letters) of the current subtitle stream" , "code" }, - { "stag", OPT_FUNC2 | HAS_ARG | OPT_EXPERT | OPT_SUBTITLE, {(void*)opt_codec_tag}, "force subtitle tag/fourcc", "fourcc/tag" }, + { "stag", HAS_ARG | OPT_EXPERT | OPT_SUBTITLE, {(void*)opt_codec_tag}, "force subtitle tag/fourcc", "fourcc/tag" }, /* grab options */ - { "vc", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_channel}, "set video grab channel (DV1394 only)", "channel" }, + { "vc", HAS_ARG | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_channel}, "set video grab channel (DV1394 only)", "channel" }, { "tvstd", HAS_ARG | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_standard}, "set television standard (NTSC, PAL (SECAM))", "standard" }, { "isync", OPT_BOOL | OPT_EXPERT | OPT_GRAB, {(void*)&input_sync}, "sync read on input", "" }, @@ -4489,18 +4490,18 @@ static const OptionDef options[] = { { "muxdelay", OPT_FLOAT | HAS_ARG | OPT_EXPERT, {(void*)&mux_max_delay}, "set the maximum demux-decode delay", "seconds" }, { "muxpreload", OPT_FLOAT | HAS_ARG | OPT_EXPERT, {(void*)&mux_preload}, "set the initial demux-decode delay", "seconds" }, - { "absf", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" }, - { "vbsf", OPT_FUNC2 | HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" }, - { "sbsf", OPT_FUNC2 | HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" }, + { "absf", HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" }, + { "vbsf", HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" }, + { "sbsf", HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_bsf}, "", "bitstream_filter" }, - { "apre", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_preset}, "set the audio options to the indicated preset", "preset" }, - { "vpre", OPT_FUNC2 | HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_preset}, "set the video options to the indicated preset", "preset" }, - { "spre", OPT_FUNC2 | HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_preset}, "set the subtitle options to the indicated preset", "preset" }, - { "fpre", OPT_FUNC2 | HAS_ARG | OPT_EXPERT, {(void*)opt_preset}, "set options from indicated preset file", "filename" }, + { "apre", HAS_ARG | OPT_AUDIO | OPT_EXPERT, {(void*)opt_preset}, "set the audio options to the indicated preset", "preset" }, + { "vpre", HAS_ARG | OPT_VIDEO | OPT_EXPERT, {(void*)opt_preset}, "set the video options to the indicated preset", "preset" }, + { "spre", HAS_ARG | OPT_SUBTITLE | OPT_EXPERT, {(void*)opt_preset}, "set the subtitle options to the indicated preset", "preset" }, + { "fpre", HAS_ARG | OPT_EXPERT, {(void*)opt_preset}, "set options from indicated preset file", "filename" }, /* data codec support */ - { "dcodec", HAS_ARG | OPT_DATA, {(void*)opt_data_codec}, "force data codec ('copy' to copy stream)", "codec" }, + { "dcodec", HAS_ARG | OPT_DATA, {(void*)opt_codec}, "force data codec ('copy' to copy stream)", "codec" }, - { "default", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" }, + { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" }, { NULL, }, }; @@ -4534,7 +4535,8 @@ int main(int argc, char **argv) init_opts(); - show_banner(); + if(verbose>=0) + show_banner(); /* parse options */ parse_options(argc, argv, options, opt_output_file); @@ -43,14 +43,10 @@ # include "libavfilter/avfiltergraph.h" #endif -#include "cmdutils.h" - #include <SDL.h> #include <SDL_thread.h> -#ifdef __MINGW32__ -#undef main /* We don't want SDL to override our main() */ -#endif +#include "cmdutils.h" #include <unistd.h> #include <assert.h> @@ -58,9 +54,6 @@ const char program_name[] = "ffplay"; const int program_birth_year = 2003; -//#define DEBUG -//#define DEBUG_SYNC - #define MAX_QUEUE_SIZE (15 * 1024 * 1024) #define MIN_AUDIOQ_SIZE (20 * 16 * 1024) #define MIN_FRAMES 5 @@ -243,8 +236,6 @@ static int show_status = 1; static int av_sync_type = AV_SYNC_AUDIO_MASTER; static int64_t start_time = AV_NOPTS_VALUE; static int64_t duration = AV_NOPTS_VALUE; -static int debug = 0; -static int debug_mv = 0; static int step = 0; static int thread_count = 1; static int workaround_bugs = 1; @@ -406,44 +397,6 @@ static inline void fill_rectangle(SDL_Surface *screen, SDL_FillRect(screen, &rect, color); } -#if 0 -/* draw only the border of a rectangle */ -void fill_border(VideoState *s, int x, int y, int w, int h, int color) -{ - int w1, w2, h1, h2; - - /* fill the background */ - w1 = x; - if (w1 < 0) - w1 = 0; - w2 = s->width - (x + w); - if (w2 < 0) - w2 = 0; - h1 = y; - if (h1 < 0) - h1 = 0; - h2 = s->height - (y + h); - if (h2 < 0) - h2 = 0; - fill_rectangle(screen, - s->xleft, s->ytop, - w1, s->height, - color); - fill_rectangle(screen, - s->xleft + s->width - w2, s->ytop, - w2, s->height, - color); - fill_rectangle(screen, - s->xleft + w1, s->ytop, - s->width - w1 - w2, h1, - color); - fill_rectangle(screen, - s->xleft + w1, s->ytop + s->height - h2, - s->width - w1 - w2, h2, - color); -} -#endif - #define ALPHA_BLEND(a, oldp, newp, s)\ ((((oldp << s) * (255 - (a))) + (newp * (a))) / (255 << s)) @@ -743,23 +696,12 @@ static void video_image_display(VideoState *is) } x = (is->width - width) / 2; y = (is->height - height) / 2; - if (!is->no_background) { - /* fill the background */ - // fill_border(is, x, y, width, height, QERGB(0x00, 0x00, 0x00)); - } else { - is->no_background = 0; - } + is->no_background = 0; rect.x = is->xleft + x; rect.y = is->ytop + y; rect.w = FFMAX(width, 1); rect.h = FFMAX(height, 1); SDL_DisplayYUVOverlay(vp->bmp, &rect); - } else { -#if 0 - fill_rectangle(screen, - is->xleft, is->ytop, is->width, is->height, - QERGB(0x00, 0x00, 0x00)); -#endif } } @@ -1107,10 +1049,9 @@ static double compute_target_time(double frame_current_pts, VideoState *is) } } is->frame_timer += delay; -#if defined(DEBUG_SYNC) - printf("video: delay=%0.3f actual_delay=%0.3f pts=%0.3f A-V=%f\n", - delay, actual_delay, frame_current_pts, -diff); -#endif + + av_dlog(NULL, "video: delay=%0.3f pts=%0.3f A-V=%f\n", + delay, frame_current_pts, -diff); return is->frame_timer; } @@ -1487,7 +1428,7 @@ static int queue_picture(VideoState *is, AVFrame *src_frame, double pts1, int64_ static int get_video_frame(VideoState *is, AVFrame *frame, int64_t *pts, AVPacket *pkt) { - int len1, got_picture, i; + int len1 av_unused, got_picture, i; if (packet_queue_get(&is->videoq, pkt, 1) < 0) return -1; @@ -1675,7 +1616,7 @@ static int input_request_frame(AVFilterLink *link) if (ret < 0) return -1; - if(priv->use_dr1) { + if(priv->use_dr1 && priv->frame->opaque) { picref = avfilter_ref_buffer(priv->frame->opaque, ~0); } else { picref = avfilter_get_video_buffer(link, AV_PERM_WRITE, link->w, link->h); @@ -1754,8 +1695,8 @@ static int configure_video_filters(AVFilterGraph *graph, VideoState *is, const c goto the_end; if(vfilters) { - AVFilterInOut *outputs = av_malloc(sizeof(AVFilterInOut)); - AVFilterInOut *inputs = av_malloc(sizeof(AVFilterInOut)); + AVFilterInOut *outputs = avfilter_inout_alloc(); + AVFilterInOut *inputs = avfilter_inout_alloc(); outputs->name = av_strdup("in"); outputs->filter_ctx = filt_src; @@ -1767,7 +1708,7 @@ static int configure_video_filters(AVFilterGraph *graph, VideoState *is, const c inputs->pad_idx = 0; inputs->next = NULL; - if ((ret = avfilter_graph_parse(graph, vfilters, inputs, outputs, NULL)) < 0) + if ((ret = avfilter_graph_parse(graph, vfilters, &inputs, &outputs, NULL)) < 0) goto the_end; av_freep(&vfilters); } else { @@ -1789,7 +1730,7 @@ static int video_thread(void *arg) { VideoState *is = arg; AVFrame *frame= avcodec_alloc_frame(); - int64_t pts_int, pos; + int64_t pts_int = AV_NOPTS_VALUE, pos = -1; double pts; int ret; @@ -1862,7 +1803,7 @@ static int subtitle_thread(void *arg) VideoState *is = arg; SubPicture *sp; AVPacket pkt1, *pkt = &pkt1; - int len1, got_subtitle; + int len1 av_unused, got_subtitle; double pts; int i, j; int r, g, b, y, u, v, a; @@ -1899,8 +1840,6 @@ static int subtitle_thread(void *arg) len1 = avcodec_decode_subtitle2(is->subtitle_st->codec, &sp->sub, &got_subtitle, pkt); -// if (len1 < 0) -// break; if (got_subtitle && sp->sub.format == 0) { sp->pts = pts; @@ -1924,9 +1863,6 @@ static int subtitle_thread(void *arg) SDL_UnlockMutex(is->subpq_mutex); } av_free_packet(pkt); -// if (step) -// if (cur_stream) -// stream_toggle_pause(cur_stream); } the_end: return 0; @@ -2093,7 +2029,7 @@ static int audio_decode_frame(VideoState *is, double *pts_ptr) n = 2 * dec->channels; is->audio_clock += (double)data_size / (double)(n * dec->sample_rate); -#if defined(DEBUG_SYNC) +#ifdef DEBUG { static double last_clock; printf("audio: delay=%0.3f clock=%0.3f pts=%0.3f\n", @@ -2192,8 +2128,6 @@ static int stream_component_open(VideoState *is, int stream_index) if (!codec) return -1; - avctx->debug_mv = debug_mv; - avctx->debug = debug; avctx->workaround_bugs = workaround_bugs; avctx->lowres = lowres; if(lowres) avctx->flags |= CODEC_FLAG_EMU_EDGE; @@ -2216,6 +2150,10 @@ static int stream_component_open(VideoState *is, int stream_index) /* prepare audio output */ if (avctx->codec_type == AVMEDIA_TYPE_AUDIO) { + if(avctx->sample_rate <= 0 || avctx->channels <= 0){ + fprintf(stderr, "Invalid sample rate or channel count\n"); + return -1; + } wanted_spec.freq = avctx->sample_rate; wanted_spec.format = AUDIO_S16SYS; wanted_spec.channels = avctx->channels; @@ -2254,8 +2192,6 @@ static int stream_component_open(VideoState *is, int stream_index) is->video_stream = stream_index; is->video_st = ic->streams[stream_index]; -// is->video_current_pts_time = av_gettime(); - packet_queue_init(&is->videoq); is->video_tid = SDL_CreateThread(video_thread, is); break; @@ -2694,10 +2630,6 @@ static void stream_cycle_channel(VideoState *is, int codec_type) static void toggle_full_screen(void) { is_full_screen = !is_full_screen; - if (!fs_screen_width) { - /* use default SDL method */ -// SDL_WM_ToggleFullScreen(screen); - } video_open(cur_stream); } @@ -2877,16 +2809,17 @@ static void event_loop(void) } } -static void opt_frame_size(const char *arg) +static int opt_frame_size(const char *opt, const char *arg) { if (av_parse_video_size(&frame_width, &frame_height, arg) < 0) { fprintf(stderr, "Incorrect frame size\n"); - exit(1); + return AVERROR(EINVAL); } if ((frame_width % 2) != 0 || (frame_height % 2) != 0) { fprintf(stderr, "Frame size must be a multiple of 2\n"); - exit(1); + return AVERROR(EINVAL); } + return 0; } static int opt_width(const char *opt, const char *arg) @@ -2901,18 +2834,20 @@ static int opt_height(const char *opt, const char *arg) return 0; } -static void opt_format(const char *arg) +static int opt_format(const char *opt, const char *arg) { file_iformat = av_find_input_format(arg); if (!file_iformat) { fprintf(stderr, "Unknown input format: %s\n", arg); - exit(1); + return AVERROR(EINVAL); } + return 0; } -static void opt_frame_pix_fmt(const char *arg) +static int opt_frame_pix_fmt(const char *opt, const char *arg) { frame_pix_fmt = av_get_pix_fmt(arg); + return 0; } static int opt_sync(const char *opt, const char *arg) @@ -2942,19 +2877,6 @@ static int opt_duration(const char *opt, const char *arg) return 0; } -static int opt_debug(const char *opt, const char *arg) -{ - av_log_set_level(99); - debug = parse_number_or_die(opt, arg, OPT_INT64, 0, INT_MAX); - return 0; -} - -static int opt_vismv(const char *opt, const char *arg) -{ - debug_mv = parse_number_or_die(opt, arg, OPT_INT64, INT_MIN, INT_MAX); - return 0; -} - static int opt_thread_count(const char *opt, const char *arg) { thread_count= parse_number_or_die(opt, arg, OPT_INT64, 0, INT_MAX); @@ -2973,10 +2895,23 @@ static int opt_show_mode(const char *opt, const char *arg) return 0; } +static int opt_input_file(const char *opt, const char *filename) +{ + if (input_filename) { + fprintf(stderr, "Argument '%s' provided as input filename, but '%s' was already specified.\n", + filename, input_filename); + exit(1); + } + if (!strcmp(filename, "-")) + filename = "pipe:"; + input_filename = filename; + return 0; +} + static const OptionDef options[] = { #include "cmdutils_common_opts.h" - { "x", HAS_ARG | OPT_FUNC2, {(void*)opt_width}, "force displayed width", "width" }, - { "y", HAS_ARG | OPT_FUNC2, {(void*)opt_height}, "force displayed height", "height" }, + { "x", HAS_ARG, {(void*)opt_width}, "force displayed width", "width" }, + { "y", HAS_ARG, {(void*)opt_height}, "force displayed height", "height" }, { "s", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_size}, "set frame size (WxH or abbreviation)", "size" }, { "fs", OPT_BOOL, {(void*)&is_full_screen}, "force full screen" }, { "an", OPT_BOOL, {(void*)&audio_disable}, "disable audio" }, @@ -2984,16 +2919,14 @@ static const OptionDef options[] = { { "ast", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&wanted_stream[AVMEDIA_TYPE_AUDIO]}, "select desired audio stream", "stream_number" }, { "vst", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&wanted_stream[AVMEDIA_TYPE_VIDEO]}, "select desired video stream", "stream_number" }, { "sst", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&wanted_stream[AVMEDIA_TYPE_SUBTITLE]}, "select desired subtitle stream", "stream_number" }, - { "ss", HAS_ARG | OPT_FUNC2, {(void*)&opt_seek}, "seek to a given position in seconds", "pos" }, - { "t", HAS_ARG | OPT_FUNC2, {(void*)&opt_duration}, "play \"duration\" seconds of audio/video", "duration" }, + { "ss", HAS_ARG, {(void*)&opt_seek}, "seek to a given position in seconds", "pos" }, + { "t", HAS_ARG, {(void*)&opt_duration}, "play \"duration\" seconds of audio/video", "duration" }, { "bytes", OPT_INT | HAS_ARG, {(void*)&seek_by_bytes}, "seek by bytes 0=off 1=on -1=auto", "val" }, { "nodisp", OPT_BOOL, {(void*)&display_disable}, "disable graphical display" }, { "f", HAS_ARG, {(void*)opt_format}, "force format", "fmt" }, { "pix_fmt", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_frame_pix_fmt}, "set pixel format", "format" }, { "stats", OPT_BOOL | OPT_EXPERT, {(void*)&show_status}, "show status", "" }, - { "debug", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_debug}, "print specific debug info", "" }, { "bug", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&workaround_bugs}, "workaround bugs", "" }, - { "vismv", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_vismv}, "visualize motion vectors", "" }, { "fast", OPT_BOOL | OPT_EXPERT, {(void*)&fast}, "non spec compliant optimizations", "" }, { "genpts", OPT_BOOL | OPT_EXPERT, {(void*)&genpts}, "generate pts", "" }, { "drp", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&decoder_reorder_pts}, "let decoder reorder pts 0=off 1=on -1=auto", ""}, @@ -3004,8 +2937,8 @@ static const OptionDef options[] = { { "idct", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&idct}, "set idct algo", "algo" }, { "er", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&error_recognition}, "set error detection threshold (0-4)", "threshold" }, { "ec", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&error_concealment}, "set error concealment options", "bit_mask" }, - { "sync", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_sync}, "set audio-video sync. type (type=audio/video/ext)", "type" }, - { "threads", HAS_ARG | OPT_FUNC2 | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" }, + { "sync", HAS_ARG | OPT_EXPERT, {(void*)opt_sync}, "set audio-video sync. type (type=audio/video/ext)", "type" }, + { "threads", HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" }, { "autoexit", OPT_BOOL | OPT_EXPERT, {(void*)&autoexit}, "exit at the end", "" }, { "exitonkeydown", OPT_BOOL | OPT_EXPERT, {(void*)&exit_on_keydown}, "exit on key down", "" }, { "exitonmousedown", OPT_BOOL | OPT_EXPERT, {(void*)&exit_on_mousedown}, "exit on mouse down", "" }, @@ -3016,9 +2949,9 @@ static const OptionDef options[] = { { "vf", OPT_STRING | HAS_ARG, {(void*)&vfilters}, "video filters", "filter list" }, #endif { "rdftspeed", OPT_INT | HAS_ARG| OPT_AUDIO | OPT_EXPERT, {(void*)&rdftspeed}, "rdft speed", "msecs" }, - { "showmode", HAS_ARG | OPT_FUNC2, {(void*)opt_show_mode}, "select show mode (0 = video, 1 = waves, 2 = RDFT)", "mode" }, - { "default", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" }, - { "i", OPT_DUMMY, {NULL}, "ffmpeg compatibility dummy option", ""}, + { "showmode", HAS_ARG, {(void*)opt_show_mode}, "select show mode (0 = video, 1 = waves, 2 = RDFT)", "mode" }, + { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" }, + { "i", HAS_ARG, {(void *)opt_input_file}, "read specified file", "input_file"}, { NULL, }, }; @@ -3063,18 +2996,6 @@ static void show_help(void) ); } -static void opt_input_file(const char *filename) -{ - if (input_filename) { - fprintf(stderr, "Argument '%s' provided as input filename, but '%s' was already specified.\n", - filename, input_filename); - exit(1); - } - if (!strcmp(filename, "-")) - filename = "pipe:"; - input_filename = filename; -} - /* Called from the main */ int main(int argc, char **argv) { diff --git a/ffpresets/libx264-lossless_fast.ffpreset b/ffpresets/libx264-lossless_fast.ffpreset index b7696b5bcb..49b9ed1add 100644 --- a/ffpresets/libx264-lossless_fast.ffpreset +++ b/ffpresets/libx264-lossless_fast.ffpreset @@ -1,5 +1,5 @@ coder=0 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=-parti8x8+parti4x4+partp8x8-partp4x4-partb8x8 me_method=hex diff --git a/ffpresets/libx264-lossless_max.ffpreset b/ffpresets/libx264-lossless_max.ffpreset index 75c387f162..f32d7b40c6 100644 --- a/ffpresets/libx264-lossless_max.ffpreset +++ b/ffpresets/libx264-lossless_max.ffpreset @@ -1,5 +1,5 @@ coder=1 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8 me_method=esa diff --git a/ffpresets/libx264-lossless_medium.ffpreset b/ffpresets/libx264-lossless_medium.ffpreset index 116e3343ce..0b84612fcb 100644 --- a/ffpresets/libx264-lossless_medium.ffpreset +++ b/ffpresets/libx264-lossless_medium.ffpreset @@ -1,5 +1,5 @@ coder=1 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=-parti8x8+parti4x4+partp8x8+partp4x4-partb8x8 me_method=hex diff --git a/ffpresets/libx264-lossless_slow.ffpreset b/ffpresets/libx264-lossless_slow.ffpreset index 0d496f6e29..857d3d1986 100644 --- a/ffpresets/libx264-lossless_slow.ffpreset +++ b/ffpresets/libx264-lossless_slow.ffpreset @@ -1,5 +1,5 @@ coder=1 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8 me_method=umh diff --git a/ffpresets/libx264-lossless_slower.ffpreset b/ffpresets/libx264-lossless_slower.ffpreset index 672e0cd637..ef0609f1b6 100644 --- a/ffpresets/libx264-lossless_slower.ffpreset +++ b/ffpresets/libx264-lossless_slower.ffpreset @@ -1,5 +1,5 @@ coder=1 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8 me_method=umh diff --git a/ffpresets/libx264-lossless_ultrafast.ffpreset b/ffpresets/libx264-lossless_ultrafast.ffpreset index a2eda65edf..4cc84f1b4f 100644 --- a/ffpresets/libx264-lossless_ultrafast.ffpreset +++ b/ffpresets/libx264-lossless_ultrafast.ffpreset @@ -1,5 +1,5 @@ coder=0 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=-parti8x8-parti4x4-partp8x8-partp4x4-partb8x8 me_method=dia @@ -25,6 +25,7 @@ #include "libavcodec/avcodec.h" #include "libavcodec/opt.h" #include "libavutil/pixdesc.h" +#include "libavutil/dict.h" #include "libavdevice/avdevice.h" #include "cmdutils.h" @@ -160,7 +161,7 @@ static void show_stream(AVFormatContext *fmt_ctx, int stream_idx) AVCodecContext *dec_ctx; AVCodec *dec; char val_str[128]; - AVMetadataTag *tag = NULL; + AVDictionaryEntry *tag = NULL; AVRational display_aspect_ratio; printf("[STREAM]\n"); @@ -226,7 +227,7 @@ static void show_stream(AVFormatContext *fmt_ctx, int stream_idx) if (stream->nb_frames) printf("nb_frames=%"PRId64"\n", stream->nb_frames); - while ((tag = av_metadata_get(stream->metadata, "", tag, AV_METADATA_IGNORE_SUFFIX))) + while ((tag = av_dict_get(stream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) printf("TAG:%s=%s\n", tag->key, tag->value); printf("[/STREAM]\n"); @@ -234,7 +235,7 @@ static void show_stream(AVFormatContext *fmt_ctx, int stream_idx) static void show_format(AVFormatContext *fmt_ctx) { - AVMetadataTag *tag = NULL; + AVDictionaryEntry *tag = NULL; char val_str[128]; printf("[FORMAT]\n"); @@ -252,7 +253,7 @@ static void show_format(AVFormatContext *fmt_ctx) printf("bit_rate=%s\n", value_string(val_str, sizeof(val_str), fmt_ctx->bit_rate, unit_bit_per_second_str)); - while ((tag = av_metadata_get(fmt_ctx->metadata, "", tag, AV_METADATA_IGNORE_SUFFIX))) + while ((tag = av_dict_get(fmt_ctx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) printf("TAG:%s=%s\n", tag->key, tag->value); printf("[/FORMAT]\n"); @@ -329,16 +330,17 @@ static void show_usage(void) printf("\n"); } -static void opt_format(const char *arg) +static int opt_format(const char *opt, const char *arg) { iformat = av_find_input_format(arg); if (!iformat) { fprintf(stderr, "Unknown input format: %s\n", arg); - exit(1); + return AVERROR(EINVAL); } + return 0; } -static void opt_input_file(const char *arg) +static int opt_input_file(const char *opt, const char *arg) { if (input_filename) { fprintf(stderr, "Argument '%s' provided as input filename, but '%s' was already specified.\n", @@ -348,6 +350,7 @@ static void opt_input_file(const char *arg) if (!strcmp(arg, "-")) arg = "pipe:"; input_filename = arg; + return 0; } static void show_help(void) @@ -382,7 +385,8 @@ static const OptionDef options[] = { { "show_format", OPT_BOOL, {(void*)&do_show_format} , "show format/container info" }, { "show_packets", OPT_BOOL, {(void*)&do_show_packets}, "show packets info" }, { "show_streams", OPT_BOOL, {(void*)&do_show_streams}, "show streams info" }, - { "default", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" }, + { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" }, + { "i", HAS_ARG, {(void *)opt_input_file}, "read specified file", "input_file"}, { NULL, }, }; diff --git a/ffserver.c b/ffserver.c index 0e29952071..6a2d2d02fb 100644 --- a/ffserver.c +++ b/ffserver.c @@ -36,6 +36,7 @@ #include "libavformat/avio_internal.h" #include "libavutil/avstring.h" #include "libavutil/lfg.h" +#include "libavutil/dict.h" #include "libavutil/random_seed.h" #include "libavutil/parseutils.h" #include "libavcodec/opt.h" @@ -856,7 +857,7 @@ static void close_connection(HTTPContext *c) ctx = c->rtp_ctx[i]; if (ctx) { av_write_trailer(ctx); - av_metadata_free(&ctx->metadata); + av_dict_free(&ctx->metadata); av_free(ctx->streams[0]); av_free(ctx); } @@ -1762,7 +1763,7 @@ static int http_parse_request(HTTPContext *c) } } -#ifdef DEBUG_WMP +#ifdef DEBUG http_log("\nGot request:\n%s\n", c->buffer); #endif @@ -1792,7 +1793,7 @@ static int http_parse_request(HTTPContext *c) return 0; } -#ifdef DEBUG_WMP +#ifdef DEBUG if (strcmp(stream->filename + strlen(stream->filename) - 4, ".asf") == 0) http_log("\nGot request:\n%s\n", c->buffer); #endif @@ -2226,10 +2227,10 @@ static int http_prepare_data(HTTPContext *c) switch(c->state) { case HTTPSTATE_SEND_DATA_HEADER: memset(&c->fmt_ctx, 0, sizeof(c->fmt_ctx)); - av_metadata_set2(&c->fmt_ctx.metadata, "author" , c->stream->author , 0); - av_metadata_set2(&c->fmt_ctx.metadata, "comment" , c->stream->comment , 0); - av_metadata_set2(&c->fmt_ctx.metadata, "copyright", c->stream->copyright, 0); - av_metadata_set2(&c->fmt_ctx.metadata, "title" , c->stream->title , 0); + av_dict_set(&c->fmt_ctx.metadata, "author" , c->stream->author , 0); + av_dict_set(&c->fmt_ctx.metadata, "comment" , c->stream->comment , 0); + av_dict_set(&c->fmt_ctx.metadata, "copyright", c->stream->copyright, 0); + av_dict_set(&c->fmt_ctx.metadata, "title" , c->stream->title , 0); for(i=0;i<c->stream->nb_streams;i++) { AVStream *st; @@ -2274,7 +2275,7 @@ static int http_prepare_data(HTTPContext *c) http_log("Error writing output header\n"); return -1; } - av_metadata_free(&c->fmt_ctx.metadata); + av_dict_free(&c->fmt_ctx.metadata); len = avio_close_dyn_buf(c->fmt_ctx.pb, &c->pb_buffer); c->buffer_ptr = c->pb_buffer; @@ -2929,8 +2930,8 @@ static int prepare_sdp_description(FFStream *stream, uint8_t **pbuffer, if (avc == NULL) { return -1; } - av_metadata_set2(&avc->metadata, "title", - stream->title[0] ? stream->title : "No Title", 0); + av_dict_set(&avc->metadata, "title", + stream->title[0] ? stream->title : "No Title", 0); avc->nb_streams = stream->nb_streams; if (stream->is_multicast) { snprintf(avc->filename, 1024, "rtp://%s:%d?multicast=1?ttl=%d", @@ -3280,7 +3281,6 @@ static void rtsp_cmd_pause(HTTPContext *c, const char *url, RTSPMessageHeader *h static void rtsp_cmd_teardown(HTTPContext *c, const char *url, RTSPMessageHeader *h) { HTTPContext *rtp_c; - char session_id[32]; rtp_c = find_rtp_session_with_url(url, h->session_id); if (!rtp_c) { @@ -3288,16 +3288,14 @@ static void rtsp_cmd_teardown(HTTPContext *c, const char *url, RTSPMessageHeader return; } - av_strlcpy(session_id, rtp_c->session_id, sizeof(session_id)); - - /* abort the session */ - close_connection(rtp_c); - /* now everything is OK, so we can send the connection parameters */ rtsp_reply_header(c, RTSP_STATUS_OK); /* session ID */ - avio_printf(c->pb, "Session: %s\r\n", session_id); + avio_printf(c->pb, "Session: %s\r\n", rtp_c->session_id); avio_printf(c->pb, "\r\n"); + + /* abort the session */ + close_connection(rtp_c); } diff --git a/libavcodec/8svx.c b/libavcodec/8svx.c index 4f95d9034e..5d94e005a2 100644 --- a/libavcodec/8svx.c +++ b/libavcodec/8svx.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2008 Jaikrishnan Menon + * Copyright (C) 2011 Stefano Sabatini * * This file is part of FFmpeg. * @@ -38,62 +39,155 @@ /** decoder context */ typedef struct EightSvxContext { - int16_t fib_acc; - const int16_t *table; + const int8_t *table; + + /* buffer used to store the whole audio decoded/interleaved chunk, + * which is sent with the first packet */ + uint8_t *samples; + size_t samples_size; + int samples_idx; } EightSvxContext; -static const int16_t fibonacci[16] = { -34<<8, -21<<8, -13<<8, -8<<8, -5<<8, -3<<8, -2<<8, -1<<8, - 0, 1<<8, 2<<8, 3<<8, 5<<8, 8<<8, 13<<8, 21<<8 }; -static const int16_t exponential[16] = { -128<<8, -64<<8, -32<<8, -16<<8, -8<<8, -4<<8, -2<<8, -1<<8, - 0, 1<<8, 2<<8, 4<<8, 8<<8, 16<<8, 32<<8, 64<<8 }; +static const int8_t fibonacci[16] = { -34, -21, -13, -8, -5, -3, -2, -1, 0, 1, 2, 3, 5, 8, 13, 21 }; +static const int8_t exponential[16] = { -128, -64, -32, -16, -8, -4, -2, -1, 0, 1, 2, 4, 8, 16, 32, 64 }; + +#define MAX_FRAME_SIZE 2048 + +/** + * Interleave samples in buffer containing all left channel samples + * at the beginning, and right channel samples at the end. + * Each sample is assumed to be in signed 8-bit format. + * + * @param size the size in bytes of the dst and src buffer + */ +static void interleave_stereo(uint8_t *dst, const uint8_t *src, int size) +{ + uint8_t *dst_end = dst + size; + size = size>>1; + + while (dst < dst_end) { + *dst++ = *src; + *dst++ = *(src+size); + src++; + } +} + +/** + * Delta decode the compressed values in src, and put the resulting + * decoded n samples in dst. + * + * @param val starting value assumed by the delta sequence + * @param table delta sequence table + * @return size in bytes of the decoded data, must be src_size*2 + */ +static int delta_decode(int8_t *dst, const uint8_t *src, int src_size, + int8_t val, const int8_t *table) +{ + int n = src_size; + int8_t *dst0 = dst; + + while (n--) { + uint8_t d = *src++; + val = av_clip(val + table[d & 0x0f], -127, 128); + *dst++ = val; + val = av_clip(val + table[d >> 4] , -127, 128); + *dst++ = val; + } + + return dst-dst0; +} static int eightsvx_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt) { - const uint8_t *buf = avpkt->data; - int buf_size = avpkt->size; EightSvxContext *esc = avctx->priv_data; - int16_t *out_data = data; - int consumed = buf_size; - const uint8_t *buf_end = buf + buf_size; + int out_data_size, n; + uint8_t *src, *dst; - if((*data_size >> 2) < buf_size) - return -1; + /* decode and interleave the first packet */ + if (!esc->samples && avpkt) { + uint8_t *deinterleaved_samples; - if(avctx->frame_number == 0) { - esc->fib_acc = buf[1] << 8; - buf_size -= 2; - buf += 2; - } + esc->samples_size = avctx->codec->id == CODEC_ID_8SVX_RAW ? + avpkt->size : avctx->channels + (avpkt->size-avctx->channels) * 2; + if (!(esc->samples = av_malloc(esc->samples_size))) + return AVERROR(ENOMEM); - *data_size = buf_size << 2; + /* decompress */ + if (avctx->codec->id == CODEC_ID_8SVX_FIB || avctx->codec->id == CODEC_ID_8SVX_EXP) { + const uint8_t *buf = avpkt->data; + int buf_size = avpkt->size; + int n = esc->samples_size; - while(buf < buf_end) { - uint8_t d = *buf++; - esc->fib_acc += esc->table[d & 0x0f]; - *out_data++ = esc->fib_acc; - esc->fib_acc += esc->table[d >> 4]; - *out_data++ = esc->fib_acc; + if (!(deinterleaved_samples = av_mallocz(n))) + return AVERROR(ENOMEM); + + /* the uncompressed starting value is contained in the first byte */ + if (avctx->channels == 2) { + delta_decode(deinterleaved_samples , buf+1, buf_size/2-1, buf[0], esc->table); + buf += buf_size/2; + delta_decode(deinterleaved_samples+n/2-1, buf+1, buf_size/2-1, buf[0], esc->table); + } else + delta_decode(deinterleaved_samples , buf+1, buf_size-1 , buf[0], esc->table); + } else { + deinterleaved_samples = avpkt->data; + } + + if (avctx->channels == 2) + interleave_stereo(esc->samples, deinterleaved_samples, esc->samples_size); + else + memcpy(esc->samples, deinterleaved_samples, esc->samples_size); } - return consumed; + /* return single packed with fixed size */ + out_data_size = FFMIN(MAX_FRAME_SIZE, esc->samples_size - esc->samples_idx); + if (*data_size < out_data_size) { + av_log(avctx, AV_LOG_ERROR, "Provided buffer with size %d is too small.\n", *data_size); + return AVERROR(EINVAL); + } + + *data_size = out_data_size; + dst = data; + src = esc->samples + esc->samples_idx; + for (n = out_data_size; n > 0; n--) + *dst++ = *src++ + 128; + esc->samples_idx += *data_size; + + return avctx->codec->id == CODEC_ID_8SVX_FIB || avctx->codec->id == CODEC_ID_8SVX_EXP ? + (avctx->frame_number == 0)*2 + out_data_size / 2 : + out_data_size; } static av_cold int eightsvx_decode_init(AVCodecContext *avctx) { EightSvxContext *esc = avctx->priv_data; - switch(avctx->codec->id) { - case CODEC_ID_8SVX_FIB: - esc->table = fibonacci; - break; - case CODEC_ID_8SVX_EXP: - esc->table = exponential; - break; - default: - return -1; + if (avctx->channels > 2) { + av_log(avctx, AV_LOG_ERROR, "8SVX does not support more than 2 channels\n"); + return AVERROR_INVALIDDATA; } - avctx->sample_fmt = AV_SAMPLE_FMT_S16; + + switch (avctx->codec->id) { + case CODEC_ID_8SVX_FIB: esc->table = fibonacci; break; + case CODEC_ID_8SVX_EXP: esc->table = exponential; break; + case CODEC_ID_8SVX_RAW: esc->table = NULL; break; + default: + av_log(avctx, AV_LOG_ERROR, "Invalid codec id %d.\n", avctx->codec->id); + return AVERROR_INVALIDDATA; + } + avctx->sample_fmt = AV_SAMPLE_FMT_U8; + + return 0; +} + +static av_cold int eightsvx_decode_close(AVCodecContext *avctx) +{ + EightSvxContext *esc = avctx->priv_data; + + av_freep(&esc->samples); + esc->samples_size = 0; + esc->samples_idx = 0; + return 0; } @@ -104,6 +198,7 @@ AVCodec ff_eightsvx_fib_decoder = { .priv_data_size = sizeof (EightSvxContext), .init = eightsvx_decode_init, .decode = eightsvx_decode_frame, + .close = eightsvx_decode_close, .long_name = NULL_IF_CONFIG_SMALL("8SVX fibonacci"), }; @@ -114,5 +209,17 @@ AVCodec ff_eightsvx_exp_decoder = { .priv_data_size = sizeof (EightSvxContext), .init = eightsvx_decode_init, .decode = eightsvx_decode_frame, + .close = eightsvx_decode_close, .long_name = NULL_IF_CONFIG_SMALL("8SVX exponential"), }; + +AVCodec ff_eightsvx_raw_decoder = { + .name = "8svx_raw", + .type = AVMEDIA_TYPE_AUDIO, + .id = CODEC_ID_8SVX_RAW, + .priv_data_size = sizeof(EightSvxContext), + .init = eightsvx_decode_init, + .decode = eightsvx_decode_frame, + .close = eightsvx_decode_close, + .long_name = NULL_IF_CONFIG_SMALL("8SVX rawaudio"), +}; diff --git a/libavcodec/Makefile b/libavcodec/Makefile index ab72adc9be..4e9906e40c 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -19,6 +19,7 @@ OBJS = allcodecs.o \ options.o \ parser.o \ raw.o \ + rawdec.o \ resample.o \ resample2.o \ simple_idct.o \ @@ -29,7 +30,7 @@ OBJS-$(CONFIG_AANDCT) += aandcttab.o OBJS-$(CONFIG_AC3DSP) += ac3dsp.o OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o -OBJS-$(CONFIG_DCT) += dct.o +OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o OBJS-$(CONFIG_DWT) += dwt.o OBJS-$(CONFIG_DXVA2) += dxva2.o FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o @@ -42,6 +43,9 @@ OBJS-$(CONFIG_HUFFMAN) += huffman.o OBJS-$(CONFIG_LPC) += lpc.o OBJS-$(CONFIG_LSP) += lsp.o OBJS-$(CONFIG_MDCT) += mdct_fixed.o mdct_float.o +OBJS-$(CONFIG_MPEGAUDIODSP) += mpegaudiodsp.o \ + mpegaudiodsp_fixed.o \ + mpegaudiodsp_float.o RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) OBJS-$(CONFIG_SINEWIN) += sinewin.o @@ -59,9 +63,9 @@ OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aaccoder.o \ mpeg4audio.o kbdwin.o OBJS-$(CONFIG_AASC_DECODER) += aasc.o msrledec.o OBJS-$(CONFIG_AC3_DECODER) += ac3dec.o ac3dec_data.o ac3.o kbdwin.o -OBJS-$(CONFIG_AC3_ENCODER) += ac3enc_combined.o ac3enc_fixed.o ac3enc_float.o ac3tab.o ac3.o kbdwin.o -OBJS-$(CONFIG_AC3_FLOAT_ENCODER) += ac3enc_float.o ac3tab.o ac3.o kbdwin.o -OBJS-$(CONFIG_AC3_FIXED_ENCODER) += ac3enc_fixed.o ac3tab.o ac3.o +OBJS-$(CONFIG_AC3_ENCODER) += ac3enc_combined.o ac3enc_fixed.o ac3enc_float.o ac3tab.o ac3.o kbdwin.o ac3enc.o +OBJS-$(CONFIG_AC3_FLOAT_ENCODER) += ac3enc_float.o ac3tab.o ac3tab.o ac3.o kbdwin.o +OBJS-$(CONFIG_AC3_FIXED_ENCODER) += ac3enc_fixed.o ac3tab.o ac3tab.o ac3.o ac3enc.o OBJS-$(CONFIG_ALAC_DECODER) += alac.o OBJS-$(CONFIG_ALAC_ENCODER) += alacenc.o OBJS-$(CONFIG_ALS_DECODER) += alsdec.o bgmc.o mpeg4audio.o @@ -125,6 +129,8 @@ OBJS-$(CONFIG_DVVIDEO_DECODER) += dv.o dvdata.o OBJS-$(CONFIG_DVVIDEO_ENCODER) += dv.o dvdata.o OBJS-$(CONFIG_DXA_DECODER) += dxa.o OBJS-$(CONFIG_EAC3_DECODER) += eac3dec.o eac3dec_data.o +OBJS-$(CONFIG_EAC3_ENCODER) += eac3enc.o ac3enc.o ac3enc_float.o \ + ac3tab.o ac3.o kbdwin.o OBJS-$(CONFIG_EACMV_DECODER) += eacmv.o OBJS-$(CONFIG_EAMAD_DECODER) += eamad.o eaidct.o mpeg12.o \ mpeg12data.o mpegvideo.o \ @@ -137,6 +143,7 @@ OBJS-$(CONFIG_EATQI_DECODER) += eatqi.o eaidct.o mpeg12.o \ OBJS-$(CONFIG_EIGHTBPS_DECODER) += 8bps.o OBJS-$(CONFIG_EIGHTSVX_EXP_DECODER) += 8svx.o OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER) += 8svx.o +OBJS-$(CONFIG_EIGHTSVX_RAW_DECODER) += 8svx.o OBJS-$(CONFIG_ESCAPE124_DECODER) += escape124.o OBJS-$(CONFIG_FFV1_DECODER) += ffv1.o rangecoder.o OBJS-$(CONFIG_FFV1_ENCODER) += ffv1.o rangecoder.o @@ -171,7 +178,7 @@ OBJS-$(CONFIG_H263_ENCODER) += mpegvideo_enc.o mpeg4video.o \ ratecontrol.o h263.o ituh263enc.o \ flvenc.o mpeg12data.o \ mpegvideo.o error_resilience.o -OBJS-$(CONFIG_H264_DECODER) += h264.o h264_hl_motion.o \ +OBJS-$(CONFIG_H264_DECODER) += h264.o \ h264_loopfilter.o h264_direct.o \ cabac.o h264_sei.o h264_ps.o \ h264_refs.o h264_cavlc.o h264_cabac.o\ @@ -189,6 +196,8 @@ OBJS-$(CONFIG_INDEO3_DECODER) += indeo3.o OBJS-$(CONFIG_INDEO5_DECODER) += indeo5.o ivi_common.o ivi_dsp.o OBJS-$(CONFIG_INTERPLAY_DPCM_DECODER) += dpcm.o OBJS-$(CONFIG_INTERPLAY_VIDEO_DECODER) += interplayvideo.o +OBJS-$(CONFIG_JPEG2000_DECODER) += j2kdec.o mqcdec.o mqc.o j2k.o j2k_dwt.o +#OBJS-$(CONFIG_JPEG2000_ENCODER) += j2kenc.o mqcenc.o mqc.o j2k.o j2k_dwt.o OBJS-$(CONFIG_JPEGLS_DECODER) += jpeglsdec.o jpegls.o \ mjpegdec.o mjpeg.o OBJS-$(CONFIG_JPEGLS_ENCODER) += jpeglsenc.o jpegls.o @@ -264,8 +273,6 @@ OBJS-$(CONFIG_MPEG2VIDEO_ENCODER) += mpeg12enc.o mpegvideo_enc.o \ mpegvideo.o error_resilience.o OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL) += vaapi_mpeg4.o OBJS-$(CONFIG_MSMPEG4V1_DECODER) += msmpeg4.o msmpeg4data.o -OBJS-$(CONFIG_MSMPEG4V1_ENCODER) += msmpeg4.o msmpeg4data.o h263dec.o \ - h263.o ituh263dec.o mpeg4videodec.o OBJS-$(CONFIG_MSMPEG4V2_DECODER) += msmpeg4.o msmpeg4data.o h263dec.o \ h263.o ituh263dec.o mpeg4videodec.o OBJS-$(CONFIG_MSMPEG4V2_ENCODER) += msmpeg4.o msmpeg4data.o h263dec.o \ @@ -362,7 +369,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += svq1enc.o svq1.o \ mpegvideo.o error_resilience.o \ ituh263enc.o mpegvideo_enc.o \ ratecontrol.o mpeg12data.o -OBJS-$(CONFIG_SVQ3_DECODER) += h264.o svq3.o h264_hl_motion.o \ +OBJS-$(CONFIG_SVQ3_DECODER) += h264.o svq3.o \ h264_loopfilter.o h264_direct.o \ h264_sei.o h264_ps.o h264_refs.o \ h264_cavlc.o h264_cabac.o cabac.o \ @@ -553,7 +560,7 @@ OBJS-$(CONFIG_OGG_DEMUXER) += flacdec.o flacdata.o flac.o \ dirac.o mpeg12data.o vorbis_data.o OBJS-$(CONFIG_OGG_MUXER) += xiph.o flacdec.o flacdata.o flac.o \ vorbis_data.o -OBJS-$(CONFIG_RTP_MUXER) += mpegvideo.o xiph.o +OBJS-$(CONFIG_RTP_MUXER) += mpeg4audio.o mpegvideo.o xiph.o OBJS-$(CONFIG_SPDIF_DEMUXER) += aacadtsdec.o mpeg4audio.o OBJS-$(CONFIG_WEBM_MUXER) += xiph.o mpeg4audio.o \ flacdec.o flacdata.o flac.o \ @@ -606,7 +613,7 @@ OBJS-$(CONFIG_FLAC_PARSER) += flac_parser.o flacdata.o flac.o \ vorbis_data.o OBJS-$(CONFIG_H261_PARSER) += h261_parser.o OBJS-$(CONFIG_H263_PARSER) += h263_parser.o -OBJS-$(CONFIG_H264_PARSER) += h264_parser.o h264.o h264_hl_motion.o \ +OBJS-$(CONFIG_H264_PARSER) += h264_parser.o h264.o \ cabac.o \ h264_refs.o h264_sei.o h264_direct.o \ h264_loopfilter.o h264_cabac.o \ @@ -662,39 +669,40 @@ OBJS-$(!CONFIG_SMALL) += inverse.o -include $(SUBDIR)$(ARCH)/Makefile -SKIPHEADERS = %_tablegen.h +SKIPHEADERS += %_tablegen.h \ + %_tables.h \ + aac_tablegen_decl.h \ + fft-internal.h \ + tableprint.h \ + $(ARCH)/vp56_arith.h SKIPHEADERS-$(CONFIG_DXVA2) += dxva2.h dxva2_internal.h SKIPHEADERS-$(CONFIG_LIBDIRAC) += libdirac.h SKIPHEADERS-$(CONFIG_LIBSCHROEDINGER) += libschroedinger.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_internal.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h -SKIPHEADERS += mpegaudio3.h - -EXAMPLES = api -TESTPROGS = cabac dct eval fft fft-fixed h264 iirfilter rangecoder snow +TESTPROGS = cabac dct fft fft-fixed h264 iirfilter rangecoder snow TESTPROGS-$(HAVE_MMX) += motion TESTOBJS = dctref.o -HOSTPROGS = costablegen +HOSTPROGS = aac_tablegen aacps_tablegen cbrt_tablegen cos_tablegen \ + dv_tablegen motionpixels_tablegen mpegaudio_tablegen \ + pcm_tablegen qdm2_tablegen sinewin_tablegen DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86 -CLEANFILES = sin_tables.c cos_tables.c *_tables.h *_tablegen$(HOSTEXESUF) +CLEANFILES = *_tables.c *_tables.h *_tablegen$(HOSTEXESUF) include $(SUBDIR)../subdir.mak $(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o -$(SUBDIR)cos_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF) - $(M)./$< > $@ +TRIG_TABLES = cos cos_fixed sin +TRIG_TABLES := $(TRIG_TABLES:%=$(SUBDIR)%_tables.c) -$(SUBDIR)cos_fixed_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF) - $(M)./$< cos fixed > $@ - -$(SUBDIR)sin_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF) - $(M)./$< sin > $@ +$(TRIG_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)cos_tablegen$(HOSTEXESUF) + $(M)./$< $* > $@ ifdef CONFIG_SMALL $(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=1 @@ -702,9 +710,6 @@ else $(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=0 endif -$(SUBDIR)%_tablegen$(HOSTEXESUF): $(SUBDIR)%_tablegen.c $(SUBDIR)%_tablegen.h $(SUBDIR)tableprint.c - $(HOSTCC) $(HOSTCFLAGS) $(HOSTLDFLAGS) -o $@ $(filter %.c,$^) $(HOSTLIBS) - GEN_HEADERS = cbrt_tables.h aacps_tables.h aac_tables.h dv_tables.h \ sinewin_tables.h mpegaudio_tables.h motionpixels_tables.h \ pcm_tables.h qdm2_tables.h diff --git a/libavcodec/a64multienc.c b/libavcodec/a64multienc.c index e9b3471925..5a665d0592 100644 --- a/libavcodec/a64multienc.c +++ b/libavcodec/a64multienc.c @@ -252,7 +252,6 @@ static int a64multi_encode_frame(AVCodecContext *avctx, unsigned char *buf, int b_width; int req_size; - int num_frames = c->mc_lifetime; int *charmap = c->mc_charmap; uint8_t *colram = c->mc_colram; @@ -280,7 +279,6 @@ static int a64multi_encode_frame(AVCodecContext *avctx, unsigned char *buf, if (!c->mc_lifetime) return 0; /* no more frames in queue, prepare to flush remaining frames */ if (!c->mc_frame_counter) { - num_frames = c->mc_lifetime; c->mc_lifetime = 0; } /* still frames in queue so limit lifetime to remaining frames */ diff --git a/libavcodec/aac.h b/libavcodec/aac.h index 5cc85dd613..f089ee9a29 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -227,7 +227,7 @@ typedef struct { DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT DECLARE_ALIGNED(32, float, saved)[1024]; ///< overlap DECLARE_ALIGNED(32, float, ret)[2048]; ///< PCM output - DECLARE_ALIGNED(16, int16_t, ltp_state)[3072]; ///< time signal for LTP + DECLARE_ALIGNED(16, float, ltp_state)[3072]; ///< time signal for LTP PredictorState predictor_state[MAX_PREDICTORS]; } SingleChannelElement; diff --git a/libavcodec/aac_tablegen.h b/libavcodec/aac_tablegen.h index 27fa0e7ba3..7afa466139 100644 --- a/libavcodec/aac_tablegen.h +++ b/libavcodec/aac_tablegen.h @@ -20,8 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef AAC_TABLEGEN_H -#define AAC_TABLEGEN_H +#ifndef AVCODEC_AAC_TABLEGEN_H +#define AVCODEC_AAC_TABLEGEN_H #include "aac_tablegen_decl.h" @@ -29,7 +29,7 @@ #include "libavcodec/aac_tables.h" #else #include "libavutil/mathematics.h" -#include "libavcodec/aac.h" +#include "aac.h" float ff_aac_pow2sf_tab[428]; void ff_aac_tableinit(void) @@ -40,4 +40,4 @@ void ff_aac_tableinit(void) } #endif /* CONFIG_HARDCODED_TABLES */ -#endif /* AAC_TABLEGEN_H */ +#endif /* AVCODEC_AAC_TABLEGEN_H */ diff --git a/libavcodec/aac_tablegen_decl.h b/libavcodec/aac_tablegen_decl.h index 9a90a09b07..0d86e80bc2 100644 --- a/libavcodec/aac_tablegen_decl.h +++ b/libavcodec/aac_tablegen_decl.h @@ -20,8 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef AAC_TABLEGEN_DECL_H -#define AAC_TABLEGEN_DECL_H +#ifndef AVCODEC_AAC_TABLEGEN_DECL_H +#define AVCODEC_AAC_TABLEGEN_DECL_H #if CONFIG_HARDCODED_TABLES #define ff_aac_tableinit() @@ -31,4 +31,4 @@ void ff_aac_tableinit(void); extern float ff_aac_pow2sf_tab[428]; #endif /* CONFIG_HARDCODED_TABLES */ -#endif /* AAC_TABLEGEN_DECL_H */ +#endif /* AVCODEC_AAC_TABLEGEN_DECL_H */ diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index 187b4ad972..a68aa025de 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -312,7 +312,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce int win, int group_len, const float lambda) { BandCodingPath path[120][12]; - int w, swb, cb, start, start2, size; + int w, swb, cb, start, size; int i, j; const int max_sfb = sce->ics.max_sfb; const int run_bits = sce->ics.num_windows == 1 ? 5 : 3; @@ -330,7 +330,6 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce path[0][cb].run = 0; } for (swb = 0; swb < max_sfb; swb++) { - start2 = start; size = sce->ics.swb_sizes[swb]; if (sce->zeroes[win*16 + swb]) { for (cb = 0; cb < 12; cb++) { @@ -414,7 +413,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda) { BandCodingPath path[120][12]; - int w, swb, cb, start, start2, size; + int w, swb, cb, start, size; int i, j; const int max_sfb = sce->ics.max_sfb; const int run_bits = sce->ics.num_windows == 1 ? 5 : 3; @@ -432,7 +431,6 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, path[0][cb].run = 0; } for (swb = 0; swb < max_sfb; swb++) { - start2 = start; size = sce->ics.swb_sizes[swb]; if (sce->zeroes[win*16 + swb]) { for (cb = 0; cb < 12; cb++) { @@ -1007,12 +1005,11 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda) { - int start = 0, i, w, w2, g; + int i, w, w2, g; int minq = 255; memset(sce->sf_idx, 0, sizeof(sce->sf_idx)); for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { - start = w*128; for (g = 0; g < sce->ics.num_swb; g++) { for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index 48c0367b06..19d3b3678f 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -186,7 +186,7 @@ static av_cold int che_configure(AACContext *ac, if (che_pos[type][id]) { if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement)))) return AVERROR(ENOMEM); - ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr); + ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr); if (type != TYPE_CCE) { ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret; if (type == TYPE_CPE || @@ -251,8 +251,6 @@ static av_cold int output_configure(AACContext *ac, } memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0])); - - avctx->channel_layout = 0; } avctx->channels = channels; @@ -550,6 +548,7 @@ static void reset_predictor_group(PredictorState *ps, int group_num) static av_cold int aac_decode_init(AVCodecContext *avctx) { AACContext *ac = avctx->priv_data; + float output_scale_factor; ac->avctx = avctx; ac->m4ac.sample_rate = avctx->sample_rate; @@ -561,8 +560,13 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) return -1; } - avctx->sample_fmt = avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT ? - AV_SAMPLE_FMT_FLT : AV_SAMPLE_FMT_S16; + if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) { + avctx->sample_fmt = AV_SAMPLE_FMT_FLT; + output_scale_factor = 1.0 / 32768.0; + } else { + avctx->sample_fmt = AV_SAMPLE_FMT_S16; + output_scale_factor = 1.0; + } AAC_INIT_VLC_STATIC( 0, 304); AAC_INIT_VLC_STATIC( 1, 270); @@ -590,9 +594,9 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]), 352); - ff_mdct_init(&ac->mdct, 11, 1, 1.0/1024.0); - ff_mdct_init(&ac->mdct_small, 8, 1, 1.0/128.0); - ff_mdct_init(&ac->mdct_ltp, 11, 0, -2.0); + ff_mdct_init(&ac->mdct, 11, 1, output_scale_factor/1024.0); + ff_mdct_init(&ac->mdct_small, 8, 1, output_scale_factor/128.0); + ff_mdct_init(&ac->mdct_ltp, 11, 0, -2.0/output_scale_factor); // window initialization ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128); @@ -818,7 +822,7 @@ static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb, else offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60; clipped_offset = av_clip(offset[1], -100, 155); - if (offset[2] != clipped_offset) { + if (offset[1] != clipped_offset) { av_log_ask_for_sample(ac->avctx, "Noise gain clipped " "(%d -> %d).\nIf you heard an audible " "artifact, there may be a bug in the decoder. ", @@ -1825,9 +1829,9 @@ static void update_ltp(AACContext *ac, SingleChannelElement *sce) saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i]; } - memcpy(sce->ltp_state, &sce->ltp_state[1024], 1024 * sizeof(int16_t)); - ac->fmt_conv.float_to_int16(&(sce->ltp_state[1024]), sce->ret, 1024); - ac->fmt_conv.float_to_int16(&(sce->ltp_state[2048]), saved_ltp, 1024); + memcpy(sce->ltp_state, sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state)); + memcpy(sce->ltp_state+1024, sce->ret, 1024 * sizeof(*sce->ltp_state)); + memcpy(sce->ltp_state+2048, saved_ltp, 1024 * sizeof(*sce->ltp_state)); } /** @@ -2174,8 +2178,8 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data, avctx->frame_size = samples; } - data_size_tmp = samples * avctx->channels; - data_size_tmp *= avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? sizeof(float) : sizeof(int16_t); + data_size_tmp = samples * avctx->channels * + (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8); if (*data_size < data_size_tmp) { av_log(avctx, AV_LOG_ERROR, "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n", @@ -2185,10 +2189,12 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data, *data_size = data_size_tmp; if (samples) { - if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) { - float_interleave(data, (const float **)ac->output_data, samples, avctx->channels); - } else - ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels); + if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) + ac->fmt_conv.float_interleave(data, (const float **)ac->output_data, + samples, avctx->channels); + else + ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, + samples, avctx->channels); } if (ac->output_configured) @@ -2460,6 +2466,7 @@ static int latm_decode_frame(AVCodecContext *avctx, void *out, int *out_size, *out_size = 0; return avpkt->size; } else { + aac_decode_close(avctx); if ((err = aac_decode_init(avctx)) < 0) return err; latmctx->initialized = 1; @@ -2507,7 +2514,7 @@ AVCodec ff_aac_decoder = { aac_decode_frame, .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"), .sample_fmts = (const enum AVSampleFormat[]) { - AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE + AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, .channel_layouts = aac_channel_layout, }; @@ -2527,7 +2534,7 @@ AVCodec ff_aac_latm_decoder = { .decode = latm_decode_frame, .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"), .sample_fmts = (const enum AVSampleFormat[]) { - AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE + AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, .channel_layouts = aac_channel_layout, }; diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index b51fccded3..0de6622389 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -30,6 +30,7 @@ * add temporal noise shaping ***********************************/ +#include "libavutil/opt.h" #include "avcodec.h" #include "put_bits.h" #include "dsputil.h" @@ -134,6 +135,15 @@ static const uint8_t aac_chan_configs[6][5] = { {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE }; +static const uint8_t channel_maps[][AAC_MAX_CHANNELS] = { + { 0 }, + { 0, 1 }, + { 2, 0, 1 }, + { 2, 0, 1, 3 }, + { 2, 0, 1, 3, 4 }, + { 2, 0, 1, 4, 5, 3 }, +}; + /** * Make AAC audio config object. * @see 1.6.2.1 "Syntax - AudioSpecificConfig" @@ -489,7 +499,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AACEncContext *s = avctx->priv_data; int16_t *samples = s->samples, *samples2, *la; ChannelElement *cpe; - int i, ch, w, chans, tag, start_ch; + int i, ch, w, g, chans, tag, start_ch; const uint8_t *chan_map = aac_chan_configs[avctx->channels-1]; int chan_el_counter[4]; FFPsyWindowInfo windows[AAC_MAX_CHANNELS]; @@ -498,15 +508,24 @@ static int aac_encode_frame(AVCodecContext *avctx, return 0; if (data) { if (!s->psypp) { - memcpy(s->samples + 1024 * avctx->channels, data, - 1024 * avctx->channels * sizeof(s->samples[0])); + if (avctx->channels <= 2) { + memcpy(s->samples + 1024 * avctx->channels, data, + 1024 * avctx->channels * sizeof(s->samples[0])); + } else { + for (i = 0; i < 1024; i++) + for (ch = 0; ch < avctx->channels; ch++) + s->samples[(i + 1024) * avctx->channels + ch] = + ((int16_t*)data)[i * avctx->channels + + channel_maps[avctx->channels-1][ch]]; + } } else { start_ch = 0; samples2 = s->samples + 1024 * avctx->channels; for (i = 0; i < chan_map[0]; i++) { tag = chan_map[i+1]; chans = tag == TYPE_CPE ? 2 : 1; - ff_psy_preprocess(s->psypp, (uint16_t*)data + start_ch, + ff_psy_preprocess(s->psypp, + (uint16_t*)data + channel_maps[avctx->channels-1][start_ch], samples2 + start_ch, start_ch, chans); start_ch += chans; } @@ -587,8 +606,16 @@ static int aac_encode_frame(AVCodecContext *avctx, } } s->cur_channel = start_ch; - if (cpe->common_window && s->coder->search_for_ms) - s->coder->search_for_ms(s, cpe, s->lambda); + if (s->options.stereo_mode && cpe->common_window) { + if (s->options.stereo_mode > 0) { + IndividualChannelStream *ics = &cpe->ch[0].ics; + for (w = 0; w < ics->num_windows; w += ics->group_len[w]) + for (g = 0; g < ics->num_swb; g++) + cpe->ms_mask[w*16+g] = 1; + } else if (s->coder->search_for_ms) { + s->coder->search_for_ms(s, cpe, s->lambda); + } + } adjust_frame_information(s, cpe, chans); if (chans == 2) { put_bits(&s->pb, 1, cpe->common_window); @@ -645,6 +672,22 @@ static av_cold int aac_encode_end(AVCodecContext *avctx) return 0; } +#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM +static const AVOption aacenc_options[] = { + {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), FF_OPT_TYPE_INT, {.dbl = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"}, + {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"}, + {"ms_off", "Disable Mid/Side coding", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"}, + {"ms_force", "Force Mid/Side for the whole frame if possible", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"}, + {NULL} +}; + +static const AVClass aacenc_class = { + "AAC encoder", + av_default_item_name, + aacenc_options, + LIBAVUTIL_VERSION_INT, +}; + AVCodec ff_aac_encoder = { "aac", AVMEDIA_TYPE_AUDIO, @@ -656,4 +699,5 @@ AVCodec ff_aac_encoder = { .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL, .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"), + .priv_class = &aacenc_class, }; diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h index c12475a187..44ad50bf80 100644 --- a/libavcodec/aacenc.h +++ b/libavcodec/aacenc.h @@ -30,6 +30,10 @@ #include "psymodel.h" +typedef struct AACEncOptions { + int stereo_mode; +} AACEncOptions; + struct AACEncContext; typedef struct AACCoefficientsEncoder { @@ -48,6 +52,8 @@ extern AACCoefficientsEncoder ff_aac_coders[]; * AAC encoder context */ typedef struct AACEncContext { + AVClass *av_class; + AACEncOptions options; ///< encoding options PutBitContext pb; FFTContext mdct1024; ///< long (1024 samples) frame transform context FFTContext mdct128; ///< short (128 samples) frame transform context diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c index afff6931fb..82092b385d 100644 --- a/libavcodec/aacsbr.c +++ b/libavcodec/aacsbr.c @@ -127,14 +127,19 @@ av_cold void ff_aac_sbr_init(void) ff_ps_init(); } -av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr) +av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr) { + float mdct_scale; sbr->kx[0] = sbr->kx[1] = 32; //Typo in spec, kx' inits to 32 sbr->data[0].e_a[1] = sbr->data[1].e_a[1] = -1; sbr->data[0].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128); sbr->data[1].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128); - ff_mdct_init(&sbr->mdct, 7, 1, 1.0/64); - ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0); + /* SBR requires samples to be scaled to +/-32768.0 to work correctly. + * mdct scale factors are adjusted to scale up from +/-1.0 at analysis + * and scale back down at synthesis. */ + mdct_scale = ac->avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? 32768.0f : 1.0f; + ff_mdct_init(&sbr->mdct, 7, 1, 1.0 / (64 * mdct_scale)); + ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0 * mdct_scale); ff_ps_ctx_init(&sbr->ps); } diff --git a/libavcodec/aacsbr.h b/libavcodec/aacsbr.h index 6b10ed43e4..d0284981c3 100644 --- a/libavcodec/aacsbr.h +++ b/libavcodec/aacsbr.h @@ -36,7 +36,7 @@ /** Initialize SBR. */ av_cold void ff_aac_sbr_init(void); /** Initialize one SBR context. */ -av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr); +av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr); /** Close one SBR context. */ av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr); /** Decode one SBR element. */ diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h index fcb401c238..a53817e1b0 100644 --- a/libavcodec/ac3.h +++ b/libavcodec/ac3.h @@ -28,7 +28,8 @@ #define AVCODEC_AC3_H #define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */ -#define AC3_MAX_CHANNELS 6 /* including LFE channel */ +#define AC3_MAX_CHANNELS 7 /**< maximum number of channels, including coupling channel */ +#define CPL_CH 0 /**< coupling channel index */ #define AC3_MAX_COEFS 256 #define AC3_BLOCK_SIZE 256 @@ -158,10 +159,11 @@ typedef struct AC3EncOptions { /* other encoding options */ int allow_per_frame_metadata; - int stereo_rematrixing; + int stereo_rematrixing; + int channel_coupling; + int cpl_start; } AC3EncOptions; - void ff_ac3_common_init(void); extern const int64_t ff_ac3_channel_layouts[]; diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c index b4aae2263a..9b44668ae2 100644 --- a/libavcodec/ac3dec.c +++ b/libavcodec/ac3dec.c @@ -185,6 +185,15 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx) ff_fmt_convert_init(&s->fmt_conv, avctx); av_lfg_init(&s->dith_state, 0); + /* set scale value for float to int16 conversion */ + if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) { + s->mul_bias = 1.0f; + avctx->sample_fmt = AV_SAMPLE_FMT_FLT; + } else { + s->mul_bias = 32767.0f; + avctx->sample_fmt = AV_SAMPLE_FMT_S16; + } + /* allow downmixing to stereo or mono */ if (avctx->channels > 0 && avctx->request_channels > 0 && avctx->request_channels < avctx->channels && @@ -193,14 +202,6 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx) } s->downmixed = 1; - if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) { - avctx->sample_fmt = AV_SAMPLE_FMT_FLT; - s->mul_bias = 1.0f; - } else { - avctx->sample_fmt = AV_SAMPLE_FMT_S16; - /* set scale value for float to int16 conversion */ - s->mul_bias = 32767.0f; - } return 0; } @@ -1295,8 +1296,8 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size, const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; AC3DecodeContext *s = avctx->priv_data; - float *out_samples_flt = (float *)data; - int16_t *out_samples = (int16_t *)data; + float *out_samples_flt = data; + int16_t *out_samples_s16 = data; int blk, ch, err; int data_size_orig, data_size_tmp; const uint8_t *channel_map; @@ -1400,7 +1401,7 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size, for (ch = 0; ch < s->out_channels; ch++) output[ch] = s->output[channel_map[ch]]; data_size_tmp = s->num_blocks * 256 * avctx->channels; - data_size_tmp *= avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? sizeof(*out_samples_flt) : sizeof(*out_samples); + data_size_tmp *= avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? sizeof(*out_samples_flt) : sizeof(*out_samples_s16); if (data_size_orig < data_size_tmp) return -1; *data_size = data_size_tmp; @@ -1409,14 +1410,19 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size, av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n"); err = 1; } + if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) { - float_interleave_noscale(out_samples_flt, output, 256, s->out_channels); + s->fmt_conv.float_interleave(out_samples_flt, output, 256, + s->out_channels); out_samples_flt += 256 * s->out_channels; } else { - s->fmt_conv.float_to_int16_interleave(out_samples, output, 256, s->out_channels); - out_samples += 256 * s->out_channels; + s->fmt_conv.float_to_int16_interleave(out_samples_s16, output, 256, + s->out_channels); + out_samples_s16 += 256 * s->out_channels; } } + *data_size = s->num_blocks * 256 * avctx->channels * + (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8); return FFMIN(buf_size, s->frame_size); } @@ -1441,6 +1447,9 @@ AVCodec ff_ac3_decoder = { .close = ac3_decode_end, .decode = ac3_decode_frame, .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"), + .sample_fmts = (const enum AVSampleFormat[]) { + AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE + }, }; #if CONFIG_EAC3_DECODER @@ -1453,5 +1462,8 @@ AVCodec ff_eac3_decoder = { .close = ac3_decode_end, .decode = ac3_decode_frame, .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52B (AC-3, E-AC-3)"), + .sample_fmts = (const enum AVSampleFormat[]) { + AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE + }, }; #endif diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h index d3fe3e782b..377e5154d7 100644 --- a/libavcodec/ac3dec.h +++ b/libavcodec/ac3dec.h @@ -58,11 +58,6 @@ #include "fft.h" #include "fmtconvert.h" -/* override ac3.h to include coupling channel */ -#undef AC3_MAX_CHANNELS -#define AC3_MAX_CHANNELS 7 -#define CPL_CH 0 - #define AC3_OUTPUT_LFEON 8 #define SPX_MAX_BANDS 17 @@ -201,7 +196,7 @@ typedef struct { ///@} ///@defgroup arrays aligned arrays - DECLARE_ALIGNED(16, int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///> fixed-point transform coefficients + DECLARE_ALIGNED(16, int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< fixed-point transform coefficients DECLARE_ALIGNED(32, float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< transform coefficients DECLARE_ALIGNED(32, float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< delay - added to the next block DECLARE_ALIGNED(32, float, window)[AC3_BLOCK_SIZE]; ///< window coefficients diff --git a/libavcodec/ac3dec_data.c b/libavcodec/ac3dec_data.c index c40390245c..d0a9b1ec40 100644 --- a/libavcodec/ac3dec_data.c +++ b/libavcodec/ac3dec_data.c @@ -54,12 +54,6 @@ const uint8_t ff_eac3_hebap_tab[64] = { }; /** - * Table E2.16 Default Coupling Banding Structure - */ -const uint8_t ff_eac3_default_cpl_band_struct[18] = -{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1 }; - -/** * Table E2.15 Default Spectral Extension Banding Structure */ const uint8_t ff_eac3_default_spx_band_struct[17] = diff --git a/libavcodec/ac3dec_data.h b/libavcodec/ac3dec_data.h index e2ab7ddbe5..975b52ef2c 100644 --- a/libavcodec/ac3dec_data.h +++ b/libavcodec/ac3dec_data.h @@ -27,7 +27,6 @@ extern const uint8_t ff_ac3_ungroup_3_in_5_bits_tab[32][3]; extern const uint8_t ff_eac3_hebap_tab[64]; -extern const uint8_t ff_eac3_default_cpl_band_struct[18]; extern const uint8_t ff_eac3_default_spx_band_struct[17]; #endif /* AVCODEC_AC3DEC_DATA_H */ diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c index 4ec0f2a19c..619addc3d5 100644 --- a/libavcodec/ac3dsp.c +++ b/libavcodec/ac3dsp.c @@ -128,24 +128,33 @@ static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd, } while (end > ff_ac3_band_start_tab[band++]); } -static int ac3_compute_mantissa_size_c(int mant_cnt[5], uint8_t *bap, - int nb_coefs) +static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap, + int len) { - int bits, b, i; + while (len-- > 0) + mant_cnt[bap[len]]++; +} - bits = 0; - for (i = 0; i < nb_coefs; i++) { - b = bap[i]; - if (b <= 4) { - // bap=1 to bap=4 will be counted in compute_mantissa_size_final - mant_cnt[b]++; - } else if (b <= 13) { - // bap=5 to bap=13 use (bap-1) bits - bits += b - 1; - } else { - // bap=14 uses 14 bits and bap=15 uses 16 bits - bits += (b == 14) ? 14 : 16; - } +DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = { + 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16 +}; + +static int ac3_compute_mantissa_size_c(uint16_t mant_cnt[6][16]) +{ + int blk, bap; + int bits = 0; + + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + // bap=1 : 3 mantissas in 5 bits + bits += (mant_cnt[blk][1] / 3) * 5; + // bap=2 : 3 mantissas in 7 bits + // bap=4 : 2 mantissas in 7 bits + bits += ((mant_cnt[blk][2] / 3) + (mant_cnt[blk][4] >> 1)) * 7; + // bap=3 : 1 mantissa in 3 bits + bits += mant_cnt[blk][3] * 3; + // bap=5 to 15 : get bits per mantissa from table + for (bap = 5; bap < 16; bap++) + bits += mant_cnt[blk][bap] * ff_ac3_bap_bits[bap]; } return bits; } @@ -181,6 +190,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) c->ac3_rshift_int32 = ac3_rshift_int32_c; c->float_to_fixed24 = float_to_fixed24_c; c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c; + c->update_bap_counts = ac3_update_bap_counts_c; c->compute_mantissa_size = ac3_compute_mantissa_size_c; c->extract_exponents = ac3_extract_exponents_c; diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h index 878c802187..d7131d2c03 100644 --- a/libavcodec/ac3dsp.h +++ b/libavcodec/ac3dsp.h @@ -24,6 +24,12 @@ #include <stdint.h> +/** + * Number of mantissa bits written for each bap value. + * bap values with fractional bits are set to 0 and are calculated separately. + */ +extern const uint16_t ff_ac3_bap_bits[16]; + typedef struct AC3DSPContext { /** * Set each encoded exponent in a block to the minimum of itself and the @@ -102,9 +108,21 @@ typedef struct AC3DSPContext { const uint8_t *bap_tab, uint8_t *bap); /** + * Update bap counts using the supplied array of bap. + * + * @param[out] mant_cnt bap counts for 1 block + * @param[in] bap array of bap, pointing to start coef bin + * @param[in] len number of elements to process + */ + void (*update_bap_counts)(uint16_t mant_cnt[16], uint8_t *bap, int len); + + /** * Calculate the number of bits needed to encode a set of mantissas. + * + * @param[in] mant_cnt bap counts for all blocks + * @return mantissa bit count */ - int (*compute_mantissa_size)(int mant_cnt[5], uint8_t *bap, int nb_coefs); + int (*compute_mantissa_size)(uint16_t mant_cnt[6][16]); void (*extract_exponents)(uint8_t *exp, int32_t *coef, int nb_coefs); } AC3DSPContext; diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index eb5a0f94e3..6ee8a7ac15 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -26,7 +26,6 @@ * The simplest AC-3 encoder. */ -//#define DEBUG //#define ASSERT_LEVEL 2 #include <stdint.h> @@ -43,131 +42,8 @@ #include "ac3.h" #include "audioconvert.h" #include "fft.h" - - -#ifndef CONFIG_AC3ENC_FLOAT -#define CONFIG_AC3ENC_FLOAT 0 -#endif - - -/** Maximum number of exponent groups. +1 for separate DC exponent. */ -#define AC3_MAX_EXP_GROUPS 85 - -#if CONFIG_AC3ENC_FLOAT -#define MAC_COEF(d,a,b) ((d)+=(a)*(b)) -typedef float SampleType; -typedef float CoefType; -typedef float CoefSumType; -#else -#define MAC_COEF(d,a,b) MAC64(d,a,b) -typedef int16_t SampleType; -typedef int32_t CoefType; -typedef int64_t CoefSumType; -#endif - -typedef struct AC3MDCTContext { - const SampleType *window; ///< MDCT window function - FFTContext fft; ///< FFT context for MDCT calculation -} AC3MDCTContext; - -/** - * Data for a single audio block. - */ -typedef struct AC3Block { - uint8_t **bap; ///< bit allocation pointers (bap) - CoefType **mdct_coef; ///< MDCT coefficients - int32_t **fixed_coef; ///< fixed-point MDCT coefficients - uint8_t **exp; ///< original exponents - uint8_t **grouped_exp; ///< grouped exponents - int16_t **psd; ///< psd per frequency bin - int16_t **band_psd; ///< psd per critical band - int16_t **mask; ///< masking curve - uint16_t **qmant; ///< quantized mantissas - uint8_t coeff_shift[AC3_MAX_CHANNELS]; ///< fixed-point coefficient shift values - uint8_t new_rematrixing_strategy; ///< send new rematrixing flags in this block - uint8_t rematrixing_flags[4]; ///< rematrixing flags - struct AC3Block *exp_ref_block[AC3_MAX_CHANNELS]; ///< reference blocks for EXP_REUSE -} AC3Block; - -/** - * AC-3 encoder private context. - */ -typedef struct AC3EncodeContext { - AVClass *av_class; ///< AVClass used for AVOption - AC3EncOptions options; ///< encoding options - PutBitContext pb; ///< bitstream writer context - DSPContext dsp; - AC3DSPContext ac3dsp; ///< AC-3 optimized functions - AC3MDCTContext mdct; ///< MDCT context - - AC3Block blocks[AC3_MAX_BLOCKS]; ///< per-block info - - int bitstream_id; ///< bitstream id (bsid) - int bitstream_mode; ///< bitstream mode (bsmod) - - int bit_rate; ///< target bit rate, in bits-per-second - int sample_rate; ///< sampling frequency, in Hz - - int frame_size_min; ///< minimum frame size in case rounding is necessary - int frame_size; ///< current frame size in bytes - int frame_size_code; ///< frame size code (frmsizecod) - uint16_t crc_inv[2]; - int bits_written; ///< bit count (used to avg. bitrate) - int samples_written; ///< sample count (used to avg. bitrate) - - int fbw_channels; ///< number of full-bandwidth channels (nfchans) - int channels; ///< total number of channels (nchans) - int lfe_on; ///< indicates if there is an LFE channel (lfeon) - int lfe_channel; ///< channel index of the LFE channel - int has_center; ///< indicates if there is a center channel - int has_surround; ///< indicates if there are one or more surround channels - int channel_mode; ///< channel mode (acmod) - const uint8_t *channel_map; ///< channel map used to reorder channels - - int center_mix_level; ///< center mix level code - int surround_mix_level; ///< surround mix level code - int ltrt_center_mix_level; ///< Lt/Rt center mix level code - int ltrt_surround_mix_level; ///< Lt/Rt surround mix level code - int loro_center_mix_level; ///< Lo/Ro center mix level code - int loro_surround_mix_level; ///< Lo/Ro surround mix level code - - int cutoff; ///< user-specified cutoff frequency, in Hz - int bandwidth_code; ///< bandwidth code (0 to 60) (chbwcod) - int nb_coefs[AC3_MAX_CHANNELS]; - - int rematrixing_enabled; ///< stereo rematrixing enabled - int num_rematrixing_bands; ///< number of rematrixing bands - - /* bitrate allocation control */ - int slow_gain_code; ///< slow gain code (sgaincod) - int slow_decay_code; ///< slow decay code (sdcycod) - int fast_decay_code; ///< fast decay code (fdcycod) - int db_per_bit_code; ///< dB/bit code (dbpbcod) - int floor_code; ///< floor code (floorcod) - AC3BitAllocParameters bit_alloc; ///< bit allocation parameters - int coarse_snr_offset; ///< coarse SNR offsets (csnroffst) - int fast_gain_code[AC3_MAX_CHANNELS]; ///< fast gain codes (signal-to-mask ratio) (fgaincod) - int fine_snr_offset[AC3_MAX_CHANNELS]; ///< fine SNR offsets (fsnroffst) - int frame_bits_fixed; ///< number of non-coefficient bits for fixed parameters - int frame_bits; ///< all frame bits except exponents and mantissas - int exponent_bits; ///< number of bits used for exponents - - SampleType **planar_samples; - uint8_t *bap_buffer; - uint8_t *bap1_buffer; - CoefType *mdct_coef_buffer; - int32_t *fixed_coef_buffer; - uint8_t *exp_buffer; - uint8_t *grouped_exp_buffer; - int16_t *psd_buffer; - int16_t *band_psd_buffer; - int16_t *mask_buffer; - uint16_t *qmant_buffer; - - uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies - - DECLARE_ALIGNED(32, SampleType, windowed_samples)[AC3_WINDOW_SIZE]; -} AC3EncodeContext; +#include "ac3enc.h" +#include "eac3enc.h" typedef struct AC3Mant { uint16_t *qmant1_ptr, *qmant2_ptr, *qmant4_ptr; ///< mantissa pointers for bap=1,2,4 @@ -191,92 +67,17 @@ static const float extmixlev_options[EXTMIXLEV_NUM_OPTIONS] = { }; -#define OFFSET(param) offsetof(AC3EncodeContext, options.param) -#define AC3ENC_PARAM (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) - -#if CONFIG_AC3ENC_FLOAT || !CONFIG_AC3_FLOAT_ENCODER //we need this exactly once compiled in -const AVOption ff_ac3_options[] = { -/* Metadata Options */ -{"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM}, -/* downmix levels */ -{"center_mixlev", "Center Mix Level", OFFSET(center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_4POINT5DB }, 0.0, 1.0, AC3ENC_PARAM}, -{"surround_mixlev", "Surround Mix Level", OFFSET(surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_6DB }, 0.0, 1.0, AC3ENC_PARAM}, -/* audio production information */ -{"mixing_level", "Mixing Level", OFFSET(mixing_level), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 111, AC3ENC_PARAM}, -{"room_type", "Room Type", OFFSET(room_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "room_type"}, - {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"}, - {"large", "Large Room", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"}, - {"small", "Small Room", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"}, -/* other metadata options */ -{"copyright", "Copyright Bit", OFFSET(copyright), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM}, -{"dialnorm", "Dialogue Level (dB)", OFFSET(dialogue_level), FF_OPT_TYPE_INT, {.dbl = -31 }, -31, -1, AC3ENC_PARAM}, -{"dsur_mode", "Dolby Surround Mode", OFFSET(dolby_surround_mode), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 2, AC3ENC_PARAM, "dsur_mode"}, - {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"}, - {"on", "Dolby Surround Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"}, - {"off", "Not Dolby Surround Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"}, -{"original", "Original Bit Stream", OFFSET(original), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM}, -/* extended bitstream information */ -{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dmix_mode"}, - {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"}, - {"ltrt", "Lt/Rt Downmix Preferred", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"}, - {"loro", "Lo/Ro Downmix Preferred", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"}, -{"ltrt_cmixlev", "Lt/Rt Center Mix Level", OFFSET(ltrt_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM}, -{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM}, -{"loro_cmixlev", "Lo/Ro Center Mix Level", OFFSET(loro_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM}, -{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM}, -{"dsurex_mode", "Dolby Surround EX Mode", OFFSET(dolby_surround_ex_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dsurex_mode"}, - {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"}, - {"on", "Dolby Surround EX Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"}, - {"off", "Not Dolby Surround EX Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"}, -{"dheadphone_mode", "Dolby Headphone Mode", OFFSET(dolby_headphone_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dheadphone_mode"}, - {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"}, - {"on", "Dolby Headphone Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"}, - {"off", "Not Dolby Headphone Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"}, -{"ad_conv_type", "A/D Converter Type", OFFSET(ad_converter_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 1, AC3ENC_PARAM, "ad_conv_type"}, - {"standard", "Standard (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"}, - {"hdcd", "HDCD", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"}, -/* Other Encoding Options */ -{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM}, -{NULL} -}; -#endif - -#if CONFIG_AC3ENC_FLOAT -static AVClass ac3enc_class = { "AC-3 Encoder", av_default_item_name, - ff_ac3_options, LIBAVUTIL_VERSION_INT }; -#else -static AVClass ac3enc_class = { "Fixed-Point AC-3 Encoder", av_default_item_name, - ff_ac3_options, LIBAVUTIL_VERSION_INT }; -#endif - - -/* prototypes for functions in ac3enc_fixed.c and ac3enc_float.c */ - -static av_cold void mdct_end(AC3MDCTContext *mdct); - -static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, - int nbits); - -static void apply_window(DSPContext *dsp, SampleType *output, const SampleType *input, - const SampleType *window, unsigned int len); - -static int normalize_samples(AC3EncodeContext *s); - -static void scale_coefficients(AC3EncodeContext *s); - - /** * LUT for number of exponent groups. - * exponent_group_tab[exponent strategy-1][number of coefficients] + * exponent_group_tab[coupling][exponent strategy-1][number of coefficients] */ -static uint8_t exponent_group_tab[3][256]; +static uint8_t exponent_group_tab[2][3][256]; /** * List of supported channel layouts. */ -#if CONFIG_AC3ENC_FLOAT || !CONFIG_AC3_FLOAT_ENCODER //we need this exactly once compiled in -const int64_t ff_ac3_channel_layouts[] = { +const int64_t ff_ac3_channel_layouts[19] = { AV_CH_LAYOUT_MONO, AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_2_1, @@ -297,7 +98,6 @@ const int64_t ff_ac3_channel_layouts[] = { AV_CH_LAYOUT_5POINT1_BACK, 0 }; -#endif /** @@ -331,8 +131,51 @@ static const uint8_t ac3_bandwidth_tab[5][3][19] = { /** + * LUT to select the coupling start band based on the bit rate, sample rate, and + * number of full-bandwidth channels. -1 = coupling off + * ac3_coupling_start_tab[channel_mode-2][sample rate code][bit rate code] + * + * TODO: more testing for optimal parameters. + * multi-channel tests at 44.1kHz and 32kHz. + */ +static const int8_t ac3_coupling_start_tab[6][3][19] = { +// 32 40 48 56 64 80 96 112 128 160 192 224 256 320 384 448 512 576 640 + + // 2/0 + { { 0, 0, 0, 0, 0, 0, 0, 1, 1, 7, 8, 11, 12, -1, -1, -1, -1, -1, -1 }, + { 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 10, 12, 13, -1, -1, -1, -1, -1, -1 }, + { 0, 0, 0, 0, 1, 2, 2, 9, 13, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1 } }, + + // 3/0 + { { 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } }, + + // 2/1 - untested + { { 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } }, + + // 3/1 + { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } }, + + // 2/2 - untested + { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } }, + + // 3/2 + { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 6, 8, 11, 12, 12, -1, -1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 6, 8, 11, 12, 12, -1, -1 }, + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } }, +}; + + +/** * Adjust the frame size to make the average bit rate match the target bit rate. - * This is only needed for 11025, 22050, and 44100 sample rates. + * This is only needed for 11025, 22050, and 44100 sample rates or any E-AC-3. */ static void adjust_frame_size(AC3EncodeContext *s) { @@ -347,110 +190,63 @@ static void adjust_frame_size(AC3EncodeContext *s) } -/** - * Deinterleave input samples. - * Channels are reordered from FFmpeg's default order to AC-3 order. - */ -static void deinterleave_input_samples(AC3EncodeContext *s, - const SampleType *samples) -{ - int ch, i; - - /* deinterleave and remap input samples */ - for (ch = 0; ch < s->channels; ch++) { - const SampleType *sptr; - int sinc; - - /* copy last 256 samples of previous frame to the start of the current frame */ - memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_FRAME_SIZE], - AC3_BLOCK_SIZE * sizeof(s->planar_samples[0][0])); - - /* deinterleave */ - sinc = s->channels; - sptr = samples + s->channel_map[ch]; - for (i = AC3_BLOCK_SIZE; i < AC3_FRAME_SIZE+AC3_BLOCK_SIZE; i++) { - s->planar_samples[ch][i] = *sptr; - sptr += sinc; - } - } -} - - -/** - * Apply the MDCT to input samples to generate frequency coefficients. - * This applies the KBD window and normalizes the input to reduce precision - * loss due to fixed-point calculations. - */ -static void apply_mdct(AC3EncodeContext *s) +static void compute_coupling_strategy(AC3EncodeContext *s) { int blk, ch; + int got_cpl_snr; - for (ch = 0; ch < s->channels; ch++) { - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { - AC3Block *block = &s->blocks[blk]; - const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE]; + /* set coupling use flags for each block/channel */ + /* TODO: turn coupling on/off and adjust start band based on bit usage */ + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + for (ch = 1; ch <= s->fbw_channels; ch++) + block->channel_in_cpl[ch] = s->cpl_on; + } - apply_window(&s->dsp, s->windowed_samples, input_samples, s->mdct.window, AC3_WINDOW_SIZE); + /* enable coupling for each block if at least 2 channels have coupling + enabled for that block */ + got_cpl_snr = 0; + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + block->num_cpl_channels = 0; + for (ch = 1; ch <= s->fbw_channels; ch++) + block->num_cpl_channels += block->channel_in_cpl[ch]; + block->cpl_in_use = block->num_cpl_channels > 1; + if (!block->cpl_in_use) { + block->num_cpl_channels = 0; + for (ch = 1; ch <= s->fbw_channels; ch++) + block->channel_in_cpl[ch] = 0; + } - block->coeff_shift[ch] = normalize_samples(s); + block->new_cpl_strategy = !blk; + if (blk) { + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (block->channel_in_cpl[ch] != s->blocks[blk-1].channel_in_cpl[ch]) { + block->new_cpl_strategy = 1; + break; + } + } + } + block->new_cpl_leak = block->new_cpl_strategy; - s->mdct.fft.mdct_calcw(&s->mdct.fft, block->mdct_coef[ch], - s->windowed_samples); + if (!blk || (block->cpl_in_use && !got_cpl_snr)) { + block->new_snr_offsets = 1; + if (block->cpl_in_use) + got_cpl_snr = 1; + } else { + block->new_snr_offsets = 0; } } -} - - -/** - * Determine rematrixing flags for each block and band. - */ -static void compute_rematrixing_strategy(AC3EncodeContext *s) -{ - int nb_coefs; - int blk, bnd, i; - AC3Block *block, *block0; - - if (s->channel_mode != AC3_CHMODE_STEREO) - return; - - s->num_rematrixing_bands = 4; - - nb_coefs = FFMIN(s->nb_coefs[0], s->nb_coefs[1]); + /* set bandwidth for each channel */ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { - block = &s->blocks[blk]; - block->new_rematrixing_strategy = !blk; - if (!s->rematrixing_enabled) - continue; - for (bnd = 0; bnd < s->num_rematrixing_bands; bnd++) { - /* calculate calculate sum of squared coeffs for one band in one block */ - int start = ff_ac3_rematrix_band_tab[bnd]; - int end = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]); - CoefSumType sum[4] = {0,}; - for (i = start; i < end; i++) { - CoefType lt = block->mdct_coef[0][i]; - CoefType rt = block->mdct_coef[1][i]; - CoefType md = lt + rt; - CoefType sd = lt - rt; - MAC_COEF(sum[0], lt, lt); - MAC_COEF(sum[1], rt, rt); - MAC_COEF(sum[2], md, md); - MAC_COEF(sum[3], sd, sd); - } - - /* compare sums to determine if rematrixing will be used for this band */ - if (FFMIN(sum[2], sum[3]) < FFMIN(sum[0], sum[1])) - block->rematrixing_flags[bnd] = 1; + AC3Block *block = &s->blocks[blk]; + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (block->channel_in_cpl[ch]) + block->end_freq[ch] = s->start_freq[CPL_CH]; else - block->rematrixing_flags[bnd] = 0; - - /* determine if new rematrixing flags will be sent */ - if (blk && - block->rematrixing_flags[bnd] != block0->rematrixing_flags[bnd]) { - block->new_rematrixing_strategy = 1; - } + block->end_freq[ch] = s->bandwidth_code * 3 + 73; } - block0 = block; } } @@ -468,21 +264,20 @@ static void apply_rematrixing(AC3EncodeContext *s) if (!s->rematrixing_enabled) return; - nb_coefs = FFMIN(s->nb_coefs[0], s->nb_coefs[1]); - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; if (block->new_rematrixing_strategy) flags = block->rematrixing_flags; - for (bnd = 0; bnd < s->num_rematrixing_bands; bnd++) { + nb_coefs = FFMIN(block->end_freq[1], block->end_freq[2]); + for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) { if (flags[bnd]) { start = ff_ac3_rematrix_band_tab[bnd]; end = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]); for (i = start; i < end; i++) { - int32_t lt = block->fixed_coef[0][i]; - int32_t rt = block->fixed_coef[1][i]; - block->fixed_coef[0][i] = (lt + rt) >> 1; - block->fixed_coef[1][i] = (lt - rt) >> 1; + int32_t lt = block->fixed_coef[1][i]; + int32_t rt = block->fixed_coef[2][i]; + block->fixed_coef[1][i] = (lt + rt) >> 1; + block->fixed_coef[2][i] = (lt - rt) >> 1; } } } @@ -499,12 +294,13 @@ static av_cold void exponent_init(AC3EncodeContext *s) for (expstr = EXP_D15-1; expstr <= EXP_D45-1; expstr++) { grpsize = 3 << expstr; - for (i = 73; i < 256; i++) { - exponent_group_tab[expstr][i] = (i + grpsize - 4) / grpsize; + for (i = 12; i < 256; i++) { + exponent_group_tab[0][expstr][i] = (i + grpsize - 4) / grpsize; + exponent_group_tab[1][expstr][i] = (i ) / grpsize; } } /* LFE */ - exponent_group_tab[0][7] = 2; + exponent_group_tab[0][0][7] = 2; } @@ -515,15 +311,11 @@ static av_cold void exponent_init(AC3EncodeContext *s) */ static void extract_exponents(AC3EncodeContext *s) { - int blk, ch; + int ch = !s->cpl_on; + int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS * (s->channels - ch + 1); + AC3Block *block = &s->blocks[0]; - for (ch = 0; ch < s->channels; ch++) { - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { - AC3Block *block = &s->blocks[blk]; - s->ac3dsp.extract_exponents(block->exp[ch], block->fixed_coef[ch], - AC3_MAX_COEFS); - } - } + s->ac3dsp.extract_exponents(block->exp[ch], block->fixed_coef[ch], chan_size); } @@ -542,7 +334,7 @@ static void compute_exp_strategy(AC3EncodeContext *s) { int ch, blk, blk1; - for (ch = 0; ch < s->fbw_channels; ch++) { + for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++) { uint8_t *exp_strategy = s->exp_strategy[ch]; uint8_t *exp = s->blocks[0].exp[ch]; int exp_diff; @@ -551,13 +343,18 @@ static void compute_exp_strategy(AC3EncodeContext *s) reused in the next frame */ exp_strategy[0] = EXP_NEW; exp += AC3_MAX_COEFS; - for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 1; blk < AC3_MAX_BLOCKS; blk++, exp += AC3_MAX_COEFS) { + if ((ch == CPL_CH && (!s->blocks[blk].cpl_in_use || !s->blocks[blk-1].cpl_in_use)) || + (ch > CPL_CH && (s->blocks[blk].channel_in_cpl[ch] != s->blocks[blk-1].channel_in_cpl[ch]))) { + exp_strategy[blk] = EXP_NEW; + continue; + } exp_diff = s->dsp.sad[0](NULL, exp, exp - AC3_MAX_COEFS, 16, 16); - if (exp_diff > EXP_DIFF_THRESHOLD) + exp_strategy[blk] = EXP_REUSE; + if (ch == CPL_CH && exp_diff > (EXP_DIFF_THRESHOLD * (s->blocks[blk].end_freq[ch] - s->start_freq[ch]) / AC3_MAX_COEFS)) + exp_strategy[blk] = EXP_NEW; + else if (ch > CPL_CH && exp_diff > EXP_DIFF_THRESHOLD) exp_strategy[blk] = EXP_NEW; - else - exp_strategy[blk] = EXP_REUSE; - exp += AC3_MAX_COEFS; } /* now select the encoding strategy type : if exponents are often @@ -588,25 +385,26 @@ static void compute_exp_strategy(AC3EncodeContext *s) /** * Update the exponents so that they are the ones the decoder will decode. */ -static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy) +static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy, + int cpl) { int nb_groups, i, k; - nb_groups = exponent_group_tab[exp_strategy-1][nb_exps] * 3; + nb_groups = exponent_group_tab[cpl][exp_strategy-1][nb_exps] * 3; /* for each group, compute the minimum exponent */ switch(exp_strategy) { case EXP_D25: - for (i = 1, k = 1; i <= nb_groups; i++) { + for (i = 1, k = 1-cpl; i <= nb_groups; i++) { uint8_t exp_min = exp[k]; if (exp[k+1] < exp_min) exp_min = exp[k+1]; - exp[i] = exp_min; + exp[i-cpl] = exp_min; k += 2; } break; case EXP_D45: - for (i = 1, k = 1; i <= nb_groups; i++) { + for (i = 1, k = 1-cpl; i <= nb_groups; i++) { uint8_t exp_min = exp[k]; if (exp[k+1] < exp_min) exp_min = exp[k+1]; @@ -614,14 +412,14 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy) exp_min = exp[k+2]; if (exp[k+3] < exp_min) exp_min = exp[k+3]; - exp[i] = exp_min; + exp[i-cpl] = exp_min; k += 4; } break; } /* constraint for DC exponent */ - if (exp[0] > 15) + if (!cpl && exp[0] > 15) exp[0] = 15; /* decrease the delta between each groups to within 2 so that they can be @@ -632,18 +430,21 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy) while (--i >= 0) exp[i] = FFMIN(exp[i], exp[i+1] + 2); + if (cpl) + exp[-1] = exp[0] & ~1; + /* now we have the exponent values the decoder will see */ switch (exp_strategy) { case EXP_D25: - for (i = nb_groups, k = nb_groups * 2; i > 0; i--) { - uint8_t exp1 = exp[i]; + for (i = nb_groups, k = (nb_groups * 2)-cpl; i > 0; i--) { + uint8_t exp1 = exp[i-cpl]; exp[k--] = exp1; exp[k--] = exp1; } break; case EXP_D45: - for (i = nb_groups, k = nb_groups * 4; i > 0; i--) { - exp[k] = exp[k-1] = exp[k-2] = exp[k-3] = exp[i]; + for (i = nb_groups, k = (nb_groups * 4)-cpl; i > 0; i--) { + exp[k] = exp[k-1] = exp[k-2] = exp[k-3] = exp[i-cpl]; k -= 4; } break; @@ -659,37 +460,48 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy) */ static void encode_exponents(AC3EncodeContext *s) { - int blk, blk1, ch; + int blk, blk1, ch, cpl; uint8_t *exp, *exp_strategy; int nb_coefs, num_reuse_blocks; - for (ch = 0; ch < s->channels; ch++) { - exp = s->blocks[0].exp[ch]; + for (ch = !s->cpl_on; ch <= s->channels; ch++) { + exp = s->blocks[0].exp[ch] + s->start_freq[ch]; exp_strategy = s->exp_strategy[ch]; - nb_coefs = s->nb_coefs[ch]; + cpl = (ch == CPL_CH); blk = 0; while (blk < AC3_MAX_BLOCKS) { + AC3Block *block = &s->blocks[blk]; + if (cpl && !block->cpl_in_use) { + exp += AC3_MAX_COEFS; + blk++; + continue; + } + nb_coefs = block->end_freq[ch] - s->start_freq[ch]; blk1 = blk + 1; /* count the number of EXP_REUSE blocks after the current block - and set exponent reference block pointers */ - s->blocks[blk].exp_ref_block[ch] = &s->blocks[blk]; + and set exponent reference block numbers */ + s->exp_ref_block[ch][blk] = blk; while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE) { - s->blocks[blk1].exp_ref_block[ch] = &s->blocks[blk]; + s->exp_ref_block[ch][blk1] = blk; blk1++; } num_reuse_blocks = blk1 - blk - 1; /* for the EXP_REUSE case we select the min of the exponents */ - s->ac3dsp.ac3_exponent_min(exp, num_reuse_blocks, nb_coefs); + s->ac3dsp.ac3_exponent_min(exp-s->start_freq[ch], num_reuse_blocks, + AC3_MAX_COEFS); - encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk]); + encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk], cpl); exp += AC3_MAX_COEFS * (num_reuse_blocks + 1); blk = blk1; } } + + /* reference block numbers have been changed, so reset ref_bap_set */ + s->ref_bap_set = 0; } @@ -700,7 +512,7 @@ static void encode_exponents(AC3EncodeContext *s) */ static void group_exponents(AC3EncodeContext *s) { - int blk, ch, i; + int blk, ch, i, cpl; int group_size, nb_groups, bit_count; uint8_t *p; int delta0, delta1, delta2; @@ -709,14 +521,15 @@ static void group_exponents(AC3EncodeContext *s) bit_count = 0; for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; - for (ch = 0; ch < s->channels; ch++) { + for (ch = !block->cpl_in_use; ch <= s->channels; ch++) { int exp_strategy = s->exp_strategy[ch][blk]; if (exp_strategy == EXP_REUSE) continue; + cpl = (ch == CPL_CH); group_size = exp_strategy + (exp_strategy == EXP_D45); - nb_groups = exponent_group_tab[exp_strategy-1][s->nb_coefs[ch]]; + nb_groups = exponent_group_tab[cpl][exp_strategy-1][block->end_freq[ch]-s->start_freq[ch]]; bit_count += 4 + (nb_groups * 7); - p = block->exp[ch]; + p = block->exp[ch] + s->start_freq[ch] - cpl; /* DC exponent */ exp1 = *p++; @@ -783,54 +596,76 @@ static void count_frame_bits_fixed(AC3EncodeContext *s) /* assumptions: * no dynamic range codes - * no channel coupling * bit allocation parameters do not change between blocks - * SNR offsets do not change between blocks * no delta bit allocation * no skipped data * no auxilliary data + * no E-AC-3 metadata */ /* header */ - frame_bits = 65; - frame_bits += frame_bits_inc[s->channel_mode]; + frame_bits = 16; /* sync info */ + if (s->eac3) { + /* bitstream info header */ + frame_bits += 35; + frame_bits += 1 + 1 + 1; + /* audio frame header */ + frame_bits += 2; + frame_bits += 10; + /* exponent strategy */ + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + frame_bits += 2 * s->fbw_channels + s->lfe_on; + /* converter exponent strategy */ + frame_bits += s->fbw_channels * 5; + /* snr offsets */ + frame_bits += 10; + /* block start info */ + frame_bits++; + } else { + frame_bits += 49; + frame_bits += frame_bits_inc[s->channel_mode]; + } /* audio blocks */ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { - /* block switch flags */ - frame_bits += s->fbw_channels; + if (!s->eac3) { + /* block switch flags */ + frame_bits += s->fbw_channels; - /* dither flags */ - frame_bits += s->fbw_channels; + /* dither flags */ + frame_bits += s->fbw_channels; + } /* dynamic range */ frame_bits++; - /* coupling strategy */ - frame_bits++; - if (!blk) + /* spectral extension */ + if (s->eac3) frame_bits++; - /* exponent strategy */ - frame_bits += 2 * s->fbw_channels; - if (s->lfe_on) - frame_bits++; + if (!s->eac3) { + /* exponent strategy */ + frame_bits += 2 * s->fbw_channels; + if (s->lfe_on) + frame_bits++; - /* bit allocation params */ - frame_bits++; - if (!blk) - frame_bits += 2 + 2 + 2 + 2 + 3; + /* bit allocation params */ + frame_bits++; + if (!blk) + frame_bits += 2 + 2 + 2 + 2 + 3; + } - /* snr offsets and fast gain codes */ - frame_bits++; - if (!blk) - frame_bits += 6 + s->channels * (4 + 3); + /* converter snr offset */ + if (s->eac3) + frame_bits++; - /* delta bit allocation */ - frame_bits++; + if (!s->eac3) { + /* delta bit allocation */ + frame_bits++; - /* skipped data */ - frame_bits++; + /* skipped data */ + frame_bits++; + } } /* auxiliary data */ @@ -855,9 +690,9 @@ static void bit_alloc_init(AC3EncodeContext *s) s->slow_decay_code = 2; s->fast_decay_code = 1; s->slow_gain_code = 1; - s->db_per_bit_code = 3; + s->db_per_bit_code = s->eac3 ? 2 : 3; s->floor_code = 7; - for (ch = 0; ch < s->channels; ch++) + for (ch = 0; ch <= s->channels; ch++) s->fast_gain_code[ch] = 4; /* initial snr offset */ @@ -871,6 +706,8 @@ static void bit_alloc_init(AC3EncodeContext *s) s->bit_alloc.slow_gain = ff_ac3_slow_gain_tab[s->slow_gain_code]; s->bit_alloc.db_per_bit = ff_ac3_db_per_bit_tab[s->db_per_bit_code]; s->bit_alloc.floor = ff_ac3_floor_tab[s->floor_code]; + s->bit_alloc.cpl_fast_leak = 0; + s->bit_alloc.cpl_slow_leak = 0; count_frame_bits_fixed(s); } @@ -888,48 +725,108 @@ static void count_frame_bits(AC3EncodeContext *s) int frame_bits = 0; /* header */ - if (opt->audio_production_info) - frame_bits += 7; - if (s->bitstream_id == 6) { - if (opt->extended_bsi_1) - frame_bits += 14; - if (opt->extended_bsi_2) - frame_bits += 14; + if (s->eac3) { + /* coupling */ + if (s->channel_mode > AC3_CHMODE_MONO) { + frame_bits++; + for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + frame_bits++; + if (block->new_cpl_strategy) + frame_bits++; + } + } + /* coupling exponent strategy */ + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + frame_bits += 2 * s->blocks[blk].cpl_in_use; + } else { + if (opt->audio_production_info) + frame_bits += 7; + if (s->bitstream_id == 6) { + if (opt->extended_bsi_1) + frame_bits += 14; + if (opt->extended_bsi_2) + frame_bits += 14; + } } /* audio blocks */ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + + /* coupling strategy */ + if (!s->eac3) + frame_bits++; + if (block->new_cpl_strategy) { + if (!s->eac3) + frame_bits++; + if (block->cpl_in_use) { + if (s->eac3) + frame_bits++; + if (!s->eac3 || s->channel_mode != AC3_CHMODE_STEREO) + frame_bits += s->fbw_channels; + if (s->channel_mode == AC3_CHMODE_STEREO) + frame_bits++; + frame_bits += 4 + 4; + if (s->eac3) + frame_bits++; + else + frame_bits += s->num_cpl_subbands - 1; + } + } + + /* coupling coordinates */ + if (block->cpl_in_use) { + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (block->channel_in_cpl[ch]) { + if (!s->eac3 || block->new_cpl_coords != 2) + frame_bits++; + if (block->new_cpl_coords) { + frame_bits += 2; + frame_bits += (4 + 4) * s->num_cpl_bands; + } + } + } + } + /* stereo rematrixing */ if (s->channel_mode == AC3_CHMODE_STEREO) { - frame_bits++; + if (!s->eac3 || blk > 0) + frame_bits++; if (s->blocks[blk].new_rematrixing_strategy) - frame_bits += s->num_rematrixing_bands; + frame_bits += block->num_rematrixing_bands; } /* bandwidth codes & gain range */ - for (ch = 0; ch < s->fbw_channels; ch++) { - if (s->exp_strategy[ch][blk] != EXP_REUSE) - frame_bits += 6 + 2; + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (s->exp_strategy[ch][blk] != EXP_REUSE) { + if (!block->channel_in_cpl[ch]) + frame_bits += 6; + frame_bits += 2; + } } - } - s->frame_bits = s->frame_bits_fixed + frame_bits; -} + /* coupling exponent strategy */ + if (!s->eac3 && block->cpl_in_use) + frame_bits += 2; + /* snr offsets and fast gain codes */ + if (!s->eac3) { + frame_bits++; + if (block->new_snr_offsets) + frame_bits += 6 + (s->channels + block->cpl_in_use) * (4 + 3); + } -/** - * Finalize the mantissa bit count by adding in the grouped mantissas. - */ -static int compute_mantissa_size_final(int mant_cnt[5]) -{ - // bap=1 : 3 mantissas in 5 bits - int bits = (mant_cnt[1] / 3) * 5; - // bap=2 : 3 mantissas in 7 bits - // bap=4 : 2 mantissas in 7 bits - bits += ((mant_cnt[2] / 3) + (mant_cnt[4] >> 1)) * 7; - // bap=3 : each mantissa is 3 bits - bits += mant_cnt[3] * 3; - return bits; + /* coupling leak info */ + if (block->cpl_in_use) { + if (!s->eac3 || block->new_cpl_leak != 2) + frame_bits++; + if (block->new_cpl_leak) + frame_bits += 3 + 3; + } + } + + s->frame_bits = s->frame_bits_fixed + frame_bits; } @@ -943,16 +840,16 @@ static void bit_alloc_masking(AC3EncodeContext *s) for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; - for (ch = 0; ch < s->channels; ch++) { + for (ch = !block->cpl_in_use; ch <= s->channels; ch++) { /* We only need psd and mask for calculating bap. Since we currently do not calculate bap when exponent strategy is EXP_REUSE we do not need to calculate psd or mask. */ if (s->exp_strategy[ch][blk] != EXP_REUSE) { - ff_ac3_bit_alloc_calc_psd(block->exp[ch], 0, - s->nb_coefs[ch], - block->psd[ch], block->band_psd[ch]); + ff_ac3_bit_alloc_calc_psd(block->exp[ch], s->start_freq[ch], + block->end_freq[ch], block->psd[ch], + block->band_psd[ch]); ff_ac3_bit_alloc_calc_mask(&s->bit_alloc, block->band_psd[ch], - 0, s->nb_coefs[ch], + s->start_freq[ch], block->end_freq[ch], ff_ac3_fast_gain_tab[s->fast_gain_code[ch]], ch == s->lfe_channel, DBA_NONE, 0, NULL, NULL, NULL, @@ -970,17 +867,79 @@ static void bit_alloc_masking(AC3EncodeContext *s) static void reset_block_bap(AC3EncodeContext *s) { int blk, ch; - if (s->blocks[0].bap[0] == s->bap_buffer) + uint8_t *ref_bap; + + if (s->ref_bap[0][0] == s->bap_buffer && s->ref_bap_set) return; + + ref_bap = s->bap_buffer; + for (ch = 0; ch <= s->channels; ch++) { + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + s->ref_bap[ch][blk] = ref_bap + AC3_MAX_COEFS * s->exp_ref_block[ch][blk]; + ref_bap += AC3_MAX_COEFS * AC3_MAX_BLOCKS; + } + s->ref_bap_set = 1; +} + + +/** + * Initialize mantissa counts. + * These are set so that they are padded to the next whole group size when bits + * are counted in compute_mantissa_size. + */ +static void count_mantissa_bits_init(uint16_t mant_cnt[AC3_MAX_BLOCKS][16]) +{ + int blk; + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { - for (ch = 0; ch < s->channels; ch++) { - s->blocks[blk].bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)]; - } + memset(mant_cnt[blk], 0, sizeof(mant_cnt[blk])); + mant_cnt[blk][1] = mant_cnt[blk][2] = 2; + mant_cnt[blk][4] = 1; } } /** + * Update mantissa bit counts for all blocks in 1 channel in a given bandwidth + * range. + */ +static void count_mantissa_bits_update_ch(AC3EncodeContext *s, int ch, + uint16_t mant_cnt[AC3_MAX_BLOCKS][16], + int start, int end) +{ + int blk; + + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + if (ch == CPL_CH && !block->cpl_in_use) + continue; + s->ac3dsp.update_bap_counts(mant_cnt[blk], + s->ref_bap[ch][blk] + start, + FFMIN(end, block->end_freq[ch]) - start); + } +} + + +/** + * Count the number of mantissa bits in the frame based on the bap values. + */ +static int count_mantissa_bits(AC3EncodeContext *s) +{ + int ch, max_end_freq; + LOCAL_ALIGNED_16(uint16_t, mant_cnt, [AC3_MAX_BLOCKS], [16]); + + count_mantissa_bits_init(mant_cnt); + + max_end_freq = s->bandwidth_code * 3 + 73; + for (ch = !s->cpl_enabled; ch <= s->channels; ch++) + count_mantissa_bits_update_ch(s, ch, mant_cnt, s->start_freq[ch], + max_end_freq); + + return s->ac3dsp.compute_mantissa_size(mant_cnt); +} + + +/** * Run the bit allocation with a given SNR offset. * This calculates the bit allocation pointers that will be used to determine * the quantization of each mantissa. @@ -990,42 +949,27 @@ static void reset_block_bap(AC3EncodeContext *s) static int bit_alloc(AC3EncodeContext *s, int snr_offset) { int blk, ch; - int mantissa_bits; - int mant_cnt[5]; snr_offset = (snr_offset - 240) << 2; reset_block_bap(s); - mantissa_bits = 0; for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; - AC3Block *ref_block; - // initialize grouped mantissa counts. these are set so that they are - // padded to the next whole group size when bits are counted in - // compute_mantissa_size_final - mant_cnt[0] = mant_cnt[3] = 0; - mant_cnt[1] = mant_cnt[2] = 2; - mant_cnt[4] = 1; - for (ch = 0; ch < s->channels; ch++) { + + for (ch = !block->cpl_in_use; ch <= s->channels; ch++) { /* Currently the only bit allocation parameters which vary across blocks within a frame are the exponent values. We can take advantage of that by reusing the bit allocation pointers whenever we reuse exponents. */ - ref_block = block->exp_ref_block[ch]; if (s->exp_strategy[ch][blk] != EXP_REUSE) { - s->ac3dsp.bit_alloc_calc_bap(ref_block->mask[ch], - ref_block->psd[ch], 0, - s->nb_coefs[ch], snr_offset, - s->bit_alloc.floor, ff_ac3_bap_tab, - ref_block->bap[ch]); + s->ac3dsp.bit_alloc_calc_bap(block->mask[ch], block->psd[ch], + s->start_freq[ch], block->end_freq[ch], + snr_offset, s->bit_alloc.floor, + ff_ac3_bap_tab, s->ref_bap[ch][blk]); } - mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt, - ref_block->bap[ch], - s->nb_coefs[ch]); } - mantissa_bits += compute_mantissa_size_final(mant_cnt); } - return mantissa_bits; + return count_mantissa_bits(s); } @@ -1047,7 +991,7 @@ static int cbr_bit_allocation(AC3EncodeContext *s) /* if previous frame SNR offset was 1023, check if current frame can also use SNR offset of 1023. if so, skip the search. */ - if ((snr_offset | s->fine_snr_offset[0]) == 1023) { + if ((snr_offset | s->fine_snr_offset[1]) == 1023) { if (bit_alloc(s, 1023) <= bits_left) return 0; } @@ -1071,7 +1015,7 @@ static int cbr_bit_allocation(AC3EncodeContext *s) reset_block_bap(s); s->coarse_snr_offset = snr_offset >> 4; - for (ch = 0; ch < s->channels; ch++) + for (ch = !s->cpl_on; ch <= s->channels; ch++) s->fine_snr_offset[ch] = snr_offset & 0xF; return 0; @@ -1089,26 +1033,26 @@ static int downgrade_exponents(AC3EncodeContext *s) { int ch, blk; - for (ch = 0; ch < s->fbw_channels; ch++) { - for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) { + for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) { + for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) { if (s->exp_strategy[ch][blk] == EXP_D15) { s->exp_strategy[ch][blk] = EXP_D25; return 0; } } } - for (ch = 0; ch < s->fbw_channels; ch++) { - for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) { + for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) { + for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) { if (s->exp_strategy[ch][blk] == EXP_D25) { s->exp_strategy[ch][blk] = EXP_D45; return 0; } } } - for (ch = 0; ch < s->fbw_channels; ch++) { - /* block 0 cannot reuse exponents, so only downgrade D45 to REUSE if - the block number > 0 */ - for (blk = AC3_MAX_BLOCKS-1; blk > 0; blk--) { + /* block 0 cannot reuse exponents, so only downgrade D45 to REUSE if + the block number > 0 */ + for (blk = AC3_MAX_BLOCKS-1; blk > 0; blk--) { + for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) { if (s->exp_strategy[ch][blk] > EXP_REUSE) { s->exp_strategy[ch][blk] = EXP_REUSE; return 0; @@ -1135,7 +1079,18 @@ static int compute_bit_allocation(AC3EncodeContext *s) ret = cbr_bit_allocation(s); while (ret) { - /* fallback 1: downgrade exponents */ + /* fallback 1: disable channel coupling */ + if (s->cpl_on) { + s->cpl_on = 0; + compute_coupling_strategy(s); + s->compute_rematrixing_strategy(s); + apply_rematrixing(s); + process_exponents(s); + ret = compute_bit_allocation(s); + continue; + } + + /* fallback 2: downgrade exponents */ if (!downgrade_exponents(s)) { extract_exponents(s); encode_exponents(s); @@ -1189,12 +1144,13 @@ static inline int asym_quant(int c, int e, int qbits) * Quantize a set of mantissas for a single channel in a single block. */ static void quantize_mantissas_blk_ch(AC3Mant *s, int32_t *fixed_coef, - uint8_t *exp, - uint8_t *bap, uint16_t *qmant, int n) + uint8_t *exp, uint8_t *bap, + uint16_t *qmant, int start_freq, + int end_freq) { int i; - for (i = 0; i < n; i++) { + for (i = start_freq; i < end_freq; i++) { int v; int c = fixed_coef[i]; int e = exp[i]; @@ -1284,19 +1240,25 @@ static void quantize_mantissas_blk_ch(AC3Mant *s, int32_t *fixed_coef, */ static void quantize_mantissas(AC3EncodeContext *s) { - int blk, ch; - + int blk, ch, ch0=0, got_cpl; for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; - AC3Block *ref_block; AC3Mant m = { 0 }; - for (ch = 0; ch < s->channels; ch++) { - ref_block = block->exp_ref_block[ch]; + got_cpl = !block->cpl_in_use; + for (ch = 1; ch <= s->channels; ch++) { + if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) { + ch0 = ch - 1; + ch = CPL_CH; + got_cpl = 1; + } quantize_mantissas_blk_ch(&m, block->fixed_coef[ch], - ref_block->exp[ch], ref_block->bap[ch], - block->qmant[ch], s->nb_coefs[ch]); + s->blocks[s->exp_ref_block[ch][blk]].exp[ch], + s->ref_bap[ch][blk], block->qmant[ch], + s->start_freq[ch], block->end_freq[ch]); + if (ch == CPL_CH) + ch = ch0; } } } @@ -1305,7 +1267,7 @@ static void quantize_mantissas(AC3EncodeContext *s) /** * Write the AC-3 frame header to the output bitstream. */ -static void output_frame_header(AC3EncodeContext *s) +static void ac3_output_frame_header(AC3EncodeContext *s) { AC3EncOptions *opt = &s->options; @@ -1363,101 +1325,177 @@ static void output_frame_header(AC3EncodeContext *s) */ static void output_audio_block(AC3EncodeContext *s, int blk) { - int ch, i, baie, rbnd; + int ch, i, baie, bnd, got_cpl; + int av_uninit(ch0); AC3Block *block = &s->blocks[blk]; /* block switching */ - for (ch = 0; ch < s->fbw_channels; ch++) - put_bits(&s->pb, 1, 0); + if (!s->eac3) { + for (ch = 0; ch < s->fbw_channels; ch++) + put_bits(&s->pb, 1, 0); + } /* dither flags */ - for (ch = 0; ch < s->fbw_channels; ch++) - put_bits(&s->pb, 1, 1); + if (!s->eac3) { + for (ch = 0; ch < s->fbw_channels; ch++) + put_bits(&s->pb, 1, 1); + } /* dynamic range codes */ put_bits(&s->pb, 1, 0); + /* spectral extension */ + if (s->eac3) + put_bits(&s->pb, 1, 0); + /* channel coupling */ - if (!blk) { - put_bits(&s->pb, 1, 1); /* coupling strategy present */ - put_bits(&s->pb, 1, 0); /* no coupling strategy */ - } else { - put_bits(&s->pb, 1, 0); /* no new coupling strategy */ + if (!s->eac3) + put_bits(&s->pb, 1, block->new_cpl_strategy); + if (block->new_cpl_strategy) { + if (!s->eac3) + put_bits(&s->pb, 1, block->cpl_in_use); + if (block->cpl_in_use) { + int start_sub, end_sub; + if (s->eac3) + put_bits(&s->pb, 1, 0); /* enhanced coupling */ + if (!s->eac3 || s->channel_mode != AC3_CHMODE_STEREO) { + for (ch = 1; ch <= s->fbw_channels; ch++) + put_bits(&s->pb, 1, block->channel_in_cpl[ch]); + } + if (s->channel_mode == AC3_CHMODE_STEREO) + put_bits(&s->pb, 1, 0); /* phase flags in use */ + start_sub = (s->start_freq[CPL_CH] - 37) / 12; + end_sub = (s->cpl_end_freq - 37) / 12; + put_bits(&s->pb, 4, start_sub); + put_bits(&s->pb, 4, end_sub - 3); + /* coupling band structure */ + if (s->eac3) { + put_bits(&s->pb, 1, 0); /* use default */ + } else { + for (bnd = start_sub+1; bnd < end_sub; bnd++) + put_bits(&s->pb, 1, ff_eac3_default_cpl_band_struct[bnd]); + } + } + } + + /* coupling coordinates */ + if (block->cpl_in_use) { + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (block->channel_in_cpl[ch]) { + if (!s->eac3 || block->new_cpl_coords != 2) + put_bits(&s->pb, 1, block->new_cpl_coords); + if (block->new_cpl_coords) { + put_bits(&s->pb, 2, block->cpl_master_exp[ch]); + for (bnd = 0; bnd < s->num_cpl_bands; bnd++) { + put_bits(&s->pb, 4, block->cpl_coord_exp [ch][bnd]); + put_bits(&s->pb, 4, block->cpl_coord_mant[ch][bnd]); + } + } + } + } } /* stereo rematrixing */ if (s->channel_mode == AC3_CHMODE_STEREO) { - put_bits(&s->pb, 1, block->new_rematrixing_strategy); + if (!s->eac3 || blk > 0) + put_bits(&s->pb, 1, block->new_rematrixing_strategy); if (block->new_rematrixing_strategy) { /* rematrixing flags */ - for (rbnd = 0; rbnd < s->num_rematrixing_bands; rbnd++) - put_bits(&s->pb, 1, block->rematrixing_flags[rbnd]); + for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) + put_bits(&s->pb, 1, block->rematrixing_flags[bnd]); } } /* exponent strategy */ - for (ch = 0; ch < s->fbw_channels; ch++) - put_bits(&s->pb, 2, s->exp_strategy[ch][blk]); - if (s->lfe_on) - put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]); + if (!s->eac3) { + for (ch = !block->cpl_in_use; ch <= s->fbw_channels; ch++) + put_bits(&s->pb, 2, s->exp_strategy[ch][blk]); + if (s->lfe_on) + put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]); + } /* bandwidth */ - for (ch = 0; ch < s->fbw_channels; ch++) { - if (s->exp_strategy[ch][blk] != EXP_REUSE) + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (s->exp_strategy[ch][blk] != EXP_REUSE && !block->channel_in_cpl[ch]) put_bits(&s->pb, 6, s->bandwidth_code); } /* exponents */ - for (ch = 0; ch < s->channels; ch++) { + for (ch = !block->cpl_in_use; ch <= s->channels; ch++) { int nb_groups; + int cpl = (ch == CPL_CH); if (s->exp_strategy[ch][blk] == EXP_REUSE) continue; /* DC exponent */ - put_bits(&s->pb, 4, block->grouped_exp[ch][0]); + put_bits(&s->pb, 4, block->grouped_exp[ch][0] >> cpl); /* exponent groups */ - nb_groups = exponent_group_tab[s->exp_strategy[ch][blk]-1][s->nb_coefs[ch]]; + nb_groups = exponent_group_tab[cpl][s->exp_strategy[ch][blk]-1][block->end_freq[ch]-s->start_freq[ch]]; for (i = 1; i <= nb_groups; i++) put_bits(&s->pb, 7, block->grouped_exp[ch][i]); /* gain range info */ - if (ch != s->lfe_channel) + if (ch != s->lfe_channel && !cpl) put_bits(&s->pb, 2, 0); } /* bit allocation info */ - baie = (blk == 0); - put_bits(&s->pb, 1, baie); - if (baie) { - put_bits(&s->pb, 2, s->slow_decay_code); - put_bits(&s->pb, 2, s->fast_decay_code); - put_bits(&s->pb, 2, s->slow_gain_code); - put_bits(&s->pb, 2, s->db_per_bit_code); - put_bits(&s->pb, 3, s->floor_code); + if (!s->eac3) { + baie = (blk == 0); + put_bits(&s->pb, 1, baie); + if (baie) { + put_bits(&s->pb, 2, s->slow_decay_code); + put_bits(&s->pb, 2, s->fast_decay_code); + put_bits(&s->pb, 2, s->slow_gain_code); + put_bits(&s->pb, 2, s->db_per_bit_code); + put_bits(&s->pb, 3, s->floor_code); + } } /* snr offset */ - put_bits(&s->pb, 1, baie); - if (baie) { - put_bits(&s->pb, 6, s->coarse_snr_offset); - for (ch = 0; ch < s->channels; ch++) { - put_bits(&s->pb, 4, s->fine_snr_offset[ch]); - put_bits(&s->pb, 3, s->fast_gain_code[ch]); + if (!s->eac3) { + put_bits(&s->pb, 1, block->new_snr_offsets); + if (block->new_snr_offsets) { + put_bits(&s->pb, 6, s->coarse_snr_offset); + for (ch = !block->cpl_in_use; ch <= s->channels; ch++) { + put_bits(&s->pb, 4, s->fine_snr_offset[ch]); + put_bits(&s->pb, 3, s->fast_gain_code[ch]); + } } + } else { + put_bits(&s->pb, 1, 0); /* no converter snr offset */ } - put_bits(&s->pb, 1, 0); /* no delta bit allocation */ - put_bits(&s->pb, 1, 0); /* no data to skip */ + /* coupling leak */ + if (block->cpl_in_use) { + if (!s->eac3 || block->new_cpl_leak != 2) + put_bits(&s->pb, 1, block->new_cpl_leak); + if (block->new_cpl_leak) { + put_bits(&s->pb, 3, s->bit_alloc.cpl_fast_leak); + put_bits(&s->pb, 3, s->bit_alloc.cpl_slow_leak); + } + } + + if (!s->eac3) { + put_bits(&s->pb, 1, 0); /* no delta bit allocation */ + put_bits(&s->pb, 1, 0); /* no data to skip */ + } /* mantissas */ - for (ch = 0; ch < s->channels; ch++) { + got_cpl = !block->cpl_in_use; + for (ch = 1; ch <= s->channels; ch++) { int b, q; - AC3Block *ref_block = block->exp_ref_block[ch]; - for (i = 0; i < s->nb_coefs[ch]; i++) { + + if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) { + ch0 = ch - 1; + ch = CPL_CH; + got_cpl = 1; + } + for (i = s->start_freq[ch]; i < block->end_freq[ch]; i++) { q = block->qmant[ch][i]; - b = ref_block->bap[ch][i]; + b = s->ref_bap[ch][blk][i]; switch (b) { case 0: break; case 1: if (q != 128) put_bits(&s->pb, 5, q); break; @@ -1469,6 +1507,8 @@ static void output_audio_block(AC3EncodeContext *s, int blk) default: put_bits(&s->pb, b-1, q); break; } } + if (ch == CPL_CH) + ch = ch0; } } @@ -1528,6 +1568,10 @@ static void output_frame_end(AC3EncodeContext *s) if (pad_bytes > 0) memset(put_bits_ptr(&s->pb), 0, pad_bytes); + if (s->eac3) { + /* compute crc2 */ + crc2_partial = av_crc(crc_ctx, 0, frame + 2, s->frame_size - 5); + } else { /* compute crc1 */ /* this is not so easy because it is at the beginning of the data... */ crc1 = av_bswap16(av_crc(crc_ctx, 0, frame + 4, frame_size_58 - 4)); @@ -1538,6 +1582,7 @@ static void output_frame_end(AC3EncodeContext *s) /* compute crc2 */ crc2_partial = av_crc(crc_ctx, 0, frame + frame_size_58, s->frame_size - frame_size_58 - 3); + } crc2 = av_crc(crc_ctx, crc2_partial, frame + s->frame_size - 3, 1); /* ensure crc2 does not match sync word by flipping crcrsv bit if needed */ if (crc2 == 0x770B) { @@ -1558,7 +1603,7 @@ static void output_frame(AC3EncodeContext *s, unsigned char *frame) init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE); - output_frame_header(s); + s->output_frame_header(s); for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) output_audio_block(s, blk); @@ -1575,10 +1620,11 @@ static void dprint_options(AVCodecContext *avctx) char strbuf[32]; switch (s->bitstream_id) { - case 6: av_strlcpy(strbuf, "AC-3 (alt syntax)", 32); break; - case 8: av_strlcpy(strbuf, "AC-3 (standard)", 32); break; - case 9: av_strlcpy(strbuf, "AC-3 (dnet half-rate)", 32); break; - case 10: av_strlcpy(strbuf, "AC-3 (dnet quater-rate", 32); break; + case 6: av_strlcpy(strbuf, "AC-3 (alt syntax)", 32); break; + case 8: av_strlcpy(strbuf, "AC-3 (standard)", 32); break; + case 9: av_strlcpy(strbuf, "AC-3 (dnet half-rate)", 32); break; + case 10: av_strlcpy(strbuf, "AC-3 (dnet quater-rate)", 32); break; + case 16: av_strlcpy(strbuf, "E-AC-3 (enhanced)", 32); break; default: snprintf(strbuf, 32, "ERROR"); } av_dlog(avctx, "bitstream_id: %s (%d)\n", strbuf, s->bitstream_id); @@ -1832,29 +1878,35 @@ static int validate_metadata(AVCodecContext *avctx) /** * Encode a single AC-3 frame. */ -static int ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame, - int buf_size, void *data) +int ff_ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame, + int buf_size, void *data) { AC3EncodeContext *s = avctx->priv_data; const SampleType *samples = data; int ret; - if (s->options.allow_per_frame_metadata) { + if (!s->eac3 && s->options.allow_per_frame_metadata) { ret = validate_metadata(avctx); if (ret) return ret; } - if (s->bit_alloc.sr_code == 1) + if (s->bit_alloc.sr_code == 1 || s->eac3) adjust_frame_size(s); - deinterleave_input_samples(s, samples); + s->deinterleave_input_samples(s, samples); - apply_mdct(s); + s->apply_mdct(s); - scale_coefficients(s); + s->scale_coefficients(s); - compute_rematrixing_strategy(s); + s->cpl_on = s->cpl_enabled; + compute_coupling_strategy(s); + + if (s->cpl_on) + s->apply_channel_coupling(s); + + s->compute_rematrixing_strategy(s); apply_rematrixing(s); @@ -1877,11 +1929,12 @@ static int ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame, /** * Finalize encoding and free any memory allocated by the encoder. */ -static av_cold int ac3_encode_close(AVCodecContext *avctx) +av_cold int ff_ac3_encode_close(AVCodecContext *avctx) { int blk, ch; AC3EncodeContext *s = avctx->priv_data; + av_freep(&s->windowed_samples); for (ch = 0; ch < s->channels; ch++) av_freep(&s->planar_samples[ch]); av_freep(&s->planar_samples); @@ -1897,7 +1950,6 @@ static av_cold int ac3_encode_close(AVCodecContext *avctx) av_freep(&s->qmant_buffer); for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; - av_freep(&block->bap); av_freep(&block->mdct_coef); av_freep(&block->fixed_coef); av_freep(&block->exp); @@ -1908,7 +1960,8 @@ static av_cold int ac3_encode_close(AVCodecContext *avctx) av_freep(&block->qmant); } - mdct_end(&s->mdct); + s->mdct_end(s->mdct); + av_freep(&s->mdct); av_freep(&avctx->coded_frame); return 0; @@ -1934,7 +1987,7 @@ static av_cold int set_channel_info(AC3EncodeContext *s, int channels, s->lfe_on = !!(ch_layout & AV_CH_LOW_FREQUENCY); s->channels = channels; s->fbw_channels = channels - s->lfe_on; - s->lfe_channel = s->lfe_on ? s->fbw_channels : -1; + s->lfe_channel = s->lfe_on ? s->fbw_channels + 1 : -1; if (s->lfe_on) ch_layout -= AV_CH_LOW_FREQUENCY; @@ -1965,7 +2018,7 @@ static av_cold int set_channel_info(AC3EncodeContext *s, int channels, static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s) { - int i, ret; + int i, ret, max_sr; /* validate channel layout */ if (!avctx->channel_layout) { @@ -1980,30 +2033,72 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s) } /* validate sample rate */ - for (i = 0; i < 9; i++) { - if ((ff_ac3_sample_rate_tab[i / 3] >> (i % 3)) == avctx->sample_rate) + /* note: max_sr could be changed from 2 to 5 for E-AC-3 once we find a + decoder that supports half sample rate so we can validate that + the generated files are correct. */ + max_sr = s->eac3 ? 2 : 8; + for (i = 0; i <= max_sr; i++) { + if ((ff_ac3_sample_rate_tab[i % 3] >> (i / 3)) == avctx->sample_rate) break; } - if (i == 9) { + if (i > max_sr) { av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n"); return AVERROR(EINVAL); } s->sample_rate = avctx->sample_rate; - s->bit_alloc.sr_shift = i % 3; - s->bit_alloc.sr_code = i / 3; - s->bitstream_id = 8 + s->bit_alloc.sr_shift; + s->bit_alloc.sr_shift = i / 3; + s->bit_alloc.sr_code = i % 3; + s->bitstream_id = s->eac3 ? 16 : 8 + s->bit_alloc.sr_shift; /* validate bit rate */ - for (i = 0; i < 19; i++) { - if ((ff_ac3_bitrate_tab[i] >> s->bit_alloc.sr_shift)*1000 == avctx->bit_rate) - break; - } - if (i == 19) { - av_log(avctx, AV_LOG_ERROR, "invalid bit rate\n"); - return AVERROR(EINVAL); + if (s->eac3) { + int max_br, min_br, wpf, min_br_dist, min_br_code; + + /* calculate min/max bitrate */ + max_br = 2048 * s->sample_rate / AC3_FRAME_SIZE * 16; + min_br = ((s->sample_rate + (AC3_FRAME_SIZE-1)) / AC3_FRAME_SIZE) * 16; + if (avctx->bit_rate < min_br || avctx->bit_rate > max_br) { + av_log(avctx, AV_LOG_ERROR, "invalid bit rate. must be %d to %d " + "for this sample rate\n", min_br, max_br); + return AVERROR(EINVAL); + } + + /* calculate words-per-frame for the selected bitrate */ + wpf = (avctx->bit_rate / 16) * AC3_FRAME_SIZE / s->sample_rate; + av_assert1(wpf > 0 && wpf <= 2048); + + /* find the closest AC-3 bitrate code to the selected bitrate. + this is needed for lookup tables for bandwidth and coupling + parameter selection */ + min_br_code = -1; + min_br_dist = INT_MAX; + for (i = 0; i < 19; i++) { + int br_dist = abs(ff_ac3_bitrate_tab[i] * 1000 - avctx->bit_rate); + if (br_dist < min_br_dist) { + min_br_dist = br_dist; + min_br_code = i; + } + } + + /* make sure the minimum frame size is below the average frame size */ + s->frame_size_code = min_br_code << 1; + while (wpf > 1 && wpf * s->sample_rate / AC3_FRAME_SIZE * 16 > avctx->bit_rate) + wpf--; + s->frame_size_min = 2 * wpf; + } else { + for (i = 0; i < 19; i++) { + if ((ff_ac3_bitrate_tab[i] >> s->bit_alloc.sr_shift)*1000 == avctx->bit_rate) + break; + } + if (i == 19) { + av_log(avctx, AV_LOG_ERROR, "invalid bit rate\n"); + return AVERROR(EINVAL); + } + s->frame_size_code = i << 1; + s->frame_size_min = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code]; } - s->bit_rate = avctx->bit_rate; - s->frame_size_code = i << 1; + s->bit_rate = avctx->bit_rate; + s->frame_size = s->frame_size_min; /* validate cutoff */ if (avctx->cutoff < 0) { @@ -2026,13 +2121,18 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s) return AVERROR(EINVAL); } - ret = validate_metadata(avctx); - if (ret) - return ret; + if (!s->eac3) { + ret = validate_metadata(avctx); + if (ret) + return ret; + } s->rematrixing_enabled = s->options.stereo_rematrixing && (s->channel_mode == AC3_CHMODE_STEREO); + s->cpl_enabled = s->options.channel_coupling && + s->channel_mode >= AC3_CHMODE_STEREO && !s->fixed_point; + return 0; } @@ -2044,7 +2144,8 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s) */ static av_cold void set_bandwidth(AC3EncodeContext *s) { - int ch; + int blk, ch; + int av_uninit(cpl_start); if (s->cutoff) { /* calculate bandwidth based on user-specified cutoff frequency */ @@ -2057,11 +2158,54 @@ static av_cold void set_bandwidth(AC3EncodeContext *s) } /* set number of coefficients for each channel */ - for (ch = 0; ch < s->fbw_channels; ch++) { - s->nb_coefs[ch] = s->bandwidth_code * 3 + 73; + for (ch = 1; ch <= s->fbw_channels; ch++) { + s->start_freq[ch] = 0; + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + s->blocks[blk].end_freq[ch] = s->bandwidth_code * 3 + 73; + } + /* LFE channel always has 7 coefs */ + if (s->lfe_on) { + s->start_freq[s->lfe_channel] = 0; + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + s->blocks[blk].end_freq[ch] = 7; + } + + /* initialize coupling strategy */ + if (s->cpl_enabled) { + if (s->options.cpl_start >= 0) { + cpl_start = s->options.cpl_start; + } else { + cpl_start = ac3_coupling_start_tab[s->channel_mode-2][s->bit_alloc.sr_code][s->frame_size_code/2]; + if (cpl_start < 0) + s->cpl_enabled = 0; + } + } + if (s->cpl_enabled) { + int i, cpl_start_band, cpl_end_band; + uint8_t *cpl_band_sizes = s->cpl_band_sizes; + + cpl_end_band = s->bandwidth_code / 4 + 3; + cpl_start_band = av_clip(cpl_start, 0, FFMIN(cpl_end_band-1, 15)); + + s->num_cpl_subbands = cpl_end_band - cpl_start_band; + + s->num_cpl_bands = 1; + *cpl_band_sizes = 12; + for (i = cpl_start_band + 1; i < cpl_end_band; i++) { + if (ff_eac3_default_cpl_band_struct[i]) { + *cpl_band_sizes += 12; + } else { + s->num_cpl_bands++; + cpl_band_sizes++; + *cpl_band_sizes = 12; + } + } + + s->start_freq[CPL_CH] = cpl_start_band * 12 + 37; + s->cpl_end_freq = cpl_end_band * 12 + 37; + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + s->blocks[blk].end_freq[CPL_CH] = s->cpl_end_freq; } - if (s->lfe_on) - s->nb_coefs[s->lfe_channel] = 7; /* LFE channel always has 7 coefs */ } @@ -2069,82 +2213,92 @@ static av_cold int allocate_buffers(AVCodecContext *avctx) { int blk, ch; AC3EncodeContext *s = avctx->priv_data; + int channels = s->channels + 1; /* includes coupling channel */ - FF_ALLOC_OR_GOTO(avctx, s->planar_samples, s->channels * sizeof(*s->planar_samples), - alloc_fail); - for (ch = 0; ch < s->channels; ch++) { - FF_ALLOCZ_OR_GOTO(avctx, s->planar_samples[ch], - (AC3_FRAME_SIZE+AC3_BLOCK_SIZE) * sizeof(**s->planar_samples), - alloc_fail); - } - FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, AC3_MAX_BLOCKS * s->channels * + if (s->allocate_sample_buffers(s)) + goto alloc_fail; + + FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, AC3_MAX_BLOCKS * channels * AC3_MAX_COEFS * sizeof(*s->bap_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * s->channels * + FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * channels * AC3_MAX_COEFS * sizeof(*s->bap1_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * s->channels * - AC3_MAX_COEFS * sizeof(*s->mdct_coef_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * s->channels * + FF_ALLOCZ_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * channels * + AC3_MAX_COEFS * sizeof(*s->mdct_coef_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * channels * AC3_MAX_COEFS * sizeof(*s->exp_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * s->channels * + FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * channels * 128 * sizeof(*s->grouped_exp_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * s->channels * + FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * channels * AC3_MAX_COEFS * sizeof(*s->psd_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * s->channels * + FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * channels * 64 * sizeof(*s->band_psd_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * s->channels * + FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * channels * 64 * sizeof(*s->mask_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * s->channels * + FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * channels * AC3_MAX_COEFS * sizeof(*s->qmant_buffer), alloc_fail); + if (s->cpl_enabled) { + FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, AC3_MAX_BLOCKS * channels * + 16 * sizeof(*s->cpl_coord_exp_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, AC3_MAX_BLOCKS * channels * + 16 * sizeof(*s->cpl_coord_mant_buffer), alloc_fail); + } for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; - FF_ALLOC_OR_GOTO(avctx, block->bap, s->channels * sizeof(*block->bap), - alloc_fail); - FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, s->channels * sizeof(*block->mdct_coef), + FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, channels * sizeof(*block->mdct_coef), alloc_fail); - FF_ALLOCZ_OR_GOTO(avctx, block->exp, s->channels * sizeof(*block->exp), + FF_ALLOCZ_OR_GOTO(avctx, block->exp, channels * sizeof(*block->exp), alloc_fail); - FF_ALLOCZ_OR_GOTO(avctx, block->grouped_exp, s->channels * sizeof(*block->grouped_exp), + FF_ALLOCZ_OR_GOTO(avctx, block->grouped_exp, channels * sizeof(*block->grouped_exp), alloc_fail); - FF_ALLOCZ_OR_GOTO(avctx, block->psd, s->channels * sizeof(*block->psd), + FF_ALLOCZ_OR_GOTO(avctx, block->psd, channels * sizeof(*block->psd), alloc_fail); - FF_ALLOCZ_OR_GOTO(avctx, block->band_psd, s->channels * sizeof(*block->band_psd), + FF_ALLOCZ_OR_GOTO(avctx, block->band_psd, channels * sizeof(*block->band_psd), alloc_fail); - FF_ALLOCZ_OR_GOTO(avctx, block->mask, s->channels * sizeof(*block->mask), + FF_ALLOCZ_OR_GOTO(avctx, block->mask, channels * sizeof(*block->mask), alloc_fail); - FF_ALLOCZ_OR_GOTO(avctx, block->qmant, s->channels * sizeof(*block->qmant), + FF_ALLOCZ_OR_GOTO(avctx, block->qmant, channels * sizeof(*block->qmant), alloc_fail); + if (s->cpl_enabled) { + FF_ALLOCZ_OR_GOTO(avctx, block->cpl_coord_exp, channels * sizeof(*block->cpl_coord_exp), + alloc_fail); + FF_ALLOCZ_OR_GOTO(avctx, block->cpl_coord_mant, channels * sizeof(*block->cpl_coord_mant), + alloc_fail); + } - for (ch = 0; ch < s->channels; ch++) { + for (ch = 0; ch < channels; ch++) { /* arrangement: block, channel, coeff */ - block->bap[ch] = &s->bap_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)]; - block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)]; - block->grouped_exp[ch] = &s->grouped_exp_buffer[128 * (blk * s->channels + ch)]; - block->psd[ch] = &s->psd_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)]; - block->band_psd[ch] = &s->band_psd_buffer [64 * (blk * s->channels + ch)]; - block->mask[ch] = &s->mask_buffer [64 * (blk * s->channels + ch)]; - block->qmant[ch] = &s->qmant_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)]; + block->grouped_exp[ch] = &s->grouped_exp_buffer[128 * (blk * channels + ch)]; + block->psd[ch] = &s->psd_buffer [AC3_MAX_COEFS * (blk * channels + ch)]; + block->band_psd[ch] = &s->band_psd_buffer [64 * (blk * channels + ch)]; + block->mask[ch] = &s->mask_buffer [64 * (blk * channels + ch)]; + block->qmant[ch] = &s->qmant_buffer [AC3_MAX_COEFS * (blk * channels + ch)]; + if (s->cpl_enabled) { + block->cpl_coord_exp[ch] = &s->cpl_coord_exp_buffer [16 * (blk * channels + ch)]; + block->cpl_coord_mant[ch] = &s->cpl_coord_mant_buffer[16 * (blk * channels + ch)]; + } /* arrangement: channel, block, coeff */ block->exp[ch] = &s->exp_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)]; + block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)]; } } - if (CONFIG_AC3ENC_FLOAT) { - FF_ALLOC_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * s->channels * - AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail); + if (!s->fixed_point) { + FF_ALLOCZ_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * channels * + AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail); for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; - FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, s->channels * + FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels * sizeof(*block->fixed_coef), alloc_fail); - for (ch = 0; ch < s->channels; ch++) - block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)]; + for (ch = 0; ch < channels; ch++) + block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)]; } } else { for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; - FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, s->channels * + FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels * sizeof(*block->fixed_coef), alloc_fail); - for (ch = 0; ch < s->channels; ch++) + for (ch = 0; ch < channels; ch++) block->fixed_coef[ch] = (int32_t *)block->mdct_coef[ch]; } } @@ -2158,11 +2312,15 @@ alloc_fail: /** * Initialize the encoder. */ -static av_cold int ac3_encode_init(AVCodecContext *avctx) +av_cold int ff_ac3_encode_init(AVCodecContext *avctx) { AC3EncodeContext *s = avctx->priv_data; int ret, frame_size_58; + s->avctx = avctx; + + s->eac3 = avctx->codec_id == CODEC_ID_EAC3; + avctx->frame_size = AC3_FRAME_SIZE; ff_ac3_common_init(); @@ -2175,10 +2333,8 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx) if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE) s->bitstream_mode = 0x7; - s->frame_size_min = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code]; s->bits_written = 0; s->samples_written = 0; - s->frame_size = s->frame_size_min; /* calculate crc_inv for both possible frame sizes */ frame_size_58 = (( s->frame_size >> 2) + ( s->frame_size >> 4)) << 1; @@ -2188,13 +2344,42 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx) s->crc_inv[1] = pow_poly((CRC16_POLY >> 1), (8 * frame_size_58) - 16, CRC16_POLY); } + /* set function pointers */ + if (CONFIG_AC3_FIXED_ENCODER && s->fixed_point) { + s->mdct_end = ff_ac3_fixed_mdct_end; + s->mdct_init = ff_ac3_fixed_mdct_init; + s->apply_window = ff_ac3_fixed_apply_window; + s->normalize_samples = ff_ac3_fixed_normalize_samples; + s->scale_coefficients = ff_ac3_fixed_scale_coefficients; + s->allocate_sample_buffers = ff_ac3_fixed_allocate_sample_buffers; + s->deinterleave_input_samples = ff_ac3_fixed_deinterleave_input_samples; + s->apply_mdct = ff_ac3_fixed_apply_mdct; + s->apply_channel_coupling = ff_ac3_fixed_apply_channel_coupling; + s->compute_rematrixing_strategy = ff_ac3_fixed_compute_rematrixing_strategy; + } else if (CONFIG_AC3_ENCODER || CONFIG_EAC3_ENCODER) { + s->mdct_end = ff_ac3_float_mdct_end; + s->mdct_init = ff_ac3_float_mdct_init; + s->apply_window = ff_ac3_float_apply_window; + s->scale_coefficients = ff_ac3_float_scale_coefficients; + s->allocate_sample_buffers = ff_ac3_float_allocate_sample_buffers; + s->deinterleave_input_samples = ff_ac3_float_deinterleave_input_samples; + s->apply_mdct = ff_ac3_float_apply_mdct; + s->apply_channel_coupling = ff_ac3_float_apply_channel_coupling; + s->compute_rematrixing_strategy = ff_ac3_float_compute_rematrixing_strategy; + } + if (CONFIG_EAC3_ENCODER && s->eac3) + s->output_frame_header = ff_eac3_output_frame_header; + else + s->output_frame_header = ac3_output_frame_header; + set_bandwidth(s); exponent_init(s); bit_alloc_init(s); - ret = mdct_init(avctx, &s->mdct, 9); + FF_ALLOCZ_OR_GOTO(avctx, s->mdct, sizeof(AC3MDCTContext), init_fail); + ret = s->mdct_init(avctx, s->mdct, 9); if (ret) goto init_fail; @@ -2211,6 +2396,6 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx) return 0; init_fail: - ac3_encode_close(avctx); + ff_ac3_encode_close(avctx); return ret; } diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h new file mode 100644 index 0000000000..2d04f26303 --- /dev/null +++ b/libavcodec/ac3enc.h @@ -0,0 +1,296 @@ +/* + * AC-3 encoder & E-AC-3 encoder common header + * Copyright (c) 2000 Fabrice Bellard + * Copyright (c) 2006-2010 Justin Ruggles <justin.ruggles@gmail.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * AC-3 encoder & E-AC-3 encoder common header + */ + +#ifndef AVCODEC_AC3ENC_H +#define AVCODEC_AC3ENC_H + +#include <stdint.h> +#include "ac3.h" +#include "ac3dsp.h" +#include "avcodec.h" +#include "dsputil.h" +#include "put_bits.h" +#include "fft.h" + +#ifndef CONFIG_AC3ENC_FLOAT +#define CONFIG_AC3ENC_FLOAT 0 +#endif + +#define OFFSET(param) offsetof(AC3EncodeContext, options.param) +#define AC3ENC_PARAM (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) + +#define AC3ENC_TYPE_AC3_FIXED 0 +#define AC3ENC_TYPE_AC3 1 +#define AC3ENC_TYPE_EAC3 2 + +#if CONFIG_AC3ENC_FLOAT +#define AC3_NAME(x) ff_ac3_float_ ## x +#define MAC_COEF(d,a,b) ((d)+=(a)*(b)) +typedef float SampleType; +typedef float CoefType; +typedef float CoefSumType; +#else +#define AC3_NAME(x) ff_ac3_fixed_ ## x +#define MAC_COEF(d,a,b) MAC64(d,a,b) +typedef int16_t SampleType; +typedef int32_t CoefType; +typedef int64_t CoefSumType; +#endif + + +typedef struct AC3MDCTContext { + const SampleType *window; ///< MDCT window function + FFTContext fft; ///< FFT context for MDCT calculation +} AC3MDCTContext; +#if 0 +/** + * Encoding Options used by AVOption. + */ +typedef struct AC3EncOptions { + /* AC-3 metadata options*/ + int dialogue_level; + int bitstream_mode; + float center_mix_level; + float surround_mix_level; + int dolby_surround_mode; + int audio_production_info; + int mixing_level; + int room_type; + int copyright; + int original; + int extended_bsi_1; + int preferred_stereo_downmix; + float ltrt_center_mix_level; + float ltrt_surround_mix_level; + float loro_center_mix_level; + float loro_surround_mix_level; + int extended_bsi_2; + int dolby_surround_ex_mode; + int dolby_headphone_mode; + int ad_converter_type; + + /* other encoding options */ + int allow_per_frame_metadata; + int stereo_rematrixing; + int channel_coupling; + int cpl_start; +} AC3EncOptions; +#endif +/** + * Data for a single audio block. + */ +typedef struct AC3Block { + CoefType **mdct_coef; ///< MDCT coefficients + int32_t **fixed_coef; ///< fixed-point MDCT coefficients + uint8_t **exp; ///< original exponents + uint8_t **grouped_exp; ///< grouped exponents + int16_t **psd; ///< psd per frequency bin + int16_t **band_psd; ///< psd per critical band + int16_t **mask; ///< masking curve + uint16_t **qmant; ///< quantized mantissas + uint8_t **cpl_coord_exp; ///< coupling coord exponents (cplcoexp) + uint8_t **cpl_coord_mant; ///< coupling coord mantissas (cplcomant) + uint8_t coeff_shift[AC3_MAX_CHANNELS]; ///< fixed-point coefficient shift values + uint8_t new_rematrixing_strategy; ///< send new rematrixing flags in this block + int num_rematrixing_bands; ///< number of rematrixing bands + uint8_t rematrixing_flags[4]; ///< rematrixing flags + int new_cpl_strategy; ///< send new coupling strategy + int cpl_in_use; ///< coupling in use for this block (cplinu) + uint8_t channel_in_cpl[AC3_MAX_CHANNELS]; ///< channel in coupling (chincpl) + int num_cpl_channels; ///< number of channels in coupling + uint8_t new_cpl_coords; ///< send new coupling coordinates (cplcoe) + uint8_t cpl_master_exp[AC3_MAX_CHANNELS]; ///< coupling coord master exponents (mstrcplco) + int new_snr_offsets; ///< send new SNR offsets + int new_cpl_leak; ///< send new coupling leak info + int end_freq[AC3_MAX_CHANNELS]; ///< end frequency bin (endmant) +} AC3Block; + +/** + * AC-3 encoder private context. + */ +typedef struct AC3EncodeContext { + AVClass *av_class; ///< AVClass used for AVOption + AC3EncOptions options; ///< encoding options + AVCodecContext *avctx; ///< parent AVCodecContext + PutBitContext pb; ///< bitstream writer context + DSPContext dsp; + AC3DSPContext ac3dsp; ///< AC-3 optimized functions + AC3MDCTContext *mdct; ///< MDCT context + + AC3Block blocks[AC3_MAX_BLOCKS]; ///< per-block info + + int fixed_point; ///< indicates if fixed-point encoder is being used + int eac3; ///< indicates if this is E-AC-3 vs. AC-3 + int bitstream_id; ///< bitstream id (bsid) + int bitstream_mode; ///< bitstream mode (bsmod) + + int bit_rate; ///< target bit rate, in bits-per-second + int sample_rate; ///< sampling frequency, in Hz + + int frame_size_min; ///< minimum frame size in case rounding is necessary + int frame_size; ///< current frame size in bytes + int frame_size_code; ///< frame size code (frmsizecod) + uint16_t crc_inv[2]; + int64_t bits_written; ///< bit count (used to avg. bitrate) + int64_t samples_written; ///< sample count (used to avg. bitrate) + + int fbw_channels; ///< number of full-bandwidth channels (nfchans) + int channels; ///< total number of channels (nchans) + int lfe_on; ///< indicates if there is an LFE channel (lfeon) + int lfe_channel; ///< channel index of the LFE channel + int has_center; ///< indicates if there is a center channel + int has_surround; ///< indicates if there are one or more surround channels + int channel_mode; ///< channel mode (acmod) + const uint8_t *channel_map; ///< channel map used to reorder channels + + int center_mix_level; ///< center mix level code + int surround_mix_level; ///< surround mix level code + int ltrt_center_mix_level; ///< Lt/Rt center mix level code + int ltrt_surround_mix_level; ///< Lt/Rt surround mix level code + int loro_center_mix_level; ///< Lo/Ro center mix level code + int loro_surround_mix_level; ///< Lo/Ro surround mix level code + + int cutoff; ///< user-specified cutoff frequency, in Hz + int bandwidth_code; ///< bandwidth code (0 to 60) (chbwcod) + int start_freq[AC3_MAX_CHANNELS]; ///< start frequency bin (strtmant) + int cpl_end_freq; ///< coupling channel end frequency bin + + int cpl_on; ///< coupling turned on for this frame + int cpl_enabled; ///< coupling enabled for all frames + int num_cpl_subbands; ///< number of coupling subbands (ncplsubnd) + int num_cpl_bands; ///< number of coupling bands (ncplbnd) + uint8_t cpl_band_sizes[AC3_MAX_CPL_BANDS]; ///< number of coeffs in each coupling band + + int rematrixing_enabled; ///< stereo rematrixing enabled + + /* bitrate allocation control */ + int slow_gain_code; ///< slow gain code (sgaincod) + int slow_decay_code; ///< slow decay code (sdcycod) + int fast_decay_code; ///< fast decay code (fdcycod) + int db_per_bit_code; ///< dB/bit code (dbpbcod) + int floor_code; ///< floor code (floorcod) + AC3BitAllocParameters bit_alloc; ///< bit allocation parameters + int coarse_snr_offset; ///< coarse SNR offsets (csnroffst) + int fast_gain_code[AC3_MAX_CHANNELS]; ///< fast gain codes (signal-to-mask ratio) (fgaincod) + int fine_snr_offset[AC3_MAX_CHANNELS]; ///< fine SNR offsets (fsnroffst) + int frame_bits_fixed; ///< number of non-coefficient bits for fixed parameters + int frame_bits; ///< all frame bits except exponents and mantissas + int exponent_bits; ///< number of bits used for exponents + + SampleType *windowed_samples; + SampleType **planar_samples; + uint8_t *bap_buffer; + uint8_t *bap1_buffer; + CoefType *mdct_coef_buffer; + int32_t *fixed_coef_buffer; + uint8_t *exp_buffer; + uint8_t *grouped_exp_buffer; + int16_t *psd_buffer; + int16_t *band_psd_buffer; + int16_t *mask_buffer; + uint16_t *qmant_buffer; + uint8_t *cpl_coord_exp_buffer; + uint8_t *cpl_coord_mant_buffer; + + uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies + uint8_t exp_ref_block[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< reference blocks for EXP_REUSE + uint8_t *ref_bap [AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< bit allocation pointers (bap) + int ref_bap_set; ///< indicates if ref_bap pointers have been set + + /* fixed vs. float function pointers */ + void (*mdct_end)(AC3MDCTContext *mdct); + int (*mdct_init)(AVCodecContext *avctx, AC3MDCTContext *mdct, int nbits); + void (*apply_window)(DSPContext *dsp, SampleType *output, + const SampleType *input, const SampleType *window, + unsigned int len); + int (*normalize_samples)(struct AC3EncodeContext *s); + void (*scale_coefficients)(struct AC3EncodeContext *s); + + /* fixed vs. float templated function pointers */ + int (*allocate_sample_buffers)(struct AC3EncodeContext *s); + void (*deinterleave_input_samples)(struct AC3EncodeContext *s, + const SampleType *samples); + void (*apply_mdct)(struct AC3EncodeContext *s); + void (*apply_channel_coupling)(struct AC3EncodeContext *s); + void (*compute_rematrixing_strategy)(struct AC3EncodeContext *s); + + /* AC-3 vs. E-AC-3 function pointers */ + void (*output_frame_header)(struct AC3EncodeContext *s); +} AC3EncodeContext; + + +int ff_ac3_encode_init(AVCodecContext *avctx); + +int ff_ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame, + int buf_size, void *data); + +int ff_ac3_encode_close(AVCodecContext *avctx); + + +/* prototypes for functions in ac3enc_fixed.c and ac3enc_float.c */ + +void ff_ac3_fixed_mdct_end(AC3MDCTContext *mdct); +void ff_ac3_float_mdct_end(AC3MDCTContext *mdct); + +int ff_ac3_fixed_mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, + int nbits); +int ff_ac3_float_mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, + int nbits); + +void ff_ac3_fixed_apply_window(DSPContext *dsp, SampleType *output, + const SampleType *input, + const SampleType *window, unsigned int len); +void ff_ac3_float_apply_window(DSPContext *dsp, SampleType *output, + const SampleType *input, + const SampleType *window, unsigned int len); + +int ff_ac3_fixed_normalize_samples(AC3EncodeContext *s); + +void ff_ac3_fixed_scale_coefficients(AC3EncodeContext *s); +void ff_ac3_float_scale_coefficients(AC3EncodeContext *s); + + +/* prototypes for functions in ac3enc_template.c */ + +int ff_ac3_fixed_allocate_sample_buffers(AC3EncodeContext *s); +int ff_ac3_float_allocate_sample_buffers(AC3EncodeContext *s); + +void ff_ac3_fixed_deinterleave_input_samples(AC3EncodeContext *s, + const SampleType *samples); +void ff_ac3_float_deinterleave_input_samples(AC3EncodeContext *s, + const SampleType *samples); + +void ff_ac3_fixed_apply_mdct(AC3EncodeContext *s); +void ff_ac3_float_apply_mdct(AC3EncodeContext *s); + +void ff_ac3_fixed_apply_channel_coupling(AC3EncodeContext *s); +void ff_ac3_float_apply_channel_coupling(AC3EncodeContext *s); + +void ff_ac3_fixed_compute_rematrixing_strategy(AC3EncodeContext *s); +void ff_ac3_float_compute_rematrixing_strategy(AC3EncodeContext *s); + +#endif /* AVCODEC_AC3ENC_H */ diff --git a/libavcodec/ac3enc_combined.c b/libavcodec/ac3enc_combined.c index 3d6b3d9335..db832f1f03 100644 --- a/libavcodec/ac3enc_combined.c +++ b/libavcodec/ac3enc_combined.c @@ -11,8 +11,18 @@ typedef struct CombineContext{ AVCodec *codec; }CombineContext; +#define OFFSET(param) offsetof(CombineContext, options.param) +#define AC3ENC_PARAM (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) + +#define AC3ENC_TYPE_AC3_FIXED 0 +#define AC3ENC_TYPE_AC3 1 +#define AC3ENC_TYPE_EAC3 2 + +#define AC3ENC_TYPE 12354 +#include "ac3enc_opts_template.c" + static AVClass ac3enc_class = { "AC-3 Encoder", av_default_item_name, - ff_ac3_options, LIBAVUTIL_VERSION_INT }; + eac3_options, LIBAVUTIL_VERSION_INT }; static av_cold AVCodec *get_codec(enum AVSampleFormat s){ #if CONFIG_AC3_FIXED_ENCODER diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c index 24c11680a6..f4d447e3b2 100644 --- a/libavcodec/ac3enc_fixed.c +++ b/libavcodec/ac3enc_fixed.c @@ -28,13 +28,20 @@ #define CONFIG_FFT_FLOAT 0 #undef CONFIG_AC3ENC_FLOAT -#include "ac3enc.c" +#include "ac3enc.h" + +#define AC3ENC_TYPE AC3ENC_TYPE_AC3_FIXED +#include "ac3enc_opts_template.c" +static AVClass ac3enc_class = { "Fixed-Point AC-3 Encoder", av_default_item_name, + ac3fixed_options, LIBAVUTIL_VERSION_INT }; + +#include "ac3enc_template.c" /** * Finalize MDCT and free allocated memory. */ -static av_cold void mdct_end(AC3MDCTContext *mdct) +av_cold void AC3_NAME(mdct_end)(AC3MDCTContext *mdct) { ff_mdct_end(&mdct->fft); } @@ -44,8 +51,8 @@ static av_cold void mdct_end(AC3MDCTContext *mdct) * Initialize MDCT tables. * @param nbits log2(MDCT size) */ -static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, - int nbits) +av_cold int AC3_NAME(mdct_init)(AVCodecContext *avctx, AC3MDCTContext *mdct, + int nbits) { int ret = ff_mdct_init(&mdct->fft, nbits, 0, -1.0); mdct->window = ff_ac3_window; @@ -56,8 +63,9 @@ static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, /** * Apply KBD window to input samples prior to MDCT. */ -static void apply_window(DSPContext *dsp, int16_t *output, const int16_t *input, - const int16_t *window, unsigned int len) +void AC3_NAME(apply_window)(DSPContext *dsp, int16_t *output, + const int16_t *input, const int16_t *window, + unsigned int len) { dsp->apply_window_int16(output, input, window, len); } @@ -82,7 +90,7 @@ static int log2_tab(AC3EncodeContext *s, int16_t *src, int len) * * @return exponent shift */ -static int normalize_samples(AC3EncodeContext *s) +int AC3_NAME(normalize_samples)(AC3EncodeContext *s) { int v = 14 - log2_tab(s, s->windowed_samples, AC3_WINDOW_SIZE); if (v > 0) @@ -95,13 +103,13 @@ static int normalize_samples(AC3EncodeContext *s) /** * Scale MDCT coefficients to 25-bit signed fixed-point. */ -static void scale_coefficients(AC3EncodeContext *s) +void AC3_NAME(scale_coefficients)(AC3EncodeContext *s) { int blk, ch; for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; - for (ch = 0; ch < s->channels; ch++) { + for (ch = 1; ch <= s->channels; ch++) { s->ac3dsp.ac3_rshift_int32(block->mdct_coef[ch], AC3_MAX_COEFS, block->coeff_shift[ch]); } @@ -109,14 +117,22 @@ static void scale_coefficients(AC3EncodeContext *s) } +static av_cold int ac3_fixed_encode_init(AVCodecContext *avctx) +{ + AC3EncodeContext *s = avctx->priv_data; + s->fixed_point = 1; + return ff_ac3_encode_init(avctx); +} + + AVCodec ff_ac3_fixed_encoder = { "ac3_fixed", AVMEDIA_TYPE_AUDIO, CODEC_ID_AC3, sizeof(AC3EncodeContext), - ac3_encode_init, - ac3_encode_frame, - ac3_encode_close, + ac3_fixed_encode_init, + ff_ac3_encode_frame, + ff_ac3_encode_close, NULL, .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"), diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c index 1a145e5ab2..9e798106f3 100644 --- a/libavcodec/ac3enc_float.c +++ b/libavcodec/ac3enc_float.c @@ -27,14 +27,25 @@ */ #define CONFIG_AC3ENC_FLOAT 1 -#include "ac3enc.c" +#include "ac3enc.h" +#include "eac3enc.h" #include "kbdwin.h" +#if CONFIG_AC3_ENCODER +#define AC3ENC_TYPE AC3ENC_TYPE_AC3 +#include "ac3enc_opts_template.c" +static AVClass ac3enc_class = { "AC-3 Encoder", av_default_item_name, + ac3_options, LIBAVUTIL_VERSION_INT }; +#endif + +#include "ac3enc_template.c" + + /** * Finalize MDCT and free allocated memory. */ -static av_cold void mdct_end(AC3MDCTContext *mdct) +av_cold void ff_ac3_float_mdct_end(AC3MDCTContext *mdct) { ff_mdct_end(&mdct->fft); av_freep(&mdct->window); @@ -45,8 +56,8 @@ static av_cold void mdct_end(AC3MDCTContext *mdct) * Initialize MDCT tables. * @param nbits log2(MDCT size) */ -static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, - int nbits) +av_cold int ff_ac3_float_mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, + int nbits) { float *window; int i, n, n2; @@ -71,44 +82,39 @@ static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, /** * Apply KBD window to input samples prior to MDCT. */ -static void apply_window(DSPContext *dsp, float *output, const float *input, - const float *window, unsigned int len) +void ff_ac3_float_apply_window(DSPContext *dsp, float *output, + const float *input, const float *window, + unsigned int len) { dsp->vector_fmul(output, input, window, len); } /** - * Normalize the input samples to use the maximum available precision. - */ -static int normalize_samples(AC3EncodeContext *s) -{ - /* Normalization is not needed for floating-point samples, so just return 0 */ - return 0; -} - - -/** * Scale MDCT coefficients from float to 24-bit fixed-point. */ -static void scale_coefficients(AC3EncodeContext *s) +void ff_ac3_float_scale_coefficients(AC3EncodeContext *s) { - s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer, s->mdct_coef_buffer, - AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels); + int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS; + s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer + chan_size, + s->mdct_coef_buffer + chan_size, + chan_size * s->channels); } +#if CONFIG_AC3_ENCODER AVCodec ff_ac3_float_encoder = { "ac3_float", AVMEDIA_TYPE_AUDIO, CODEC_ID_AC3, sizeof(AC3EncodeContext), - ac3_encode_init, - ac3_encode_frame, - ac3_encode_close, + ff_ac3_encode_init, + ff_ac3_encode_frame, + ff_ac3_encode_close, NULL, .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE}, .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"), .priv_class = &ac3enc_class, .channel_layouts = ff_ac3_channel_layouts, }; +#endif diff --git a/libavcodec/ac3enc_opts_template.c b/libavcodec/ac3enc_opts_template.c new file mode 100644 index 0000000000..39138a1083 --- /dev/null +++ b/libavcodec/ac3enc_opts_template.c @@ -0,0 +1,84 @@ +/* + * AC-3 encoder options + * Copyright (c) 2011 Justin Ruggles <justin.ruggles@gmail.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "ac3.h" + +#if AC3ENC_TYPE == AC3ENC_TYPE_AC3_FIXED +static const AVOption ac3fixed_options[] = { +#elif AC3ENC_TYPE == AC3ENC_TYPE_AC3 +static const AVOption ac3_options[] = { +#else /* AC3ENC_TYPE_EAC3 */ +static const AVOption eac3_options[] = { +#endif +#if AC3ENC_TYPE != AC3ENC_TYPE_EAC3 +/* Metadata Options */ +{"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM}, +/* downmix levels */ +{"center_mixlev", "Center Mix Level", OFFSET(center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_4POINT5DB }, 0.0, 1.0, AC3ENC_PARAM}, +{"surround_mixlev", "Surround Mix Level", OFFSET(surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_6DB }, 0.0, 1.0, AC3ENC_PARAM}, +/* audio production information */ +{"mixing_level", "Mixing Level", OFFSET(mixing_level), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 111, AC3ENC_PARAM}, +{"room_type", "Room Type", OFFSET(room_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "room_type"}, + {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"}, + {"large", "Large Room", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"}, + {"small", "Small Room", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"}, +/* other metadata options */ +{"copyright", "Copyright Bit", OFFSET(copyright), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM}, +#endif +{"dialnorm", "Dialogue Level (dB)", OFFSET(dialogue_level), FF_OPT_TYPE_INT, {.dbl = -31 }, -31, -1, AC3ENC_PARAM}, +#if AC3ENC_TYPE != AC3ENC_TYPE_EAC3 +{"dsur_mode", "Dolby Surround Mode", OFFSET(dolby_surround_mode), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 2, AC3ENC_PARAM, "dsur_mode"}, + {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"}, + {"on", "Dolby Surround Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"}, + {"off", "Not Dolby Surround Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"}, +{"original", "Original Bit Stream", OFFSET(original), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM}, +/* extended bitstream information */ +{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dmix_mode"}, + {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"}, + {"ltrt", "Lt/Rt Downmix Preferred", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"}, + {"loro", "Lo/Ro Downmix Preferred", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"}, +{"ltrt_cmixlev", "Lt/Rt Center Mix Level", OFFSET(ltrt_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM}, +{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM}, +{"loro_cmixlev", "Lo/Ro Center Mix Level", OFFSET(loro_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM}, +{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM}, +{"dsurex_mode", "Dolby Surround EX Mode", OFFSET(dolby_surround_ex_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dsurex_mode"}, + {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"}, + {"on", "Dolby Surround EX Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"}, + {"off", "Not Dolby Surround EX Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"}, +{"dheadphone_mode", "Dolby Headphone Mode", OFFSET(dolby_headphone_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dheadphone_mode"}, + {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"}, + {"on", "Dolby Headphone Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"}, + {"off", "Not Dolby Headphone Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"}, +{"ad_conv_type", "A/D Converter Type", OFFSET(ad_converter_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 1, AC3ENC_PARAM, "ad_conv_type"}, + {"standard", "Standard (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"}, + {"hdcd", "HDCD", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"}, +#endif +/* Other Encoding Options */ +{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM}, +#if AC3ENC_TYPE != AC3ENC_TYPE_AC3_FIXED +{"channel_coupling", "Channel Coupling", OFFSET(channel_coupling), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM, "channel_coupling"}, + {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "channel_coupling"}, +{"cpl_start_band", "Coupling Start Band", OFFSET(cpl_start), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 15, AC3ENC_PARAM, "cpl_start_band"}, + {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "cpl_start_band"}, +#endif +{NULL} +}; diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c new file mode 100644 index 0000000000..f6248a82c9 --- /dev/null +++ b/libavcodec/ac3enc_template.c @@ -0,0 +1,399 @@ +/* + * AC-3 encoder float/fixed template + * Copyright (c) 2000 Fabrice Bellard + * Copyright (c) 2006-2011 Justin Ruggles <justin.ruggles@gmail.com> + * Copyright (c) 2006-2010 Prakash Punnoor <prakash@punnoor.de> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * AC-3 encoder float/fixed template + */ + +#include <stdint.h> + +#include "ac3enc.h" + + +int AC3_NAME(allocate_sample_buffers)(AC3EncodeContext *s) +{ + int ch; + + FF_ALLOC_OR_GOTO(s->avctx, s->windowed_samples, AC3_WINDOW_SIZE * + sizeof(*s->windowed_samples), alloc_fail); + FF_ALLOC_OR_GOTO(s->avctx, s->planar_samples, s->channels * sizeof(*s->planar_samples), + alloc_fail); + for (ch = 0; ch < s->channels; ch++) { + FF_ALLOCZ_OR_GOTO(s->avctx, s->planar_samples[ch], + (AC3_FRAME_SIZE+AC3_BLOCK_SIZE) * sizeof(**s->planar_samples), + alloc_fail); + } + + return 0; +alloc_fail: + return AVERROR(ENOMEM); +} + + +/** + * Deinterleave input samples. + * Channels are reordered from Libav's default order to AC-3 order. + */ +void AC3_NAME(deinterleave_input_samples)(AC3EncodeContext *s, + const SampleType *samples) +{ + int ch, i; + + /* deinterleave and remap input samples */ + for (ch = 0; ch < s->channels; ch++) { + const SampleType *sptr; + int sinc; + + /* copy last 256 samples of previous frame to the start of the current frame */ + memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_FRAME_SIZE], + AC3_BLOCK_SIZE * sizeof(s->planar_samples[0][0])); + + /* deinterleave */ + sinc = s->channels; + sptr = samples + s->channel_map[ch]; + for (i = AC3_BLOCK_SIZE; i < AC3_FRAME_SIZE+AC3_BLOCK_SIZE; i++) { + s->planar_samples[ch][i] = *sptr; + sptr += sinc; + } + } +} + + +/** + * Apply the MDCT to input samples to generate frequency coefficients. + * This applies the KBD window and normalizes the input to reduce precision + * loss due to fixed-point calculations. + */ +void AC3_NAME(apply_mdct)(AC3EncodeContext *s) +{ + int blk, ch; + + for (ch = 0; ch < s->channels; ch++) { + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE]; + + s->apply_window(&s->dsp, s->windowed_samples, input_samples, + s->mdct->window, AC3_WINDOW_SIZE); + + if (s->fixed_point) + block->coeff_shift[ch+1] = s->normalize_samples(s); + + s->mdct->fft.mdct_calcw(&s->mdct->fft, block->mdct_coef[ch+1], + s->windowed_samples); + } + } +} + + +/** + * Calculate a single coupling coordinate. + */ +static inline float calc_cpl_coord(float energy_ch, float energy_cpl) +{ + float coord = 0.125; + if (energy_cpl > 0) + coord *= sqrtf(energy_ch / energy_cpl); + return coord; +} + + +/** + * Calculate coupling channel and coupling coordinates. + * TODO: Currently this is only used for the floating-point encoder. I was + * able to make it work for the fixed-point encoder, but quality was + * generally lower in most cases than not using coupling. If a more + * adaptive coupling strategy were to be implemented it might be useful + * at that time to use coupling for the fixed-point encoder as well. + */ +void AC3_NAME(apply_channel_coupling)(AC3EncodeContext *s) +{ +#if CONFIG_AC3ENC_FLOAT + LOCAL_ALIGNED_16(float, cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]); + LOCAL_ALIGNED_16(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]); + int blk, ch, bnd, i, j; + CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}}; + int cpl_start, num_cpl_coefs; + + memset(cpl_coords, 0, AC3_MAX_BLOCKS * sizeof(*cpl_coords)); + memset(fixed_cpl_coords, 0, AC3_MAX_BLOCKS * sizeof(*fixed_cpl_coords)); + + /* align start to 16-byte boundary. align length to multiple of 32. + note: coupling start bin % 4 will always be 1 */ + cpl_start = s->start_freq[CPL_CH] - 1; + num_cpl_coefs = FFALIGN(s->num_cpl_subbands * 12 + 1, 32); + cpl_start = FFMIN(256, cpl_start + num_cpl_coefs) - num_cpl_coefs; + + /* calculate coupling channel from fbw channels */ + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + CoefType *cpl_coef = &block->mdct_coef[CPL_CH][cpl_start]; + if (!block->cpl_in_use) + continue; + memset(cpl_coef, 0, num_cpl_coefs * sizeof(*cpl_coef)); + for (ch = 1; ch <= s->fbw_channels; ch++) { + CoefType *ch_coef = &block->mdct_coef[ch][cpl_start]; + if (!block->channel_in_cpl[ch]) + continue; + for (i = 0; i < num_cpl_coefs; i++) + cpl_coef[i] += ch_coef[i]; + } + + /* coefficients must be clipped to +/- 1.0 in order to be encoded */ + s->dsp.vector_clipf(cpl_coef, cpl_coef, -1.0f, 1.0f, num_cpl_coefs); + + /* scale coupling coefficients from float to 24-bit fixed-point */ + s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][cpl_start], + cpl_coef, num_cpl_coefs); + } + + /* calculate energy in each band in coupling channel and each fbw channel */ + /* TODO: possibly use SIMD to speed up energy calculation */ + bnd = 0; + i = s->start_freq[CPL_CH]; + while (i < s->cpl_end_freq) { + int band_size = s->cpl_band_sizes[bnd]; + for (ch = CPL_CH; ch <= s->fbw_channels; ch++) { + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + if (!block->cpl_in_use || (ch > CPL_CH && !block->channel_in_cpl[ch])) + continue; + for (j = 0; j < band_size; j++) { + CoefType v = block->mdct_coef[ch][i+j]; + MAC_COEF(energy[blk][ch][bnd], v, v); + } + } + } + i += band_size; + bnd++; + } + + /* determine which blocks to send new coupling coordinates for */ + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + AC3Block *block0 = blk ? &s->blocks[blk-1] : NULL; + int new_coords = 0; + CoefSumType coord_diff[AC3_MAX_CHANNELS] = {0,}; + + if (block->cpl_in_use) { + /* calculate coupling coordinates for all blocks and calculate the + average difference between coordinates in successive blocks */ + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (!block->channel_in_cpl[ch]) + continue; + + for (bnd = 0; bnd < s->num_cpl_bands; bnd++) { + cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy[blk][ch][bnd], + energy[blk][CPL_CH][bnd]); + if (blk > 0 && block0->cpl_in_use && + block0->channel_in_cpl[ch]) { + coord_diff[ch] += fabs(cpl_coords[blk-1][ch][bnd] - + cpl_coords[blk ][ch][bnd]); + } + } + coord_diff[ch] /= s->num_cpl_bands; + } + + /* send new coordinates if this is the first block, if previous + * block did not use coupling but this block does, the channels + * using coupling has changed from the previous block, or the + * coordinate difference from the last block for any channel is + * greater than a threshold value. */ + if (blk == 0) { + new_coords = 1; + } else if (!block0->cpl_in_use) { + new_coords = 1; + } else { + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (block->channel_in_cpl[ch] && !block0->channel_in_cpl[ch]) { + new_coords = 1; + break; + } + } + if (!new_coords) { + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (block->channel_in_cpl[ch] && coord_diff[ch] > 0.04) { + new_coords = 1; + break; + } + } + } + } + } + block->new_cpl_coords = new_coords; + } + + /* calculate final coupling coordinates, taking into account reusing of + coordinates in successive blocks */ + for (bnd = 0; bnd < s->num_cpl_bands; bnd++) { + blk = 0; + while (blk < AC3_MAX_BLOCKS) { + int blk1; + CoefSumType energy_cpl; + AC3Block *block = &s->blocks[blk]; + + if (!block->cpl_in_use) { + blk++; + continue; + } + + energy_cpl = energy[blk][CPL_CH][bnd]; + blk1 = blk+1; + while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) { + if (s->blocks[blk1].cpl_in_use) + energy_cpl += energy[blk1][CPL_CH][bnd]; + blk1++; + } + + for (ch = 1; ch <= s->fbw_channels; ch++) { + CoefType energy_ch; + if (!block->channel_in_cpl[ch]) + continue; + energy_ch = energy[blk][ch][bnd]; + blk1 = blk+1; + while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) { + if (s->blocks[blk1].cpl_in_use) + energy_ch += energy[blk1][ch][bnd]; + blk1++; + } + cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy_ch, energy_cpl); + } + blk = blk1; + } + } + + /* calculate exponents/mantissas for coupling coordinates */ + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + if (!block->cpl_in_use || !block->new_cpl_coords) + continue; + + s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1], + cpl_coords[blk][1], + s->fbw_channels * 16); + s->ac3dsp.extract_exponents(block->cpl_coord_exp[1], + fixed_cpl_coords[blk][1], + s->fbw_channels * 16); + + for (ch = 1; ch <= s->fbw_channels; ch++) { + int bnd, min_exp, max_exp, master_exp; + + /* determine master exponent */ + min_exp = max_exp = block->cpl_coord_exp[ch][0]; + for (bnd = 1; bnd < s->num_cpl_bands; bnd++) { + int exp = block->cpl_coord_exp[ch][bnd]; + min_exp = FFMIN(exp, min_exp); + max_exp = FFMAX(exp, max_exp); + } + master_exp = ((max_exp - 15) + 2) / 3; + master_exp = FFMAX(master_exp, 0); + while (min_exp < master_exp * 3) + master_exp--; + for (bnd = 0; bnd < s->num_cpl_bands; bnd++) { + block->cpl_coord_exp[ch][bnd] = av_clip(block->cpl_coord_exp[ch][bnd] - + master_exp * 3, 0, 15); + } + block->cpl_master_exp[ch] = master_exp; + + /* quantize mantissas */ + for (bnd = 0; bnd < s->num_cpl_bands; bnd++) { + int cpl_exp = block->cpl_coord_exp[ch][bnd]; + int cpl_mant = (fixed_cpl_coords[blk][ch][bnd] << (5 + cpl_exp + master_exp * 3)) >> 24; + if (cpl_exp == 15) + cpl_mant >>= 1; + else + cpl_mant -= 16; + + block->cpl_coord_mant[ch][bnd] = cpl_mant; + } + } + } + + if (CONFIG_EAC3_ENCODER && s->eac3) + ff_eac3_set_cpl_states(s); +#endif /* CONFIG_AC3ENC_FLOAT */ +} + + +/** + * Determine rematrixing flags for each block and band. + */ +void AC3_NAME(compute_rematrixing_strategy)(AC3EncodeContext *s) +{ + int nb_coefs; + int blk, bnd, i; + AC3Block *block, *av_uninit(block0); + + if (s->channel_mode != AC3_CHMODE_STEREO) + return; + + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + block = &s->blocks[blk]; + block->new_rematrixing_strategy = !blk; + + if (!s->rematrixing_enabled) { + block0 = block; + continue; + } + + block->num_rematrixing_bands = 4; + if (block->cpl_in_use) { + block->num_rematrixing_bands -= (s->start_freq[CPL_CH] <= 61); + block->num_rematrixing_bands -= (s->start_freq[CPL_CH] == 37); + if (blk && block->num_rematrixing_bands != block0->num_rematrixing_bands) + block->new_rematrixing_strategy = 1; + } + nb_coefs = FFMIN(block->end_freq[1], block->end_freq[2]); + + for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) { + /* calculate calculate sum of squared coeffs for one band in one block */ + int start = ff_ac3_rematrix_band_tab[bnd]; + int end = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]); + CoefSumType sum[4] = {0,}; + for (i = start; i < end; i++) { + CoefType lt = block->mdct_coef[1][i]; + CoefType rt = block->mdct_coef[2][i]; + CoefType md = lt + rt; + CoefType sd = lt - rt; + MAC_COEF(sum[0], lt, lt); + MAC_COEF(sum[1], rt, rt); + MAC_COEF(sum[2], md, md); + MAC_COEF(sum[3], sd, sd); + } + + /* compare sums to determine if rematrixing will be used for this band */ + if (FFMIN(sum[2], sum[3]) < FFMIN(sum[0], sum[1])) + block->rematrixing_flags[bnd] = 1; + else + block->rematrixing_flags[bnd] = 0; + + /* determine if new rematrixing flags will be sent */ + if (blk && + block->rematrixing_flags[bnd] != block0->rematrixing_flags[bnd]) { + block->new_rematrixing_strategy = 1; + } + } + block0 = block; + } +} diff --git a/libavcodec/ac3tab.c b/libavcodec/ac3tab.c index fb8e9c7b52..3b3e715655 100644 --- a/libavcodec/ac3tab.c +++ b/libavcodec/ac3tab.c @@ -138,6 +138,13 @@ const uint16_t ff_ac3_bitrate_tab[19] = { */ const uint8_t ff_ac3_rematrix_band_tab[5] = { 13, 25, 37, 61, 253 }; +/** + * Table E2.16 Default Coupling Banding Structure + */ +const uint8_t ff_eac3_default_cpl_band_struct[18] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1 +}; + /* AC-3 MDCT window */ /* MDCT window */ diff --git a/libavcodec/ac3tab.h b/libavcodec/ac3tab.h index 4bcbcab72b..aa13c8f6e5 100644 --- a/libavcodec/ac3tab.h +++ b/libavcodec/ac3tab.h @@ -39,6 +39,7 @@ extern const uint8_t ff_ac3_dec_channel_map[8][2][6]; extern const uint16_t ff_ac3_sample_rate_tab[3]; extern const uint16_t ff_ac3_bitrate_tab[19]; extern const uint8_t ff_ac3_rematrix_band_tab[5]; +extern const uint8_t ff_eac3_default_cpl_band_struct[18]; extern const int16_t ff_ac3_window[AC3_WINDOW_SIZE/2]; extern const uint8_t ff_ac3_log_add_tab[260]; extern const uint16_t ff_ac3_hearing_threshold_tab[AC3_CRITICAL_BANDS][3]; diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c index e41e5facb6..a44ab8cfe6 100644 --- a/libavcodec/acelp_vectors.c +++ b/libavcodec/acelp_vectors.c @@ -93,16 +93,6 @@ const uint8_t ff_fc_4pulses_8bits_track_4[32] = 78, 79, }; -#if 0 -static uint8_t gray_decode[32] = -{ - 0, 1, 3, 2, 7, 6, 4, 5, - 15, 14, 12, 13, 8, 9, 11, 10, - 31, 30, 28, 29, 24, 25, 27, 26, - 16, 17, 19, 18, 23, 22, 20, 21 -}; -#endif - const float ff_pow_0_7[10] = { 0.700000, 0.490000, 0.343000, 0.240100, 0.168070, 0.117649, 0.082354, 0.057648, 0.040354, 0.028248 diff --git a/libavcodec/alacenc.c b/libavcodec/alacenc.c index c3a1fdfa03..bb618e16f2 100644 --- a/libavcodec/alacenc.c +++ b/libavcodec/alacenc.c @@ -389,6 +389,11 @@ static av_cold int alac_encode_init(AVCodecContext *avctx) return -1; } + if(avctx->channels > 2) { + av_log(avctx, AV_LOG_ERROR, "channels > 2 not supported\n"); + return AVERROR_PATCHWELCOME; + } + // Set default compression level if(avctx->compression_level == FF_COMPRESSION_DEFAULT) s->compression_level = 2; diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index fc74eeaf8c..e6305cf7d2 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -104,6 +104,7 @@ void avcodec_register_all(void) REGISTER_DECODER (EIGHTBPS, eightbps); REGISTER_DECODER (EIGHTSVX_EXP, eightsvx_exp); REGISTER_DECODER (EIGHTSVX_FIB, eightsvx_fib); + REGISTER_DECODER (EIGHTSVX_RAW, eightsvx_raw); REGISTER_DECODER (ESCAPE124, escape124); REGISTER_ENCDEC (FFV1, ffv1); REGISTER_ENCDEC (FFVHUFF, ffvhuff); @@ -130,6 +131,7 @@ void avcodec_register_all(void) REGISTER_DECODER (INDEO3, indeo3); REGISTER_DECODER (INDEO5, indeo5); REGISTER_DECODER (INTERPLAY_VIDEO, interplay_video); + REGISTER_DECODER (JPEG2000, jpeg2000); REGISTER_ENCDEC (JPEGLS, jpegls); REGISTER_DECODER (JV, jv); REGISTER_DECODER (KGV1, kgv1); @@ -154,7 +156,7 @@ void avcodec_register_all(void) REGISTER_DECODER (MPEG1_VDPAU, mpeg1_vdpau); REGISTER_DECODER (MPEG2_CRYSTALHD, mpeg2_crystalhd); REGISTER_DECODER (MSMPEG4_CRYSTALHD, msmpeg4_crystalhd); - REGISTER_ENCDEC (MSMPEG4V1, msmpeg4v1); + REGISTER_DECODER (MSMPEG4V1, msmpeg4v1); REGISTER_ENCDEC (MSMPEG4V2, msmpeg4v2); REGISTER_ENCDEC (MSMPEG4V3, msmpeg4v3); REGISTER_DECODER (MSRLE, msrle); @@ -251,7 +253,7 @@ void avcodec_register_all(void) REGISTER_DECODER (COOK, cook); REGISTER_ENCDEC (DCA, dca); REGISTER_DECODER (DSICINAUDIO, dsicinaudio); - REGISTER_DECODER (EAC3, eac3); + REGISTER_ENCDEC (EAC3, eac3); REGISTER_ENCDEC (FLAC, flac); REGISTER_DECODER (GSM, gsm); REGISTER_DECODER (GSM_MS, gsm_ms); diff --git a/libavcodec/amrwbdata.h b/libavcodec/amrwbdata.h index 36553d5a0f..1327b0237a 100644 --- a/libavcodec/amrwbdata.h +++ b/libavcodec/amrwbdata.h @@ -1887,4 +1887,4 @@ static const uint16_t cf_sizes_wb[] = { 40 /// SID/comfort noise frame }; -#endif +#endif /* AVCODEC_AMRWBDATA_H */ diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index a5abfdd128..3374f0e2bd 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -5,6 +5,9 @@ OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \ ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o +OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o +ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o + OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o diff --git a/libavcodec/arm/aac.h b/libavcodec/arm/aac.h index 390cdbfcf9..3b14c094c6 100644 --- a/libavcodec/arm/aac.h +++ b/libavcodec/arm/aac.h @@ -30,17 +30,17 @@ static inline float *VMUL2(float *dst, const float *v, unsigned idx, const float *scale) { unsigned v0, v1; - __asm__ volatile ("ubfx %0, %4, #0, #4 \n\t" - "ubfx %1, %4, #4, #4 \n\t" - "ldr %0, [%3, %0, lsl #2] \n\t" - "ldr %1, [%3, %1, lsl #2] \n\t" - "vld1.32 {d1[]}, [%5,:32] \n\t" - "vmov d0, %0, %1 \n\t" - "vmul.f32 d0, d0, d1 \n\t" - "vst1.32 {d0}, [%2,:64]! \n\t" - : "=&r"(v0), "=&r"(v1), "+r"(dst) - : "r"(v), "r"(idx), "r"(scale) - : "d0", "d1"); + __asm__ ("ubfx %0, %6, #0, #4 \n\t" + "ubfx %1, %6, #4, #4 \n\t" + "ldr %0, [%5, %0, lsl #2] \n\t" + "ldr %1, [%5, %1, lsl #2] \n\t" + "vld1.32 {d1[]}, [%7,:32] \n\t" + "vmov d0, %0, %1 \n\t" + "vmul.f32 d0, d0, d1 \n\t" + "vst1.32 {d0}, [%2,:64]! \n\t" + : "=&r"(v0), "=&r"(v1), "+r"(dst), "=m"(dst[0]), "=m"(dst[1]) + : "r"(v), "r"(idx), "r"(scale) + : "d0", "d1"); return dst; } @@ -49,22 +49,23 @@ static inline float *VMUL4(float *dst, const float *v, unsigned idx, const float *scale) { unsigned v0, v1, v2, v3; - __asm__ volatile ("ubfx %0, %6, #0, #2 \n\t" - "ubfx %1, %6, #2, #2 \n\t" - "ldr %0, [%5, %0, lsl #2] \n\t" - "ubfx %2, %6, #4, #2 \n\t" - "ldr %1, [%5, %1, lsl #2] \n\t" - "ubfx %3, %6, #6, #2 \n\t" - "ldr %2, [%5, %2, lsl #2] \n\t" - "vmov d0, %0, %1 \n\t" - "ldr %3, [%5, %3, lsl #2] \n\t" - "vld1.32 {d2[],d3[]},[%7,:32] \n\t" - "vmov d1, %2, %3 \n\t" - "vmul.f32 q0, q0, q1 \n\t" - "vst1.32 {q0}, [%4,:128]! \n\t" - : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst) - : "r"(v), "r"(idx), "r"(scale) - : "d0", "d1", "d2", "d3"); + __asm__ ("ubfx %0, %10, #0, #2 \n\t" + "ubfx %1, %10, #2, #2 \n\t" + "ldr %0, [%9, %0, lsl #2] \n\t" + "ubfx %2, %10, #4, #2 \n\t" + "ldr %1, [%9, %1, lsl #2] \n\t" + "ubfx %3, %10, #6, #2 \n\t" + "ldr %2, [%9, %2, lsl #2] \n\t" + "vmov d0, %0, %1 \n\t" + "ldr %3, [%9, %3, lsl #2] \n\t" + "vld1.32 {d2[],d3[]},[%11,:32] \n\t" + "vmov d1, %2, %3 \n\t" + "vmul.f32 q0, q0, q1 \n\t" + "vst1.32 {q0}, [%4,:128]! \n\t" + : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst), + "=m"(dst[0]), "=m"(dst[1]), "=m"(dst[2]), "=m"(dst[3]) + : "r"(v), "r"(idx), "r"(scale) + : "d0", "d1", "d2", "d3"); return dst; } @@ -73,22 +74,23 @@ static inline float *VMUL2S(float *dst, const float *v, unsigned idx, unsigned sign, const float *scale) { unsigned v0, v1, v2, v3; - __asm__ volatile ("ubfx %0, %6, #0, #4 \n\t" - "ubfx %1, %6, #4, #4 \n\t" - "ldr %0, [%5, %0, lsl #2] \n\t" - "lsl %2, %8, #30 \n\t" - "ldr %1, [%5, %1, lsl #2] \n\t" - "lsl %3, %8, #31 \n\t" - "vmov d0, %0, %1 \n\t" - "bic %2, %2, #1<<30 \n\t" - "vld1.32 {d1[]}, [%7,:32] \n\t" - "vmov d2, %2, %3 \n\t" - "veor d0, d0, d2 \n\t" - "vmul.f32 d0, d0, d1 \n\t" - "vst1.32 {d0}, [%4,:64]! \n\t" - : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst) - : "r"(v), "r"(idx), "r"(scale), "r"(sign) - : "d0", "d1", "d2"); + __asm__ ("ubfx %0, %8, #0, #4 \n\t" + "ubfx %1, %8, #4, #4 \n\t" + "ldr %0, [%7, %0, lsl #2] \n\t" + "lsl %2, %10, #30 \n\t" + "ldr %1, [%7, %1, lsl #2] \n\t" + "lsl %3, %10, #31 \n\t" + "vmov d0, %0, %1 \n\t" + "bic %2, %2, #1<<30 \n\t" + "vld1.32 {d1[]}, [%9,:32] \n\t" + "vmov d2, %2, %3 \n\t" + "veor d0, d0, d2 \n\t" + "vmul.f32 d0, d0, d1 \n\t" + "vst1.32 {d0}, [%4,:64]! \n\t" + : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst), + "=m"(dst[0]), "=m"(dst[1]) + : "r"(v), "r"(idx), "r"(scale), "r"(sign) + : "d0", "d1", "d2"); return dst; } @@ -97,38 +99,39 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx, unsigned sign, const float *scale) { unsigned v0, v1, v2, v3, nz; - __asm__ volatile ("vld1.32 {d2[],d3[]},[%9,:32] \n\t" - "ubfx %0, %8, #0, #2 \n\t" - "ubfx %1, %8, #2, #2 \n\t" - "ldr %0, [%7, %0, lsl #2] \n\t" - "ubfx %2, %8, #4, #2 \n\t" - "ldr %1, [%7, %1, lsl #2] \n\t" - "ubfx %3, %8, #6, #2 \n\t" - "ldr %2, [%7, %2, lsl #2] \n\t" - "vmov d0, %0, %1 \n\t" - "ldr %3, [%7, %3, lsl #2] \n\t" - "lsr %6, %8, #12 \n\t" - "rbit %6, %6 \n\t" - "vmov d1, %2, %3 \n\t" - "lsls %6, %6, #1 \n\t" - "and %0, %5, #1<<31 \n\t" - "lslcs %5, %5, #1 \n\t" - "lsls %6, %6, #1 \n\t" - "and %1, %5, #1<<31 \n\t" - "lslcs %5, %5, #1 \n\t" - "lsls %6, %6, #1 \n\t" - "and %2, %5, #1<<31 \n\t" - "lslcs %5, %5, #1 \n\t" - "vmov d4, %0, %1 \n\t" - "and %3, %5, #1<<31 \n\t" - "vmov d5, %2, %3 \n\t" - "veor q0, q0, q2 \n\t" - "vmul.f32 q0, q0, q1 \n\t" - "vst1.32 {q0}, [%4,:128]! \n\t" - : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst), - "+r"(sign), "=r"(nz) - : "r"(v), "r"(idx), "r"(scale) - : "d0", "d1", "d2", "d3", "d4", "d5"); + __asm__ ("vld1.32 {d2[],d3[]},[%13,:32] \n\t" + "ubfx %0, %12, #0, #2 \n\t" + "ubfx %1, %12, #2, #2 \n\t" + "ldr %0, [%11,%0, lsl #2] \n\t" + "ubfx %2, %12, #4, #2 \n\t" + "ldr %1, [%11,%1, lsl #2] \n\t" + "ubfx %3, %12, #6, #2 \n\t" + "ldr %2, [%11,%2, lsl #2] \n\t" + "vmov d0, %0, %1 \n\t" + "ldr %3, [%11,%3, lsl #2] \n\t" + "lsr %6, %12, #12 \n\t" + "rbit %6, %6 \n\t" + "vmov d1, %2, %3 \n\t" + "lsls %6, %6, #1 \n\t" + "and %0, %5, #1<<31 \n\t" + "lslcs %5, %5, #1 \n\t" + "lsls %6, %6, #1 \n\t" + "and %1, %5, #1<<31 \n\t" + "lslcs %5, %5, #1 \n\t" + "lsls %6, %6, #1 \n\t" + "and %2, %5, #1<<31 \n\t" + "lslcs %5, %5, #1 \n\t" + "vmov d4, %0, %1 \n\t" + "and %3, %5, #1<<31 \n\t" + "vmov d5, %2, %3 \n\t" + "veor q0, q0, q2 \n\t" + "vmul.f32 q0, q0, q1 \n\t" + "vst1.32 {q0}, [%4,:128]! \n\t" + : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst), + "+r"(sign), "=r"(nz), + "=m"(dst[0]), "=m"(dst[1]), "=m"(dst[2]), "=m"(dst[3]) + : "r"(v), "r"(idx), "r"(scale) + : "cc", "d0", "d1", "d2", "d3", "d4", "d5"); return dst; } diff --git a/libavcodec/arm/ac3dsp_arm.S b/libavcodec/arm/ac3dsp_arm.S index d7d498e41f..545714cff1 100644 --- a/libavcodec/arm/ac3dsp_arm.S +++ b/libavcodec/arm/ac3dsp_arm.S @@ -20,33 +20,16 @@ #include "asm.S" -function ff_ac3_compute_mantissa_size_arm, export=1 - push {r4-r8,lr} - ldm r0, {r4-r8} - mov r3, r0 - mov r0, #0 -1: +function ff_ac3_update_bap_counts_arm, export=1 + push {lr} ldrb lr, [r1], #1 +1: + lsl r3, lr, #1 + ldrh r12, [r0, r3] subs r2, r2, #1 - blt 2f - cmp lr, #4 - bgt 3f - subs lr, lr, #1 - addlt r4, r4, #1 - addeq r5, r5, #1 - ble 1b - subs lr, lr, #2 - addlt r6, r6, #1 - addeq r7, r7, #1 - addgt r8, r8, #1 - b 1b -3: - cmp lr, #14 - sublt lr, lr, #1 - addgt r0, r0, #16 - addle r0, r0, lr - b 1b -2: - stm r3, {r4-r8} - pop {r4-r8,pc} + ldrbgt lr, [r1], #1 + add r12, r12, #1 + strh r12, [r0, r3] + bgt 1b + pop {pc} endfunc diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c index 9f01507853..5a9fdf0071 100644 --- a/libavcodec/arm/ac3dsp_init_arm.c +++ b/libavcodec/arm/ac3dsp_init_arm.c @@ -35,11 +35,11 @@ void ff_ac3_bit_alloc_calc_bap_armv6(int16_t *mask, int16_t *psd, int snr_offset, int floor, const uint8_t *bap_tab, uint8_t *bap); -int ff_ac3_compute_mantissa_size_arm(int cnt[5], uint8_t *bap, int nb_coefs); +void ff_ac3_update_bap_counts_arm(uint16_t mant_cnt[16], uint8_t *bap, int len); av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) { - c->compute_mantissa_size = ff_ac3_compute_mantissa_size_arm; + c->update_bap_counts = ff_ac3_update_bap_counts_arm; if (HAVE_ARMV6) { c->bit_alloc_calc_bap = ff_ac3_bit_alloc_calc_bap_armv6; diff --git a/libavcodec/arm/asm-offsets.h b/libavcodec/arm/asm-offsets.h index 7f73534834..fe124ba702 100644 --- a/libavcodec/arm/asm-offsets.h +++ b/libavcodec/arm/asm-offsets.h @@ -36,4 +36,4 @@ #define H263_AIC 0xf0 #define INTER_SCANTAB_RASTER_END 0x138 -#endif +#endif /* AVCODEC_ARM_ASM_OFFSETS_H */ diff --git a/libavcodec/arm/asm.S b/libavcodec/arm/asm.S index cf73a77609..bb999fd61a 100644 --- a/libavcodec/arm/asm.S +++ b/libavcodec/arm/asm.S @@ -26,6 +26,8 @@ # define ELF @ #endif + .syntax unified + .macro require8 val=1 ELF .eabi_attribute 24, \val .endm diff --git a/libavcodec/arm/dsputil_arm.h b/libavcodec/arm/dsputil_arm.h index 7f8579d507..b333c70226 100644 --- a/libavcodec/arm/dsputil_arm.h +++ b/libavcodec/arm/dsputil_arm.h @@ -30,4 +30,4 @@ void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx); void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx); -#endif +#endif /* AVCODEC_ARM_DSPUTIL_H */ diff --git a/libavcodec/arm/dsputil_init_vfp.c b/libavcodec/arm/dsputil_init_vfp.c index bd52315934..ee092dca10 100644 --- a/libavcodec/arm/dsputil_init_vfp.c +++ b/libavcodec/arm/dsputil_init_vfp.c @@ -28,6 +28,7 @@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0, void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx) { - c->vector_fmul = ff_vector_fmul_vfp; + if (!HAVE_VFPV3) + c->vector_fmul = ff_vector_fmul_vfp; c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp; } diff --git a/libavcodec/arm/dsputil_vfp.S b/libavcodec/arm/dsputil_vfp.S index 197d500819..497c02be92 100644 --- a/libavcodec/arm/dsputil_vfp.S +++ b/libavcodec/arm/dsputil_vfp.S @@ -21,7 +21,6 @@ #include "config.h" #include "asm.S" - .syntax unified /* * VFP is a floating point coprocessor used in some ARM cores. VFP11 has 1 cycle * throughput for almost all the instructions (except for double precision diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S index 1d19e7758b..da2ef8c158 100644 --- a/libavcodec/arm/fmtconvert_vfp.S +++ b/libavcodec/arm/fmtconvert_vfp.S @@ -21,8 +21,6 @@ #include "config.h" #include "asm.S" - .syntax unified - /** * ARM VFP optimized float to int16 conversion. * Assume that len is a positive number and is multiple of 8, destination diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S index 6b6a669f35..afd3718518 100644 --- a/libavcodec/arm/h264idct_neon.S +++ b/libavcodec/arm/h264idct_neon.S @@ -148,24 +148,27 @@ function ff_h264_idct_add8_neon, export=1 add r5, r1, #16*4 add r1, r2, #16*32 mov r2, r3 + mov r3, r1 ldr r6, [sp, #32] movrel r7, scan8+16 - mov ip, #7 -1: ldrb r8, [r7], #1 - ldr r0, [r5], #4 + mov r12, #0 +1: ldrb r8, [r7, r12] + ldr r0, [r5, r12, lsl #2] ldrb r8, [r6, r8] - tst ip, #4 - addne r0, r0, r4 - addeq r0, r0, r9 + add r0, r0, r4 + add r1, r3, r12, lsl #5 cmp r8, #0 ldrsh r8, [r1] adrne lr, ff_h264_idct_add_neon adreq lr, ff_h264_idct_dc_add_neon cmpeq r8, #0 blxne lr - subs ip, ip, #1 - add r1, r1, #32 - bge 1b + add r12, r12, #1 + cmp r12, #4 + moveq r12, #16 + moveq r4, r9 + cmp r12, #20 + blt 1b pop {r4-r10,pc} endfunc @@ -374,11 +377,15 @@ function ff_h264_idct8_add4_neon, export=1 endfunc .section .rodata -scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8 - .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8 - .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8 - .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8 - .byte 1+1*8, 2+1*8 - .byte 1+2*8, 2+2*8 - .byte 1+4*8, 2+4*8 - .byte 1+5*8, 2+5*8 +scan8: .byte 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8 + .byte 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8 + .byte 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8 + .byte 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8 + .byte 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8 + .byte 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8 + .byte 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8 + .byte 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8 + .byte 4+11*8, 5+11*8, 4+12*8, 5+12*8 + .byte 6+11*8, 7+11*8, 6+12*8, 7+12*8 + .byte 4+13*8, 5+13*8, 4+14*8, 5+14*8 + .byte 6+13*8, 7+13*8, 6+14*8, 7+14*8 diff --git a/libavcodec/arm/jrevdct_arm.S b/libavcodec/arm/jrevdct_arm.S index 4fcf35101d..93cbbbe8eb 100644 --- a/libavcodec/arm/jrevdct_arm.S +++ b/libavcodec/arm/jrevdct_arm.S @@ -54,18 +54,13 @@ #define FIX_M_1_961570560_ID 40 #define FIX_M_2_562915447_ID 44 #define FIX_0xFFFF_ID 48 - .text - .align function ff_j_rev_dct_arm, export=1 - stmdb sp!, { r4 - r12, lr } @ all callee saved regs - - sub sp, sp, #4 @ reserve some space on the stack - str r0, [ sp ] @ save the DCT pointer to the stack + push {r0, r4 - r11, lr} mov lr, r0 @ lr = pointer to the current row mov r12, #8 @ r12 = row-counter - adr r11, const_array @ r11 = base pointer to the constants array + movrel r11, const_array @ r11 = base pointer to the constants array row_loop: ldrsh r0, [lr, # 0] @ r0 = 'd0' ldrsh r2, [lr, # 2] @ r2 = 'd2' @@ -102,7 +97,7 @@ row_loop: add r4, r6, r3, lsl #13 @ r4 = tmp11 rsb r3, r6, r3, lsl #13 @ r3 = tmp12 - stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 + push {r0, r2, r3, r4} @ save on the stack tmp10, tmp13, tmp12, tmp11 ldrsh r3, [lr, #10] @ r3 = 'd3' ldrsh r5, [lr, #12] @ r5 = 'd5' @@ -136,8 +131,8 @@ row_loop: add r3, r3, r4 @ r3 = tmp2 add r1, r1, r6 @ r1 = tmp3 - ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 - @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 + pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 + @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) add r8, r0, r1 @@ -211,7 +206,7 @@ end_of_row_loop: start_column_loop: @ Start of column loop - ldr lr, [ sp ] + pop {lr} mov r12, #8 column_loop: ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' @@ -245,7 +240,7 @@ column_loop: orrs r10, r9, r10 beq empty_odd_column - stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 + push {r0, r2, r4, r6} @ save on the stack tmp10, tmp13, tmp12, tmp11 add r0, r3, r5 @ r0 = 'z2' add r2, r1, r7 @ r2 = 'z1' @@ -275,8 +270,8 @@ column_loop: add r3, r3, r4 @ r3 = tmp2 add r1, r1, r6 @ r1 = tmp3 - ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 - @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 + pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 + @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) add r8, r0, r1 @@ -368,11 +363,10 @@ empty_odd_column: the_end: @ The end.... - add sp, sp, #4 - ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return + pop {r4 - r11, pc} +endfunc -const_array: - .align +const const_array .word FIX_0_298631336 .word FIX_0_541196100 .word FIX_0_765366865 @@ -386,3 +380,4 @@ const_array: .word FIX_M_1_961570560 .word FIX_M_2_562915447 .word FIX_0xFFFF +endconst diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h index 2244fa19ae..299a973cb6 100644 --- a/libavcodec/arm/mathops.h +++ b/libavcodec/arm/mathops.h @@ -28,52 +28,16 @@ #if HAVE_INLINE_ASM -# define MULL MULL -static inline av_const int MULL(int a, int b, unsigned shift) -{ - int lo, hi; - __asm__("smull %0, %1, %2, %3 \n\t" - "mov %0, %0, lsr %4 \n\t" - "add %1, %0, %1, lsl %5 \n\t" - : "=&r"(lo), "=&r"(hi) - : "r"(b), "r"(a), "ir"(shift), "ir"(32-shift)); - return hi; -} - -#define MULH MULH #if HAVE_ARMV6 +#define MULH MULH static inline av_const int MULH(int a, int b) { int r; __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b)); return r; } -#else -static inline av_const int MULH(int a, int b) -{ - int lo, hi; - __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a)); - return hi; -} #endif -static inline av_const int64_t MUL64(int a, int b) -{ - union { uint64_t x; unsigned hl[2]; } x; - __asm__ ("smull %0, %1, %2, %3" - : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b)); - return x.x; -} -#define MUL64 MUL64 - -static inline av_const int64_t MAC64(int64_t d, int a, int b) -{ - union { uint64_t x; unsigned hl[2]; } x = { d }; - __asm__ ("smlal %0, %1, %2, %3" - : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b)); - return x.x; -} -#define MAC64(d, a, b) ((d) = MAC64(d, a, b)) #define MLS64(d, a, b) MAC64(d, -(a), b) #if HAVE_ARMV5TE @@ -97,7 +61,7 @@ static inline av_const int MUL16(int ra, int rb) static inline av_const int mid_pred(int a, int b, int c) { int m; - __asm__ volatile ( + __asm__ ( "mov %0, %2 \n\t" "cmp %1, %2 \n\t" "movgt %0, %1 \n\t" @@ -107,7 +71,8 @@ static inline av_const int mid_pred(int a, int b, int c) "cmp %0, %1 \n\t" "movgt %0, %1 \n\t" : "=&r"(m), "+r"(a) - : "r"(b), "r"(c)); + : "r"(b), "r"(c) + : "cc"); return m; } diff --git a/libavcodec/arm/mpegaudiodsp_fixed_armv6.S b/libavcodec/arm/mpegaudiodsp_fixed_armv6.S new file mode 100644 index 0000000000..9ec731480b --- /dev/null +++ b/libavcodec/arm/mpegaudiodsp_fixed_armv6.S @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm.S" + +.macro skip args:vararg +.endm + +.macro sum8 lo, hi, w, p, t1, t2, t3, t4, rsb=skip, offs=0 + ldr \t1, [\w, #4*\offs] + ldr \t2, [\p, #4]! + \rsb \t1, \t1, #0 + .irpc i, 135 + ldr \t3, [\w, #4*64*\i+4*\offs] + ldr \t4, [\p, #4*64*\i] + smlal \lo, \hi, \t1, \t2 + \rsb \t3, \t3, #0 + ldr \t1, [\w, #4*64*(\i+1)+4*\offs] + ldr \t2, [\p, #4*64*(\i+1)] + smlal \lo, \hi, \t3, \t4 + \rsb \t1, \t1, #0 + .endr + ldr \t3, [\w, #4*64*7+4*\offs] + ldr \t4, [\p, #4*64*7] + smlal \lo, \hi, \t1, \t2 + \rsb \t3, \t3, #0 + smlal \lo, \hi, \t3, \t4 +.endm + +.macro round rd, lo, hi + lsr \rd, \lo, #24 + bic \lo, \lo, #0xff000000 + orr \rd, \rd, \hi, lsl #8 + mov \hi, #0 + ssat \rd, #16, \rd +.endm + +function ff_mpadsp_apply_window_fixed_armv6, export=1 + push {r2,r4-r11,lr} + + add r4, r0, #4*512 @ synth_buf + 512 + .rept 4 + ldm r0!, {r5-r12} + stm r4!, {r5-r12} + .endr + + ldr r4, [sp, #40] @ incr + sub r0, r0, #4*17 @ synth_buf + 16 + ldr r8, [r2] @ sum:low + add r2, r0, #4*32 @ synth_buf + 48 + rsb r5, r4, r4, lsl #5 @ 31 * incr + lsl r4, r4, #1 + asr r9, r8, #31 @ sum:high + add r5, r3, r5, lsl #1 @ samples2 + add r6, r1, #4*32 @ w2 + str r4, [sp, #40] + + sum8 r8, r9, r1, r0, r10, r11, r12, lr + sum8 r8, r9, r1, r2, r10, r11, r12, lr, rsb, 32 + round r10, r8, r9 + strh r10, [r3], r4 + + mov lr, #15 +1: + ldr r12, [r0, #4]! + ldr r11, [r6, #-4]! + ldr r10, [r1, #4]! + .irpc i, 0246 + .if \i + ldr r11, [r6, #4*64*\i] + ldr r10, [r1, #4*64*\i] + .endif + rsb r11, r11, #0 + smlal r8, r9, r10, r12 + ldr r10, [r0, #4*64*(\i+1)] + .ifeq \i + smull r4, r7, r11, r12 + .else + smlal r4, r7, r11, r12 + .endif + ldr r11, [r6, #4*64*(\i+1)] + ldr r12, [r1, #4*64*(\i+1)] + rsb r11, r11, #0 + smlal r8, r9, r12, r10 + .iflt \i-6 + ldr r12, [r0, #4*64*(\i+2)] + .else + ldr r12, [r2, #-4]! + .endif + smlal r4, r7, r11, r10 + .endr + .irpc i, 0246 + ldr r10, [r1, #4*64*\i+4*32] + rsb r12, r12, #0 + ldr r11, [r6, #4*64*\i+4*32] + smlal r8, r9, r10, r12 + ldr r10, [r2, #4*64*(\i+1)] + smlal r4, r7, r11, r12 + ldr r12, [r1, #4*64*(\i+1)+4*32] + rsb r10, r10, #0 + ldr r11, [r6, #4*64*(\i+1)+4*32] + smlal r8, r9, r12, r10 + .iflt \i-6 + ldr r12, [r2, #4*64*(\i+2)] + .else + ldr r12, [sp, #40] + .endif + smlal r4, r7, r11, r10 + .endr + round r10, r8, r9 + adds r8, r8, r4 + adc r9, r9, r7 + strh r10, [r3], r12 + round r11, r8, r9 + subs lr, lr, #1 + strh r11, [r5], -r12 + bgt 1b + + sum8 r8, r9, r1, r0, r10, r11, r12, lr, rsb, 33 + pop {r4} + round r10, r8, r9 + str r8, [r4] + strh r10, [r3] + + pop {r4-r11,pc} +endfunc diff --git a/libavcodec/arm/mpegaudiodsp_init_arm.c b/libavcodec/arm/mpegaudiodsp_init_arm.c new file mode 100644 index 0000000000..94a55787ad --- /dev/null +++ b/libavcodec/arm/mpegaudiodsp_init_arm.c @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2011 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> +#include "libavcodec/mpegaudiodsp.h" +#include "config.h" + +void ff_mpadsp_apply_window_fixed_armv6(int32_t *synth_buf, int32_t *window, + int *dither, int16_t *out, int incr); + +void ff_mpadsp_init_arm(MPADSPContext *s) +{ + if (HAVE_ARMV6) { + s->apply_window_fixed = ff_mpadsp_apply_window_fixed_armv6; + } +} diff --git a/libavcodec/arm/mpegvideo_arm.h b/libavcodec/arm/mpegvideo_arm.h index 4cc25fdd36..3549bb244b 100644 --- a/libavcodec/arm/mpegvideo_arm.h +++ b/libavcodec/arm/mpegvideo_arm.h @@ -24,4 +24,4 @@ void MPV_common_init_iwmmxt(MpegEncContext *s); void MPV_common_init_armv5te(MpegEncContext *s); -#endif +#endif /* AVCODEC_ARM_MPEGVIDEO_H */ diff --git a/libavcodec/arm/mpegvideo_armv5te_s.S b/libavcodec/arm/mpegvideo_armv5te_s.S index c8cb2c68a3..82095ab15d 100644 --- a/libavcodec/arm/mpegvideo_armv5te_s.S +++ b/libavcodec/arm/mpegvideo_armv5te_s.S @@ -35,6 +35,21 @@ * * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770) */ + +.macro dequant_t dst, src, mul, add, tmp + rsbs \tmp, ip, \src, asr #16 + addgt \tmp, \add, #0 + rsblt \tmp, \add, #0 + smlatbne \dst, \src, \mul, \tmp +.endm + +.macro dequant_b dst, src, mul, add, tmp + rsbs \tmp, ip, \src, lsl #16 + addgt \tmp, \add, #0 + rsblt \tmp, \add, #0 + smlabbne \dst, \src, \mul, \tmp +.endm + function ff_dct_unquantize_h263_armv5te, export=1 push {r4-r9,lr} mov ip, #0 @@ -44,50 +59,20 @@ function ff_dct_unquantize_h263_armv5te, export=1 1: ldrd r6, [r0, #8] - rsbs r9, ip, r4, asr #16 - addgt r9, r2, #0 - rsblt r9, r2, #0 - smlatbne r9, r4, r1, r9 - - rsbs lr, ip, r5, asr #16 - addgt lr, r2, #0 - rsblt lr, r2, #0 - smlatbne lr, r5, r1, lr - - rsbs r8, ip, r4, asl #16 - addgt r8, r2, #0 - rsblt r8, r2, #0 - smlabbne r4, r4, r1, r8 - - rsbs r8, ip, r5, asl #16 - addgt r8, r2, #0 - rsblt r8, r2, #0 - smlabbne r5, r5, r1, r8 + dequant_t r9, r4, r1, r2, r9 + dequant_t lr, r5, r1, r2, lr + dequant_b r4, r4, r1, r2, r8 + dequant_b r5, r5, r1, r2, r8 strh r4, [r0], #2 strh r9, [r0], #2 strh r5, [r0], #2 strh lr, [r0], #2 - rsbs r9, ip, r6, asr #16 - addgt r9, r2, #0 - rsblt r9, r2, #0 - smlatbne r9, r6, r1, r9 - - rsbs lr, ip, r7, asr #16 - addgt lr, r2, #0 - rsblt lr, r2, #0 - smlatbne lr, r7, r1, lr - - rsbs r8, ip, r6, asl #16 - addgt r8, r2, #0 - rsblt r8, r2, #0 - smlabbne r6, r6, r1, r8 - - rsbs r8, ip, r7, asl #16 - addgt r8, r2, #0 - rsblt r8, r2, #0 - smlabbne r7, r7, r1, r8 + dequant_t r9, r6, r1, r2, r9 + dequant_t lr, r7, r1, r2, lr + dequant_b r6, r6, r1, r2, r8 + dequant_b r7, r7, r1, r2, r8 strh r6, [r0], #2 strh r9, [r0], #2 @@ -95,7 +80,7 @@ function ff_dct_unquantize_h263_armv5te, export=1 strh lr, [r0], #2 subs r3, r3, #8 - ldrgtd r4, [r0, #0] /* load data early to avoid load/use pipeline stall */ + ldrdgt r4, [r0, #0] /* load data early to avoid load/use pipeline stall */ bgt 1b adds r3, r3, #2 diff --git a/libavcodec/arm/simple_idct_armv5te.S b/libavcodec/arm/simple_idct_armv5te.S index 27aeca4448..3c4b5c06d1 100644 --- a/libavcodec/arm/simple_idct_armv5te.S +++ b/libavcodec/arm/simple_idct_armv5te.S @@ -333,6 +333,20 @@ function idct_col_armv5te ldr pc, [sp], #4 endfunc +.macro clip dst, src:vararg + movs \dst, \src + movmi \dst, #0 + cmp \dst, #255 + movgt \dst, #255 +.endm + +.macro aclip dst, src:vararg + adds \dst, \src + movmi \dst, #0 + cmp \dst, #255 + movgt \dst, #255 +.endm + function idct_col_put_armv5te str lr, [sp, #-4]! @@ -341,27 +355,15 @@ function idct_col_put_armv5te ldmfd sp!, {a3, a4} ldr lr, [sp, #32] add a2, a3, v1 - movs a2, a2, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a2, asr #20 add ip, a4, v2 - movs ip, ip, asr #20 - movmi ip, #0 - cmp ip, #255 - movgt ip, #255 + clip ip, ip, asr #20 orr a2, a2, ip, lsl #8 sub a3, a3, v1 - movs a3, a3, asr #20 - movmi a3, #0 - cmp a3, #255 - movgt a3, #255 + clip a3, a3, asr #20 sub a4, a4, v2 - movs a4, a4, asr #20 - movmi a4, #0 - cmp a4, #255 + clip a4, a4, asr #20 ldr v1, [sp, #28] - movgt a4, #255 strh a2, [v1] add a2, v1, #2 str a2, [sp, #28] @@ -371,79 +373,43 @@ function idct_col_put_armv5te strh a2, [v2, v1]! sub a2, a3, v3 - movs a2, a2, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a2, asr #20 sub ip, a4, v4 - movs ip, ip, asr #20 - movmi ip, #0 - cmp ip, #255 - movgt ip, #255 + clip ip, ip, asr #20 orr a2, a2, ip, lsl #8 strh a2, [v1, lr]! add a3, a3, v3 - movs a2, a3, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a3, asr #20 add a4, a4, v4 - movs a4, a4, asr #20 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + clip a4, a4, asr #20 orr a2, a2, a4, lsl #8 ldmfd sp!, {a3, a4} strh a2, [v2, -lr]! add a2, a3, v5 - movs a2, a2, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a2, asr #20 add ip, a4, v6 - movs ip, ip, asr #20 - movmi ip, #0 - cmp ip, #255 - movgt ip, #255 + clip ip, ip, asr #20 orr a2, a2, ip, lsl #8 strh a2, [v1, lr]! sub a3, a3, v5 - movs a2, a3, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a3, asr #20 sub a4, a4, v6 - movs a4, a4, asr #20 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + clip a4, a4, asr #20 orr a2, a2, a4, lsl #8 ldmfd sp!, {a3, a4} strh a2, [v2, -lr]! add a2, a3, v7 - movs a2, a2, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a2, asr #20 add ip, a4, fp - movs ip, ip, asr #20 - movmi ip, #0 - cmp ip, #255 - movgt ip, #255 + clip ip, ip, asr #20 orr a2, a2, ip, lsl #8 strh a2, [v1, lr] sub a3, a3, v7 - movs a2, a3, asr #20 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + clip a2, a3, asr #20 sub a4, a4, fp - movs a4, a4, asr #20 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + clip a4, a4, asr #20 orr a2, a2, a4, lsl #8 strh a2, [v2, -lr] @@ -460,36 +426,22 @@ function idct_col_add_armv5te ldmfd sp!, {a3, a4} ldrh ip, [lr] add a2, a3, v1 - mov a2, a2, asr #20 sub a3, a3, v1 and v1, ip, #255 - adds a2, a2, v1 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + aclip a2, v1, a2, asr #20 add v1, a4, v2 mov v1, v1, asr #20 - adds v1, v1, ip, lsr #8 - movmi v1, #0 - cmp v1, #255 - movgt v1, #255 + aclip v1, v1, ip, lsr #8 orr a2, a2, v1, lsl #8 ldr v1, [sp, #32] sub a4, a4, v2 rsb v2, v1, v1, lsl #3 ldrh ip, [v2, lr]! strh a2, [lr] - mov a3, a3, asr #20 and a2, ip, #255 - adds a3, a3, a2 - movmi a3, #0 - cmp a3, #255 - movgt a3, #255 + aclip a3, a2, a3, asr #20 mov a4, a4, asr #20 - adds a4, a4, ip, lsr #8 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + aclip a4, a4, ip, lsr #8 add a2, lr, #2 str a2, [sp, #28] orr a2, a3, a4, lsl #8 @@ -498,102 +450,60 @@ function idct_col_add_armv5te ldmfd sp!, {a3, a4} ldrh ip, [lr, v1]! sub a2, a3, v3 - mov a2, a2, asr #20 add a3, a3, v3 and v3, ip, #255 - adds a2, a2, v3 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + aclip a2, v3, a2, asr #20 sub v3, a4, v4 mov v3, v3, asr #20 - adds v3, v3, ip, lsr #8 - movmi v3, #0 - cmp v3, #255 - movgt v3, #255 + aclip v3, v3, ip, lsr #8 orr a2, a2, v3, lsl #8 add a4, a4, v4 ldrh ip, [v2, -v1]! strh a2, [lr] - mov a3, a3, asr #20 and a2, ip, #255 - adds a3, a3, a2 - movmi a3, #0 - cmp a3, #255 - movgt a3, #255 + aclip a3, a2, a3, asr #20 mov a4, a4, asr #20 - adds a4, a4, ip, lsr #8 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + aclip a4, a4, ip, lsr #8 orr a2, a3, a4, lsl #8 strh a2, [v2] ldmfd sp!, {a3, a4} ldrh ip, [lr, v1]! add a2, a3, v5 - mov a2, a2, asr #20 sub a3, a3, v5 and v3, ip, #255 - adds a2, a2, v3 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + aclip a2, v3, a2, asr #20 add v3, a4, v6 mov v3, v3, asr #20 - adds v3, v3, ip, lsr #8 - movmi v3, #0 - cmp v3, #255 - movgt v3, #255 + aclip v3, v3, ip, lsr #8 orr a2, a2, v3, lsl #8 sub a4, a4, v6 ldrh ip, [v2, -v1]! strh a2, [lr] - mov a3, a3, asr #20 and a2, ip, #255 - adds a3, a3, a2 - movmi a3, #0 - cmp a3, #255 - movgt a3, #255 + aclip a3, a2, a3, asr #20 mov a4, a4, asr #20 - adds a4, a4, ip, lsr #8 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + aclip a4, a4, ip, lsr #8 orr a2, a3, a4, lsl #8 strh a2, [v2] ldmfd sp!, {a3, a4} ldrh ip, [lr, v1]! add a2, a3, v7 - mov a2, a2, asr #20 sub a3, a3, v7 and v3, ip, #255 - adds a2, a2, v3 - movmi a2, #0 - cmp a2, #255 - movgt a2, #255 + aclip a2, v3, a2, asr #20 add v3, a4, fp mov v3, v3, asr #20 - adds v3, v3, ip, lsr #8 - movmi v3, #0 - cmp v3, #255 - movgt v3, #255 + aclip v3, v3, ip, lsr #8 orr a2, a2, v3, lsl #8 sub a4, a4, fp ldrh ip, [v2, -v1]! strh a2, [lr] - mov a3, a3, asr #20 and a2, ip, #255 - adds a3, a3, a2 - movmi a3, #0 - cmp a3, #255 - movgt a3, #255 + aclip a3, a2, a3, asr #20 mov a4, a4, asr #20 - adds a4, a4, ip, lsr #8 - movmi a4, #0 - cmp a4, #255 - movgt a4, #255 + aclip a4, a4, ip, lsr #8 orr a2, a3, a4, lsl #8 strh a2, [v2] diff --git a/libavcodec/arm/vp56_arith.h b/libavcodec/arm/vp56_arith.h index 9bcb466bdc..cd02579e5b 100644 --- a/libavcodec/arm/vp56_arith.h +++ b/libavcodec/arm/vp56_arith.h @@ -31,24 +31,25 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr) unsigned high = c->high << shift; unsigned bit; - __asm__ volatile ("adds %3, %3, %0 \n" - "cmpcs %7, %4 \n" - "ldrcsh %2, [%4], #2 \n" - "rsb %0, %6, #256 \n" - "smlabb %0, %5, %6, %0 \n" - "rev16cs %2, %2 \n" - "orrcs %1, %1, %2, lsl %3 \n" - "subcs %3, %3, #16 \n" - "lsr %0, %0, #8 \n" - "cmp %1, %0, lsl #16 \n" - "subge %1, %1, %0, lsl #16 \n" - "subge %0, %5, %0 \n" - "movge %2, #1 \n" - "movlt %2, #0 \n" - : "=&r"(c->high), "=&r"(c->code_word), "=&r"(bit), - "+&r"(c->bits), "+&r"(c->buffer) - : "r"(high), "r"(pr), "r"(c->end - 1), - "0"(shift), "1"(code_word)); + __asm__ ("adds %3, %3, %0 \n" + "cmpcs %7, %4 \n" + "ldrcsh %2, [%4], #2 \n" + "rsb %0, %6, #256 \n" + "smlabb %0, %5, %6, %0 \n" + "rev16cs %2, %2 \n" + "orrcs %1, %1, %2, lsl %3 \n" + "subcs %3, %3, #16 \n" + "lsr %0, %0, #8 \n" + "cmp %1, %0, lsl #16 \n" + "subge %1, %1, %0, lsl #16 \n" + "subge %0, %5, %0 \n" + "movge %2, #1 \n" + "movlt %2, #0 \n" + : "=&r"(c->high), "=&r"(c->code_word), "=&r"(bit), + "+&r"(c->bits), "+&r"(c->buffer) + : "r"(high), "r"(pr), "r"(c->end - 1), + "0"(shift), "1"(code_word) + : "cc"); return bit; } @@ -62,19 +63,20 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr) unsigned low; unsigned tmp; - __asm__ volatile ("adds %3, %3, %0 \n" - "cmpcs %7, %4 \n" - "ldrcsh %2, [%4], #2 \n" - "rsb %0, %6, #256 \n" - "smlabb %0, %5, %6, %0 \n" - "rev16cs %2, %2 \n" - "orrcs %1, %1, %2, lsl %3 \n" - "subcs %3, %3, #16 \n" - "lsr %0, %0, #8 \n" - "lsl %2, %0, #16 \n" - : "=&r"(low), "+&r"(code_word), "=&r"(tmp), - "+&r"(c->bits), "+&r"(c->buffer) - : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift)); + __asm__ ("adds %3, %3, %0 \n" + "cmpcs %7, %4 \n" + "ldrcsh %2, [%4], #2 \n" + "rsb %0, %6, #256 \n" + "smlabb %0, %5, %6, %0 \n" + "rev16cs %2, %2 \n" + "orrcs %1, %1, %2, lsl %3 \n" + "subcs %3, %3, #16 \n" + "lsr %0, %0, #8 \n" + "lsl %2, %0, #16 \n" + : "=&r"(low), "+&r"(code_word), "=&r"(tmp), + "+&r"(c->bits), "+&r"(c->buffer) + : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift) + : "cc"); if (code_word >= tmp) { c->high = high - low; @@ -89,4 +91,4 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr) #endif -#endif +#endif /* AVCODEC_ARM_VP56_ARITH_H */ diff --git a/libavcodec/arm/vp8.h b/libavcodec/arm/vp8.h index 90e7344b62..55193394c5 100644 --- a/libavcodec/arm/vp8.h +++ b/libavcodec/arm/vp8.h @@ -26,4 +26,4 @@ int ff_decode_block_coeffs_armv6(VP56RangeCoder *rc, DCTELEM block[16], int i, uint8_t *token_prob, int16_t qmul[2]); #endif -#endif +#endif /* AVCODEC_ARM_VP8_H */ diff --git a/libavcodec/arm/vp8_armv6.S b/libavcodec/arm/vp8_armv6.S index aee9c52608..93f4dd664b 100644 --- a/libavcodec/arm/vp8_armv6.S +++ b/libavcodec/arm/vp8_armv6.S @@ -20,8 +20,6 @@ #include "asm.S" - .syntax unified - .macro rac_get_prob h, bs, buf, cw, pr, t0, t1 adds \bs, \bs, \t0 lsl \cw, \cw, \t0 diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index e4e6a9b23e..8f05fa67f6 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -213,6 +213,7 @@ enum CodecID { CODEC_ID_PRORES, CODEC_ID_JV, CODEC_ID_DFA, + CODEC_ID_8SVX_RAW, /* various PCM "codecs" */ CODEC_ID_PCM_S16LE= 0x10000, @@ -2690,7 +2691,7 @@ typedef struct AVCodecContext { /** * Audio channel layout. * - encoding: set by user. - * - decoding: set by libavcodec. + * - decoding: set by user, may be overwritten by libavcodec. */ int64_t channel_layout; @@ -2960,6 +2961,14 @@ typedef struct AVCodecContext { enum AVAudioServiceType audio_service_type; /** + * desired sample format + * - encoding: Not used. + * - decoding: Set by user. + * Decoder will decode to this format if it can. + */ + enum AVSampleFormat request_sample_fmt; + + /** * Current statistics for PTS correction. * - decoding: maintained and used by libavcodec, not intended to be used by user apps * - encoding: unused @@ -2969,13 +2978,6 @@ typedef struct AVCodecContext { int64_t pts_correction_last_pts; /// PTS of the last frame int64_t pts_correction_last_dts; /// DTS of the last frame - /** - * desired sample format - * - encoding: Not used. - * - decoding: Set by user. - * Decoder will decode to this format if it can. - */ - enum AVSampleFormat request_sample_fmt; } AVCodecContext; @@ -3450,12 +3452,16 @@ int avpicture_layout(const AVPicture* src, enum PixelFormat pix_fmt, int width, int avpicture_get_size(enum PixelFormat pix_fmt, int width, int height); void avcodec_get_chroma_sub_sample(enum PixelFormat pix_fmt, int *h_shift, int *v_shift); +#if FF_API_GET_PIX_FMT_NAME /** * Return the short name for a pixel format. * * \see av_get_pix_fmt(), av_get_pix_fmt_string(). + * @deprecated Deprecated in favor of av_get_pix_fmt_name(). */ +attribute_deprecated const char *avcodec_get_pix_fmt_name(enum PixelFormat pix_fmt); +#endif void avcodec_set_dimensions(AVCodecContext *s, int width, int height); diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c index 82890c3676..5825854987 100644 --- a/libavcodec/avpacket.c +++ b/libavcodec/avpacket.c @@ -21,7 +21,7 @@ #include "avcodec.h" #include "libavutil/avassert.h" - +#include "bytestream.h" void av_destruct_packet_nofree(AVPacket *pkt) { @@ -115,3 +115,4 @@ void av_free_packet(AVPacket *pkt) pkt->data = NULL; pkt->size = 0; } } + diff --git a/libavcodec/bink.c b/libavcodec/bink.c index d1985eddb2..ef07747dbc 100644 --- a/libavcodec/bink.c +++ b/libavcodec/bink.c @@ -1208,7 +1208,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac /** * Caclulate quantization tables for version b */ -static av_cold void binkb_calc_quant() +static av_cold void binkb_calc_quant(void) { uint8_t inv_bink_scan[64]; double s[64]; diff --git a/libavcodec/bitstream.c b/libavcodec/bitstream.c index 8fd44e3d78..f0fa9652c6 100644 --- a/libavcodec/bitstream.c +++ b/libavcodec/bitstream.c @@ -83,8 +83,6 @@ void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length) /* VLC decoding */ -//#define DEBUG_VLC - #define GET_DATA(v, table, i, wrap, size) \ {\ const uint8_t *ptr = (const uint8_t *)table + i * wrap;\ @@ -120,10 +118,10 @@ static int alloc_table(VLC *vlc, int size, int use_static) } static av_always_inline uint32_t bitswap_32(uint32_t x) { - return av_reverse[x&0xFF]<<24 - | av_reverse[(x>>8)&0xFF]<<16 - | av_reverse[(x>>16)&0xFF]<<8 - | av_reverse[x>>24]; + return (uint32_t)av_reverse[x&0xFF]<<24 + | (uint32_t)av_reverse[(x>>8)&0xFF]<<16 + | (uint32_t)av_reverse[(x>>16)&0xFF]<<8 + | (uint32_t)av_reverse[x>>24]; } typedef struct { @@ -164,10 +162,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes, table_size = 1 << table_nb_bits; table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC); -#ifdef DEBUG_VLC - av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d\n", - table_index, table_size); -#endif + av_dlog(NULL, "new table index=%d size=%d\n", table_index, table_size); if (table_index < 0) return -1; table = &vlc->table[table_index]; @@ -182,9 +177,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes, n = codes[i].bits; code = codes[i].code; symbol = codes[i].symbol; -#if defined(DEBUG_VLC) && 0 - av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code); -#endif + av_dlog(NULL, "i=%d n=%d code=0x%x\n", i, n, code); if (n <= table_nb_bits) { /* no need to add another table */ j = code >> (32 - table_nb_bits); @@ -195,10 +188,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes, inc = 1 << n; } for (k = 0; k < nb; k++) { -#ifdef DEBUG_VLC - av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n", - j, i, n); -#endif + av_dlog(NULL, "%4x: code=%d n=%d\n", j, i, n); if (table[j][1] /*bits*/ != 0) { av_log(NULL, AV_LOG_ERROR, "incorrect codes\n"); return -1; @@ -228,10 +218,8 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes, subtable_bits = FFMIN(subtable_bits, table_nb_bits); j = (flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) : code_prefix; table[j][1] = -subtable_bits; -#ifdef DEBUG_VLC - av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n", - j, codes[i].bits + table_nb_bits); -#endif + av_dlog(NULL, "%4x: n=%d (subtable)\n", + j, codes[i].bits + table_nb_bits); index = build_table(vlc, subtable_bits, k-i, codes+i, flags); if (index < 0) return -1; @@ -293,9 +281,7 @@ int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes, vlc->table_size = 0; } -#ifdef DEBUG_VLC - av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes); -#endif + av_dlog(NULL, "build table nb_codes=%d\n", nb_codes); buf = av_malloc((nb_codes+1)*sizeof(VLCcode)); diff --git a/libavcodec/cavsdsp.h b/libavcodec/cavsdsp.h index d3fae69e84..b41ad21bca 100644 --- a/libavcodec/cavsdsp.h +++ b/libavcodec/cavsdsp.h @@ -38,4 +38,4 @@ typedef struct CAVSDSPContext { void ff_cavsdsp_init(CAVSDSPContext* c, AVCodecContext *avctx); void ff_cavsdsp_init_mmx(CAVSDSPContext* c, AVCodecContext *avctx); -#endif +#endif /* AVCODEC_CAVSDSP_H */ diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h index 930e513705..a9d34dc75d 100644 --- a/libavcodec/cbrt_tablegen.h +++ b/libavcodec/cbrt_tablegen.h @@ -20,8 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef CBRT_TABLEGEN_H -#define CBRT_TABLEGEN_H +#ifndef AVCODEC_CBRT_TABLEGEN_H +#define AVCODEC_CBRT_TABLEGEN_H #include <stdint.h> #include <math.h> @@ -48,4 +48,4 @@ static void cbrt_tableinit(void) } #endif /* CONFIG_HARDCODED_TABLES */ -#endif /* CBRT_TABLEGEN_H */ +#endif /* AVCODEC_CBRT_TABLEGEN_H */ diff --git a/libavcodec/celp_filters.c b/libavcodec/celp_filters.c index 26a62eed14..8b68c2ffef 100644 --- a/libavcodec/celp_filters.c +++ b/libavcodec/celp_filters.c @@ -109,7 +109,7 @@ void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, old_out2 = out[-2]; old_out3 = out[-1]; for (n = 0; n <= buffer_length - 4; n+=4) { - float tmp0,tmp1,tmp2,tmp3; + float tmp0,tmp1,tmp2; float val; out0 = in[0]; @@ -160,7 +160,6 @@ void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, tmp0 = out0; tmp1 = out1; tmp2 = out2; - tmp3 = out3; out3 -= a * tmp2; out2 -= a * tmp1; diff --git a/libavcodec/cga_data.h b/libavcodec/cga_data.h index 788b2b3911..998dccaefb 100644 --- a/libavcodec/cga_data.h +++ b/libavcodec/cga_data.h @@ -45,4 +45,4 @@ extern const uint32_t ff_ega_palette[64]; */ void ff_draw_pc_font(uint8_t *dst, int linesize, const uint8_t *font, int font_height, int ch, int fg, int bg); -#endif +#endif /* AVCODEC_CGA_DATA_H */ diff --git a/libavcodec/cook.c b/libavcodec/cook.c index 249b966eaa..f0dee7905a 100644 --- a/libavcodec/cook.c +++ b/libavcodec/cook.c @@ -66,7 +66,6 @@ #define SUBBAND_SIZE 20 #define MAX_SUBPACKETS 5 -//#define COOKDEBUG typedef struct { int *now; @@ -166,38 +165,6 @@ typedef struct cook { static float pow2tab[127]; static float rootpow2tab[127]; -/* debug functions */ - -#ifdef COOKDEBUG -static void dump_float_table(float* table, int size, int delimiter) { - int i=0; - av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i); - for (i=0 ; i<size ; i++) { - av_log(NULL, AV_LOG_ERROR, "%5.1f, ", table[i]); - if ((i+1)%delimiter == 0) av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i+1); - } -} - -static void dump_int_table(int* table, int size, int delimiter) { - int i=0; - av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i); - for (i=0 ; i<size ; i++) { - av_log(NULL, AV_LOG_ERROR, "%d, ", table[i]); - if ((i+1)%delimiter == 0) av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i+1); - } -} - -static void dump_short_table(short* table, int size, int delimiter) { - int i=0; - av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i); - for (i=0 ; i<size ; i++) { - av_log(NULL, AV_LOG_ERROR, "%d, ", table[i]); - if ((i+1)%delimiter == 0) av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i+1); - } -} - -#endif - /*************** init functions ***************/ /* table generator */ @@ -1037,7 +1004,7 @@ static int cook_decode_frame(AVCodecContext *avctx, return avctx->block_align; } -#ifdef COOKDEBUG +#ifdef DEBUG static void dump_cook_context(COOKContext *q) { //int i=0; @@ -1055,7 +1022,6 @@ static void dump_cook_context(COOKContext *q) PRINT("samples_per_channel",q->subpacket[0].samples_per_channel); PRINT("samples_per_frame",q->subpacket[0].samples_per_frame); PRINT("subbands",q->subpacket[0].subbands); - PRINT("random_state",q->random_state); PRINT("js_subband_start",q->subpacket[0].js_subband_start); PRINT("log2_numvector_size",q->subpacket[0].log2_numvector_size); PRINT("numvector_size",q->subpacket[0].numvector_size); @@ -1280,7 +1246,7 @@ static av_cold int cook_decode_init(AVCodecContext *avctx) else avctx->channel_layout = (avctx->channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO; -#ifdef COOKDEBUG +#ifdef DEBUG dump_cook_context(q); #endif return 0; diff --git a/libavcodec/costablegen.c b/libavcodec/cos_tablegen.c index 6e818252f7..1577166a46 100644 --- a/libavcodec/costablegen.c +++ b/libavcodec/cos_tablegen.c @@ -51,10 +51,10 @@ int main(int argc, char *argv[]) { int i, j; int do_sin = argc > 1 && !strcmp(argv[1], "sin"); - int fixed = argc > 2 && !strcmp(argv[2], "fixed"); + int fixed = argc > 1 && strstr(argv[1], "fixed"); double (*func)(double) = do_sin ? sin : cos; - printf("/* This file was generated by libavcodec/costablegen */\n"); + printf("/* This file was automatically generated. */\n"); printf("#define CONFIG_FFT_FLOAT %d\n", !fixed); printf("#include \"libavcodec/%s\"\n", do_sin ? "rdft.h" : "fft.h"); for (i = 4; i <= BITS; i++) { diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c index 3c8021748f..ce1cd55228 100644 --- a/libavcodec/crystalhd.c +++ b/libavcodec/crystalhd.c @@ -124,6 +124,7 @@ typedef struct { AVFrame pic; HANDLE dev; + AVBitStreamFilterContext *bsfc; AVCodecParserContext *parser; uint8_t is_70012; @@ -338,6 +339,9 @@ static av_cold int uninit(AVCodecContext *avctx) DtsDeviceClose(device); av_parser_close(priv->parser); + if (priv->bsfc) { + av_bitstream_filter_close(priv->bsfc); + } av_free(priv->sps_pps_buf); @@ -397,7 +401,6 @@ static av_cold int init(AVCodecContext *avctx) { uint8_t *dummy_p; int dummy_int; - AVBitStreamFilterContext *bsfc; uint32_t orig_data_size = avctx->extradata_size; uint8_t *orig_data = av_malloc(orig_data_size); @@ -409,16 +412,15 @@ static av_cold int init(AVCodecContext *avctx) memcpy(orig_data, avctx->extradata, orig_data_size); - bsfc = av_bitstream_filter_init("h264_mp4toannexb"); - if (!bsfc) { + priv->bsfc = av_bitstream_filter_init("h264_mp4toannexb"); + if (!priv->bsfc) { av_log(avctx, AV_LOG_ERROR, "Cannot open the h264_mp4toannexb BSF!\n"); av_free(orig_data); return AVERROR_BSF_NOT_FOUND; } - av_bitstream_filter_filter(bsfc, avctx, NULL, &dummy_p, + av_bitstream_filter_filter(priv->bsfc, avctx, NULL, &dummy_p, &dummy_int, NULL, 0, 0); - av_bitstream_filter_close(bsfc); priv->sps_pps_buf = avctx->extradata; priv->sps_pps_size = avctx->extradata_size; @@ -512,6 +514,7 @@ static av_cold int init(AVCodecContext *avctx) av_log(avctx, AV_LOG_WARNING, "Cannot open the h.264 parser! Interlaced h.264 content " "will not be detected reliably.\n"); + priv->parser->flags = PARSER_FLAG_COMPLETE_FRAMES; } av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Init complete.\n"); @@ -831,24 +834,49 @@ static int decode(AVCodecContext *avctx, void *data, int *data_size, AVPacket *a int32_t tx_free = (int32_t)DtsTxFreeSize(dev); if (priv->parser) { - uint8_t *pout; - int psize; - const uint8_t *in_data = avpkt->data; + uint8_t *in_data = avpkt->data; int in_len = len; - H264Context *h = priv->parser->priv_data; + int ret = 0; + + if (priv->bsfc) { + ret = av_bitstream_filter_filter(priv->bsfc, avctx, NULL, + &in_data, &in_len, + avpkt->data, len, 0); + } - while (in_len) { + if (ret >= 0) { + uint8_t *pout; + int psize; int index; + H264Context *h = priv->parser->priv_data; + index = av_parser_parse2(priv->parser, avctx, &pout, &psize, in_data, in_len, avctx->pkt->pts, avctx->pkt->dts, 0); - in_data += index; - in_len -= index; + if (index < 0) { + av_log(avctx, AV_LOG_WARNING, + "CrystalHD: Failed to parse h.264 packet to " + "detect interlacing.\n"); + } else if (index != in_len) { + av_log(avctx, AV_LOG_WARNING, + "CrystalHD: Failed to parse h.264 packet " + "completely. Interlaced frames may be " + "incorrectly detected\n."); + } else { + av_log(avctx, AV_LOG_VERBOSE, + "CrystalHD: parser picture type %d\n", + h->s.picture_structure); + pic_type = h->s.picture_structure; + } + } else { + av_log(avctx, AV_LOG_WARNING, + "CrystalHD: mp4toannexb filter failed to filter " + "packet. Interlaced frames may be incorrectly " + "detected.\n"); + } + if (ret > 0) { + av_freep(&in_data); } - av_log(avctx, AV_LOG_VERBOSE, - "CrystalHD: parser picture type %d\n", - h->s.picture_structure); - pic_type = h->s.picture_structure; } if (len < tx_free - 1024) { diff --git a/libavcodec/dca.c b/libavcodec/dca.c index 7a35631eea..0235a57a59 100644 --- a/libavcodec/dca.c +++ b/libavcodec/dca.c @@ -1535,8 +1535,6 @@ static void dca_exss_parse_header(DCAContext *s) { int ss_index; int blownup; - int header_size; - int hd_size; int num_audiop = 1; int num_assets = 1; int active_ss_mask[8]; @@ -1549,8 +1547,8 @@ static void dca_exss_parse_header(DCAContext *s) ss_index = get_bits(&s->gb, 2); blownup = get_bits1(&s->gb); - header_size = get_bits(&s->gb, 8 + 4 * blownup) + 1; - hd_size = get_bits_long(&s->gb, 16 + 4 * blownup) + 1; + skip_bits(&s->gb, 8 + 4 * blownup); // header_size + skip_bits(&s->gb, 16 + 4 * blownup); // hd_size s->static_fields = get_bits1(&s->gb); if (s->static_fields) { @@ -1622,13 +1620,13 @@ static int dca_decode_frame(AVCodecContext * avctx, { const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; - int data_size_tmp; int lfe_samples; int num_core_channels = 0; int i; - float *samples_flt = data; - int16_t *samples = data; + float *samples_flt = data; + int16_t *samples_s16 = data; + int out_size; DCAContext *s = avctx->priv_data; int channels; int core_ss_end; @@ -1818,11 +1816,11 @@ static int dca_decode_frame(AVCodecContext * avctx, return -1; } - data_size_tmp = (s->sample_blocks / 8) * 256 * channels; - data_size_tmp *= avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? sizeof(*samples_flt) : sizeof(*samples); - if (*data_size < data_size_tmp) + out_size = 256 / 8 * s->sample_blocks * channels * + (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8); + if (*data_size < out_size) return -1; - *data_size = data_size_tmp; + *data_size = out_size; /* filter to get final output */ for (i = 0; i < (s->sample_blocks / 8); i++) { @@ -1841,13 +1839,15 @@ static int dca_decode_frame(AVCodecContext * avctx, } } - /* interleave samples */ if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) { - float_interleave(samples_flt, s->samples_chanptr, 256, channels); + s->fmt_conv.float_interleave(samples_flt, s->samples_chanptr, 256, + channels); samples_flt += 256 * channels; } else { - s->fmt_conv.float_to_int16_interleave(samples, s->samples_chanptr, 256, channels); - samples += 256 * channels; + s->fmt_conv.float_to_int16_interleave(samples_s16, + s->samples_chanptr, 256, + channels); + samples_s16 += 256 * channels; } } @@ -1884,10 +1884,14 @@ static av_cold int dca_decode_init(AVCodecContext * avctx) for (i = 0; i < DCA_PRIM_CHANNELS_MAX+1; i++) s->samples_chanptr[i] = s->samples + i * 256; - avctx->sample_fmt = avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT ? - AV_SAMPLE_FMT_FLT : AV_SAMPLE_FMT_S16; - s->scale_bias = 1.0; + if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) { + avctx->sample_fmt = AV_SAMPLE_FMT_FLT; + s->scale_bias = 1.0 / 32768.0; + } else { + avctx->sample_fmt = AV_SAMPLE_FMT_S16; + s->scale_bias = 1.0; + } /* allow downmixing to stereo */ if (avctx->channels > 0 && avctx->request_channels < avctx->channels && @@ -1924,5 +1928,8 @@ AVCodec ff_dca_decoder = { .close = dca_decode_end, .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"), .capabilities = CODEC_CAP_CHANNEL_CONF, + .sample_fmts = (const enum AVSampleFormat[]) { + AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE + }, .profiles = NULL_IF_CONFIG_SMALL(profiles), }; diff --git a/libavcodec/dct.c b/libavcodec/dct.c index 83cf1b4896..c30cff664e 100644 --- a/libavcodec/dct.c +++ b/libavcodec/dct.c @@ -30,9 +30,7 @@ #include <math.h> #include "libavutil/mathematics.h" #include "dct.h" - -#define DCT32_FLOAT -#include "dct32.c" +#include "dct32.h" /* sin((M_PI * x / (2*n)) */ #define SIN(s,n,x) (s->costab[(n) - (x)]) @@ -180,34 +178,37 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse) int n = 1 << nbits; int i; + memset(s, 0, sizeof(*s)); + s->nbits = nbits; s->inverse = inverse; - ff_init_ff_cos_tabs(nbits+2); + if (inverse == DCT_II && nbits == 5) { + s->dct_calc = dct32_func; + } else { + ff_init_ff_cos_tabs(nbits+2); - s->costab = ff_cos_tabs[nbits+2]; + s->costab = ff_cos_tabs[nbits+2]; - s->csc2 = av_malloc(n/2 * sizeof(FFTSample)); + s->csc2 = av_malloc(n/2 * sizeof(FFTSample)); - if (ff_rdft_init(&s->rdft, nbits, inverse == DCT_III) < 0) { - av_free(s->csc2); - return -1; - } + if (ff_rdft_init(&s->rdft, nbits, inverse == DCT_III) < 0) { + av_free(s->csc2); + return -1; + } - for (i = 0; i < n/2; i++) - s->csc2[i] = 0.5 / sin((M_PI / (2*n) * (2*i + 1))); + for (i = 0; i < n/2; i++) + s->csc2[i] = 0.5 / sin((M_PI / (2*n) * (2*i + 1))); - switch(inverse) { - case DCT_I : s->dct_calc = ff_dct_calc_I_c; break; - case DCT_II : s->dct_calc = ff_dct_calc_II_c ; break; - case DCT_III: s->dct_calc = ff_dct_calc_III_c; break; - case DST_I : s->dct_calc = ff_dst_calc_I_c; break; + switch(inverse) { + case DCT_I : s->dct_calc = ff_dct_calc_I_c; break; + case DCT_II : s->dct_calc = ff_dct_calc_II_c ; break; + case DCT_III: s->dct_calc = ff_dct_calc_III_c; break; + case DST_I : s->dct_calc = ff_dst_calc_I_c; break; + } } - if (inverse == DCT_II && nbits == 5) - s->dct_calc = dct32_func; - - s->dct32 = dct32; + s->dct32 = ff_dct32_float; if (HAVE_MMX) ff_dct_init_mmx(s); return 0; diff --git a/libavcodec/dct.h b/libavcodec/dct.h index ffc1d9aed2..bb17d75d0c 100644 --- a/libavcodec/dct.h +++ b/libavcodec/dct.h @@ -49,4 +49,4 @@ void ff_dct_end (DCTContext *s); void ff_dct_init_mmx(DCTContext *s); -#endif +#endif /* AVCODEC_DCT_H */ diff --git a/libavcodec/dct32.c b/libavcodec/dct32.c index 4e843ee832..fb53d53ab1 100644 --- a/libavcodec/dct32.c +++ b/libavcodec/dct32.c @@ -19,10 +19,19 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifdef DCT32_FLOAT +#include "dct32.h" +#include "mathops.h" + +#if DCT32_FLOAT +# define dct32 ff_dct32_float # define FIXHR(x) ((float)(x)) # define MULH3(x, y, s) ((s)*(y)*(x)) # define INTFLOAT float +#else +# define dct32 ff_dct32_fixed +# define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) +# define MULH3(x, y, s) MULH((s)*(x), y) +# define INTFLOAT int #endif @@ -103,7 +112,7 @@ #define ADD(a, b) val##a += val##b /* DCT32 without 1/sqrt(2) coef zero scaling. */ -static void dct32(INTFLOAT *out, const INTFLOAT *tab) +void dct32(INTFLOAT *out, const INTFLOAT *tab) { INTFLOAT tmp0, tmp1; diff --git a/libavcodec/dct32.h b/libavcodec/dct32.h new file mode 100644 index 0000000000..110338d25c --- /dev/null +++ b/libavcodec/dct32.h @@ -0,0 +1,25 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DCT32_H +#define AVCODEC_DCT32_H + +void ff_dct32_float(float *dst, const float *src); +void ff_dct32_fixed(int *dst, const int *src); + +#endif diff --git a/libavcodec/dct32_fixed.c b/libavcodec/dct32_fixed.c new file mode 100644 index 0000000000..7eb9dc1a53 --- /dev/null +++ b/libavcodec/dct32_fixed.c @@ -0,0 +1,20 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define DCT32_FLOAT 0 +#include "dct32.c" diff --git a/libavcodec/dct32_float.c b/libavcodec/dct32_float.c new file mode 100644 index 0000000000..727ec3caca --- /dev/null +++ b/libavcodec/dct32_float.c @@ -0,0 +1,20 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define DCT32_FLOAT 1 +#include "dct32.c" diff --git a/libavcodec/dctref.h b/libavcodec/dctref.h index adbaf689e2..be481f07b0 100644 --- a/libavcodec/dctref.h +++ b/libavcodec/dctref.h @@ -28,4 +28,4 @@ void ff_ref_fdct(DCTELEM *block); void ff_ref_idct(DCTELEM *block); void ff_ref_dct_init(void); -#endif +#endif /* AVCODEC_DCTREF_H */ diff --git a/libavcodec/dirac.c b/libavcodec/dirac.c index c8dc7a0f62..af0d799acc 100644 --- a/libavcodec/dirac.c +++ b/libavcodec/dirac.c @@ -245,11 +245,11 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb, int ff_dirac_parse_sequence_header(AVCodecContext *avctx, GetBitContext *gb, dirac_source_params *source) { - unsigned version_major, version_minor; + unsigned version_major; unsigned video_format, picture_coding_mode; version_major = svq3_get_ue_golomb(gb); - version_minor = svq3_get_ue_golomb(gb); + svq3_get_ue_golomb(gb); /* version_minor */ avctx->profile = svq3_get_ue_golomb(gb); avctx->level = svq3_get_ue_golomb(gb); video_format = svq3_get_ue_golomb(gb); diff --git a/libavcodec/dnxhddata.c b/libavcodec/dnxhddata.c index 2625a9be0c..f628a642a5 100644 --- a/libavcodec/dnxhddata.c +++ b/libavcodec/dnxhddata.c @@ -243,11 +243,11 @@ static const uint8_t dnxhd_1238_ac_index_flag[257] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; -static const uint16_t dnxhd_1238_run_codes[62] = { +static const uint16_t dnxhd_1235_1238_1241_run_codes[62] = { 0, 4, 10, 11, 24, 25, 26, 27, 56, 57, 58, 59, 120, 242, 486, 487, 488, 489, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, }; -static const uint8_t dnxhd_1238_run_bits[62] = { +static const uint8_t dnxhd_1235_1238_1241_run_bits[62] = { 1, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, }; @@ -255,34 +255,34 @@ static const uint8_t dnxhd_1238_run[62] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, }; -static const uint8_t dnxhd_1241_dc_codes[14] = { +static const uint8_t dnxhd_1235_1241_dc_codes[14] = { 10, 62, 11, 12, 13, 0, 1, 2, 3, 4, 14, 30, 126, 127, }; -static const uint8_t dnxhd_1241_dc_bits[14] = { +static const uint8_t dnxhd_1235_1241_dc_bits[14] = { 4, 6, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 7, 7, }; -static const uint16_t dnxhd_1241_ac_codes[257] = { +static const uint16_t dnxhd_1235_1241_ac_codes[257] = { 0, 1, 4, 10, 11, 24, 25, 26, 54, 55, 56, 57, 116, 117, 118, 119, 240, 241, 242, 243, 244, 245, 492, 493, 494, 495, 496, 497, 498, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 4054, 4055, 4056, 4057, 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, 4066, 4067, 4068, 4069, 8140, 8141, 8142, 8143, 8144, 8145, 8146, 8147, 8148, 8149, 8150, 8151, 8152, 8153, 8154, 8155, 8156, 8157, 16316, 16317, 16318, 16319, 16320, 16321, 16322, 16323, 16324, 16325, 16326, 16327, 16328, 16329, 16330, 16331, 16332, 16333, 16334, 16335, 16336, 16337, 32676, 32677, 32678, 32679, 32680, 32681, 32682, 32683, 32684, 32685, 32686, 32687, 32688, 32689, 32690, 32691, 32692, 32693, 32694, 32695, 32696, 32697, 32698, 32699, 32700, 32701, 32702, 32703, 32704, 32705, 32706, 32707, 32708, 65418, 65419, 65420, 65421, 65422, 65423, 65424, 65425, 65426, 65427, 65428, 65429, 65430, 65431, 65432, 65433, 65434, 65435, 65436, 65437, 65438, 65439, 65440, 65441, 65442, 65443, 65444, 65445, 65446, 65447, 65448, 65449, 65450, 65451, 65452, 65453, 65454, 65455, 65456, 65457, 65458, 65459, 65460, 65461, 65462, 65463, 65464, 65465, 65466, 65467, 65468, 65469, 65470, 65471, 65472, 65473, 65474, 65475, 65476, 65477, 65478, 65479, 65480, 65481, 65482, 65483, 65484, 65485, 65486, 65487, 65488, 65489, 65490, 65491, 65492, 65493, 65494, 65495, 65496, 65497, 65498, 65499, 65500, 65501, 65502, 65503, 65504, 65505, 65506, 65507, 65508, 65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65518, 65519, 65520, 65521, 65522, 65523, 65524, 65525, 65526, 65527, 65528, 65529, 65530, 65531, 65532, 65533, 65534, 65535, }; -static const uint8_t dnxhd_1241_ac_bits[257] = { +static const uint8_t dnxhd_1235_1241_ac_bits[257] = { 2, 2, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, }; -static const uint8_t dnxhd_1241_ac_level[257] = { +static const uint8_t dnxhd_1235_1241_ac_level[257] = { 1, 1, 2, 3, 0, 4, 5, 2, 6, 7, 8, 3, 9, 10, 11, 4, 12, 13, 14, 15, 16, 5, 17, 18, 19, 20, 21, 6, 7, 22, 23, 24, 25, 26, 27, 28, 29, 8, 9, 30, 31, 32, 33, 34, 35, 36, 37, 38, 10, 11, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 12, 13, 14, 15, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 1, 16, 17, 18, 19, 64, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 25, 26, 27, 28, 29, 30, 31, 32, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, }; -static const uint8_t dnxhd_1241_ac_run_flag[257] = { +static const uint8_t dnxhd_1235_1241_ac_run_flag[257] = { 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; -static const uint8_t dnxhd_1241_ac_index_flag[257] = { +static const uint8_t dnxhd_1235_1241_ac_index_flag[257] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }; -static const uint8_t dnxhd_1241_run[62] = { +static const uint8_t dnxhd_1235_1241_run[62] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 17, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, }; @@ -352,14 +352,14 @@ const CIDEntry ff_dnxhd_cid_table[] = { dnxhd_1238_dc_codes, dnxhd_1238_dc_bits, dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_level, dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag, - dnxhd_1238_run_codes, dnxhd_1238_run_bits, dnxhd_1238_run, + dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run, { 175, 185, 220, 365, 440 } }, { 1241, 1920, 1080, 1, 917504, 458752, 6, 10, dnxhd_1241_luma_weight, dnxhd_1241_chroma_weight, - dnxhd_1241_dc_codes, dnxhd_1241_dc_bits, - dnxhd_1241_ac_codes, dnxhd_1241_ac_bits, dnxhd_1241_ac_level, - dnxhd_1241_ac_run_flag, dnxhd_1241_ac_index_flag, - dnxhd_1238_run_codes, dnxhd_1238_run_bits, dnxhd_1241_run, + dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits, + dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level, + dnxhd_1235_1241_ac_run_flag, dnxhd_1235_1241_ac_index_flag, + dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run, { 185, 220 } }, { 1242, 1920, 1080, 1, 606208, 303104, 4, 8, dnxhd_1242_luma_weight, dnxhd_1242_chroma_weight, @@ -373,7 +373,7 @@ const CIDEntry ff_dnxhd_cid_table[] = { dnxhd_1238_dc_codes, dnxhd_1238_dc_bits, dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_level, dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag, - dnxhd_1238_run_codes, dnxhd_1238_run_bits, dnxhd_1238_run, + dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run, { 185, 220 } }, { 1251, 1280, 720, 0, 458752, 458752, 4, 8, dnxhd_1251_luma_weight, dnxhd_1251_chroma_weight, diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h index d2251d5b36..a968ae0223 100644 --- a/libavcodec/dnxhdenc.h +++ b/libavcodec/dnxhdenc.h @@ -25,8 +25,9 @@ #define AVCODEC_DNXHDENC_H #include <stdint.h> -#include "libavcodec/mpegvideo.h" -#include "libavcodec/dnxhddata.h" + +#include "mpegvideo.h" +#include "dnxhddata.h" typedef struct { uint16_t mb; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 02a8a1a3a5..8dd26849e9 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -513,7 +513,7 @@ typedef struct DSPContext { #define BASIS_SHIFT 16 #define RECON_SHIFT 6 - void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int sides); + void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides); #define EDGE_WIDTH 16 #define EDGE_TOP 1 #define EDGE_BOTTOM 2 @@ -636,13 +636,6 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){ } } -/** - * Empty mmx state. - * this must be called between any dsp function and float/double code. - * for example sin(); dsp->idct_put(); emms_c(); cos() - */ -#define emms_c() - void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx); void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); @@ -660,22 +653,9 @@ void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); void ff_mlp_init(DSPContext* c, AVCodecContext *avctx); void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx); -#if HAVE_MMX -#undef emms_c +#if ARCH_ARM -static inline void emms(void) -{ - __asm__ volatile ("emms;":::"memory"); -} - -#define emms_c() \ -{\ - if(av_get_cpu_flags() & AV_CPU_FLAG_MMX)\ - emms();\ -} - -#elif ARCH_ARM #if HAVE_NEON # define STRIDE_ALIGN 16 @@ -715,11 +695,6 @@ static inline void emms(void) # define LOCAL_ALIGNED_16(t, v, ...) LOCAL_ALIGNED(16, t, v, __VA_ARGS__) #endif -/* PSNR */ -void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], - int orig_linesize[3], int coded_linesize, - AVCodecContext *avctx); - #define WRAPPER8_16(name8, name16)\ static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ return name8(s, dst , src , stride, h)\ diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c index 1d73a7d858..58533d2ce9 100644 --- a/libavcodec/dsputil_template.c +++ b/libavcodec/dsputil_template.c @@ -79,7 +79,7 @@ static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstS /* draw the edges of width 'w' of an image of size width, height */ //FIXME check that this is ok for mpeg4 interlaced -static void FUNCC(draw_edges)(uint8_t *p_buf, int p_wrap, int width, int height, int w, int sides) +static void FUNCC(draw_edges)(uint8_t *p_buf, int p_wrap, int width, int height, int w, int h, int sides) { pixel *buf = (pixel*)p_buf; int wrap = p_wrap / sizeof(pixel); @@ -106,10 +106,10 @@ static void FUNCC(draw_edges)(uint8_t *p_buf, int p_wrap, int width, int height, buf -= w; last_line = buf + (height - 1) * wrap; if (sides & EDGE_TOP) - for(i = 0; i < w; i++) + for(i = 0; i < h; i++) memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top if (sides & EDGE_BOTTOM) - for (i = 0; i < w; i++) + for (i = 0; i < h; i++) memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom } diff --git a/libavcodec/dv.c b/libavcodec/dv.c index 9b24d13967..d6c49c86cb 100644 --- a/libavcodec/dv.c +++ b/libavcodec/dv.c @@ -38,6 +38,7 @@ * DV codec. */ #define ALT_BITSTREAM_READER +#include "libavutil/pixdesc.h" #include "avcodec.h" #include "dsputil.h" #include "get_bits.h" @@ -350,16 +351,13 @@ static av_cold int dvvideo_init_encoder(AVCodecContext *avctx) { if (!ff_dv_codec_profile(avctx)) { av_log(avctx, AV_LOG_ERROR, "Found no DV profile for %ix%i %s video\n", - avctx->width, avctx->height, avcodec_get_pix_fmt_name(avctx->pix_fmt)); + avctx->width, avctx->height, av_get_pix_fmt_name(avctx->pix_fmt)); return -1; } return dvvideo_init(avctx); } -// #define VLC_DEBUG -// #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__) - typedef struct BlockInfo { const uint32_t *factor_table; const uint8_t *scan_table; @@ -403,9 +401,8 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) /* get the AC coefficients until last_index is reached */ for (;;) { -#ifdef VLC_DEBUG - printf("%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), re_index); -#endif + av_dlog(NULL, "%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), + re_index); /* our own optimized GET_RL_VLC */ index = NEG_USR32(re_cache, TEX_VLC_BITS); vlc_len = dv_rl_vlc[index].len; @@ -426,9 +423,7 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) } re_index += vlc_len; -#ifdef VLC_DEBUG - printf("run=%d level=%d\n", run, level); -#endif + av_dlog(NULL, "run=%d level=%d\n", run, level); pos += run; if (pos >= 64) break; @@ -532,9 +527,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg) mb->pos = 0; mb->partial_bit_count = 0; -#ifdef VLC_DEBUG - printf("MB block: %d, %d ", mb_index, j); -#endif + av_dlog(avctx, "MB block: %d, %d ", mb_index, j); dv_decode_ac(&gb, mb, block); /* write the remaining bits in a new buffer only if the @@ -547,9 +540,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg) } /* pass 2 : we can do it just after */ -#ifdef VLC_DEBUG - printf("***pass 2 size=%d MB#=%d\n", put_bits_count(&pb), mb_index); -#endif + av_dlog(avctx, "***pass 2 size=%d MB#=%d\n", put_bits_count(&pb), mb_index); block = block1; mb = mb1; init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb)); @@ -569,9 +560,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg) } /* we need a pass other the whole video segment */ -#ifdef VLC_DEBUG - printf("***pass 3 size=%d\n", put_bits_count(&vs_pb)); -#endif + av_dlog(avctx, "***pass 3 size=%d\n", put_bits_count(&vs_pb)); block = &sblock[0][0]; mb = mb_data; init_get_bits(&gb, vs_bit_buffer, put_bits_count(&vs_pb)); @@ -579,9 +568,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg) for (mb_index = 0; mb_index < 5; mb_index++) { for (j = 0; j < s->sys->bpm; j++) { if (mb->pos < 64) { -#ifdef VLC_DEBUG - printf("start %d:%d\n", mb_index, j); -#endif + av_dlog(avctx, "start %d:%d\n", mb_index, j); dv_decode_ac(&gb, mb, block); } if (mb->pos >= 64 && mb->pos < 127) diff --git a/libavcodec/dv_tablegen.h b/libavcodec/dv_tablegen.h index 47e8c362c1..678be73940 100644 --- a/libavcodec/dv_tablegen.h +++ b/libavcodec/dv_tablegen.h @@ -20,8 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef DV_TABLEGEN_H -#define DV_TABLEGEN_H +#ifndef AVCODEC_DV_TABLEGEN_H +#define AVCODEC_DV_TABLEGEN_H #include <stdint.h> #include "dv_vlc_data.h" @@ -93,4 +93,4 @@ static void dv_vlc_map_tableinit(void) } #endif /* CONFIG_HARDCODED_TABLES */ -#endif /* DV_TABLEGEN_H */ +#endif /* AVCODEC_DV_TABLEGEN_H */ diff --git a/libavcodec/dvbsub_parser.c b/libavcodec/dvbsub_parser.c index 6ecd44af13..59d03d1d46 100644 --- a/libavcodec/dvbsub_parser.c +++ b/libavcodec/dvbsub_parser.c @@ -22,9 +22,6 @@ #include "dsputil.h" #include "get_bits.h" -//#define DEBUG -//#define DEBUG_PACKET_CONTENTS - /* Parser (mostly) copied from dvdsub.c */ #define PARSE_BUF_SIZE (65536) @@ -53,25 +50,20 @@ static int dvbsub_parse(AVCodecParserContext *s, { DVBSubParseContext *pc = s->priv_data; uint8_t *p, *p_end; - int len, buf_pos = 0; + int i, len, buf_pos = 0; av_dlog(avctx, "DVB parse packet pts=%"PRIx64", lpts=%"PRIx64", cpts=%"PRIx64":\n", s->pts, s->last_pts, s->cur_frame_pts[s->cur_frame_start_index]); -#ifdef DEBUG_PACKET_CONTENTS - int i; - for (i=0; i < buf_size; i++) { - av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]); + av_dlog(avctx, "%02x ", buf[i]); if (i % 16 == 15) - av_log(avctx, AV_LOG_INFO, "\n"); + av_dlog(avctx, "\n"); } if (i % 16 != 0) - av_log(avctx, AV_LOG_INFO, "\n"); - -#endif + av_dlog(avctx, "\n"); *poutbuf = NULL; *poutbuf_size = 0; diff --git a/libavcodec/dvbsubdec.c b/libavcodec/dvbsubdec.c index cd2b53decf..5b09eb14d6 100644 --- a/libavcodec/dvbsubdec.c +++ b/libavcodec/dvbsubdec.c @@ -24,10 +24,6 @@ #include "bytestream.h" #include "libavutil/colorspace.h" -//#define DEBUG -//#define DEBUG_PACKET_CONTENTS -//#define DEBUG_SAVE_IMAGES - #define DVBSUB_PAGE_SEGMENT 0x10 #define DVBSUB_REGION_SEGMENT 0x11 #define DVBSUB_CLUT_SEGMENT 0x12 @@ -37,8 +33,9 @@ #define cm (ff_cropTbl + MAX_NEG_CROP) -#ifdef DEBUG_SAVE_IMAGES +#ifdef DEBUG #undef fprintf +#undef perror #if 0 static void png_save(const char *filename, uint8_t *bitmap, int w, int h, uint32_t *rgba_palette) @@ -53,7 +50,7 @@ static void png_save(const char *filename, uint8_t *bitmap, int w, int h, f = fopen(fname, "w"); if (!f) { perror(fname); - exit(1); + return; } fprintf(f, "P6\n" "%d %d\n" @@ -75,7 +72,7 @@ static void png_save(const char *filename, uint8_t *bitmap, int w, int h, f = fopen(fname2, "w"); if (!f) { perror(fname2); - exit(1); + return; } fprintf(f, "P5\n" "%d %d\n" @@ -109,7 +106,7 @@ static void png_save2(const char *filename, uint32_t *bitmap, int w, int h) f = fopen(fname, "w"); if (!f) { perror(fname); - exit(1); + return; } fprintf(f, "P6\n" "%d %d\n" @@ -131,7 +128,7 @@ static void png_save2(const char *filename, uint32_t *bitmap, int w, int h) f = fopen(fname2, "w"); if (!f) { perror(fname2); - exit(1); + return; } fprintf(f, "P5\n" "%d %d\n" @@ -768,20 +765,17 @@ static void dvbsub_parse_pixel_data_block(AVCodecContext *avctx, DVBSubObjectDis av_dlog(avctx, "DVB pixel block size %d, %s field:\n", buf_size, top_bottom ? "bottom" : "top"); -#ifdef DEBUG_PACKET_CONTENTS for (i = 0; i < buf_size; i++) { if (i % 16 == 0) - av_log(avctx, AV_LOG_INFO, "0x%08p: ", buf+i); + av_dlog(avctx, "0x%8p: ", buf+i); - av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]); + av_dlog(avctx, "%02x ", buf[i]); if (i % 16 == 15) - av_log(avctx, AV_LOG_INFO, "\n"); + av_dlog(avctx, "\n"); } if (i % 16) - av_log(avctx, AV_LOG_INFO, "\n"); - -#endif + av_dlog(avctx, "\n"); if (region == 0) return; @@ -930,27 +924,22 @@ static void dvbsub_parse_clut_segment(AVCodecContext *avctx, DVBSubContext *ctx = avctx->priv_data; const uint8_t *buf_end = buf + buf_size; - int clut_id; + int i, clut_id; DVBSubCLUT *clut; int entry_id, depth , full_range; int y, cr, cb, alpha; int r, g, b, r_add, g_add, b_add; -#ifdef DEBUG_PACKET_CONTENTS - int i; - - av_log(avctx, AV_LOG_INFO, "DVB clut packet:\n"); + av_dlog(avctx, "DVB clut packet:\n"); for (i=0; i < buf_size; i++) { - av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]); + av_dlog(avctx, "%02x ", buf[i]); if (i % 16 == 15) - av_log(avctx, AV_LOG_INFO, "\n"); + av_dlog(avctx, "\n"); } if (i % 16) - av_log(avctx, AV_LOG_INFO, "\n"); - -#endif + av_dlog(avctx, "\n"); clut_id = *buf++; buf += 1; @@ -1194,7 +1183,7 @@ static void dvbsub_parse_page_segment(AVCodecContext *avctx, } -#ifdef DEBUG_SAVE_IMAGES +#ifdef DEBUG static void save_display_set(DVBSubContext *ctx) { DVBSubRegion *region; @@ -1404,7 +1393,7 @@ static int dvbsub_display_end_segment(AVCodecContext *avctx, const uint8_t *buf, sub->num_rects = i; -#ifdef DEBUG_SAVE_IMAGES +#ifdef DEBUG save_display_set(ctx); #endif @@ -1423,22 +1412,18 @@ static int dvbsub_decode(AVCodecContext *avctx, int segment_type; int page_id; int segment_length; - -#ifdef DEBUG_PACKET_CONTENTS int i; - av_log(avctx, AV_LOG_INFO, "DVB sub packet:\n"); + av_dlog(avctx, "DVB sub packet:\n"); for (i=0; i < buf_size; i++) { - av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]); + av_dlog(avctx, "%02x ", buf[i]); if (i % 16 == 15) - av_log(avctx, AV_LOG_INFO, "\n"); + av_dlog(avctx, "\n"); } if (i % 16) - av_log(avctx, AV_LOG_INFO, "\n"); - -#endif + av_dlog(avctx, "\n"); if (buf_size <= 6 || *buf != 0x0f) { av_dlog(avctx, "incomplete or broken packet"); diff --git a/libavcodec/eac3enc.c b/libavcodec/eac3enc.c new file mode 100644 index 0000000000..d37acaf20b --- /dev/null +++ b/libavcodec/eac3enc.c @@ -0,0 +1,155 @@ +/* + * E-AC-3 encoder + * Copyright (c) 2011 Justin Ruggles <justin.ruggles@gmail.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * E-AC-3 encoder + */ + +#define CONFIG_AC3ENC_FLOAT 1 +#include "ac3enc.h" +#include "eac3enc.h" + + +#define AC3ENC_TYPE AC3ENC_TYPE_EAC3 +#include "ac3enc_opts_template.c" +static AVClass eac3enc_class = { "E-AC-3 Encoder", av_default_item_name, + eac3_options, LIBAVUTIL_VERSION_INT }; + + +void ff_eac3_set_cpl_states(AC3EncodeContext *s) +{ + int ch, blk; + int first_cpl_coords[AC3_MAX_CHANNELS]; + + /* set first cpl coords */ + for (ch = 1; ch <= s->fbw_channels; ch++) + first_cpl_coords[ch] = 1; + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + for (ch = 1; ch <= s->fbw_channels; ch++) { + if (block->channel_in_cpl[ch]) { + if (first_cpl_coords[ch]) { + block->new_cpl_coords = 2; + first_cpl_coords[ch] = 0; + } + } else { + first_cpl_coords[ch] = 1; + } + } + } + + /* set first cpl leak */ + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + if (block->cpl_in_use) { + block->new_cpl_leak = 2; + break; + } + } +} + + +void ff_eac3_output_frame_header(AC3EncodeContext *s) +{ + int blk, ch; + AC3EncOptions *opt = &s->options; + + put_bits(&s->pb, 16, 0x0b77); /* sync word */ + + /* BSI header */ + put_bits(&s->pb, 2, 0); /* stream type = independent */ + put_bits(&s->pb, 3, 0); /* substream id = 0 */ + put_bits(&s->pb, 11, (s->frame_size / 2) - 1); /* frame size */ + if (s->bit_alloc.sr_shift) { + put_bits(&s->pb, 2, 0x3); /* fscod2 */ + put_bits(&s->pb, 2, s->bit_alloc.sr_code); /* sample rate code */ + } else { + put_bits(&s->pb, 2, s->bit_alloc.sr_code); /* sample rate code */ + put_bits(&s->pb, 2, 0x3); /* number of blocks = 6 */ + } + put_bits(&s->pb, 3, s->channel_mode); /* audio coding mode */ + put_bits(&s->pb, 1, s->lfe_on); /* LFE channel indicator */ + put_bits(&s->pb, 5, s->bitstream_id); /* bitstream id (EAC3=16) */ + put_bits(&s->pb, 5, -opt->dialogue_level); /* dialogue normalization level */ + put_bits(&s->pb, 1, 0); /* no compression gain */ + put_bits(&s->pb, 1, 0); /* no mixing metadata */ + /* TODO: mixing metadata */ + put_bits(&s->pb, 1, 0); /* no info metadata */ + /* TODO: info metadata */ + put_bits(&s->pb, 1, 0); /* no additional bit stream info */ + + /* frame header */ + put_bits(&s->pb, 1, 1); /* exponent strategy syntax = each block */ + put_bits(&s->pb, 1, 0); /* aht enabled = no */ + put_bits(&s->pb, 2, 0); /* snr offset strategy = 1 */ + put_bits(&s->pb, 1, 0); /* transient pre-noise processing enabled = no */ + put_bits(&s->pb, 1, 0); /* block switch syntax enabled = no */ + put_bits(&s->pb, 1, 0); /* dither flag syntax enabled = no */ + put_bits(&s->pb, 1, 0); /* bit allocation model syntax enabled = no */ + put_bits(&s->pb, 1, 0); /* fast gain codes enabled = no */ + put_bits(&s->pb, 1, 0); /* dba syntax enabled = no */ + put_bits(&s->pb, 1, 0); /* skip field syntax enabled = no */ + put_bits(&s->pb, 1, 0); /* spx enabled = no */ + /* coupling strategy use flags */ + if (s->channel_mode > AC3_CHMODE_MONO) { + put_bits(&s->pb, 1, s->blocks[0].cpl_in_use); + for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) { + AC3Block *block = &s->blocks[blk]; + put_bits(&s->pb, 1, block->new_cpl_strategy); + if (block->new_cpl_strategy) + put_bits(&s->pb, 1, block->cpl_in_use); + } + } + /* exponent strategy */ + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) + put_bits(&s->pb, 2, s->exp_strategy[ch][blk]); + if (s->lfe_on) { + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]); + } + /* E-AC-3 to AC-3 converter exponent strategy (unfortunately not optional...) */ + for (ch = 1; ch <= s->fbw_channels; ch++) + put_bits(&s->pb, 5, 0); + /* snr offsets */ + put_bits(&s->pb, 6, s->coarse_snr_offset); + put_bits(&s->pb, 4, s->fine_snr_offset[1]); + /* block start info */ + put_bits(&s->pb, 1, 0); +} + + +#if CONFIG_EAC3_ENCODER +AVCodec ff_eac3_encoder = { + .name = "eac3", + .type = AVMEDIA_TYPE_AUDIO, + .id = CODEC_ID_EAC3, + .priv_data_size = sizeof(AC3EncodeContext), + .init = ff_ac3_encode_init, + .encode = ff_ac3_encode_frame, + .close = ff_ac3_encode_close, + .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE}, + .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52 E-AC-3"), + .priv_class = &eac3enc_class, + .channel_layouts = ff_ac3_channel_layouts, +}; +#endif diff --git a/libavcodec/eac3enc.h b/libavcodec/eac3enc.h new file mode 100644 index 0000000000..eacb8cf164 --- /dev/null +++ b/libavcodec/eac3enc.h @@ -0,0 +1,44 @@ +/* + * E-AC-3 encoder + * Copyright (c) 2011 Justin Ruggles <justin.ruggles@gmail.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * E-AC-3 encoder + */ + +#ifndef AVCODEC_EAC3ENC_H +#define AVCODEC_EAC3ENC_H + +#include "ac3enc.h" + +/** + * Set coupling states. + * This determines whether certain flags must be written to the bitstream or + * whether they will be implicitly already known by the decoder. + */ +void ff_eac3_set_cpl_states(AC3EncodeContext *s); + +/** + * Write the E-AC-3 frame header to the output bitstream. + */ +void ff_eac3_output_frame_header(AC3EncodeContext *s); + +#endif /* AVCODEC_EAC3ENC_H */ diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c index c9231e206f..68e465796a 100644 --- a/libavcodec/error_resilience.c +++ b/libavcodec/error_resilience.c @@ -32,6 +32,7 @@ #include "mpegvideo.h" #include "h264.h" #include "rectangle.h" +#include "thread.h" /* * H264 redefines mb_intra so it is not mistakely used (its uninitialized in h264) @@ -436,8 +437,7 @@ int score_sum=0; int best_score=256*256*256*64; int best_pred=0; const int mot_index= (mb_x + mb_y*mot_stride) * mot_step; - int prev_x= s->current_picture.motion_val[0][mot_index][0]; - int prev_y= s->current_picture.motion_val[0][mot_index][1]; + int prev_x, prev_y, prev_ref; if((mb_x^mb_y^pass)&1) continue; @@ -535,10 +535,26 @@ skip_mean_and_median: /* zero MV */ pred_count++; + if (!fixed[mb_xy] && 0) { + if (s->avctx->codec_id == CODEC_ID_H264) { + // FIXME + } else { + ff_thread_await_progress((AVFrame *) s->last_picture_ptr, + mb_y, 0); + } + prev_x = s->last_picture.motion_val[0][mot_index][0]; + prev_y = s->last_picture.motion_val[0][mot_index][1]; + prev_ref = s->last_picture.ref_index[0][4*mb_xy]; + } else { + prev_x = s->current_picture.motion_val[0][mot_index][0]; + prev_y = s->current_picture.motion_val[0][mot_index][1]; + prev_ref = s->current_picture.ref_index[0][4*mb_xy]; + } + /* last MV */ - mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index][0]; - mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index][1]; - ref [pred_count] = s->current_picture.ref_index[0][4*mb_xy]; + mv_predictor[pred_count][0]= prev_x; + mv_predictor[pred_count][1]= prev_y; + ref [pred_count] = prev_ref; pred_count++; s->mv_dir = MV_DIR_FORWARD; @@ -670,6 +686,12 @@ static int is_intra_more_likely(MpegEncContext *s){ uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize; uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize; + if (s->avctx->codec_id == CODEC_ID_H264) { + // FIXME + } else { + ff_thread_await_progress((AVFrame *) s->last_picture_ptr, + mb_y, 0); + } is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16); // FIXME need await_progress() here is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16); @@ -690,6 +712,7 @@ void ff_er_frame_start(MpegEncContext *s){ memset(s->error_status_table, MV_ERROR|AC_ERROR|DC_ERROR|VP_START|AC_END|DC_END|MV_END, s->mb_stride*s->mb_height*sizeof(uint8_t)); s->error_count= 3*s->mb_num; + s->error_occurred = 0; } /** @@ -729,7 +752,10 @@ void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int en s->error_count -= end_i - start_i + 1; } - if(status & (AC_ERROR|DC_ERROR|MV_ERROR)) s->error_count= INT_MAX; + if(status & (AC_ERROR|DC_ERROR|MV_ERROR)) { + s->error_occurred = 1; + s->error_count= INT_MAX; + } if(mask == ~0x7F){ memset(&s->error_status_table[start_xy], 0, (end_xy - start_xy) * sizeof(uint8_t)); @@ -1009,7 +1035,12 @@ void ff_er_frame_end(MpegEncContext *s){ int time_pp= s->pp_time; int time_pb= s->pb_time; - // FIXME await_progress here + if (s->avctx->codec_id == CODEC_ID_H264) { + //FIXME + } else { + ff_thread_await_progress((AVFrame *) s->next_picture_ptr, + mb_y, 0); + } s->mv[0][0][0] = s->next_picture.motion_val[0][xy][0]*time_pb/time_pp; s->mv[0][0][1] = s->next_picture.motion_val[0][xy][1]*time_pb/time_pp; s->mv[1][0][0] = s->next_picture.motion_val[0][xy][0]*(time_pb - time_pp)/time_pp; diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c index 4e0e5f7b62..a0a4a1d009 100644 --- a/libavcodec/ffv1.c +++ b/libavcodec/ffv1.c @@ -1146,6 +1146,7 @@ static int encode_slice(AVCodecContext *c, void *arg){ int x= fs->slice_x; int y= fs->slice_y; AVFrame * const p= &f->picture; + const int ps= (c->bits_per_raw_sample>8)+1; if(f->colorspace==0){ const int chroma_width = -((-width )>>f->chroma_h_shift); @@ -1153,12 +1154,12 @@ static int encode_slice(AVCodecContext *c, void *arg){ const int cx= x>>f->chroma_h_shift; const int cy= y>>f->chroma_v_shift; - encode_plane(fs, p->data[0] + x + y*p->linesize[0], width, height, p->linesize[0], 0); + encode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0); - encode_plane(fs, p->data[1] + cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1); - encode_plane(fs, p->data[2] + cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1); + encode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1); + encode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1); }else{ - encode_rgb_frame(fs, (uint32_t*)(p->data[0]) + x + y*(p->linesize[0]/4), width, height, p->linesize[0]/4); + encode_rgb_frame(fs, (uint32_t*)(p->data[0]) + ps*x + y*(p->linesize[0]/4), width, height, p->linesize[0]/4); } emms_c(); @@ -1475,6 +1476,7 @@ static int decode_slice(AVCodecContext *c, void *arg){ int height= fs->slice_height; int x= fs->slice_x; int y= fs->slice_y; + const int ps= (c->bits_per_raw_sample>8)+1; AVFrame * const p= &f->picture; av_assert1(width && height); @@ -1483,12 +1485,12 @@ static int decode_slice(AVCodecContext *c, void *arg){ const int chroma_height= -((-height)>>f->chroma_v_shift); const int cx= x>>f->chroma_h_shift; const int cy= y>>f->chroma_v_shift; - decode_plane(fs, p->data[0] + x + y*p->linesize[0], width, height, p->linesize[0], 0); + decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0); - decode_plane(fs, p->data[1] + cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1); - decode_plane(fs, p->data[2] + cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[2], 1); + decode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1); + decode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[2], 1); }else{ - decode_rgb_frame(fs, (uint32_t*)p->data[0] + x + y*(p->linesize[0]/4), width, height, p->linesize[0]/4); + decode_rgb_frame(fs, (uint32_t*)p->data[0] + ps*x + y*(p->linesize[0]/4), width, height, p->linesize[0]/4); } emms_c(); diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c index 012adb3f83..159bbf2af8 100644 --- a/libavcodec/flacenc.c +++ b/libavcodec/flacenc.c @@ -1013,7 +1013,7 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch) static int count_frame_header(FlacEncodeContext *s) { - uint8_t tmp; + uint8_t av_unused tmp; int count; /* diff --git a/libavcodec/flv.h b/libavcodec/flv.h index eb10f22608..16bc88b663 100644 --- a/libavcodec/flv.h +++ b/libavcodec/flv.h @@ -30,5 +30,4 @@ void ff_flv2_encode_ac_esc(PutBitContext *pb, int slevel, int level, int run, in int ff_flv_decode_picture_header(MpegEncContext *s); void ff_flv2_decode_ac_esc(GetBitContext *gb, int *level, int *run, int *last); -#endif - +#endif /* AVCODEC_FLV_H */ diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c index 0e8aa5e909..c03117c2cd 100644 --- a/libavcodec/fmtconvert.c +++ b/libavcodec/fmtconvert.c @@ -56,11 +56,31 @@ static void float_to_int16_interleave_c(int16_t *dst, const float **src, } } +void ff_float_interleave_c(float *dst, const float **src, unsigned int len, + int channels) +{ + int j, c; + unsigned int i; + if (channels == 2) { + for (i = 0; i < len; i++) { + dst[2*i] = src[0][i]; + dst[2*i+1] = src[1][i]; + } + } else if (channels == 1 && len < INT_MAX / sizeof(float)) { + memcpy(dst, src[0], len * sizeof(float)); + } else { + for (c = 0; c < channels; c++) + for (i = 0, j = c; i < len; i++, j += channels) + dst[j] = src[c][i]; + } +} + av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx) { c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; c->float_to_int16 = float_to_int16_c; c->float_to_int16_interleave = float_to_int16_interleave_c; + c->float_interleave = ff_float_interleave_c; if (ARCH_ARM) ff_fmt_convert_init_arm(c, avctx); if (HAVE_ALTIVEC) ff_fmt_convert_init_altivec(c, avctx); diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h index 82811d108c..825422bed6 100644 --- a/libavcodec/fmtconvert.h +++ b/libavcodec/fmtconvert.h @@ -68,8 +68,17 @@ typedef struct FmtConvertContext { */ void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels); + + /** + * Convert an array of interleaved float to multiple arrays of float. + */ + void (*float_interleave)(float *dst, const float **src, unsigned int len, + int channels); } FmtConvertContext; +void ff_float_interleave_c(float *dst, const float **src, unsigned int len, + int channels); + void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx); void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx); diff --git a/libavcodec/g729dec.c b/libavcodec/g729dec.c index 8a35bdc83c..32db0597e3 100644 --- a/libavcodec/g729dec.c +++ b/libavcodec/g729dec.c @@ -30,7 +30,6 @@ #include "libavutil/avutil.h" #include "get_bits.h" -#include "g729.h" #include "lsp.h" #include "celp_math.h" #include "acelp_filters.h" @@ -71,6 +70,12 @@ */ #define SHARP_MAX 13017 +/** + * subframe size + */ +#define SUBFRAME_SIZE 40 + + typedef struct { uint8_t ac_index_bits[2]; ///< adaptive codebook index for second subframe (size in bits) uint8_t parity_bit; ///< parity bit for pitch delay diff --git a/libavcodec/gsmdec_data.h b/libavcodec/gsmdec_data.h index 6b05bf6c97..bb4f159c60 100644 --- a/libavcodec/gsmdec_data.h +++ b/libavcodec/gsmdec_data.h @@ -19,8 +19,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef GSMDEC_DATA -#define GSMDEC_DATA +#ifndef AVCODEC_GSMDEC_DATA +#define AVCODEC_GSMDEC_DATA #include <stdint.h> @@ -44,4 +44,4 @@ typedef struct { extern const uint16_t ff_gsm_long_term_gain_tab[4]; extern const int16_t ff_gsm_dequant_tab[64][8]; -#endif +#endif /* AVCODEC_GSMDEC_DATA */ diff --git a/libavcodec/h263.h b/libavcodec/h263.h index 6c05565c70..b2b6613536 100644 --- a/libavcodec/h263.h +++ b/libavcodec/h263.h @@ -248,4 +248,5 @@ static inline void memsetw(short *tab, int val, int n) for(i=0;i<n;i++) tab[i] = val; } -#endif + +#endif /* AVCODEC_H263_H */ diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c index ec5cb8164c..ff312b0935 100644 --- a/libavcodec/h263dec.c +++ b/libavcodec/h263dec.c @@ -70,33 +70,27 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx) case CODEC_ID_MPEG4: break; case CODEC_ID_MSMPEG4V1: - s->h263_msmpeg4 = 1; s->h263_pred = 1; s->msmpeg4_version=1; break; case CODEC_ID_MSMPEG4V2: - s->h263_msmpeg4 = 1; s->h263_pred = 1; s->msmpeg4_version=2; break; case CODEC_ID_MSMPEG4V3: - s->h263_msmpeg4 = 1; s->h263_pred = 1; s->msmpeg4_version=3; break; case CODEC_ID_WMV1: - s->h263_msmpeg4 = 1; s->h263_pred = 1; s->msmpeg4_version=4; break; case CODEC_ID_WMV2: - s->h263_msmpeg4 = 1; s->h263_pred = 1; s->msmpeg4_version=5; break; case CODEC_ID_VC1: case CODEC_ID_WMV3: - s->h263_msmpeg4 = 1; s->h263_pred = 1; s->msmpeg4_version=6; avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; @@ -670,8 +664,11 @@ retry: if(s->slice_height==0 || s->mb_x!=0 || (s->mb_y%s->slice_height)!=0 || get_bits_count(&s->gb) > s->gb.size_in_bits) break; }else{ + int prev_x=s->mb_x, prev_y=s->mb_y; if(ff_h263_resync(s)<0) break; + if (prev_y * s->mb_width + prev_x < s->mb_y * s->mb_width + s->mb_x) + s->error_occurred = 1; } if(s->msmpeg4_version<4 && s->h263_pred) @@ -680,7 +677,7 @@ retry: decode_slice(s); } - if (s->h263_msmpeg4 && s->msmpeg4_version<4 && s->pict_type==AV_PICTURE_TYPE_I) + if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type==AV_PICTURE_TYPE_I) if(!CONFIG_MSMPEG4_DECODER || msmpeg4_decode_ext_header(s, buf_size) < 0){ s->error_status_table[s->mb_num-1]= AC_ERROR|DC_ERROR|MV_ERROR; } @@ -689,22 +686,18 @@ retry: frame_end: /* divx 5.01+ bistream reorder stuff */ if(s->codec_id==CODEC_ID_MPEG4 && s->divx_packed){ - int current_pos= get_bits_count(&s->gb)>>3; + int current_pos= s->gb.buffer == s->bitstream_buffer ? 0 : (get_bits_count(&s->gb)>>3); int startcode_found=0; if(buf_size - current_pos > 5){ int i; - for(i=current_pos; i<buf_size-3; i++){ + for(i=current_pos; i<buf_size-4; i++){ if(buf[i]==0 && buf[i+1]==0 && buf[i+2]==1 && buf[i+3]==0xB6){ - startcode_found=1; + startcode_found=!(buf[i+4]&0x40); break; } } } - if(s->gb.buffer == s->bitstream_buffer && buf_size>7 && s->xvid_build>=0){ //xvid style - startcode_found=1; - current_pos=0; - } if(startcode_found){ av_fast_malloc( diff --git a/libavcodec/h264.c b/libavcodec/h264.c index e00e5e17d4..5be216a71b 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -246,6 +246,141 @@ static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){ return 0; } +static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height, + int y_offset, int list){ + int raw_my= h->mv_cache[list][ scan8[n] ][1]; + int filter_height= (raw_my&3) ? 2 : 0; + int full_my= (raw_my>>2) + y_offset; + int top = full_my - filter_height, bottom = full_my + height + filter_height; + + return FFMAX(abs(top), bottom); +} + +static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height, + int y_offset, int list0, int list1, int *nrefs){ + MpegEncContext * const s = &h->s; + int my; + + y_offset += 16*(s->mb_y >> MB_FIELD); + + if(list0){ + int ref_n = h->ref_cache[0][ scan8[n] ]; + Picture *ref= &h->ref_list[0][ref_n]; + + // Error resilience puts the current picture in the ref list. + // Don't try to wait on these as it will cause a deadlock. + // Fields can wait on each other, though. + if(ref->thread_opaque != s->current_picture.thread_opaque || + (ref->reference&3) != s->picture_structure) { + my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0); + if (refs[0][ref_n] < 0) nrefs[0] += 1; + refs[0][ref_n] = FFMAX(refs[0][ref_n], my); + } + } + + if(list1){ + int ref_n = h->ref_cache[1][ scan8[n] ]; + Picture *ref= &h->ref_list[1][ref_n]; + + if(ref->thread_opaque != s->current_picture.thread_opaque || + (ref->reference&3) != s->picture_structure) { + my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1); + if (refs[1][ref_n] < 0) nrefs[1] += 1; + refs[1][ref_n] = FFMAX(refs[1][ref_n], my); + } + } +} + +/** + * Wait until all reference frames are available for MC operations. + * + * @param h the H264 context + */ +static void await_references(H264Context *h){ + MpegEncContext * const s = &h->s; + const int mb_xy= h->mb_xy; + const int mb_type= s->current_picture.mb_type[mb_xy]; + int refs[2][48]; + int nrefs[2] = {0}; + int ref, list; + + memset(refs, -1, sizeof(refs)); + + if(IS_16X16(mb_type)){ + get_lowest_part_y(h, refs, 0, 16, 0, + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); + }else if(IS_16X8(mb_type)){ + get_lowest_part_y(h, refs, 0, 8, 0, + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); + get_lowest_part_y(h, refs, 8, 8, 8, + IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); + }else if(IS_8X16(mb_type)){ + get_lowest_part_y(h, refs, 0, 16, 0, + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); + get_lowest_part_y(h, refs, 4, 16, 0, + IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); + }else{ + int i; + + assert(IS_8X8(mb_type)); + + for(i=0; i<4; i++){ + const int sub_mb_type= h->sub_mb_type[i]; + const int n= 4*i; + int y_offset= (i&2)<<2; + + if(IS_SUB_8X8(sub_mb_type)){ + get_lowest_part_y(h, refs, n , 8, y_offset, + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); + }else if(IS_SUB_8X4(sub_mb_type)){ + get_lowest_part_y(h, refs, n , 4, y_offset, + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); + get_lowest_part_y(h, refs, n+2, 4, y_offset+4, + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); + }else if(IS_SUB_4X8(sub_mb_type)){ + get_lowest_part_y(h, refs, n , 8, y_offset, + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); + get_lowest_part_y(h, refs, n+1, 8, y_offset, + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); + }else{ + int j; + assert(IS_SUB_4X4(sub_mb_type)); + for(j=0; j<4; j++){ + int sub_y_offset= y_offset + 2*(j&2); + get_lowest_part_y(h, refs, n+j, 4, sub_y_offset, + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); + } + } + } + } + + for(list=h->list_count-1; list>=0; list--){ + for(ref=0; ref<48 && nrefs[list]; ref++){ + int row = refs[list][ref]; + if(row >= 0){ + Picture *ref_pic = &h->ref_list[list][ref]; + int ref_field = ref_pic->reference - 1; + int ref_field_picture = ref_pic->field_picture; + int pic_height = 16*s->mb_height >> ref_field_picture; + + row <<= MB_MBAFF; + nrefs[list]--; + + if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields + ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1); + ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) , pic_height-1), 0); + }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame + ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field , pic_height-1), 0); + }else if(FIELD_PICTURE){ + ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field); + }else{ + ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0); + } + } + } + } +} + #if 0 /** * DCT transforms the 16 dc values. @@ -312,10 +447,368 @@ static void chroma_dc_dct_c(DCTELEM *block){ } #endif +static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int src_x_offset, int src_y_offset, + qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op, + int pixel_shift, int chroma444){ + MpegEncContext * const s = &h->s; + const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; + int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; + const int luma_xy= (mx&3) + ((my&3)<<2); + int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize; + uint8_t * src_y = pic->data[0] + offset; + uint8_t * src_cb, * src_cr; + int extra_width= h->emu_edge_width; + int extra_height= h->emu_edge_height; + int emu=0; + const int full_mx= mx>>2; + const int full_my= my>>2; + const int pic_width = 16*s->mb_width; + const int pic_height = 16*s->mb_height >> MB_FIELD; + + if(mx&7) extra_width -= 3; + if(my&7) extra_height -= 3; + + if( full_mx < 0-extra_width + || full_my < 0-extra_height + || full_mx + 16/*FIXME*/ > pic_width + extra_width + || full_my + 16/*FIXME*/ > pic_height + extra_height){ + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); + src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; + emu=1; + } + + qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps? + if(!square){ + qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); + } + + if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; + + if(chroma444){ + src_cb = pic->data[1] + offset; + if(emu){ + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, + 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); + src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; + } + qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps? + if(!square){ + qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize); + } + + src_cr = pic->data[2] + offset; + if(emu){ + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, + 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); + src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; + } + qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps? + if(!square){ + qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize); + } + return; + } + + if(MB_FIELD){ + // chroma offset when predicting from a field of opposite parity + my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); + emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); + } + src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize; + src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize; + + if(emu){ + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); + src_cb= s->edge_emu_buffer; + } + chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); + + if(emu){ + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); + src_cr= s->edge_emu_buffer; + } + chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); +} + +static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int x_offset, int y_offset, + qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, + qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, + int list0, int list1, int pixel_shift, int chroma444){ + MpegEncContext * const s = &h->s; + qpel_mc_func *qpix_op= qpix_put; + h264_chroma_mc_func chroma_op= chroma_put; + + dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; + if(chroma444){ + dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; + dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; + }else{ + dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; + dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; + } + x_offset += 8*s->mb_x; + y_offset += 8*(s->mb_y >> MB_FIELD); + + if(list0){ + Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; + mc_dir_part(h, ref, n, square, chroma_height, delta, 0, + dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_op, chroma_op, pixel_shift, chroma444); + + qpix_op= qpix_avg; + chroma_op= chroma_avg; + } + + if(list1){ + Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; + mc_dir_part(h, ref, n, square, chroma_height, delta, 1, + dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_op, chroma_op, pixel_shift, chroma444); + } +} + +static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int x_offset, int y_offset, + qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, + h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, + h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, + int list0, int list1, int pixel_shift, int chroma444){ + MpegEncContext * const s = &h->s; + + dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; + if(chroma444){ + chroma_weight_avg = luma_weight_avg; + chroma_weight_op = luma_weight_op; + dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; + dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; + }else{ + dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; + dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; + } + x_offset += 8*s->mb_x; + y_offset += 8*(s->mb_y >> MB_FIELD); + + if(list0 && list1){ + /* don't optimize for luma-only case, since B-frames usually + * use implicit weights => chroma too. */ + uint8_t *tmp_cb = s->obmc_scratchpad; + uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift); + uint8_t *tmp_y = s->obmc_scratchpad + 16*h->mb_uvlinesize; + int refn0 = h->ref_cache[0][ scan8[n] ]; + int refn1 = h->ref_cache[1][ scan8[n] ]; + + mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, + dest_y, dest_cb, dest_cr, + x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); + mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, + tmp_y, tmp_cb, tmp_cr, + x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); + + if(h->use_weight == 2){ + int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; + int weight1 = 64 - weight0; + luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); + chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); + chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); + }else{ + luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, + h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], + h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]); + chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], + h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); + chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], + h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); + } + }else{ + int list = list1 ? 1 : 0; + int refn = h->ref_cache[list][ scan8[n] ]; + Picture *ref= &h->ref_list[list][refn]; + mc_dir_part(h, ref, n, square, chroma_height, delta, list, + dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_put, chroma_put, pixel_shift, chroma444); + + luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, + h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); + if(h->use_weight_chroma){ + chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); + chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); + } + } +} + +static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int x_offset, int y_offset, + qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, + qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, + h264_weight_func *weight_op, h264_biweight_func *weight_avg, + int list0, int list1, int pixel_shift, int chroma444){ + if((h->use_weight==2 && list0 && list1 + && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) + || h->use_weight==1) + mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, + x_offset, y_offset, qpix_put, chroma_put, + weight_op[0], weight_op[3], weight_avg[0], + weight_avg[3], list0, list1, pixel_shift, chroma444); + else + mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, + x_offset, y_offset, qpix_put, chroma_put, qpix_avg, + chroma_avg, list0, list1, pixel_shift, chroma444); +} + +static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){ + /* fetch pixels for estimated mv 4 macroblocks ahead + * optimized for 64byte cache lines */ + MpegEncContext * const s = &h->s; + const int refn = h->ref_cache[list][scan8[0]]; + if(refn >= 0){ + const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; + const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; + uint8_t **src= h->ref_list[list][refn].data; + int off= ((mx+64)<<h->pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize; + s->dsp.prefetch(src[0]+off, s->linesize, 4); + if(chroma444){ + s->dsp.prefetch(src[1]+off, s->linesize, 4); + s->dsp.prefetch(src[2]+off, s->linesize, 4); + }else{ + off= (((mx>>1)+64)<<pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize; + s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); + } + } +} + +static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), + qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), + h264_weight_func *weight_op, h264_biweight_func *weight_avg, + int pixel_shift, int chroma444){ + MpegEncContext * const s = &h->s; + const int mb_xy= h->mb_xy; + const int mb_type= s->current_picture.mb_type[mb_xy]; + + assert(IS_INTER(mb_type)); + + if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME)) + await_references(h); + prefetch_motion(h, 0, pixel_shift, chroma444); + + if(IS_16X16(mb_type)){ + mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, + qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], + weight_op, weight_avg, + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), + pixel_shift, chroma444); + }else if(IS_16X8(mb_type)){ + mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0, + qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], + &weight_op[1], &weight_avg[1], + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), + pixel_shift, chroma444); + mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4, + qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], + &weight_op[1], &weight_avg[1], + IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), + pixel_shift, chroma444); + }else if(IS_8X16(mb_type)){ + mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, + qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], + &weight_op[2], &weight_avg[2], + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), + pixel_shift, chroma444); + mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, + qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], + &weight_op[2], &weight_avg[2], + IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), + pixel_shift, chroma444); + }else{ + int i; + + assert(IS_8X8(mb_type)); + + for(i=0; i<4; i++){ + const int sub_mb_type= h->sub_mb_type[i]; + const int n= 4*i; + int x_offset= (i&1)<<2; + int y_offset= (i&2)<<1; + + if(IS_SUB_8X8(sub_mb_type)){ + mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], + &weight_op[3], &weight_avg[3], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift, chroma444); + }else if(IS_SUB_8X4(sub_mb_type)){ + mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], + &weight_op[4], &weight_avg[4], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift, chroma444); + mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, + qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], + &weight_op[4], &weight_avg[4], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift, chroma444); + }else if(IS_SUB_4X8(sub_mb_type)){ + mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], + &weight_op[5], &weight_avg[5], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift, chroma444); + mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, + qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], + &weight_op[5], &weight_avg[5], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift, chroma444); + }else{ + int j; + assert(IS_SUB_4X4(sub_mb_type)); + for(j=0; j<4; j++){ + int sub_x_offset= x_offset + 2*(j&1); + int sub_y_offset= y_offset + (j&2); + mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, + qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], + &weight_op[6], &weight_avg[6], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), + pixel_shift, chroma444); + } + } + } + } + + prefetch_motion(h, 1, pixel_shift, chroma444); +} + +#define hl_motion_fn(sh, bits) \ +static av_always_inline void hl_motion_ ## bits(H264Context *h, \ + uint8_t *dest_y, \ + uint8_t *dest_cb, uint8_t *dest_cr, \ + qpel_mc_func (*qpix_put)[16], \ + h264_chroma_mc_func (*chroma_put), \ + qpel_mc_func (*qpix_avg)[16], \ + h264_chroma_mc_func (*chroma_avg), \ + h264_weight_func *weight_op, \ + h264_biweight_func *weight_avg, \ + int chroma444) \ +{ \ + hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \ + qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \ +} +hl_motion_fn(0, 8); +hl_motion_fn(1, 16); static void free_tables(H264Context *h, int free_rbsp){ int i; H264Context *hx; + av_freep(&h->intra4x4_pred_mode); av_freep(&h->chroma_pred_mode_table); av_freep(&h->cbp_table); @@ -347,16 +840,19 @@ static void free_tables(H264Context *h, int free_rbsp){ } static void init_dequant8_coeff_table(H264Context *h){ - int i,q,x; + int i,j,q,x; const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8); - h->dequant8_coeff[0] = h->dequant8_buffer[0]; - h->dequant8_coeff[1] = h->dequant8_buffer[1]; - for(i=0; i<2; i++ ){ - if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ - h->dequant8_coeff[1] = h->dequant8_buffer[0]; - break; + for(i=0; i<6; i++ ){ + h->dequant8_coeff[i] = h->dequant8_buffer[i]; + for(j=0; j<i; j++){ + if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){ + h->dequant8_coeff[i] = h->dequant8_buffer[j]; + break; + } } + if(j<i) + continue; for(q=0; q<max_qp+1; q++){ int shift = div6[q]; @@ -404,7 +900,7 @@ static void init_dequant_tables(H264Context *h){ for(x=0; x<16; x++) h->dequant4_coeff[i][0][x] = 1<<6; if(h->pps.transform_8x8_mode) - for(i=0; i<2; i++) + for(i=0; i<6; i++) for(x=0; x<64; x++) h->dequant8_coeff[i][0][x] = 1<<6; } @@ -419,7 +915,7 @@ int ff_h264_alloc_tables(H264Context *h){ FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail) - FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail) + FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 48 * sizeof(uint8_t), fail) FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail) FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail) @@ -481,8 +977,8 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){ * Allocate buffers which are not shared amongst multiple threads. */ static int context_init(H264Context *h){ - FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail) - FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail) + FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail) + FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail) h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] = h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE; @@ -546,7 +1042,7 @@ int ff_h264_decode_extradata(H264Context *h) cnt = *(p++); // Number of pps for (i = 0; i < cnt; i++) { nalsize = AV_RB16(p) + 2; - if(decode_nal_units(h, p, nalsize) < 0) { + if (decode_nal_units(h, p, nalsize) < 0) { av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i); return -1; } @@ -584,7 +1080,6 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){ ff_h264_decode_init_vlc(); - h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; h->pixel_shift = 0; h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; @@ -612,11 +1107,15 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){ return 0; } +#define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size)))) static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base) { int i; for (i=0; i<count; i++){ + assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) || + IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) || + !from[i])); to[i] = REBASE_PICTURE(from[i], new_base, old_base); } } @@ -678,9 +1177,10 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex // frame_start may not be called for the next thread (if it's decoding a bottom field) // so this has to be allocated here - h->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); + h->s.obmc_scratchpad = av_malloc(16*6*s->linesize); s->dsp.clear_blocks(h->mb); + s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift)); } //extradata/NAL handling @@ -699,7 +1199,7 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex for(i=0; i<6; i++) h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]); - for(i=0; i<2; i++) + for(i=0; i<6; i++) h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]); h->dequant_coeff_pps = h1->dequant_coeff_pps; @@ -708,7 +1208,8 @@ static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContex copy_fields(h, h1, poc_lsb, redundant_pic_count); //reference lists - copy_fields(h, h1, ref_count, intra_gb); + copy_fields(h, h1, ref_count, list_count); + copy_fields(h, h1, ref_list, intra_gb); copy_fields(h, h1, short_ref, cabac_init_idc); copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1); @@ -735,6 +1236,7 @@ int ff_h264_frame_start(H264Context *h){ MpegEncContext * const s = &h->s; int i; const int pixel_shift = h->pixel_shift; + int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1; if(MPV_frame_start(s, s->avctx) < 0) return -1; @@ -752,20 +1254,20 @@ int ff_h264_frame_start(H264Context *h){ for(i=0; i<16; i++){ h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3); - h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3); + h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3); } - for(i=0; i<4; i++){ + for(i=0; i<16; i++){ h->block_offset[16+i]= - h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); - h->block_offset[24+16+i]= - h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); + h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); + h->block_offset[48+16+i]= + h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); } /* can't be in alloc_tables because linesize isn't known there. * FIXME: redo bipred weight to not require extra buffer? */ - for(i = 0; i < s->avctx->thread_count; i++) + for(i = 0; i < thread_count; i++) if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad) - h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); + h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize); /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/ memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table)); @@ -796,8 +1298,10 @@ int ff_h264_frame_start(H264Context *h){ * This includes finding the next displayed frame. * * @param h h264 master context + * @param setup_finished enough NALs have been read that we can call + * ff_thread_finish_setup() */ -static void decode_postinit(H264Context *h){ +static void decode_postinit(H264Context *h, int setup_finished){ MpegEncContext * const s = &h->s; Picture *out = s->current_picture_ptr; Picture *cur = s->current_picture_ptr; @@ -809,10 +1313,11 @@ static void decode_postinit(H264Context *h){ if (h->next_output_pic) return; if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) { - //FIXME this allows the next thread to start once we encounter the first field of a PAFF packet - //This works if the next packet contains the second field. It does not work if both fields are - //in the same packet. - //ff_thread_finish_setup(s->avctx); + //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here. + //If we have one field per packet, we can. The check in decode_nal_units() is not good enough + //to find this yet, so we assume the worst for now. + //if (setup_finished) + // ff_thread_finish_setup(s->avctx); return; } @@ -943,10 +1448,11 @@ static void decode_postinit(H264Context *h){ av_log(s->avctx, AV_LOG_DEBUG, "no picture\n"); } - ff_thread_finish_setup(s->avctx); + if (setup_finished) + ff_thread_finish_setup(s->avctx); } -static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ +static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){ MpegEncContext * const s = &h->s; uint8_t *top_border; int top_idx = 1; @@ -964,12 +1470,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src if (pixel_shift) AV_COPY128(top_border+16, src_y+15*linesize+16); if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ - if (pixel_shift) { - AV_COPY128(top_border+32, src_cb+7*uvlinesize); - AV_COPY128(top_border+48, src_cr+7*uvlinesize); + if(chroma444){ + if (pixel_shift){ + AV_COPY128(top_border+32, src_cb + 15*uvlinesize); + AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16); + AV_COPY128(top_border+64, src_cr + 15*uvlinesize); + AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16); + } else { + AV_COPY128(top_border+16, src_cb + 15*uvlinesize); + AV_COPY128(top_border+32, src_cr + 15*uvlinesize); + } } else { - AV_COPY64(top_border+16, src_cb+7*uvlinesize); - AV_COPY64(top_border+24, src_cr+7*uvlinesize); + if (pixel_shift) { + AV_COPY128(top_border+32, src_cb+7*uvlinesize); + AV_COPY128(top_border+48, src_cr+7*uvlinesize); + } else { + AV_COPY64(top_border+16, src_cb+7*uvlinesize); + AV_COPY64(top_border+24, src_cr+7*uvlinesize); + } } } } @@ -987,12 +1505,24 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src AV_COPY128(top_border+16, src_y+16*linesize+16); if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ - if (pixel_shift) { - AV_COPY128(top_border+32, src_cb+8*uvlinesize); - AV_COPY128(top_border+48, src_cr+8*uvlinesize); + if(chroma444){ + if (pixel_shift){ + AV_COPY128(top_border+32, src_cb + 16*linesize); + AV_COPY128(top_border+48, src_cb + 16*linesize+16); + AV_COPY128(top_border+64, src_cr + 16*linesize); + AV_COPY128(top_border+80, src_cr + 16*linesize+16); + } else { + AV_COPY128(top_border+16, src_cb + 16*linesize); + AV_COPY128(top_border+32, src_cr + 16*linesize); + } } else { - AV_COPY64(top_border+16, src_cb+8*uvlinesize); - AV_COPY64(top_border+24, src_cr+8*uvlinesize); + if (pixel_shift) { + AV_COPY128(top_border+32, src_cb+8*uvlinesize); + AV_COPY128(top_border+48, src_cr+8*uvlinesize); + } else { + AV_COPY64(top_border+16, src_cb+8*uvlinesize); + AV_COPY64(top_border+24, src_cr+8*uvlinesize); + } } } } @@ -1000,9 +1530,10 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, - int xchg, int simple, int pixel_shift){ + int xchg, int chroma444, + int simple, int pixel_shift){ MpegEncContext * const s = &h->s; - int deblock_left; + int deblock_topleft; int deblock_top; int top_idx = 1; uint8_t *top_border_m1; @@ -1018,11 +1549,11 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, } if(h->deblocking_filter == 2) { - deblock_left = h->left_type[0]; - deblock_top = h->top_type; + deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num; + deblock_top = h->top_type; } else { - deblock_left = (s->mb_x > 0); - deblock_top = (s->mb_y > !!MB_FIELD); + deblock_topleft = (s->mb_x > 0); + deblock_top = (s->mb_y > !!MB_FIELD); } src_y -= linesize + 1 + pixel_shift; @@ -1045,7 +1576,7 @@ if (xchg) AV_SWAP64(b,a);\ else AV_COPY64(b,a); if(deblock_top){ - if(deblock_left){ + if(deblock_topleft){ XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1); } XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg); @@ -1055,13 +1586,28 @@ else AV_COPY64(b,a); } } if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ - if(deblock_top){ - if(deblock_left){ - XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); - XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); + if(chroma444){ + if(deblock_topleft){ + XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1); + XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1); + } + XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg); + XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1); + XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg); + XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1); + if(s->mb_x+1 < s->mb_width){ + XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1); + XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1); + } + } else { + if(deblock_top){ + if(deblock_topleft){ + XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); + XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); + } + XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1); + XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1); } - XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1); - XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1); } } } @@ -1080,6 +1626,159 @@ static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int in AV_WN16A(mb + index, value); } +static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass, + int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p) +{ + MpegEncContext * const s = &h->s; + void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); + void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); + int i; + int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1]; + block_offset += 16*p; + if(IS_INTRA4x4(mb_type)){ + if(simple || !s->encoding){ + if(IS_8x8DCT(mb_type)){ + if(transform_bypass){ + idct_dc_add = + idct_add = s->dsp.add_pixels8; + }else{ + idct_dc_add = h->h264dsp.h264_idct8_dc_add; + idct_add = h->h264dsp.h264_idct8_add; + } + for(i=0; i<16; i+=4){ + uint8_t * const ptr= dest_y + block_offset[i]; + const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; + if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ + h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); + }else{ + const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ]; + h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, + (h->topright_samples_available<<i)&0x4000, linesize); + if(nnz){ + if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256)) + idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); + else + idct_add (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); + } + } + } + }else{ + if(transform_bypass){ + idct_dc_add = + idct_add = s->dsp.add_pixels4; + }else{ + idct_dc_add = h->h264dsp.h264_idct_dc_add; + idct_add = h->h264dsp.h264_idct_add; + } + for(i=0; i<16; i++){ + uint8_t * const ptr= dest_y + block_offset[i]; + const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; + + if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ + h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); + }else{ + uint8_t *topright; + int nnz, tr; + uint64_t tr_high; + if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ + const int topright_avail= (h->topright_samples_available<<i)&0x8000; + assert(mb_y || linesize <= block_offset[i]); + if(!topright_avail){ + if (pixel_shift) { + tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL; + topright= (uint8_t*) &tr_high; + } else { + tr= ptr[3 - linesize]*0x01010101; + topright= (uint8_t*) &tr; + } + }else + topright= ptr + (4 << pixel_shift) - linesize; + }else + topright= NULL; + + h->hpc.pred4x4[ dir ](ptr, topright, linesize); + nnz = h->non_zero_count_cache[ scan8[i+p*16] ]; + if(nnz){ + if(is_h264){ + if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256)) + idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); + else + idct_add (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); + }else + ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0); + } + } + } + } + } + }else{ + h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); + if(is_h264){ + if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){ + if(!transform_bypass) + h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]); + else{ + static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16, + 8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16}; + for(i = 0; i < 16; i++) + dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i)); + } + } + }else + ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale); + } +} + +static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass, + int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p) +{ + MpegEncContext * const s = &h->s; + void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); + int i; + block_offset += 16*p; + if(!IS_INTRA4x4(mb_type)){ + if(is_h264){ + if(IS_INTRA16x16(mb_type)){ + if(transform_bypass){ + if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){ + h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize); + }else{ + for(i=0; i<16; i++){ + if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16)) + s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize); + } + } + }else{ + h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8); + } + }else if(h->cbp&15){ + if(transform_bypass){ + const int di = IS_8x8DCT(mb_type) ? 4 : 1; + idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; + for(i=0; i<16; i+=di){ + if(h->non_zero_count_cache[ scan8[i+p*16] ]){ + idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize); + } + } + }else{ + if(IS_8x8DCT(mb_type)){ + h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8); + }else{ + h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8); + } + } + } + }else{ + for(i=0; i<16; i++){ + if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below + uint8_t * const ptr= dest_y + block_offset[i]; + ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); + } + } + } + } +} + static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){ MpegEncContext * const s = &h->s; const int mb_x= s->mb_x; @@ -1088,13 +1787,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i const int mb_type= s->current_picture.mb_type[mb_xy]; uint8_t *dest_y, *dest_cb, *dest_cr; int linesize, uvlinesize /*dct_offset*/; - int i; + int i, j; int *block_offset = &h->block_offset[0]; const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass); /* is_h264 should always be true if SVQ3 is disabled. */ const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); - void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; @@ -1108,7 +1806,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i if (!simple && MB_FIELD) { linesize = h->mb_linesize = s->linesize * 2; uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; - block_offset = &h->block_offset[24]; + block_offset = &h->block_offset[48]; if(mb_y&1){ //FIXME move out of this function? dest_y -= s->linesize*15; dest_cb-= s->uvlinesize*7; @@ -1149,219 +1847,95 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i for (j = 0; j < 16; j++) tmp_y[j] = get_bits(&gb, bit_depth); } - for (i = 0; i < 8; i++) { - uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); - for (j = 0; j < 8; j++) - tmp_cb[j] = get_bits(&gb, bit_depth); - } - for (i = 0; i < 8; i++) { - uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); - for (j = 0; j < 8; j++) - tmp_cr[j] = get_bits(&gb, bit_depth); + if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ + for (i = 0; i < 8; i++) { + uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); + for (j = 0; j < 8; j++) + tmp_cb[j] = get_bits(&gb, bit_depth); + } + for (i = 0; i < 8; i++) { + uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); + for (j = 0; j < 8; j++) + tmp_cr[j] = get_bits(&gb, bit_depth); + } } } else { - for (i=0; i<16; i++) { - memcpy(dest_y + i* linesize, h->mb + i*8, 16); - } - for (i=0; i<8; i++) { - memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); - memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); - } + for (i=0; i<16; i++) { + memcpy(dest_y + i* linesize, h->mb + i*8, 16); + } + if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ + for (i=0; i<8; i++) { + memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); + memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); + } + } } } else { if(IS_INTRA(mb_type)){ if(h->deblocking_filter) - xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift); + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift); if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize); } - if(IS_INTRA4x4(mb_type)){ - if(simple || !s->encoding){ - if(IS_8x8DCT(mb_type)){ - if(transform_bypass){ - idct_dc_add = - idct_add = s->dsp.add_pixels8; - }else{ - idct_dc_add = h->h264dsp.h264_idct8_dc_add; - idct_add = h->h264dsp.h264_idct8_add; - } - for(i=0; i<16; i+=4){ - uint8_t * const ptr= dest_y + block_offset[i]; - const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; - if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ - h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize); - }else{ - const int nnz = h->non_zero_count_cache[ scan8[i] ]; - h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, - (h->topright_samples_available<<i)&0x4000, linesize); - if(nnz){ - if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16)) - idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize); - else - idct_add (ptr, h->mb + (i*16 << pixel_shift), linesize); - } - } - } - }else{ - if(transform_bypass){ - idct_dc_add = - idct_add = s->dsp.add_pixels4; - }else{ - idct_dc_add = h->h264dsp.h264_idct_dc_add; - idct_add = h->h264dsp.h264_idct_add; - } - for(i=0; i<16; i++){ - uint8_t * const ptr= dest_y + block_offset[i]; - const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; - - if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ - h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16 << pixel_shift), linesize); - }else{ - uint8_t *topright; - int nnz, tr; - uint64_t tr_high; - if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ - const int topright_avail= (h->topright_samples_available<<i)&0x8000; - assert(mb_y || linesize <= block_offset[i]); - if(!topright_avail){ - if (pixel_shift) { - tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL; - topright= (uint8_t*) &tr_high; - } else { - tr= ptr[3 - linesize]*0x01010101; - topright= (uint8_t*) &tr; - } - }else - topright= ptr + (4 << pixel_shift) - linesize; - }else - topright= NULL; - - h->hpc.pred4x4[ dir ](ptr, topright, linesize); - nnz = h->non_zero_count_cache[ scan8[i] ]; - if(nnz){ - if(is_h264){ - if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16)) - idct_dc_add(ptr, h->mb + (i*16 << pixel_shift), linesize); - else - idct_add (ptr, h->mb + (i*16<<pixel_shift), linesize); - } -#if CONFIG_SVQ3_DECODER - else - ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); -#endif - } - } - } - } - } - }else{ - h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); - if(is_h264){ - if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){ - if(!transform_bypass) - h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]); - else{ - static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16, - 8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16}; - for(i = 0; i < 16; i++) - dctcoef_set(h->mb, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc, pixel_shift, i)); - } - } - } -#if CONFIG_SVQ3_DECODER - else - ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale); -#endif - } + hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0); + if(h->deblocking_filter) - xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift); + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift); }else if(is_h264){ - ff_hl_motion(h, dest_y, dest_cb, dest_cr, - s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, - s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, - h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab); - } - - - if(!IS_INTRA4x4(mb_type)){ - if(is_h264){ - if(IS_INTRA16x16(mb_type)){ - if(transform_bypass){ - if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){ - h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); - }else{ - for(i=0; i<16; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) - s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize); - } - } - }else{ - h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); - } - }else if(h->cbp&15){ - if(transform_bypass){ - const int di = IS_8x8DCT(mb_type) ? 4 : 1; - idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; - for(i=0; i<16; i+=di){ - if(h->non_zero_count_cache[ scan8[i] ]){ - idct_add(dest_y + block_offset[i], h->mb + (i*16 << pixel_shift), linesize); - } - } - }else{ - if(IS_8x8DCT(mb_type)){ - h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); - }else{ - h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); - } - } - } - } -#if CONFIG_SVQ3_DECODER - else{ - for(i=0; i<16; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below - uint8_t * const ptr= dest_y + block_offset[i]; - ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); - } - } - } -#endif + if (pixel_shift) { + hl_motion_16(h, dest_y, dest_cb, dest_cr, + s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, + s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, + h->h264dsp.weight_h264_pixels_tab, + h->h264dsp.biweight_h264_pixels_tab, 0); + } else + hl_motion_8(h, dest_y, dest_cb, dest_cr, + s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, + s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, + h->h264dsp.weight_h264_pixels_tab, + h->h264dsp.biweight_h264_pixels_tab, 0); } + hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0); + if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){ uint8_t *dest[2] = {dest_cb, dest_cr}; if(transform_bypass){ if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ - h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16 << pixel_shift), uvlinesize); - h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16 << pixel_shift), uvlinesize); + h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize); + h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize); }else{ idct_add = s->dsp.add_pixels4; - for(i=16; i<16+8; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) - idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize); + for(j=1; j<3; j++){ + for(i=j*16; i<j*16+4; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) + idct_add (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize); + } } } }else{ if(is_h264){ if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16 << pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16) << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); h->h264dsp.h264_idct_add8(dest, block_offset, h->mb, uvlinesize, h->non_zero_count_cache); } #if CONFIG_SVQ3_DECODER else{ - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); - for(i=16; i<16+8; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i]; - ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); + for(j=1; j<3; j++){ + for(i=j*16; i<j*16+4; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ + uint8_t * const ptr= dest[j-1] + block_offset[i]; + ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2); + } } } } @@ -1370,7 +1944,113 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i } } if(h->cbp || IS_INTRA(mb_type)) + { + s->dsp.clear_blocks(h->mb); + s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift)); + } +} + +static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){ + MpegEncContext * const s = &h->s; + const int mb_x= s->mb_x; + const int mb_y= s->mb_y; + const int mb_xy= h->mb_xy; + const int mb_type= s->current_picture.mb_type[mb_xy]; + uint8_t *dest[3]; + int linesize; + int i, j, p; + int *block_offset = &h->block_offset[0]; + const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass); + const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1; + + for (p = 0; p < plane_count; p++) + { + dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16; + s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4); + } + + h->list_counts[mb_xy]= h->list_count; + + if (!simple && MB_FIELD) { + linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2; + block_offset = &h->block_offset[48]; + if(mb_y&1) //FIXME move out of this function? + for (p = 0; p < 3; p++) + dest[p] -= s->linesize*15; + if(FRAME_MBAFF) { + int list; + for(list=0; list<h->list_count; list++){ + if(!USES_LIST(mb_type, list)) + continue; + if(IS_16X16(mb_type)){ + int8_t *ref = &h->ref_cache[list][scan8[0]]; + fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1); + }else{ + for(i=0; i<16; i+=4){ + int ref = h->ref_cache[list][scan8[i]]; + if(ref >= 0) + fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1); + } + } + } + } + } else { + linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize; + } + + if (!simple && IS_INTRA_PCM(mb_type)) { + if (pixel_shift) { + const int bit_depth = h->sps.bit_depth_luma; + GetBitContext gb; + init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth); + + for (p = 0; p < plane_count; p++) { + for (i = 0; i < 16; i++) { + uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize); + for (j = 0; j < 16; j++) + tmp[j] = get_bits(&gb, bit_depth); + } + } + } else { + for (p = 0; p < plane_count; p++) { + for (i = 0; i < 16; i++) { + memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16); + } + } + } + } else { + if(IS_INTRA(mb_type)){ + if(h->deblocking_filter) + xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift); + + for (p = 0; p < plane_count; p++) + hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p); + + if(h->deblocking_filter) + xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift); + }else{ + if (pixel_shift) { + hl_motion_16(h, dest[0], dest[1], dest[2], + s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, + s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, + h->h264dsp.weight_h264_pixels_tab, + h->h264dsp.biweight_h264_pixels_tab, 1); + } else + hl_motion_8(h, dest[0], dest[1], dest[2], + s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, + s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, + h->h264dsp.weight_h264_pixels_tab, + h->h264dsp.biweight_h264_pixels_tab, 1); + } + + for (p = 0; p < plane_count; p++) + hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p); + } + if(h->cbp || IS_INTRA(mb_type)) + { s->dsp.clear_blocks(h->mb); + s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift)); + } } /** @@ -1390,13 +2070,26 @@ static void av_noinline hl_decode_mb_complex(H264Context *h){ hl_decode_mb_internal(h, 0, h->pixel_shift); } +static void av_noinline hl_decode_mb_444_complex(H264Context *h){ + hl_decode_mb_444_internal(h, 0, h->pixel_shift); +} + +static void av_noinline hl_decode_mb_444_simple(H264Context *h){ + hl_decode_mb_444_internal(h, 1, 0); +} + void ff_h264_hl_decode_mb(H264Context *h){ MpegEncContext * const s = &h->s; const int mb_xy= h->mb_xy; const int mb_type= s->current_picture.mb_type[mb_xy]; int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; - if (is_complex) { + if (CHROMA444) { + if(is_complex || h->pixel_shift) + hl_decode_mb_444_complex(h); + else + hl_decode_mb_444_simple(h); + } else if (is_complex) { hl_decode_mb_complex(h); } else if (h->pixel_shift) { hl_decode_mb_simple_16(h); @@ -1412,7 +2105,7 @@ static int pred_weight_table(H264Context *h){ h->use_weight= 0; h->use_weight_chroma= 0; h->luma_log2_weight_denom= get_ue_golomb(&s->gb); - if(CHROMA) + if(h->sps.chroma_format_idc) h->chroma_log2_weight_denom= get_ue_golomb(&s->gb); luma_def = 1<<h->luma_log2_weight_denom; chroma_def = 1<<h->chroma_log2_weight_denom; @@ -1437,7 +2130,7 @@ static int pred_weight_table(H264Context *h){ h->luma_weight[i][list][1]= 0; } - if(CHROMA){ + if(h->sps.chroma_format_idc){ chroma_weight_flag= get_bits1(&s->gb); if(chroma_weight_flag){ int j; @@ -1867,11 +2560,11 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ h->b_stride= s->mb_width*4; - s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7); + s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1); if(h->sps.frame_mbs_only_flag) - s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7); + s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); else - s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 7); + s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); if (s->context_initialized && ( s->width != s->avctx->width || s->height != s->avctx->height @@ -1885,8 +2578,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ MPV_common_end(s); } if (!s->context_initialized) { - if(h != h0){ - av_log(h->s.avctx, AV_LOG_ERROR, "we cant (re-)initialize context during parallel decoding\n"); + if (h != h0) { + av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n"); return -1; } @@ -1894,6 +2587,9 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ s->avctx->sample_aspect_ratio= h->sps.sar; av_assert0(s->avctx->sample_aspect_ratio.den); + h->s.avctx->coded_width = 16*s->mb_width; + h->s.avctx->coded_height = 16*s->mb_height; + if(h->sps.video_signal_type_present_flag){ s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; if(h->sps.colour_description_present_flag){ @@ -1913,24 +2609,28 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ switch (h->sps.bit_depth_luma) { case 9 : - s->avctx->pix_fmt = PIX_FMT_YUV420P9; + s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9; break; case 10 : - s->avctx->pix_fmt = PIX_FMT_YUV420P10; + s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10; break; default: - s->avctx->pix_fmt = s->avctx->get_format(s->avctx, - s->avctx->codec->pix_fmts ? - s->avctx->codec->pix_fmts : - s->avctx->color_range == AVCOL_RANGE_JPEG ? - hwaccel_pixfmt_list_h264_jpeg_420 : - ff_hwaccel_pixfmt_list_420); + if (CHROMA444){ + s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P; + }else{ + s->avctx->pix_fmt = s->avctx->get_format(s->avctx, + s->avctx->codec->pix_fmts ? + s->avctx->codec->pix_fmts : + s->avctx->color_range == AVCOL_RANGE_JPEG ? + hwaccel_pixfmt_list_h264_jpeg_420 : + ff_hwaccel_pixfmt_list_420); + } } s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt); - if (MPV_common_init(s) < 0){ - av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed\n"); + if (MPV_common_init(s) < 0) { + av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n"); return -1; } s->first_field = 0; @@ -1940,8 +2640,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ ff_h264_alloc_tables(h); if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) { - if (context_init(h) < 0){ - av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed\n"); + if (context_init(h) < 0) { + av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n"); return -1; } } else { @@ -1953,13 +2653,14 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ c->h264dsp = h->h264dsp; c->sps = h->sps; c->pps = h->pps; + c->pixel_shift = h->pixel_shift; init_scan_tables(c); clone_tables(c, h, i); } for(i = 0; i < s->avctx->thread_count; i++) - if(context_init(h->thread_context[i]) < 0){ - av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed\n"); + if (context_init(h->thread_context[i]) < 0) { + av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n"); return -1; } } @@ -1983,9 +2684,20 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME; if(h0->current_slice == 0){ - if(h->frame_num != h->prev_frame_num && - (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num) < (h->frame_num - h->sps.ref_frame_count)) - h->prev_frame_num = h->frame_num - h->sps.ref_frame_count - 1; + // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away + if(h->frame_num != h->prev_frame_num) { + int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num; + + if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num; + + if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) { + unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1; + if (unwrap_prev_frame_num < 0) + unwrap_prev_frame_num += max_frame_num; + + h->prev_frame_num = unwrap_prev_frame_num; + } + } while(h->frame_num != h->prev_frame_num && h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){ @@ -2256,8 +2968,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); h0->single_decode_warning = 1; } - if(h != h0){ - av_log(h->s.avctx, AV_LOG_ERROR, "deblocking switched inside frame\n"); + if (h != h0) { + av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n"); return 1; } } @@ -2309,7 +3021,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ } //FIXME: fix draw_edges+PAFF+frame threads - h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type&FF_THREAD_FRAME)) ? 0 : 16; + h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16; h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width; if(s->avctx->debug&FF_DEBUG_PICT_INFO){ @@ -2415,11 +3127,10 @@ static int fill_filter_caches(H264Context *h, int mb_type){ if(IS_INTRA(mb_type)) return 0; - AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); - AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); - AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]); - AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]); - AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); + AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]); + AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]); + AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]); + AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]); h->cbp= h->cbp_table[mb_xy]; @@ -2471,45 +3182,45 @@ static int fill_filter_caches(H264Context *h, int mb_type){ */ //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) if(top_type){ - AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); + AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]); } if(left_type[0]){ - h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; - h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; - h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8]; - h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8]; + h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4]; + h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4]; + h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4]; + h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4]; } // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs if(!CABAC && h->pps.transform_8x8_mode){ if(IS_8x8DCT(top_type)){ h->non_zero_count_cache[4+8*0]= - h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4; + h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12; h->non_zero_count_cache[6+8*0]= - h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8; + h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12; } if(IS_8x8DCT(left_type[0])){ h->non_zero_count_cache[3+8*1]= - h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF + h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF } if(IS_8x8DCT(left_type[1])){ h->non_zero_count_cache[3+8*3]= - h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF + h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF } if(IS_8x8DCT(mb_type)){ h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= - h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1; + h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= (h->cbp & 0x1000) >> 12; h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= - h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2; + h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12; h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= - h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4; + h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12; h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= - h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8; + h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12; } } @@ -2560,18 +3271,16 @@ static int fill_filter_caches(H264Context *h, int mb_type){ return 0; } -static void loop_filter(H264Context *h){ +static void loop_filter(H264Context *h, int start_x, int end_x){ MpegEncContext * const s = &h->s; uint8_t *dest_y, *dest_cb, *dest_cr; int linesize, uvlinesize, mb_x, mb_y; const int end_mb_y= s->mb_y + FRAME_MBAFF; const int old_slice_type= h->slice_type; - const int end_mb_x = s->mb_x; const int pixel_shift = h->pixel_shift; if(h->deblocking_filter) { - int start_x= s->resync_mb_y == s->mb_y ? s->resync_mb_x : 0; - for(mb_x= start_x; mb_x<end_mb_x; mb_x++){ + for(mb_x= start_x; mb_x<end_x; mb_x++){ for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){ int mb_xy, mb_type; mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride; @@ -2585,8 +3294,8 @@ static void loop_filter(H264Context *h){ s->mb_x= mb_x; s->mb_y= mb_y; dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; - dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; - dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; + dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); + dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); //FIXME simplify above if (MB_FIELD) { @@ -2601,7 +3310,7 @@ static void loop_filter(H264Context *h){ linesize = h->mb_linesize = s->linesize; uvlinesize = h->mb_uvlinesize = s->uvlinesize; } - backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); + backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0); if(fill_filter_caches(h, mb_type)) continue; h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]); @@ -2616,7 +3325,7 @@ static void loop_filter(H264Context *h){ } } h->slice_type= old_slice_type; - s->mb_x= end_mb_x; + s->mb_x= end_x; s->mb_y= end_mb_y - FRAME_MBAFF; h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); @@ -2671,6 +3380,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){ H264Context *h = *(void**)arg; MpegEncContext * const s = &h->s; const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; + int lf_x_start = s->mb_x; s->mb_skip_run= -1; @@ -2709,6 +3419,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){ if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){ ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1); return 0; } if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) { @@ -2718,8 +3429,8 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){ } if( ++s->mb_x >= s->mb_width ) { - loop_filter(h); - s->mb_x = 0; + loop_filter(h, lf_x_start, s->mb_x); + s->mb_x = lf_x_start = 0; decode_finish_row(h); ++s->mb_y; if(FIELD_OR_MBAFF_PICTURE) { @@ -2730,10 +3441,9 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){ } if( eos || s->mb_y >= s->mb_height ) { - if(s->mb_x) - loop_filter(h); tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x); return 0; } } @@ -2755,13 +3465,12 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){ if(ret<0){ av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); - return -1; } if(++s->mb_x >= s->mb_width){ - loop_filter(h); - s->mb_x=0; + loop_filter(h, lf_x_start, s->mb_x); + s->mb_x = lf_x_start = 0; decode_finish_row(h); ++s->mb_y; if(FIELD_OR_MBAFF_PICTURE) { @@ -2788,9 +3497,8 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){ if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){ tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); if(get_bits_count(&s->gb) == s->gb.size_in_bits ){ - if(s->mb_x) - loop_filter(h); ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x); return 0; }else{ @@ -2894,18 +3602,15 @@ static void execute_decode_slices(H264Context *h, int context_count){ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ MpegEncContext * const s = &h->s; AVCodecContext * const avctx= s->avctx; - int buf_index=0; H264Context *hx; ///< thread context - int context_count = 0; - int next_avc= h->is_avc ? 0 : buf_size; + int buf_index; + int context_count; + int next_avc; + int pass = !(avctx->active_thread_type & FF_THREAD_FRAME); + int nals_needed=0; ///< number of NALs that need decoding before the next frame thread starts + int nal_index; h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1; -#if 0 - int i; - for(i=0; i<50; i++){ - av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]); - } -#endif if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){ h->current_slice = 0; if (!s->first_field) @@ -2913,6 +3618,11 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ ff_h264_reset_sei(h); } + for(;pass <= 1;pass++){ + buf_index = 0; + context_count = 0; + next_avc = h->is_avc ? 0 : buf_size; + nal_index = 0; for(;;){ int consumed; int dst_length; @@ -2971,6 +3681,19 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ } buf_index += consumed; + nal_index++; + + if(pass == 0) { + // packets can sometimes contain multiple PPS/SPS + // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely + // if so, when frame threading we can't start the next thread until we've read all of them + switch (hx->nal_unit_type) { + case NAL_SPS: + case NAL_PPS: + nals_needed = nal_index; + } + continue; + } //FIXME do not discard SEI id if( @@ -3004,7 +3727,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ if (h->current_slice == 1) { if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) { - decode_postinit(h); + decode_postinit(h, nal_index >= nals_needed); } if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0) @@ -3127,6 +3850,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ goto again; } } + } if(context_count) execute_decode_slices(h, context_count); return buf_index; @@ -3206,7 +3930,7 @@ static int decode_frame(AVCodecContext *avctx, if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){ - if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h); + if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1); field_end(h, 0); @@ -3476,8 +4200,7 @@ AVCodec ff_h264_decoder = { ff_h264_decode_end, decode_frame, /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY | - CODEC_CAP_FRAME_THREADS | - CODEC_CAP_SLICE_THREADS, + CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS, .flush= flush_dpb, .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"), .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy), diff --git a/libavcodec/h264.h b/libavcodec/h264.h index 04da701750..7bad91b215 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -39,9 +39,6 @@ #define interlaced_dct interlaced_dct_is_a_bad_name #define mb_intra mb_intra_is_not_initialized_see_mb_type -#define LUMA_DC_BLOCK_INDEX 24 -#define CHROMA_DC_BLOCK_INDEX 25 - #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 #define COEFF_TOKEN_VLC_BITS 8 #define TOTAL_ZEROS_VLC_BITS 9 @@ -60,8 +57,6 @@ * of progressive decoding by about 2%. */ #define ALLOW_INTERLACE -#define ALLOW_NOCHROMA - #define FMO 0 /** @@ -85,16 +80,12 @@ #endif #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE) -#ifdef ALLOW_NOCHROMA -#define CHROMA h->sps.chroma_format_idc -#else -#define CHROMA 1 -#endif - #ifndef CABAC #define CABAC h->pps.cabac #endif +#define CHROMA444 (h->sps.chroma_format_idc == 3) + #define EXTENDED_SAR 255 #define MB_TYPE_REF0 MB_TYPE_ACPRED //dirty but it fits in 16 bit @@ -198,7 +189,7 @@ typedef struct SPS{ int num_reorder_frames; int scaling_matrix_present; uint8_t scaling_matrix4[6][16]; - uint8_t scaling_matrix8[2][64]; + uint8_t scaling_matrix8[6][64]; int nal_hrd_parameters_present_flag; int vcl_hrd_parameters_present_flag; int pic_struct_present_flag; @@ -233,7 +224,7 @@ typedef struct PPS{ int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag int transform_8x8_mode; ///< transform_8x8_mode_flag uint8_t scaling_matrix4[6][16]; - uint8_t scaling_matrix8[2][64]; + uint8_t scaling_matrix8[6][64]; uint8_t chroma_qp_table[2][64]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table int chroma_qp_diff; }PPS; @@ -298,21 +289,15 @@ typedef struct H264Context{ unsigned int top_samples_available; unsigned int topright_samples_available; unsigned int left_samples_available; - uint8_t (*top_borders[2])[(16+2*8)*2]; + uint8_t (*top_borders[2])[(16*3)*2]; /** * non zero coeff count cache. * is 64 if not available. */ - DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[6*8]; + DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15*8]; - /* - .UU.YYYY - .UU.YYYY - .vv.YYYY - .VV.YYYY - */ - uint8_t (*non_zero_count)[32]; + uint8_t (*non_zero_count)[48]; /** * Motion vector cache. @@ -336,7 +321,7 @@ typedef struct H264Context{ * block_offset[ 0..23] for frame macroblocks * block_offset[24..47] for field macroblocks */ - int block_offset[2*(16+8)]; + int block_offset[2*(16*3)]; uint32_t *mb2b_xy; //FIXME are these 4 a good idea? uint32_t *mb2br_xy; @@ -356,9 +341,9 @@ typedef struct H264Context{ PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down? - uint32_t dequant8_buffer[2][QP_MAX_NUM+1][64]; + uint32_t dequant8_buffer[6][QP_MAX_NUM+1][64]; uint32_t (*dequant4_coeff[6])[16]; - uint32_t (*dequant8_coeff[2])[64]; + uint32_t (*dequant8_coeff[6])[64]; int slice_num; uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 @@ -408,15 +393,15 @@ typedef struct H264Context{ GetBitContext *intra_gb_ptr; GetBitContext *inter_gb_ptr; - DECLARE_ALIGNED(16, DCTELEM, mb)[16*24*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space. - DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16*2]; + DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space. + DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2]; DCTELEM mb_padding[256*2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb /** * Cabac */ CABACContext cabac; - uint8_t cabac_state[460]; + uint8_t cabac_state[1024]; /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ uint16_t *cbp_table; @@ -710,12 +695,6 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint void ff_h264_reset_sei(H264Context *h); -void ff_hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, - qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), - qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), - h264_weight_func *weight_op, h264_biweight_func *weight_avg); - - /* o-o o-o / / / @@ -727,27 +706,43 @@ o-o o-o */ /* Scan8 organization: - * 0 1 2 3 4 5 6 7 - * 0 u u y y y y y - * 1 u U U y Y Y Y Y - * 2 u U U y Y Y Y Y - * 3 v v y Y Y Y Y - * 4 v V V y Y Y Y Y - * 5 v V V DYDUDV + * 0 1 2 3 4 5 6 7 + * 0 DY y y y y y + * 1 y Y Y Y Y + * 2 y Y Y Y Y + * 3 y Y Y Y Y + * 4 y Y Y Y Y + * 5 DU u u u u u + * 6 u U U U U + * 7 u U U U U + * 8 u U U U U + * 9 u U U U U + * 10 DV v v v v v + * 11 v V V V V + * 12 v V V V V + * 13 v V V V V + * 14 v V V V V * DY/DU/DV are for luma/chroma DC. */ +#define LUMA_DC_BLOCK_INDEX 48 +#define CHROMA_DC_BLOCK_INDEX 49 + //This table must be here because scan8[constant] must be known at compiletime -static const uint8_t scan8[16 + 2*4 + 3]={ - 4+1*8, 5+1*8, 4+2*8, 5+2*8, - 6+1*8, 7+1*8, 6+2*8, 7+2*8, - 4+3*8, 5+3*8, 4+4*8, 5+4*8, - 6+3*8, 7+3*8, 6+4*8, 7+4*8, - 1+1*8, 2+1*8, - 1+2*8, 2+2*8, - 1+4*8, 2+4*8, - 1+5*8, 2+5*8, - 4+5*8, 5+5*8, 6+5*8 +static const uint8_t scan8[16*3 + 3]={ + 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8, + 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8, + 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8, + 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8, + 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8, + 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8, + 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8, + 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8, + 4+11*8, 5+11*8, 4+12*8, 5+12*8, + 6+11*8, 7+11*8, 6+12*8, 7+12*8, + 4+13*8, 5+13*8, 4+14*8, 5+14*8, + 6+13*8, 7+13*8, 6+14*8, 7+14*8, + 0+ 0*8, 0+ 5*8, 0+10*8 }; static av_always_inline uint32_t pack16to32(int a, int b){ @@ -779,11 +774,11 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){ MpegEncContext * const s = &h->s; const int mb_xy= h->mb_xy; int topleft_xy, top_xy, topright_xy, left_xy[2]; - static const uint8_t left_block_options[4][16]={ - {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8}, - {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8}, - {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}, - {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8} + static const uint8_t left_block_options[4][32]={ + {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4}, + {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4}, + {0,0,1,1,7,10,7,10,3+0*4, 3+0*4, 3+1*4, 3+1*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4}, + {0,2,0,2,7,10,7,10,3+0*4, 3+2*4, 3+0*4, 3+2*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4} }; h->topleft_partition= -1; @@ -953,32 +948,41 @@ static void fill_decode_caches(H264Context *h, int mb_type){ */ //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) if(top_type){ - AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); - h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; - h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; - - h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; - h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; - }else { - h->non_zero_count_cache[1+8*0]= - h->non_zero_count_cache[2+8*0]= - - h->non_zero_count_cache[1+8*3]= - h->non_zero_count_cache[2+8*3]= - AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040); + AV_COPY32(&h->non_zero_count_cache[4+8* 0], &h->non_zero_count[top_xy][4*3]); + if(CHROMA444){ + AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 7]); + AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4*11]); + }else{ + AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 5]); + AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4* 9]); + } + }else{ + uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; + AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty); + AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty); + AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty); } for (i=0; i<2; i++) { if(left_type[i]){ - h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]]; - h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]]; - h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]]; - h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]]; + h->non_zero_count_cache[3+8* 1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]]; + h->non_zero_count_cache[3+8* 2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]]; + if(CHROMA444){ + h->non_zero_count_cache[3+8* 6 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+4*4]; + h->non_zero_count_cache[3+8* 7 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+4*4]; + h->non_zero_count_cache[3+8*11 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+8*4]; + h->non_zero_count_cache[3+8*12 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+8*4]; + }else{ + h->non_zero_count_cache[3+8* 6 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]]; + h->non_zero_count_cache[3+8*11 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]]; + } }else{ - h->non_zero_count_cache[3+8*1 + 2*8*i]= - h->non_zero_count_cache[3+8*2 + 2*8*i]= - h->non_zero_count_cache[0+8*1 + 8*i]= - h->non_zero_count_cache[0+8*4 + 8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64; + h->non_zero_count_cache[3+8* 1 + 2*8*i]= + h->non_zero_count_cache[3+8* 2 + 2*8*i]= + h->non_zero_count_cache[3+8* 6 + 2*8*i]= + h->non_zero_count_cache[3+8* 7 + 2*8*i]= + h->non_zero_count_cache[3+8*11 + 2*8*i]= + h->non_zero_count_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64; } } @@ -987,15 +991,15 @@ static void fill_decode_caches(H264Context *h, int mb_type){ if(top_type) { h->top_cbp = h->cbp_table[top_xy]; } else { - h->top_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F; + h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; } // left_cbp if (left_type[0]) { - h->left_cbp = (h->cbp_table[left_xy[0]] & 0x1f0) + h->left_cbp = (h->cbp_table[left_xy[0]] & 0x7F0) | ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2) | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2); } else { - h->left_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F; + h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; } } } @@ -1196,11 +1200,21 @@ static inline int pred_intra_mode(H264Context *h, int n){ static inline void write_back_non_zero_count(H264Context *h){ const int mb_xy= h->mb_xy; - AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]); - AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]); - AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]); - AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]); - AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]); + AV_COPY32(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[4+8* 1]); + AV_COPY32(&h->non_zero_count[mb_xy][ 4], &h->non_zero_count_cache[4+8* 2]); + AV_COPY32(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[4+8* 3]); + AV_COPY32(&h->non_zero_count[mb_xy][12], &h->non_zero_count_cache[4+8* 4]); + AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[4+8* 6]); + AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8* 7]); + AV_COPY32(&h->non_zero_count[mb_xy][32], &h->non_zero_count_cache[4+8*11]); + AV_COPY32(&h->non_zero_count[mb_xy][36], &h->non_zero_count_cache[4+8*12]); + + if(CHROMA444){ + AV_COPY32(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[4+8* 8]); + AV_COPY32(&h->non_zero_count[mb_xy][28], &h->non_zero_count_cache[4+8* 9]); + AV_COPY32(&h->non_zero_count[mb_xy][40], &h->non_zero_count_cache[4+8*13]); + AV_COPY32(&h->non_zero_count[mb_xy][44], &h->non_zero_count_cache[4+8*14]); + } } static inline void write_back_motion(H264Context *h, int mb_type){ @@ -1273,8 +1287,7 @@ static void av_unused decode_mb_skip(H264Context *h){ const int mb_xy= h->mb_xy; int mb_type=0; - memset(h->non_zero_count[mb_xy], 0, 32); - memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui + memset(h->non_zero_count[mb_xy], 0, 48); if(MB_FIELD) mb_type|= MB_TYPE_INTERLACED; diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index 925ac44498..3975a61699 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -45,7 +45,7 @@ /* Cabac pre state table */ -static const int8_t cabac_context_init_I[460][2] = +static const int8_t cabac_context_init_I[1024][2] = { /* 0 - 10 */ { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, @@ -211,10 +211,153 @@ static const int8_t cabac_context_init_I[460][2] = { -10, 73 }, { -10, 70 }, { -10, 69 }, { -5, 66 }, { -9, 64 }, { -5, 58 }, { 2, 59 }, { 21, -10 }, { 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 }, - { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 } + { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 }, + + /* 460 -> 1024 */ + { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 }, + { -12, 63 }, { -2, 68 }, { -15, 84 }, { -13, 104 }, + { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, + { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 }, + { -12, 63 }, { -2, 68 }, { -15, 84 }, { -13, 104 }, + { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, + { -7, 93 }, { -11, 87 }, { -3, 77 }, { -5, 71 }, + { -4, 63 }, { -4, 68 }, { -12, 84 }, { -7, 62 }, + { -7, 65 }, { 8, 61 }, { 5, 56 }, { -2, 66 }, + { 1, 64 }, { 0, 61 }, { -2, 78 }, { 1, 50 }, + { 7, 52 }, { 10, 35 }, { 0, 44 }, { 11, 38 }, + { 1, 45 }, { 0, 46 }, { 5, 44 }, { 31, 17 }, + { 1, 51 }, { 7, 50 }, { 28, 19 }, { 16, 33 }, + { 14, 62 }, { -13, 108 }, { -15, 100 }, { -13, 101 }, + { -13, 91 }, { -12, 94 }, { -10, 88 }, { -16, 84 }, + { -10, 86 }, { -7, 83 }, { -13, 87 }, { -19, 94 }, + { 1, 70 }, { 0, 72 }, { -5, 74 }, { 18, 59 }, + { -7, 93 }, { -11, 87 }, { -3, 77 }, { -5, 71 }, + { -4, 63 }, { -4, 68 }, { -12, 84 }, { -7, 62 }, + { -7, 65 }, { 8, 61 }, { 5, 56 }, { -2, 66 }, + { 1, 64 }, { 0, 61 }, { -2, 78 }, { 1, 50 }, + { 7, 52 }, { 10, 35 }, { 0, 44 }, { 11, 38 }, + { 1, 45 }, { 0, 46 }, { 5, 44 }, { 31, 17 }, + { 1, 51 }, { 7, 50 }, { 28, 19 }, { 16, 33 }, + { 14, 62 }, { -13, 108 }, { -15, 100 }, { -13, 101 }, + { -13, 91 }, { -12, 94 }, { -10, 88 }, { -16, 84 }, + { -10, 86 }, { -7, 83 }, { -13, 87 }, { -19, 94 }, + { 1, 70 }, { 0, 72 }, { -5, 74 }, { 18, 59 }, + { 24, 0 }, { 15, 9 }, { 8, 25 }, { 13, 18 }, + { 15, 9 }, { 13, 19 }, { 10, 37 }, { 12, 18 }, + { 6, 29 }, { 20, 33 }, { 15, 30 }, { 4, 45 }, + { 1, 58 }, { 0, 62 }, { 7, 61 }, { 12, 38 }, + { 11, 45 }, { 15, 39 }, { 11, 42 }, { 13, 44 }, + { 16, 45 }, { 12, 41 }, { 10, 49 }, { 30, 34 }, + { 18, 42 }, { 10, 55 }, { 17, 51 }, { 17, 46 }, + { 0, 89 }, { 26, -19 }, { 22, -17 }, { 26, -17 }, + { 30, -25 }, { 28, -20 }, { 33, -23 }, { 37, -27 }, + { 33, -23 }, { 40, -28 }, { 38, -17 }, { 33, -11 }, + { 40, -15 }, { 41, -6 }, { 38, 1 }, { 41, 17 }, + { 24, 0 }, { 15, 9 }, { 8, 25 }, { 13, 18 }, + { 15, 9 }, { 13, 19 }, { 10, 37 }, { 12, 18 }, + { 6, 29 }, { 20, 33 }, { 15, 30 }, { 4, 45 }, + { 1, 58 }, { 0, 62 }, { 7, 61 }, { 12, 38 }, + { 11, 45 }, { 15, 39 }, { 11, 42 }, { 13, 44 }, + { 16, 45 }, { 12, 41 }, { 10, 49 }, { 30, 34 }, + { 18, 42 }, { 10, 55 }, { 17, 51 }, { 17, 46 }, + { 0, 89 }, { 26, -19 }, { 22, -17 }, { 26, -17 }, + { 30, -25 }, { 28, -20 }, { 33, -23 }, { 37, -27 }, + { 33, -23 }, { 40, -28 }, { 38, -17 }, { 33, -11 }, + { 40, -15 }, { 41, -6 }, { 38, 1 }, { 41, 17 }, + { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11, 85 }, + { -15, 92 }, { -14, 89 }, { -26, 71 }, { -15, 81 }, + { -14, 80 }, { 0, 68 }, { -14, 70 }, { -24, 56 }, + { -23, 68 }, { -24, 50 }, { -11, 74 }, { -14, 106 }, + { -13, 97 }, { -15, 90 }, { -12, 90 }, { -18, 88 }, + { -10, 73 }, { -9, 79 }, { -14, 86 }, { -10, 73 }, + { -10, 70 }, { -10, 69 }, { -5, 66 }, { -9, 64 }, + { -5, 58 }, { 2, 59 }, { 23, -13 }, { 26, -13 }, + { 40, -15 }, { 49, -14 }, { 44, 3 }, { 45, 6 }, + { 44, 34 }, { 33, 54 }, { 19, 82 }, { 21, -10 }, + { 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 }, + { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 }, + { -3, 75 }, { -1, 23 }, { 1, 34 }, { 1, 43 }, + { 0, 54 }, { -2, 55 }, { 0, 61 }, { 1, 64 }, + { 0, 68 }, { -9, 92 }, { -17, 120 }, { -20, 112 }, + { -18, 114 }, { -11, 85 }, { -15, 92 }, { -14, 89 }, + { -26, 71 }, { -15, 81 }, { -14, 80 }, { 0, 68 }, + { -14, 70 }, { -24, 56 }, { -23, 68 }, { -24, 50 }, + { -11, 74 }, { -14, 106 }, { -13, 97 }, { -15, 90 }, + { -12, 90 }, { -18, 88 }, { -10, 73 }, { -9, 79 }, + { -14, 86 }, { -10, 73 }, { -10, 70 }, { -10, 69 }, + { -5, 66 }, { -9, 64 }, { -5, 58 }, { 2, 59 }, + { 23, -13 }, { 26, -13 }, { 40, -15 }, { 49, -14 }, + { 44, 3 }, { 45, 6 }, { 44, 34 }, { 33, 54 }, + { 19, 82 }, { 21, -10 }, { 24, -11 }, { 28, -8 }, + { 28, -1 }, { 29, 3 }, { 29, 9 }, { 35, 20 }, + { 29, 36 }, { 14, 67 }, { -3, 75 }, { -1, 23 }, + { 1, 34 }, { 1, 43 }, { 0, 54 }, { -2, 55 }, + { 0, 61 }, { 1, 64 }, { 0, 68 }, { -9, 92 }, + { -6, 93 }, { -6, 84 }, { -8, 79 }, { 0, 66 }, + { -1, 71 }, { 0, 62 }, { -2, 60 }, { -2, 59 }, + { -5, 75 }, { -3, 62 }, { -4, 58 }, { -9, 66 }, + { -1, 79 }, { 0, 71 }, { 3, 68 }, { 10, 44 }, + { -7, 62 }, { 15, 36 }, { 14, 40 }, { 16, 27 }, + { 12, 29 }, { 1, 44 }, { 20, 36 }, { 18, 32 }, + { 5, 42 }, { 1, 48 }, { 10, 62 }, { 17, 46 }, + { 9, 64 }, { -12, 104 }, { -11, 97 }, { -16, 96 }, + { -7, 88 }, { -8, 85 }, { -7, 85 }, { -9, 85 }, + { -13, 88 }, { 4, 66 }, { -3, 77 }, { -3, 76 }, + { -6, 76 }, { 10, 58 }, { -1, 76 }, { -1, 83 }, + { -6, 93 }, { -6, 84 }, { -8, 79 }, { 0, 66 }, + { -1, 71 }, { 0, 62 }, { -2, 60 }, { -2, 59 }, + { -5, 75 }, { -3, 62 }, { -4, 58 }, { -9, 66 }, + { -1, 79 }, { 0, 71 }, { 3, 68 }, { 10, 44 }, + { -7, 62 }, { 15, 36 }, { 14, 40 }, { 16, 27 }, + { 12, 29 }, { 1, 44 }, { 20, 36 }, { 18, 32 }, + { 5, 42 }, { 1, 48 }, { 10, 62 }, { 17, 46 }, + { 9, 64 }, { -12, 104 }, { -11, 97 }, { -16, 96 }, + { -7, 88 }, { -8, 85 }, { -7, 85 }, { -9, 85 }, + { -13, 88 }, { 4, 66 }, { -3, 77 }, { -3, 76 }, + { -6, 76 }, { 10, 58 }, { -1, 76 }, { -1, 83 }, + { 15, 6 }, { 6, 19 }, { 7, 16 }, { 12, 14 }, + { 18, 13 }, { 13, 11 }, { 13, 15 }, { 15, 16 }, + { 12, 23 }, { 13, 23 }, { 15, 20 }, { 14, 26 }, + { 14, 44 }, { 17, 40 }, { 17, 47 }, { 24, 17 }, + { 21, 21 }, { 25, 22 }, { 31, 27 }, { 22, 29 }, + { 19, 35 }, { 14, 50 }, { 10, 57 }, { 7, 63 }, + { -2, 77 }, { -4, 82 }, { -3, 94 }, { 9, 69 }, + { -12, 109 }, { 36, -35 }, { 36, -34 }, { 32, -26 }, + { 37, -30 }, { 44, -32 }, { 34, -18 }, { 34, -15 }, + { 40, -15 }, { 33, -7 }, { 35, -5 }, { 33, 0 }, + { 38, 2 }, { 33, 13 }, { 23, 35 }, { 13, 58 }, + { 15, 6 }, { 6, 19 }, { 7, 16 }, { 12, 14 }, + { 18, 13 }, { 13, 11 }, { 13, 15 }, { 15, 16 }, + { 12, 23 }, { 13, 23 }, { 15, 20 }, { 14, 26 }, + { 14, 44 }, { 17, 40 }, { 17, 47 }, { 24, 17 }, + { 21, 21 }, { 25, 22 }, { 31, 27 }, { 22, 29 }, + { 19, 35 }, { 14, 50 }, { 10, 57 }, { 7, 63 }, + { -2, 77 }, { -4, 82 }, { -3, 94 }, { 9, 69 }, + { -12, 109 }, { 36, -35 }, { 36, -34 }, { 32, -26 }, + { 37, -30 }, { 44, -32 }, { 34, -18 }, { 34, -15 }, + { 40, -15 }, { 33, -7 }, { 35, -5 }, { 33, 0 }, + { 38, 2 }, { 33, 13 }, { 23, 35 }, { 13, 58 }, + { -3, 71 }, { -6, 42 }, { -5, 50 }, { -3, 54 }, + { -2, 62 }, { 0, 58 }, { 1, 63 }, { -2, 72 }, + { -1, 74 }, { -9, 91 }, { -5, 67 }, { -5, 27 }, + { -3, 39 }, { -2, 44 }, { 0, 46 }, { -16, 64 }, + { -8, 68 }, { -10, 78 }, { -6, 77 }, { -10, 86 }, + { -12, 92 }, { -15, 55 }, { -10, 60 }, { -6, 62 }, + { -4, 65 }, { -12, 73 }, { -8, 76 }, { -7, 80 }, + { -9, 88 }, { -17, 110 }, { -3, 71 }, { -6, 42 }, + { -5, 50 }, { -3, 54 }, { -2, 62 }, { 0, 58 }, + { 1, 63 }, { -2, 72 }, { -1, 74 }, { -9, 91 }, + { -5, 67 }, { -5, 27 }, { -3, 39 }, { -2, 44 }, + { 0, 46 }, { -16, 64 }, { -8, 68 }, { -10, 78 }, + { -6, 77 }, { -10, 86 }, { -12, 92 }, { -15, 55 }, + { -10, 60 }, { -6, 62 }, { -4, 65 }, { -12, 73 }, + { -8, 76 }, { -7, 80 }, { -9, 88 }, { -17, 110 }, + { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, + { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, + { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 } }; -static const int8_t cabac_context_init_PB[3][460][2] = +static const int8_t cabac_context_init_PB[3][1024][2] = { /* i_cabac_init_idc == 0 */ { @@ -370,6 +513,149 @@ static const int8_t cabac_context_init_PB[3][460][2] = { -14, 66 }, { 0, 59 }, { 2, 59 }, { 21, -13 }, { 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 }, { 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 }, + + /* 460 - 1024 */ + { -7, 92 }, { -5, 89 }, { -7, 96 }, { -13, 108 }, + { -3, 46 }, { -1, 65 }, { -1, 57 }, { -9, 93 }, + { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, + { -7, 92 }, { -5, 89 }, { -7, 96 }, { -13, 108 }, + { -3, 46 }, { -1, 65 }, { -1, 57 }, { -9, 93 }, + { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, + { -2, 85 }, { -6, 78 }, { -1, 75 }, { -7, 77 }, + { 2, 54 }, { 5, 50 }, { -3, 68 }, { 1, 50 }, + { 6, 42 }, { -4, 81 }, { 1, 63 }, { -4, 70 }, + { 0, 67 }, { 2, 57 }, { -2, 76 }, { 11, 35 }, + { 4, 64 }, { 1, 61 }, { 11, 35 }, { 18, 25 }, + { 12, 24 }, { 13, 29 }, { 13, 36 }, { -10, 93 }, + { -7, 73 }, { -2, 73 }, { 13, 46 }, { 9, 49 }, + { -7, 100 }, { 9, 53 }, { 2, 53 }, { 5, 53 }, + { -2, 61 }, { 0, 56 }, { 0, 56 }, { -13, 63 }, + { -5, 60 }, { -1, 62 }, { 4, 57 }, { -6, 69 }, + { 4, 57 }, { 14, 39 }, { 4, 51 }, { 13, 68 }, + { -2, 85 }, { -6, 78 }, { -1, 75 }, { -7, 77 }, + { 2, 54 }, { 5, 50 }, { -3, 68 }, { 1, 50 }, + { 6, 42 }, { -4, 81 }, { 1, 63 }, { -4, 70 }, + { 0, 67 }, { 2, 57 }, { -2, 76 }, { 11, 35 }, + { 4, 64 }, { 1, 61 }, { 11, 35 }, { 18, 25 }, + { 12, 24 }, { 13, 29 }, { 13, 36 }, { -10, 93 }, + { -7, 73 }, { -2, 73 }, { 13, 46 }, { 9, 49 }, + { -7, 100 }, { 9, 53 }, { 2, 53 }, { 5, 53 }, + { -2, 61 }, { 0, 56 }, { 0, 56 }, { -13, 63 }, + { -5, 60 }, { -1, 62 }, { 4, 57 }, { -6, 69 }, + { 4, 57 }, { 14, 39 }, { 4, 51 }, { 13, 68 }, + { 11, 28 }, { 2, 40 }, { 3, 44 }, { 0, 49 }, + { 0, 46 }, { 2, 44 }, { 2, 51 }, { 0, 47 }, + { 4, 39 }, { 2, 62 }, { 6, 46 }, { 0, 54 }, + { 3, 54 }, { 2, 58 }, { 4, 63 }, { 6, 51 }, + { 6, 57 }, { 7, 53 }, { 6, 52 }, { 6, 55 }, + { 11, 45 }, { 14, 36 }, { 8, 53 }, { -1, 82 }, + { 7, 55 }, { -3, 78 }, { 15, 46 }, { 22, 31 }, + { -1, 84 }, { 25, 7 }, { 30, -7 }, { 28, 3 }, + { 28, 4 }, { 32, 0 }, { 34, -1 }, { 30, 6 }, + { 30, 6 }, { 32, 9 }, { 31, 19 }, { 26, 27 }, + { 26, 30 }, { 37, 20 }, { 28, 34 }, { 17, 70 }, + { 11, 28 }, { 2, 40 }, { 3, 44 }, { 0, 49 }, + { 0, 46 }, { 2, 44 }, { 2, 51 }, { 0, 47 }, + { 4, 39 }, { 2, 62 }, { 6, 46 }, { 0, 54 }, + { 3, 54 }, { 2, 58 }, { 4, 63 }, { 6, 51 }, + { 6, 57 }, { 7, 53 }, { 6, 52 }, { 6, 55 }, + { 11, 45 }, { 14, 36 }, { 8, 53 }, { -1, 82 }, + { 7, 55 }, { -3, 78 }, { 15, 46 }, { 22, 31 }, + { -1, 84 }, { 25, 7 }, { 30, -7 }, { 28, 3 }, + { 28, 4 }, { 32, 0 }, { 34, -1 }, { 30, 6 }, + { 30, 6 }, { 32, 9 }, { 31, 19 }, { 26, 27 }, + { 26, 30 }, { 37, 20 }, { 28, 34 }, { 17, 70 }, + { -4, 79 }, { -7, 71 }, { -5, 69 }, { -9, 70 }, + { -8, 66 }, { -10, 68 }, { -19, 73 }, { -12, 69 }, + { -16, 70 }, { -15, 67 }, { -20, 62 }, { -19, 70 }, + { -16, 66 }, { -22, 65 }, { -20, 63 }, { -5, 85 }, + { -6, 81 }, { -10, 77 }, { -7, 81 }, { -17, 80 }, + { -18, 73 }, { -4, 74 }, { -10, 83 }, { -9, 71 }, + { -9, 67 }, { -1, 61 }, { -8, 66 }, { -14, 66 }, + { 0, 59 }, { 2, 59 }, { 9, -2 }, { 26, -9 }, + { 33, -9 }, { 39, -7 }, { 41, -2 }, { 45, 3 }, + { 49, 9 }, { 45, 27 }, { 36, 59 }, { 21, -13 }, + { 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 }, + { 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 }, + { -6, 66 }, { -7, 35 }, { -7, 42 }, { -8, 45 }, + { -5, 48 }, { -12, 56 }, { -6, 60 }, { -5, 62 }, + { -8, 66 }, { -8, 76 }, { -4, 79 }, { -7, 71 }, + { -5, 69 }, { -9, 70 }, { -8, 66 }, { -10, 68 }, + { -19, 73 }, { -12, 69 }, { -16, 70 }, { -15, 67 }, + { -20, 62 }, { -19, 70 }, { -16, 66 }, { -22, 65 }, + { -20, 63 }, { -5, 85 }, { -6, 81 }, { -10, 77 }, + { -7, 81 }, { -17, 80 }, { -18, 73 }, { -4, 74 }, + { -10, 83 }, { -9, 71 }, { -9, 67 }, { -1, 61 }, + { -8, 66 }, { -14, 66 }, { 0, 59 }, { 2, 59 }, + { 9, -2 }, { 26, -9 }, { 33, -9 }, { 39, -7 }, + { 41, -2 }, { 45, 3 }, { 49, 9 }, { 45, 27 }, + { 36, 59 }, { 21, -13 }, { 33, -14 }, { 39, -7 }, + { 46, -2 }, { 51, 2 }, { 60, 6 }, { 61, 17 }, + { 55, 34 }, { 42, 62 }, { -6, 66 }, { -7, 35 }, + { -7, 42 }, { -8, 45 }, { -5, 48 }, { -12, 56 }, + { -6, 60 }, { -5, 62 }, { -8, 66 }, { -8, 76 }, + { -13, 106 }, { -16, 106 }, { -10, 87 }, { -21, 114 }, + { -18, 110 }, { -14, 98 }, { -22, 110 }, { -21, 106 }, + { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 }, + { -10, 96 }, { -12, 95 }, { -5, 91 }, { -9, 93 }, + { -22, 94 }, { -5, 86 }, { 9, 67 }, { -4, 80 }, + { -10, 85 }, { -1, 70 }, { 7, 60 }, { 9, 58 }, + { 5, 61 }, { 12, 50 }, { 15, 50 }, { 18, 49 }, + { 17, 54 }, { 10, 41 }, { 7, 46 }, { -1, 51 }, + { 7, 49 }, { 8, 52 }, { 9, 41 }, { 6, 47 }, + { 2, 55 }, { 13, 41 }, { 10, 44 }, { 6, 50 }, + { 5, 53 }, { 13, 49 }, { 4, 63 }, { 6, 64 }, + { -13, 106 }, { -16, 106 }, { -10, 87 }, { -21, 114 }, + { -18, 110 }, { -14, 98 }, { -22, 110 }, { -21, 106 }, + { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 }, + { -10, 96 }, { -12, 95 }, { -5, 91 }, { -9, 93 }, + { -22, 94 }, { -5, 86 }, { 9, 67 }, { -4, 80 }, + { -10, 85 }, { -1, 70 }, { 7, 60 }, { 9, 58 }, + { 5, 61 }, { 12, 50 }, { 15, 50 }, { 18, 49 }, + { 17, 54 }, { 10, 41 }, { 7, 46 }, { -1, 51 }, + { 7, 49 }, { 8, 52 }, { 9, 41 }, { 6, 47 }, + { 2, 55 }, { 13, 41 }, { 10, 44 }, { 6, 50 }, + { 5, 53 }, { 13, 49 }, { 4, 63 }, { 6, 64 }, + { 14, 11 }, { 11, 14 }, { 9, 11 }, { 18, 11 }, + { 21, 9 }, { 23, -2 }, { 32, -15 }, { 32, -15 }, + { 34, -21 }, { 39, -23 }, { 42, -33 }, { 41, -31 }, + { 46, -28 }, { 38, -12 }, { 21, 29 }, { 45, -24 }, + { 53, -45 }, { 48, -26 }, { 65, -43 }, { 43, -19 }, + { 39, -10 }, { 30, 9 }, { 18, 26 }, { 20, 27 }, + { 0, 57 }, { -14, 82 }, { -5, 75 }, { -19, 97 }, + { -35, 125 }, { 27, 0 }, { 28, 0 }, { 31, -4 }, + { 27, 6 }, { 34, 8 }, { 30, 10 }, { 24, 22 }, + { 33, 19 }, { 22, 32 }, { 26, 31 }, { 21, 41 }, + { 26, 44 }, { 23, 47 }, { 16, 65 }, { 14, 71 }, + { 14, 11 }, { 11, 14 }, { 9, 11 }, { 18, 11 }, + { 21, 9 }, { 23, -2 }, { 32, -15 }, { 32, -15 }, + { 34, -21 }, { 39, -23 }, { 42, -33 }, { 41, -31 }, + { 46, -28 }, { 38, -12 }, { 21, 29 }, { 45, -24 }, + { 53, -45 }, { 48, -26 }, { 65, -43 }, { 43, -19 }, + { 39, -10 }, { 30, 9 }, { 18, 26 }, { 20, 27 }, + { 0, 57 }, { -14, 82 }, { -5, 75 }, { -19, 97 }, + { -35, 125 }, { 27, 0 }, { 28, 0 }, { 31, -4 }, + { 27, 6 }, { 34, 8 }, { 30, 10 }, { 24, 22 }, + { 33, 19 }, { 22, 32 }, { 26, 31 }, { 21, 41 }, + { 26, 44 }, { 23, 47 }, { 16, 65 }, { 14, 71 }, + { -6, 76 }, { -2, 44 }, { 0, 45 }, { 0, 52 }, + { -3, 64 }, { -2, 59 }, { -4, 70 }, { -4, 75 }, + { -8, 82 }, { -17, 102 }, { -9, 77 }, { 3, 24 }, + { 0, 42 }, { 0, 48 }, { 0, 55 }, { -6, 59 }, + { -7, 71 }, { -12, 83 }, { -11, 87 }, { -30, 119 }, + { 1, 58 }, { -3, 29 }, { -1, 36 }, { 1, 38 }, + { 2, 43 }, { -6, 55 }, { 0, 58 }, { 0, 64 }, + { -3, 74 }, { -10, 90 }, { -6, 76 }, { -2, 44 }, + { 0, 45 }, { 0, 52 }, { -3, 64 }, { -2, 59 }, + { -4, 70 }, { -4, 75 }, { -8, 82 }, { -17, 102 }, + { -9, 77 }, { 3, 24 }, { 0, 42 }, { 0, 48 }, + { 0, 55 }, { -6, 59 }, { -7, 71 }, { -12, 83 }, + { -11, 87 }, { -30, 119 }, { 1, 58 }, { -3, 29 }, + { -1, 36 }, { 1, 38 }, { 2, 43 }, { -6, 55 }, + { 0, 58 }, { 0, 64 }, { -3, 74 }, { -10, 90 }, + { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, + { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, + { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 } }, /* i_cabac_init_idc == 1 */ @@ -526,6 +812,149 @@ static const int8_t cabac_context_init_PB[3][460][2] = { -9, 60 }, { 1, 54 }, { 2, 58 }, { 17, -10 }, { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, + + /* 460 - 1024 */ + { 0, 80 }, { -5, 89 }, { -7, 94 }, { -4, 92 }, + { 0, 39 }, { 0, 65 }, { -15, 84 }, { -35, 127 }, + { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, + { 0, 80 }, { -5, 89 }, { -7, 94 }, { -4, 92 }, + { 0, 39 }, { 0, 65 }, { -15, 84 }, { -35, 127 }, + { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, + { -13, 103 }, { -13, 91 }, { -9, 89 }, { -14, 92 }, + { -8, 76 }, { -12, 87 }, { -23, 110 }, { -24, 105 }, + { -10, 78 }, { -20, 112 }, { -17, 99 }, { -78, 127 }, + { -70, 127 }, { -50, 127 }, { -46, 127 }, { -4, 66 }, + { -5, 78 }, { -4, 71 }, { -8, 72 }, { 2, 59 }, + { -1, 55 }, { -7, 70 }, { -6, 75 }, { -8, 89 }, + { -34, 119 }, { -3, 75 }, { 32, 20 }, { 30, 22 }, + { -44, 127 }, { 0, 54 }, { -5, 61 }, { 0, 58 }, + { -1, 60 }, { -3, 61 }, { -8, 67 }, { -25, 84 }, + { -14, 74 }, { -5, 65 }, { 5, 52 }, { 2, 57 }, + { 0, 61 }, { -9, 69 }, { -11, 70 }, { 18, 55 }, + { -13, 103 }, { -13, 91 }, { -9, 89 }, { -14, 92 }, + { -8, 76 }, { -12, 87 }, { -23, 110 }, { -24, 105 }, + { -10, 78 }, { -20, 112 }, { -17, 99 }, { -78, 127 }, + { -70, 127 }, { -50, 127 }, { -46, 127 }, { -4, 66 }, + { -5, 78 }, { -4, 71 }, { -8, 72 }, { 2, 59 }, + { -1, 55 }, { -7, 70 }, { -6, 75 }, { -8, 89 }, + { -34, 119 }, { -3, 75 }, { 32, 20 }, { 30, 22 }, + { -44, 127 }, { 0, 54 }, { -5, 61 }, { 0, 58 }, + { -1, 60 }, { -3, 61 }, { -8, 67 }, { -25, 84 }, + { -14, 74 }, { -5, 65 }, { 5, 52 }, { 2, 57 }, + { 0, 61 }, { -9, 69 }, { -11, 70 }, { 18, 55 }, + { 4, 45 }, { 10, 28 }, { 10, 31 }, { 33, -11 }, + { 52, -43 }, { 18, 15 }, { 28, 0 }, { 35, -22 }, + { 38, -25 }, { 34, 0 }, { 39, -18 }, { 32, -12 }, + { 102, -94 }, { 0, 0 }, { 56, -15 }, { 33, -4 }, + { 29, 10 }, { 37, -5 }, { 51, -29 }, { 39, -9 }, + { 52, -34 }, { 69, -58 }, { 67, -63 }, { 44, -5 }, + { 32, 7 }, { 55, -29 }, { 32, 1 }, { 0, 0 }, + { 27, 36 }, { 33, -25 }, { 34, -30 }, { 36, -28 }, + { 38, -28 }, { 38, -27 }, { 34, -18 }, { 35, -16 }, + { 34, -14 }, { 32, -8 }, { 37, -6 }, { 35, 0 }, + { 30, 10 }, { 28, 18 }, { 26, 25 }, { 29, 41 }, + { 4, 45 }, { 10, 28 }, { 10, 31 }, { 33, -11 }, + { 52, -43 }, { 18, 15 }, { 28, 0 }, { 35, -22 }, + { 38, -25 }, { 34, 0 }, { 39, -18 }, { 32, -12 }, + { 102, -94 }, { 0, 0 }, { 56, -15 }, { 33, -4 }, + { 29, 10 }, { 37, -5 }, { 51, -29 }, { 39, -9 }, + { 52, -34 }, { 69, -58 }, { 67, -63 }, { 44, -5 }, + { 32, 7 }, { 55, -29 }, { 32, 1 }, { 0, 0 }, + { 27, 36 }, { 33, -25 }, { 34, -30 }, { 36, -28 }, + { 38, -28 }, { 38, -27 }, { 34, -18 }, { 35, -16 }, + { 34, -14 }, { 32, -8 }, { 37, -6 }, { 35, 0 }, + { 30, 10 }, { 28, 18 }, { 26, 25 }, { 29, 41 }, + { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 }, + { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 }, + { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 }, + { -14, 66 }, { 0, 59 }, { 2, 59 }, { -3, 81 }, + { -3, 76 }, { -7, 72 }, { -6, 78 }, { -12, 72 }, + { -14, 68 }, { -3, 70 }, { -6, 76 }, { -5, 66 }, + { -5, 62 }, { 0, 57 }, { -4, 61 }, { -9, 60 }, + { 1, 54 }, { 2, 58 }, { 17, -10 }, { 32, -13 }, + { 42, -9 }, { 49, -5 }, { 53, 0 }, { 64, 3 }, + { 68, 10 }, { 66, 27 }, { 47, 57 }, { 17, -10 }, + { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, + { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, + { -5, 71 }, { 0, 24 }, { -1, 36 }, { -2, 42 }, + { -2, 52 }, { -9, 57 }, { -6, 63 }, { -4, 65 }, + { -4, 67 }, { -7, 82 }, { -5, 85 }, { -6, 81 }, + { -10, 77 }, { -7, 81 }, { -17, 80 }, { -18, 73 }, + { -4, 74 }, { -10, 83 }, { -9, 71 }, { -9, 67 }, + { -1, 61 }, { -8, 66 }, { -14, 66 }, { 0, 59 }, + { 2, 59 }, { -3, 81 }, { -3, 76 }, { -7, 72 }, + { -6, 78 }, { -12, 72 }, { -14, 68 }, { -3, 70 }, + { -6, 76 }, { -5, 66 }, { -5, 62 }, { 0, 57 }, + { -4, 61 }, { -9, 60 }, { 1, 54 }, { 2, 58 }, + { 17, -10 }, { 32, -13 }, { 42, -9 }, { 49, -5 }, + { 53, 0 }, { 64, 3 }, { 68, 10 }, { 66, 27 }, + { 47, 57 }, { 17, -10 }, { 32, -13 }, { 42, -9 }, + { 49, -5 }, { 53, 0 }, { 64, 3 }, { 68, 10 }, + { 66, 27 }, { 47, 57 }, { -5, 71 }, { 0, 24 }, + { -1, 36 }, { -2, 42 }, { -2, 52 }, { -9, 57 }, + { -6, 63 }, { -4, 65 }, { -4, 67 }, { -7, 82 }, + { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 }, + { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 }, + { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 }, + { -10, 95 }, { -14, 100 }, { -8, 95 }, { -17, 111 }, + { -28, 114 }, { -6, 89 }, { -2, 80 }, { -4, 82 }, + { -9, 85 }, { -8, 81 }, { -1, 72 }, { 5, 64 }, + { 1, 67 }, { 9, 56 }, { 0, 69 }, { 1, 69 }, + { 7, 69 }, { -7, 69 }, { -6, 67 }, { -16, 77 }, + { -2, 64 }, { 2, 61 }, { -6, 67 }, { -3, 64 }, + { 2, 57 }, { -3, 65 }, { -3, 66 }, { 0, 62 }, + { 9, 51 }, { -1, 66 }, { -2, 71 }, { -2, 75 }, + { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 }, + { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 }, + { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 }, + { -10, 95 }, { -14, 100 }, { -8, 95 }, { -17, 111 }, + { -28, 114 }, { -6, 89 }, { -2, 80 }, { -4, 82 }, + { -9, 85 }, { -8, 81 }, { -1, 72 }, { 5, 64 }, + { 1, 67 }, { 9, 56 }, { 0, 69 }, { 1, 69 }, + { 7, 69 }, { -7, 69 }, { -6, 67 }, { -16, 77 }, + { -2, 64 }, { 2, 61 }, { -6, 67 }, { -3, 64 }, + { 2, 57 }, { -3, 65 }, { -3, 66 }, { 0, 62 }, + { 9, 51 }, { -1, 66 }, { -2, 71 }, { -2, 75 }, + { 19, -6 }, { 18, -6 }, { 14, 0 }, { 26, -12 }, + { 31, -16 }, { 33, -25 }, { 33, -22 }, { 37, -28 }, + { 39, -30 }, { 42, -30 }, { 47, -42 }, { 45, -36 }, + { 49, -34 }, { 41, -17 }, { 32, 9 }, { 69, -71 }, + { 63, -63 }, { 66, -64 }, { 77, -74 }, { 54, -39 }, + { 52, -35 }, { 41, -10 }, { 36, 0 }, { 40, -1 }, + { 30, 14 }, { 28, 26 }, { 23, 37 }, { 12, 55 }, + { 11, 65 }, { 37, -33 }, { 39, -36 }, { 40, -37 }, + { 38, -30 }, { 46, -33 }, { 42, -30 }, { 40, -24 }, + { 49, -29 }, { 38, -12 }, { 40, -10 }, { 38, -3 }, + { 46, -5 }, { 31, 20 }, { 29, 30 }, { 25, 44 }, + { 19, -6 }, { 18, -6 }, { 14, 0 }, { 26, -12 }, + { 31, -16 }, { 33, -25 }, { 33, -22 }, { 37, -28 }, + { 39, -30 }, { 42, -30 }, { 47, -42 }, { 45, -36 }, + { 49, -34 }, { 41, -17 }, { 32, 9 }, { 69, -71 }, + { 63, -63 }, { 66, -64 }, { 77, -74 }, { 54, -39 }, + { 52, -35 }, { 41, -10 }, { 36, 0 }, { 40, -1 }, + { 30, 14 }, { 28, 26 }, { 23, 37 }, { 12, 55 }, + { 11, 65 }, { 37, -33 }, { 39, -36 }, { 40, -37 }, + { 38, -30 }, { 46, -33 }, { 42, -30 }, { 40, -24 }, + { 49, -29 }, { 38, -12 }, { 40, -10 }, { 38, -3 }, + { 46, -5 }, { 31, 20 }, { 29, 30 }, { 25, 44 }, + { -23, 112 }, { -15, 71 }, { -7, 61 }, { 0, 53 }, + { -5, 66 }, { -11, 77 }, { -9, 80 }, { -9, 84 }, + { -10, 87 }, { -34, 127 }, { -21, 101 }, { -3, 39 }, + { -5, 53 }, { -7, 61 }, { -11, 75 }, { -15, 77 }, + { -17, 91 }, { -25, 107 }, { -25, 111 }, { -28, 122 }, + { -11, 76 }, { -10, 44 }, { -10, 52 }, { -10, 57 }, + { -9, 58 }, { -16, 72 }, { -7, 69 }, { -4, 69 }, + { -5, 74 }, { -9, 86 }, { -23, 112 }, { -15, 71 }, + { -7, 61 }, { 0, 53 }, { -5, 66 }, { -11, 77 }, + { -9, 80 }, { -9, 84 }, { -10, 87 }, { -34, 127 }, + { -21, 101 }, { -3, 39 }, { -5, 53 }, { -7, 61 }, + { -11, 75 }, { -15, 77 }, { -17, 91 }, { -25, 107 }, + { -25, 111 }, { -28, 122 }, { -11, 76 }, { -10, 44 }, + { -10, 52 }, { -10, 57 }, { -9, 58 }, { -16, 72 }, + { -7, 69 }, { -4, 69 }, { -5, 74 }, { -9, 86 }, + { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, + { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, + { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 } }, /* i_cabac_init_idc == 2 */ @@ -682,6 +1111,149 @@ static const int8_t cabac_context_init_PB[3][460][2] = { -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 }, { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, + + /* 460 - 1024 */ + { 11, 80 }, { 5, 76 }, { 2, 84 }, { 5, 78 }, + { -6, 55 }, { 4, 61 }, { -14, 83 }, { -37, 127 }, + { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, + { 11, 80 }, { 5, 76 }, { 2, 84 }, { 5, 78 }, + { -6, 55 }, { 4, 61 }, { -14, 83 }, { -37, 127 }, + { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, + { -4, 86 }, { -12, 88 }, { -5, 82 }, { -3, 72 }, + { -4, 67 }, { -8, 72 }, { -16, 89 }, { -9, 69 }, + { -1, 59 }, { 5, 66 }, { 4, 57 }, { -4, 71 }, + { -2, 71 }, { 2, 58 }, { -1, 74 }, { -4, 44 }, + { -1, 69 }, { 0, 62 }, { -7, 51 }, { -4, 47 }, + { -6, 42 }, { -3, 41 }, { -6, 53 }, { 8, 76 }, + { -9, 78 }, { -11, 83 }, { 9, 52 }, { 0, 67 }, + { -5, 90 }, { 1, 67 }, { -15, 72 }, { -5, 75 }, + { -8, 80 }, { -21, 83 }, { -21, 64 }, { -13, 31 }, + { -25, 64 }, { -29, 94 }, { 9, 75 }, { 17, 63 }, + { -8, 74 }, { -5, 35 }, { -2, 27 }, { 13, 91 }, + { -4, 86 }, { -12, 88 }, { -5, 82 }, { -3, 72 }, + { -4, 67 }, { -8, 72 }, { -16, 89 }, { -9, 69 }, + { -1, 59 }, { 5, 66 }, { 4, 57 }, { -4, 71 }, + { -2, 71 }, { 2, 58 }, { -1, 74 }, { -4, 44 }, + { -1, 69 }, { 0, 62 }, { -7, 51 }, { -4, 47 }, + { -6, 42 }, { -3, 41 }, { -6, 53 }, { 8, 76 }, + { -9, 78 }, { -11, 83 }, { 9, 52 }, { 0, 67 }, + { -5, 90 }, { 1, 67 }, { -15, 72 }, { -5, 75 }, + { -8, 80 }, { -21, 83 }, { -21, 64 }, { -13, 31 }, + { -25, 64 }, { -29, 94 }, { 9, 75 }, { 17, 63 }, + { -8, 74 }, { -5, 35 }, { -2, 27 }, { 13, 91 }, + { 4, 39 }, { 0, 42 }, { 7, 34 }, { 11, 29 }, + { 8, 31 }, { 6, 37 }, { 7, 42 }, { 3, 40 }, + { 8, 33 }, { 13, 43 }, { 13, 36 }, { 4, 47 }, + { 3, 55 }, { 2, 58 }, { 6, 60 }, { 8, 44 }, + { 11, 44 }, { 14, 42 }, { 7, 48 }, { 4, 56 }, + { 4, 52 }, { 13, 37 }, { 9, 49 }, { 19, 58 }, + { 10, 48 }, { 12, 45 }, { 0, 69 }, { 20, 33 }, + { 8, 63 }, { 35, -18 }, { 33, -25 }, { 28, -3 }, + { 24, 10 }, { 27, 0 }, { 34, -14 }, { 52, -44 }, + { 39, -24 }, { 19, 17 }, { 31, 25 }, { 36, 29 }, + { 24, 33 }, { 34, 15 }, { 30, 20 }, { 22, 73 }, + { 4, 39 }, { 0, 42 }, { 7, 34 }, { 11, 29 }, + { 8, 31 }, { 6, 37 }, { 7, 42 }, { 3, 40 }, + { 8, 33 }, { 13, 43 }, { 13, 36 }, { 4, 47 }, + { 3, 55 }, { 2, 58 }, { 6, 60 }, { 8, 44 }, + { 11, 44 }, { 14, 42 }, { 7, 48 }, { 4, 56 }, + { 4, 52 }, { 13, 37 }, { 9, 49 }, { 19, 58 }, + { 10, 48 }, { 12, 45 }, { 0, 69 }, { 20, 33 }, + { 8, 63 }, { 35, -18 }, { 33, -25 }, { 28, -3 }, + { 24, 10 }, { 27, 0 }, { 34, -14 }, { 52, -44 }, + { 39, -24 }, { 19, 17 }, { 31, 25 }, { 36, 29 }, + { 24, 33 }, { 34, 15 }, { 30, 20 }, { 22, 73 }, + { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 }, + { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 }, + { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 }, + { -14, 59 }, { -9, 52 }, { -11, 68 }, { -3, 78 }, + { -8, 74 }, { -9, 72 }, { -10, 72 }, { -18, 75 }, + { -12, 71 }, { -11, 63 }, { -5, 70 }, { -17, 75 }, + { -14, 72 }, { -16, 67 }, { -8, 53 }, { -14, 59 }, + { -9, 52 }, { -11, 68 }, { 9, -2 }, { 30, -10 }, + { 31, -4 }, { 33, -1 }, { 33, 7 }, { 31, 12 }, + { 37, 23 }, { 31, 38 }, { 20, 64 }, { 9, -2 }, + { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, + { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, + { -9, 71 }, { -7, 37 }, { -8, 44 }, { -11, 49 }, + { -10, 56 }, { -12, 59 }, { -8, 63 }, { -9, 67 }, + { -6, 68 }, { -10, 79 }, { -3, 78 }, { -8, 74 }, + { -9, 72 }, { -10, 72 }, { -18, 75 }, { -12, 71 }, + { -11, 63 }, { -5, 70 }, { -17, 75 }, { -14, 72 }, + { -16, 67 }, { -8, 53 }, { -14, 59 }, { -9, 52 }, + { -11, 68 }, { -3, 78 }, { -8, 74 }, { -9, 72 }, + { -10, 72 }, { -18, 75 }, { -12, 71 }, { -11, 63 }, + { -5, 70 }, { -17, 75 }, { -14, 72 }, { -16, 67 }, + { -8, 53 }, { -14, 59 }, { -9, 52 }, { -11, 68 }, + { 9, -2 }, { 30, -10 }, { 31, -4 }, { 33, -1 }, + { 33, 7 }, { 31, 12 }, { 37, 23 }, { 31, 38 }, + { 20, 64 }, { 9, -2 }, { 30, -10 }, { 31, -4 }, + { 33, -1 }, { 33, 7 }, { 31, 12 }, { 37, 23 }, + { 31, 38 }, { 20, 64 }, { -9, 71 }, { -7, 37 }, + { -8, 44 }, { -11, 49 }, { -10, 56 }, { -12, 59 }, + { -8, 63 }, { -9, 67 }, { -6, 68 }, { -10, 79 }, + { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 }, + { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 }, + { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 }, + { -10, 94 }, { -15, 102 }, { -10, 99 }, { -13, 106 }, + { -50, 127 }, { -5, 92 }, { 17, 57 }, { -5, 86 }, + { -13, 94 }, { -12, 91 }, { -2, 77 }, { 0, 71 }, + { -1, 73 }, { 4, 64 }, { -7, 81 }, { 5, 64 }, + { 15, 57 }, { 1, 67 }, { 0, 68 }, { -10, 67 }, + { 1, 68 }, { 0, 77 }, { 2, 64 }, { 0, 68 }, + { -5, 78 }, { 7, 55 }, { 5, 59 }, { 2, 65 }, + { 14, 54 }, { 15, 44 }, { 5, 60 }, { 2, 70 }, + { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 }, + { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 }, + { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 }, + { -10, 94 }, { -15, 102 }, { -10, 99 }, { -13, 106 }, + { -50, 127 }, { -5, 92 }, { 17, 57 }, { -5, 86 }, + { -13, 94 }, { -12, 91 }, { -2, 77 }, { 0, 71 }, + { -1, 73 }, { 4, 64 }, { -7, 81 }, { 5, 64 }, + { 15, 57 }, { 1, 67 }, { 0, 68 }, { -10, 67 }, + { 1, 68 }, { 0, 77 }, { 2, 64 }, { 0, 68 }, + { -5, 78 }, { 7, 55 }, { 5, 59 }, { 2, 65 }, + { 14, 54 }, { 15, 44 }, { 5, 60 }, { 2, 70 }, + { 17, -13 }, { 16, -9 }, { 17, -12 }, { 27, -21 }, + { 37, -30 }, { 41, -40 }, { 42, -41 }, { 48, -47 }, + { 39, -32 }, { 46, -40 }, { 52, -51 }, { 46, -41 }, + { 52, -39 }, { 43, -19 }, { 32, 11 }, { 61, -55 }, + { 56, -46 }, { 62, -50 }, { 81, -67 }, { 45, -20 }, + { 35, -2 }, { 28, 15 }, { 34, 1 }, { 39, 1 }, + { 30, 17 }, { 20, 38 }, { 18, 45 }, { 15, 54 }, + { 0, 79 }, { 36, -16 }, { 37, -14 }, { 37, -17 }, + { 32, 1 }, { 34, 15 }, { 29, 15 }, { 24, 25 }, + { 34, 22 }, { 31, 16 }, { 35, 18 }, { 31, 28 }, + { 33, 41 }, { 36, 28 }, { 27, 47 }, { 21, 62 }, + { 17, -13 }, { 16, -9 }, { 17, -12 }, { 27, -21 }, + { 37, -30 }, { 41, -40 }, { 42, -41 }, { 48, -47 }, + { 39, -32 }, { 46, -40 }, { 52, -51 }, { 46, -41 }, + { 52, -39 }, { 43, -19 }, { 32, 11 }, { 61, -55 }, + { 56, -46 }, { 62, -50 }, { 81, -67 }, { 45, -20 }, + { 35, -2 }, { 28, 15 }, { 34, 1 }, { 39, 1 }, + { 30, 17 }, { 20, 38 }, { 18, 45 }, { 15, 54 }, + { 0, 79 }, { 36, -16 }, { 37, -14 }, { 37, -17 }, + { 32, 1 }, { 34, 15 }, { 29, 15 }, { 24, 25 }, + { 34, 22 }, { 31, 16 }, { 35, 18 }, { 31, 28 }, + { 33, 41 }, { 36, 28 }, { 27, 47 }, { 21, 62 }, + { -24, 115 }, { -22, 82 }, { -9, 62 }, { 0, 53 }, + { 0, 59 }, { -14, 85 }, { -13, 89 }, { -13, 94 }, + { -11, 92 }, { -29, 127 }, { -21, 100 }, { -14, 57 }, + { -12, 67 }, { -11, 71 }, { -10, 77 }, { -21, 85 }, + { -16, 88 }, { -23, 104 }, { -15, 98 }, { -37, 127 }, + { -10, 82 }, { -8, 48 }, { -8, 61 }, { -8, 66 }, + { -7, 70 }, { -14, 75 }, { -10, 79 }, { -9, 83 }, + { -12, 92 }, { -18, 108 }, { -24, 115 }, { -22, 82 }, + { -9, 62 }, { 0, 53 }, { 0, 59 }, { -14, 85 }, + { -13, 89 }, { -13, 94 }, { -11, 92 }, { -29, 127 }, + { -21, 100 }, { -14, 57 }, { -12, 67 }, { -11, 71 }, + { -10, 77 }, { -21, 85 }, { -16, 88 }, { -23, 104 }, + { -15, 98 }, { -37, 127 }, { -10, 82 }, { -8, 48 }, + { -8, 61 }, { -8, 66 }, { -7, 70 }, { -14, 75 }, + { -10, 79 }, { -9, 83 }, { -12, 92 }, { -18, 108 }, + { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, + { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, + { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 } } }; @@ -695,7 +1267,7 @@ void ff_h264_init_cabac_states(H264Context *h) { else tab = cabac_context_init_PB[h->cabac_init_idc]; /* calculate pre-state */ - for( i= 0; i < 460; i++ ) { + for( i= 0; i < 1024; i++ ) { int pre = 2*(((tab[i][0] * slice_qp) >>4 ) + tab[i][1]) - 127; pre^= pre>>31; @@ -957,21 +1529,22 @@ static int decode_cabac_mb_mvd( H264Context *h, int ctxbase, int amvd, int *mvda my += decode_cabac_mb_mvd( h, 47, amvd1, &mpy );\ } -static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) { +static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int max_coeff, int is_dc ) { int nza, nzb; int ctx = 0; + static const uint16_t base_ctx[14] = {85,89,93,97,101,1012,460,464,468,1016,472,476,480,1020}; if( is_dc ) { - if( cat == 0 ) { - nza = h->left_cbp&0x100; - nzb = h-> top_cbp&0x100; - } else { + if( cat == 3 ) { idx -= CHROMA_DC_BLOCK_INDEX; nza = (h->left_cbp>>(6+idx))&0x01; nzb = (h-> top_cbp>>(6+idx))&0x01; + } else { + idx -= LUMA_DC_BLOCK_INDEX; + nza = h->left_cbp&(0x100<<idx); + nzb = h-> top_cbp&(0x100<<idx); } } else { - assert(cat == 1 || cat == 2 || cat == 4); nza = h->non_zero_count_cache[scan8[idx] - 1]; nzb = h->non_zero_count_cache[scan8[idx] - 8]; } @@ -982,7 +1555,7 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, if( nzb > 0 ) ctx += 2; - return ctx + 4 * cat; + return base_ctx[cat] + ctx; } DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = { @@ -993,16 +1566,16 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = { }; static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) { - static const int significant_coeff_flag_offset[2][6] = { - { 105+0, 105+15, 105+29, 105+44, 105+47, 402 }, - { 277+0, 277+15, 277+29, 277+44, 277+47, 436 } + static const int significant_coeff_flag_offset[2][14] = { + { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 }, + { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 } }; - static const int last_coeff_flag_offset[2][6] = { - { 166+0, 166+15, 166+29, 166+44, 166+47, 417 }, - { 338+0, 338+15, 338+29, 338+44, 338+47, 451 } + static const int last_coeff_flag_offset[2][14] = { + { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748 }, + { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757 } }; - static const int coeff_abs_level_m1_offset[6] = { - 227+0, 227+10, 227+20, 227+30, 227+39, 426 + static const int coeff_abs_level_m1_offset[14] = { + 227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766 }; static const uint8_t significant_coeff_flag_offset_8x8[2][63] = { { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, @@ -1057,7 +1630,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT abs_level_m1_ctx_base = h->cabac_state + coeff_abs_level_m1_offset[cat]; - if( !is_dc && cat == 5 ) { + if( !is_dc && max_coeff == 64 ) { #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \ for(last= 0; last < coefs; last++) { \ uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \ @@ -1075,9 +1648,11 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT } const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) - coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off); + coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, + last_coeff_ctx_base-significant_coeff_ctx_base, sig_off); } else { - coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index); + coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, + last_coeff_ctx_base-significant_coeff_ctx_base); #else DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); } else { @@ -1087,16 +1662,16 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT assert(coeff_count > 0); if( is_dc ) { - if( cat == 0 ) - h->cbp_table[h->mb_xy] |= 0x100; - else + if( cat == 3 ) h->cbp_table[h->mb_xy] |= 0x40 << (n - CHROMA_DC_BLOCK_INDEX); + else + h->cbp_table[h->mb_xy] |= 0x100 << (n - LUMA_DC_BLOCK_INDEX); h->non_zero_count_cache[scan8[n]] = coeff_count; } else { - if( cat == 5 ) + if( max_coeff == 64 ) fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1); else { - assert( cat == 1 || cat == 2 || cat == 4 ); + assert( cat == 1 || cat == 2 || cat == 4 || cat == 7 || cat == 8 || cat == 11 || cat == 12 ); h->non_zero_count_cache[scan8[n]] = coeff_count; } } @@ -1180,7 +1755,7 @@ static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) { /* read coded block flag */ - if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 1 ) ] ) == 0 ) { + if( get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 1 ) ] ) == 0 ) { h->non_zero_count_cache[scan8[n]] = 0; return; } @@ -1189,13 +1764,68 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM * static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { /* read coded block flag */ - if( cat != 5 && get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, 0 ) ] ) == 0 ) { - h->non_zero_count_cache[scan8[n]] = 0; + if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) { + if( max_coeff == 64 ) { + fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 0, 1); + } else { + h->non_zero_count_cache[scan8[n]] = 0; + } return; } decode_cabac_residual_nondc_internal( h, block, cat, n, scantable, qmul, max_coeff ); } +static av_always_inline void decode_cabac_luma_residual( H264Context *h, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p ) +{ + static const uint8_t ctx_cat[4][3] = {{0,6,10},{1,7,11},{2,8,12},{5,9,13}}; + const uint32_t *qmul; + int i8x8, i4x4; + MpegEncContext * const s = &h->s; + int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1]; + if( IS_INTRA16x16( mb_type ) ) { + //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" ); + AV_ZERO128(h->mb_luma_dc[p]+0); + AV_ZERO128(h->mb_luma_dc[p]+8); + AV_ZERO128(h->mb_luma_dc[p]+16); + AV_ZERO128(h->mb_luma_dc[p]+24); + decode_cabac_residual_dc(h, h->mb_luma_dc[p], ctx_cat[0][p], LUMA_DC_BLOCK_INDEX+p, scan, 16); + + if( cbp&15 ) { + qmul = h->dequant4_coeff[p][qscale]; + for( i4x4 = 0; i4x4 < 16; i4x4++ ) { + const int index = 16*p + i4x4; + //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", index ); + decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[1][p], index, scan + 1, qmul, 15); + } + } else { + fill_rectangle(&h->non_zero_count_cache[scan8[16*p]], 4, 4, 8, 0, 1); + } + } else { + int cqm = (IS_INTRA( mb_type ) ? 0:3) + p; + for( i8x8 = 0; i8x8 < 4; i8x8++ ) { + if( cbp & (1<<i8x8) ) { + if( IS_8x8DCT(mb_type) ) { + const int index = 16*p + 4*i8x8; + decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[3][p], index, + scan8x8, h->dequant8_coeff[cqm][qscale], 64); + } else { + qmul = h->dequant4_coeff[cqm][qscale]; + for( i4x4 = 0; i4x4 < 4; i4x4++ ) { + const int index = 16*p + 4*i8x8 + i4x4; + //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); +//START_TIMER + decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), ctx_cat[2][p], index, scan, qmul, 16); +//STOP_TIMER("decode_residual") + } + } + } else { + uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+16*p] ]; + nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; + } + } + } +} + /** * decodes a macroblock * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed @@ -1205,6 +1835,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) { int mb_xy; int mb_type, partition_count, cbp = 0; int dct8x8_allowed= h->pps.transform_8x8_mode; + int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2; const int pixel_shift = h->pixel_shift; mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride; @@ -1314,7 +1945,8 @@ decode_intra_mb: h->slice_table[ mb_xy ]= h->slice_num; if(IS_INTRA_PCM(mb_type)) { - const int mb_size = (384*h->sps.bit_depth_luma) >> 3; + static const uint16_t mb_sizes[4] = {256,384,512,768}; + const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3; const uint8_t *ptr; // We assume these blocks are very rare so we do not optimize it. @@ -1327,20 +1959,17 @@ decode_intra_mb: } // The pixels are stored in the same order as levels in h->mb array. - memcpy(h->mb, ptr, 2*mb_size/3); ptr+=2*mb_size/3; - if(CHROMA){ - memcpy(h->mb+mb_size/3, ptr, mb_size/3); ptr+=mb_size/3; - } + memcpy(h->mb, ptr, mb_size); ptr+=mb_size; ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr); // All blocks are present - h->cbp_table[mb_xy] = 0x1ef; + h->cbp_table[mb_xy] = 0xf7ef; h->chroma_pred_mode_table[mb_xy] = 0; // In deblocking, the quantizer is 0 s->current_picture.qscale_table[mb_xy]= 0; // All coeffs are present - memset(h->non_zero_count[mb_xy], 16, 32); + memset(h->non_zero_count[mb_xy], 16, 48); s->current_picture.mb_type[mb_xy]= mb_type; h->last_qscale_diff = 0; return 0; @@ -1377,7 +2006,7 @@ decode_intra_mb: h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode( h, h->intra16x16_pred_mode ); if( h->intra16x16_pred_mode < 0 ) return -1; } - if(CHROMA){ + if(decode_chroma){ h->chroma_pred_mode_table[mb_xy] = pred_mode = decode_cabac_mb_chroma_pre_mode( h ); @@ -1606,7 +2235,7 @@ decode_intra_mb: if( !IS_INTRA16x16( mb_type ) ) { cbp = decode_cabac_mb_cbp_luma( h ); - if(CHROMA) + if(decode_chroma) cbp |= decode_cabac_mb_cbp_chroma( h ) << 4; } @@ -1615,6 +2244,28 @@ decode_intra_mb: if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) { mb_type |= MB_TYPE_8x8DCT * get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ); } + + /* It would be better to do this in fill_decode_caches, but we don't know + * the transform mode of the current macroblock there. */ + if (CHROMA444 && IS_8x8DCT(mb_type)){ + int i; + for (i = 0; i < 2; i++){ + if (h->left_type[i] && !IS_8x8DCT(h->left_type[i])){ + h->non_zero_count_cache[3+8* 1 + 2*8*i]= + h->non_zero_count_cache[3+8* 2 + 2*8*i]= + h->non_zero_count_cache[3+8* 6 + 2*8*i]= + h->non_zero_count_cache[3+8* 7 + 2*8*i]= + h->non_zero_count_cache[3+8*11 + 2*8*i]= + h->non_zero_count_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0; + } + } + if (h->top_type && !IS_8x8DCT(h->top_type)){ + uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; + AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty); + AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty); + AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty); + } + } s->current_picture.mb_type[mb_xy]= mb_type; if( cbp || IS_INTRA16x16( mb_type ) ) { @@ -1659,76 +2310,38 @@ decode_intra_mb: }else h->last_qscale_diff=0; - if( IS_INTRA16x16( mb_type ) ) { - int i; - //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" ); - AV_ZERO128(h->mb_luma_dc+0); - AV_ZERO128(h->mb_luma_dc+8); - AV_ZERO128(h->mb_luma_dc+16); - AV_ZERO128(h->mb_luma_dc+24); - decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16); - - if( cbp&15 ) { - qmul = h->dequant4_coeff[0][s->qscale]; - for( i = 0; i < 16; i++ ) { - //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i ); - decode_cabac_residual_nondc(h, h->mb + (16*i << pixel_shift), 1, i, scan + 1, qmul, 15); - } - } else { - fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); - } + decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 0); + if(CHROMA444){ + decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1); + decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2); } else { - int i8x8, i4x4; - for( i8x8 = 0; i8x8 < 4; i8x8++ ) { - if( cbp & (1<<i8x8) ) { - if( IS_8x8DCT(mb_type) ) { - decode_cabac_residual_nondc(h, h->mb + (64*i8x8 << pixel_shift), 5, 4*i8x8, - scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64); - } else { - qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale]; - for( i4x4 = 0; i4x4 < 4; i4x4++ ) { - const int index = 4*i8x8 + i4x4; - //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); -//START_TIMER - decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 2, index, scan, qmul, 16); -//STOP_TIMER("decode_residual") - } - } - } else { - uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; - nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; + if( cbp&0x30 ){ + int c; + for( c = 0; c < 2; c++ ) { + //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); + decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4); } } - } - if( cbp&0x30 ){ - int c; - for( c = 0; c < 2; c++ ) { - //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); - decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4); - } - } - - if( cbp&0x20 ) { - int c, i; - for( c = 0; c < 2; c++ ) { - qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; - for( i = 0; i < 4; i++ ) { - const int index = 16 + 4 * c + i; - //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); - decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15); + if( cbp&0x20 ) { + int c, i; + for( c = 0; c < 2; c++ ) { + qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; + for( i = 0; i < 4; i++ ) { + const int index = 16 + 16 * c + i; + //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); + decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15); + } } + } else { + fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); + fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); } - } else { - uint8_t * const nnz= &h->non_zero_count_cache[0]; - nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = - nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; } } else { - uint8_t * const nnz= &h->non_zero_count_cache[0]; - fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1); - nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = - nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; + fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1); + fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); + fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); h->last_qscale_diff = 0; } diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c index 62e30f1311..0ddc430661 100644 --- a/libavcodec/h264_cavlc.c +++ b/libavcodec/h264_cavlc.c @@ -371,12 +371,12 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in //FIXME put trailing_onex into the context - if(n >= CHROMA_DC_BLOCK_INDEX){ + if(max_coeff <= 8){ coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); total_coeff= coeff_token>>2; }else{ - if(n == LUMA_DC_BLOCK_INDEX){ - total_coeff= pred_non_zero_count(h, 0); + if(n >= LUMA_DC_BLOCK_INDEX){ + total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16); coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2); total_coeff= coeff_token>>2; }else{ @@ -482,7 +482,8 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in if(total_coeff == max_coeff) zeros_left=0; else{ - if(n >= CHROMA_DC_BLOCK_INDEX) + /* FIXME: we don't actually support 4:2:2 yet. */ + if(max_coeff <= 8) zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); else zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1); @@ -536,12 +537,80 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in return 0; } +static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){ + int i4x4, i8x8; + MpegEncContext * const s = &h->s; + int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1]; + if(IS_INTRA16x16(mb_type)){ + AV_ZERO128(h->mb_luma_dc[p]+0); + AV_ZERO128(h->mb_luma_dc[p]+8); + AV_ZERO128(h->mb_luma_dc[p]+16); + AV_ZERO128(h->mb_luma_dc[p]+24); + if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){ + return -1; //FIXME continue if partitioned and other return -1 too + } + + assert((cbp&15) == 0 || (cbp&15) == 15); + + if(cbp&15){ + for(i8x8=0; i8x8<4; i8x8++){ + for(i4x4=0; i4x4<4; i4x4++){ + const int index= i4x4 + 4*i8x8 + p*16; + if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), + index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){ + return -1; + } + } + } + return 0xf; + }else{ + fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1); + return 0; + } + }else{ + int cqm = (IS_INTRA( mb_type ) ? 0:3)+p; + /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */ + int new_cbp = 0; + for(i8x8=0; i8x8<4; i8x8++){ + if(cbp & (1<<i8x8)){ + if(IS_8x8DCT(mb_type)){ + DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift]; + uint8_t *nnz; + for(i4x4=0; i4x4<4; i4x4++){ + const int index= i4x4 + 4*i8x8 + p*16; + if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4, + h->dequant8_coeff[cqm][qscale], 16) < 0 ) + return -1; + } + nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ]; + nnz[0] += nnz[1] + nnz[8] + nnz[9]; + new_cbp |= !!nnz[0] << i8x8; + }else{ + for(i4x4=0; i4x4<4; i4x4++){ + const int index= i4x4 + 4*i8x8 + p*16; + if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, + scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){ + return -1; + } + new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8; + } + } + }else{ + uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ]; + nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; + } + } + return new_cbp; + } +} + int ff_h264_decode_mb_cavlc(H264Context *h){ MpegEncContext * const s = &h->s; int mb_xy; int partition_count; unsigned int mb_type, cbp; int dct8x8_allowed= h->pps.transform_8x8_mode; + int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2; const int pixel_shift = h->pixel_shift; mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride; @@ -608,19 +677,21 @@ decode_intra_mb: if(IS_INTRA_PCM(mb_type)){ unsigned int x; + static const uint16_t mb_sizes[4] = {256,384,512,768}; + const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3; // We assume these blocks are very rare so we do not optimize it. align_get_bits(&s->gb); // The pixels are stored in the same order as levels in h->mb array. - for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){ + for(x=0; x < mb_size; x++){ ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8); } // In deblocking, the quantizer is 0 s->current_picture.qscale_table[mb_xy]= 0; // All coeffs are present - memset(h->non_zero_count[mb_xy], 16, 32); + memset(h->non_zero_count[mb_xy], 16, 48); s->current_picture.mb_type[mb_xy]= mb_type; return 0; @@ -668,7 +739,7 @@ decode_intra_mb: if(h->intra16x16_pred_mode < 0) return -1; } - if(CHROMA){ + if(decode_chroma){ pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb)); if(pred_mode < 0) return -1; @@ -896,15 +967,19 @@ decode_intra_mb: if(!IS_INTRA16x16(mb_type)){ cbp= get_ue_golomb(&s->gb); - if(cbp > 47){ - av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y); - return -1; - } - if(CHROMA){ + if(decode_chroma){ + if(cbp > 47){ + av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y); + return -1; + } if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp]; else cbp= golomb_to_inter_cbp [cbp]; }else{ + if(cbp > 15){ + av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y); + return -1; + } if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp]; else cbp= golomb_to_inter_cbp_gray[cbp]; } @@ -918,8 +993,9 @@ decode_intra_mb: s->current_picture.mb_type[mb_xy]= mb_type; if(cbp || IS_INTRA16x16(mb_type)){ - int i8x8, i4x4, chroma_idx; + int i4x4, chroma_idx; int dquant; + int ret; GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; const uint8_t *scan, *scan8x8; const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8); @@ -947,85 +1023,45 @@ decode_intra_mb: h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale); h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale); - if(IS_INTRA16x16(mb_type)){ - AV_ZERO128(h->mb_luma_dc+0); - AV_ZERO128(h->mb_luma_dc+8); - AV_ZERO128(h->mb_luma_dc+16); - AV_ZERO128(h->mb_luma_dc+24); - if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){ - return -1; //FIXME continue if partitioned and other return -1 too - } - assert((cbp&15) == 0 || (cbp&15) == 15); + if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){ + return -1; + } + h->cbp_table[mb_xy] |= ret << 12; + if(CHROMA444){ + if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){ + return -1; + } + if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){ + return -1; + } + } else { + if(cbp&0x30){ + for(chroma_idx=0; chroma_idx<2; chroma_idx++) + if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){ + return -1; + } + } - if(cbp&15){ - for(i8x8=0; i8x8<4; i8x8++){ + if(cbp&0x20){ + for(chroma_idx=0; chroma_idx<2; chroma_idx++){ + const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]]; for(i4x4=0; i4x4<4; i4x4++){ - const int index= i4x4 + 4*i8x8; - if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){ + const int index= 16 + 16*chroma_idx + i4x4; + if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){ return -1; } } } }else{ - fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); + fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); + fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); } - }else{ - for(i8x8=0; i8x8<4; i8x8++){ - if(cbp & (1<<i8x8)){ - if(IS_8x8DCT(mb_type)){ - DCTELEM *buf = &h->mb[64*i8x8 << pixel_shift]; - uint8_t *nnz; - for(i4x4=0; i4x4<4; i4x4++){ - if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4, - h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 ) - return -1; - } - nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; - nnz[0] += nnz[1] + nnz[8] + nnz[9]; - }else{ - for(i4x4=0; i4x4<4; i4x4++){ - const int index= i4x4 + 4*i8x8; - - if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){ - return -1; - } - } - } - }else{ - uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; - nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; - } - } - } - - if(cbp&0x30){ - for(chroma_idx=0; chroma_idx<2; chroma_idx++) - if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){ - return -1; - } - } - - if(cbp&0x20){ - for(chroma_idx=0; chroma_idx<2; chroma_idx++){ - const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]]; - for(i4x4=0; i4x4<4; i4x4++){ - const int index= 16 + 4*chroma_idx + i4x4; - if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){ - return -1; - } - } - } - }else{ - uint8_t * const nnz= &h->non_zero_count_cache[0]; - nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = - nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; } }else{ - uint8_t * const nnz= &h->non_zero_count_cache[0]; - fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1); - nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = - nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; + fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1); + fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); + fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); } s->current_picture.qscale_table[mb_xy]= s->qscale; write_back_non_zero_count(h); diff --git a/libavcodec/h264_hl_motion.c b/libavcodec/h264_hl_motion.c deleted file mode 100644 index 654b8b8444..0000000000 --- a/libavcodec/h264_hl_motion.c +++ /dev/null @@ -1,164 +0,0 @@ - -#include "h264.h" -#include "thread.h" - -static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height, - int y_offset, int list){ - int raw_my= h->mv_cache[list][ scan8[n] ][1]; - int filter_height= (raw_my&3) ? 2 : 0; - int full_my= (raw_my>>2) + y_offset; - int top = full_my - filter_height, bottom = full_my + height + filter_height; - - return FFMAX(abs(top), bottom); -} - -static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height, - int y_offset, int list0, int list1, int *nrefs){ - MpegEncContext * const s = &h->s; - int my; - - y_offset += 16*(s->mb_y >> MB_FIELD); - - if(list0){ - int ref_n = h->ref_cache[0][ scan8[n] ]; - Picture *ref= &h->ref_list[0][ref_n]; - - // Error resilience puts the current picture in the ref list. - // Don't try to wait on these as it will cause a deadlock. - // Fields can wait on each other, though. - if(ref->thread_opaque != s->current_picture.thread_opaque || - (ref->reference&3) != s->picture_structure) { - my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0); - if (refs[0][ref_n] < 0) nrefs[0] += 1; - refs[0][ref_n] = FFMAX(refs[0][ref_n], my); - } - } - - if(list1){ - int ref_n = h->ref_cache[1][ scan8[n] ]; - Picture *ref= &h->ref_list[1][ref_n]; - - if(ref->thread_opaque != s->current_picture.thread_opaque || - (ref->reference&3) != s->picture_structure) { - my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1); - if (refs[1][ref_n] < 0) nrefs[1] += 1; - refs[1][ref_n] = FFMAX(refs[1][ref_n], my); - } - } -} - -/** - * Wait until all reference frames are available for MC operations. - * - * @param h the H264 context - */ -static void await_references(H264Context *h){ - MpegEncContext * const s = &h->s; - const int mb_xy= h->mb_xy; - const int mb_type= s->current_picture.mb_type[mb_xy]; - int refs[2][48]; - int nrefs[2] = {0}; - int ref, list; - - memset(refs, -1, sizeof(refs)); - - if(IS_16X16(mb_type)){ - get_lowest_part_y(h, refs, 0, 16, 0, - IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); - }else if(IS_16X8(mb_type)){ - get_lowest_part_y(h, refs, 0, 8, 0, - IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); - get_lowest_part_y(h, refs, 8, 8, 8, - IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); - }else if(IS_8X16(mb_type)){ - get_lowest_part_y(h, refs, 0, 16, 0, - IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); - get_lowest_part_y(h, refs, 4, 16, 0, - IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); - }else{ - int i; - - assert(IS_8X8(mb_type)); - - for(i=0; i<4; i++){ - const int sub_mb_type= h->sub_mb_type[i]; - const int n= 4*i; - int y_offset= (i&2)<<2; - - if(IS_SUB_8X8(sub_mb_type)){ - get_lowest_part_y(h, refs, n , 8, y_offset, - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); - }else if(IS_SUB_8X4(sub_mb_type)){ - get_lowest_part_y(h, refs, n , 4, y_offset, - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); - get_lowest_part_y(h, refs, n+2, 4, y_offset+4, - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); - }else if(IS_SUB_4X8(sub_mb_type)){ - get_lowest_part_y(h, refs, n , 8, y_offset, - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); - get_lowest_part_y(h, refs, n+1, 8, y_offset, - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); - }else{ - int j; - assert(IS_SUB_4X4(sub_mb_type)); - for(j=0; j<4; j++){ - int sub_y_offset= y_offset + 2*(j&2); - get_lowest_part_y(h, refs, n+j, 4, sub_y_offset, - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); - } - } - } - } - - for(list=h->list_count-1; list>=0; list--){ - for(ref=0; ref<48 && nrefs[list]; ref++){ - int row = refs[list][ref]; - if(row >= 0){ - Picture *ref_pic = &h->ref_list[list][ref]; - int ref_field = ref_pic->reference - 1; - int ref_field_picture = ref_pic->field_picture; - int pic_height = 16*s->mb_height >> ref_field_picture; - - row <<= MB_MBAFF; - nrefs[list]--; - - if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields - ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1); - ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) , pic_height-1), 0); - }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame - ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field , pic_height-1), 0); - }else if(FIELD_PICTURE){ - ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field); - }else{ - ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0); - } - } - } - } -} - -#define FUNC(a) a ## _8 -#define PIXEL_SHIFT 0 -#include "h264_hl_motion.h" - -#undef PIXEL_SHIFT -#undef FUNC -#define FUNC(a) a ## _16 -#define PIXEL_SHIFT 1 -#include "h264_hl_motion.h" - -void ff_hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, - qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), - qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), - h264_weight_func *weight_op, h264_biweight_func *weight_avg){ - if(h->pixel_shift){ - hl_motion_16(h, dest_y, dest_cb, dest_cr, - qpix_put, chroma_put, - qpix_avg, chroma_avg, - weight_op, weight_avg); - }else - hl_motion_8(h, dest_y, dest_cb, dest_cr, - qpix_put, chroma_put, - qpix_avg, chroma_avg, - weight_op, weight_avg); -} diff --git a/libavcodec/h264_hl_motion.h b/libavcodec/h264_hl_motion.h deleted file mode 100644 index f354251fc9..0000000000 --- a/libavcodec/h264_hl_motion.h +++ /dev/null @@ -1,282 +0,0 @@ - -static inline void FUNC(mc_dir_part)(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, - uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, - int src_x_offset, int src_y_offset, - qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ - MpegEncContext * const s = &h->s; - const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; - int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; - const int luma_xy= (mx&3) + ((my&3)<<2); - uint8_t * src_y = pic->data[0] + ((mx>>2)<<PIXEL_SHIFT) + (my>>2)*h->mb_linesize; - uint8_t * src_cb, * src_cr; - int extra_width= h->emu_edge_width; - int extra_height= h->emu_edge_height; - int emu=0; - const int full_mx= mx>>2; - const int full_my= my>>2; - const int pic_width = 16*s->mb_width; - const int pic_height = 16*s->mb_height >> MB_FIELD; - - if(mx&7) extra_width -= 3; - if(my&7) extra_height -= 3; - - if( full_mx < 0-extra_width - || full_my < 0-extra_height - || full_mx + 16/*FIXME*/ > pic_width + extra_width - || full_my + 16/*FIXME*/ > pic_height + extra_height){ - s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2<<PIXEL_SHIFT) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); - src_y= s->edge_emu_buffer + (2<<PIXEL_SHIFT) + 2*h->mb_linesize; - emu=1; - } - - qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps? - if(!square){ - qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); - } - - if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; - - if(MB_FIELD){ - // chroma offset when predicting from a field of opposite parity - my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); - emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); - } - src_cb= pic->data[1] + ((mx>>3)<<PIXEL_SHIFT) + (my>>3)*h->mb_uvlinesize; - src_cr= pic->data[2] + ((mx>>3)<<PIXEL_SHIFT) + (my>>3)*h->mb_uvlinesize; - - if(emu){ - s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); - src_cb= s->edge_emu_buffer; - } - chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); - - if(emu){ - s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); - src_cr= s->edge_emu_buffer; - } - chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); -} - -static inline void FUNC(mc_part_std)(H264Context *h, int n, int square, int chroma_height, int delta, - uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, - int x_offset, int y_offset, - qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, - qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, - int list0, int list1){ - MpegEncContext * const s = &h->s; - qpel_mc_func *qpix_op= qpix_put; - h264_chroma_mc_func chroma_op= chroma_put; - - dest_y += (2*x_offset<<PIXEL_SHIFT) + 2*y_offset*h-> mb_linesize; - dest_cb += ( x_offset<<PIXEL_SHIFT) + y_offset*h->mb_uvlinesize; - dest_cr += ( x_offset<<PIXEL_SHIFT) + y_offset*h->mb_uvlinesize; - x_offset += 8*s->mb_x; - y_offset += 8*(s->mb_y >> MB_FIELD); - - if(list0){ - Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; - FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, 0, - dest_y, dest_cb, dest_cr, x_offset, y_offset, - qpix_op, chroma_op); - - qpix_op= qpix_avg; - chroma_op= chroma_avg; - } - - if(list1){ - Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; - FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, 1, - dest_y, dest_cb, dest_cr, x_offset, y_offset, - qpix_op, chroma_op); - } -} - -static inline void FUNC(mc_part_weighted)(H264Context *h, int n, int square, int chroma_height, int delta, - uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, - int x_offset, int y_offset, - qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, - h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, - h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, - int list0, int list1){ - MpegEncContext * const s = &h->s; - - dest_y += (2*x_offset<<PIXEL_SHIFT) + 2*y_offset*h-> mb_linesize; - dest_cb += ( x_offset<<PIXEL_SHIFT) + y_offset*h->mb_uvlinesize; - dest_cr += ( x_offset<<PIXEL_SHIFT) + y_offset*h->mb_uvlinesize; - x_offset += 8*s->mb_x; - y_offset += 8*(s->mb_y >> MB_FIELD); - - if(list0 && list1){ - /* don't optimize for luma-only case, since B-frames usually - * use implicit weights => chroma too. */ - uint8_t *tmp_cb = s->obmc_scratchpad; - uint8_t *tmp_cr = s->obmc_scratchpad + (8<<PIXEL_SHIFT); - uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize; - int refn0 = h->ref_cache[0][ scan8[n] ]; - int refn1 = h->ref_cache[1][ scan8[n] ]; - - FUNC(mc_dir_part)(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, - dest_y, dest_cb, dest_cr, - x_offset, y_offset, qpix_put, chroma_put); - FUNC(mc_dir_part)(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, - tmp_y, tmp_cb, tmp_cr, - x_offset, y_offset, qpix_put, chroma_put); - - if(h->use_weight == 2){ - int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; - int weight1 = 64 - weight0; - luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); - chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); - chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); - }else{ - luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, - h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], - h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]); - chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, - h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], - h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); - chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, - h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], - h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); - } - }else{ - int list = list1 ? 1 : 0; - int refn = h->ref_cache[list][ scan8[n] ]; - Picture *ref= &h->ref_list[list][refn]; - FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, list, - dest_y, dest_cb, dest_cr, x_offset, y_offset, - qpix_put, chroma_put); - - luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, - h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); - if(h->use_weight_chroma){ - chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, - h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); - chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, - h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); - } - } -} - -static inline void FUNC(mc_part)(H264Context *h, int n, int square, int chroma_height, int delta, - uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, - int x_offset, int y_offset, - qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, - qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, - h264_weight_func *weight_op, h264_biweight_func *weight_avg, - int list0, int list1){ - if((h->use_weight==2 && list0 && list1 - && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) - || h->use_weight==1) - FUNC(mc_part_weighted)(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, - x_offset, y_offset, qpix_put, chroma_put, - weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); - else - FUNC(mc_part_std)(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, - x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); -} - -static inline void FUNC(prefetch_motion)(H264Context *h, int list){ - /* fetch pixels for estimated mv 4 macroblocks ahead - * optimized for 64byte cache lines */ - MpegEncContext * const s = &h->s; - const int refn = h->ref_cache[list][scan8[0]]; - if(refn >= 0){ - const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; - const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; - uint8_t **src= h->ref_list[list][refn].data; - int off= ((mx+64)<<PIXEL_SHIFT) + (my + (s->mb_x&3)*4)*h->mb_linesize; - s->dsp.prefetch(src[0]+off, s->linesize, 4); - off= (((mx>>1)+64)<<PIXEL_SHIFT) + ((my>>1) + (s->mb_x&7))*s->uvlinesize; - s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); - } -} - -static void FUNC(hl_motion)(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, - qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), - qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), - h264_weight_func *weight_op, h264_biweight_func *weight_avg){ - MpegEncContext * const s = &h->s; - const int mb_xy= h->mb_xy; - const int mb_type= s->current_picture.mb_type[mb_xy]; - - assert(IS_INTER(mb_type)); - - if(HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) - await_references(h); - FUNC(prefetch_motion)(h, 0); - - if(IS_16X16(mb_type)){ - FUNC(mc_part)(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, - qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], - weight_op, weight_avg, - IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); - }else if(IS_16X8(mb_type)){ - FUNC(mc_part)(h, 0, 0, 4, (8<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, 0, 0, - qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], - &weight_op[1], &weight_avg[1], - IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); - FUNC(mc_part)(h, 8, 0, 4, (8<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, 0, 4, - qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], - &weight_op[1], &weight_avg[1], - IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); - }else if(IS_8X16(mb_type)){ - FUNC(mc_part)(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, - qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], - &weight_op[2], &weight_avg[2], - IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); - FUNC(mc_part)(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, - qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], - &weight_op[2], &weight_avg[2], - IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); - }else{ - int i; - - assert(IS_8X8(mb_type)); - - for(i=0; i<4; i++){ - const int sub_mb_type= h->sub_mb_type[i]; - const int n= 4*i; - int x_offset= (i&1)<<2; - int y_offset= (i&2)<<1; - - if(IS_SUB_8X8(sub_mb_type)){ - FUNC(mc_part)(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, - qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], - &weight_op[3], &weight_avg[3], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - }else if(IS_SUB_8X4(sub_mb_type)){ - FUNC(mc_part)(h, n , 0, 2, (4<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, x_offset, y_offset, - qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], - &weight_op[4], &weight_avg[4], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - FUNC(mc_part)(h, n+2, 0, 2, (4<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, x_offset, y_offset+2, - qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], - &weight_op[4], &weight_avg[4], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - }else if(IS_SUB_4X8(sub_mb_type)){ - FUNC(mc_part)(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, - qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], - &weight_op[5], &weight_avg[5], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - FUNC(mc_part)(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, - qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], - &weight_op[5], &weight_avg[5], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - }else{ - int j; - assert(IS_SUB_4X4(sub_mb_type)); - for(j=0; j<4; j++){ - int sub_x_offset= x_offset + 2*(j&1); - int sub_y_offset= y_offset + (j&2); - FUNC(mc_part)(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, - qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], - &weight_op[6], &weight_avg[6], - IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - } - } - } - } - - FUNC(prefetch_motion)(h, 1); -} diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c index 2e61a3110a..fd1fb5f662 100644 --- a/libavcodec/h264_loopfilter.c +++ b/libavcodec/h264_loopfilter.c @@ -217,10 +217,11 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, int mb_xy; int mb_type, left_type; int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; + int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); mb_xy = h->mb_xy; - if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) { + if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff || CHROMA444) { ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); return; } @@ -262,16 +263,18 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h); } - if(left_type){ - filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h); - filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h); + if(chroma){ + if(left_type){ + filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h); + filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h); + } + filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); + filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); + filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); + filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); + filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); + filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); } - filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); - filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); - filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); - filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); - filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); - filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); return; } else { LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]); @@ -298,7 +301,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, #define FILTER(hv,dir,edge)\ if(AV_RN64A(bS[dir][edge])) { \ filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\ - if(!(edge&1)) {\ + if(chroma && !(edge&1)) {\ filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ }\ @@ -353,9 +356,10 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){ return v; } -static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) { +static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma, int chroma444, int dir) { MpegEncContext * const s = &h->s; int edge; + int chroma_qp_avg[2]; const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type; @@ -394,7 +398,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u bS[2]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+2]); bS[3]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+3]); }else{ - const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 4+3*8; + const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 3*4; int i; for( i = 0; i < 4; i++ ) { bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]); @@ -407,10 +411,17 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h ); - filter_mb_edgech( &img_cb[j*uvlinesize], tmp_uvlinesize, bS, - ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); - filter_mb_edgech( &img_cr[j*uvlinesize], tmp_uvlinesize, bS, - ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); + chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; + chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; + if (chroma) { + if (chroma444) { + filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h); + filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h); + } else { + filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h); + filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h); + } + } } }else{ DECLARE_ALIGNED(8, int16_t, bS)[4]; @@ -465,23 +476,29 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } + chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; + chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; if( dir == 0 ) { filter_mb_edgev( &img_y[0], linesize, bS, qp, h ); - { - int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; - filter_mb_edgecv( &img_cb[0], uvlinesize, bS, qp, h); - if(h->pps.chroma_qp_diff) - qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; - filter_mb_edgecv( &img_cr[0], uvlinesize, bS, qp, h); + if (chroma) { + if (chroma444) { + filter_mb_edgev ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h); + filter_mb_edgev ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h); + } else { + filter_mb_edgecv( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h); + filter_mb_edgecv( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h); + } } } else { filter_mb_edgeh( &img_y[0], linesize, bS, qp, h ); - { - int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; - filter_mb_edgech( &img_cb[0], uvlinesize, bS, qp, h); - if(h->pps.chroma_qp_diff) - qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; - filter_mb_edgech( &img_cr[0], uvlinesize, bS, qp, h); + if (chroma) { + if (chroma444) { + filter_mb_edgeh ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h); + filter_mb_edgeh ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h); + } else { + filter_mb_edgech( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h); + filter_mb_edgech( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h); + } } } } @@ -545,15 +562,25 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } if( dir == 0 ) { filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h ); - if( (edge&1) == 0 ) { - filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h); - filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h); + if (chroma) { + if (chroma444) { + filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h); + filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h); + } else if( (edge&1) == 0 ) { + filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h); + filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h); + } } } else { filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h ); - if( (edge&1) == 0 ) { - filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h); - filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h); + if (chroma) { + if (chroma444) { + filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h); + filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h); + } else if( (edge&1) == 0 ) { + filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h); + filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h); + } } } } @@ -566,6 +593,7 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4; int first_vertical_edge_done = 0; av_unused int dir; + int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); if (FRAME_MBAFF // and current and left pair do not have the same interlaced type @@ -589,11 +617,11 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint } else { static const uint8_t offset[2][2][8]={ { - {7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1}, - {7+8*2, 7+8*2, 7+8*2, 7+8*2, 7+8*3, 7+8*3, 7+8*3, 7+8*3}, + {3+4*0, 3+4*0, 3+4*0, 3+4*0, 3+4*1, 3+4*1, 3+4*1, 3+4*1}, + {3+4*2, 3+4*2, 3+4*2, 3+4*2, 3+4*3, 3+4*3, 3+4*3, 3+4*3}, },{ - {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, - {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, + {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3}, + {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3}, } }; const uint8_t *off= offset[MB_FIELD][mb_y&1]; @@ -634,25 +662,29 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint if(MB_FIELD){ filter_mb_mbaff_edgev ( h, img_y , linesize, bS , 1, qp [0] ); filter_mb_mbaff_edgev ( h, img_y + 8* linesize, linesize, bS+4, 1, qp [1] ); - filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0] ); - filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] ); - filter_mb_mbaff_edgecv( h, img_cr, uvlinesize, bS , 1, rqp[0] ); - filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] ); + if (chroma){ + filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0] ); + filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] ); + filter_mb_mbaff_edgecv( h, img_cr, uvlinesize, bS , 1, rqp[0] ); + filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] ); + } }else{ filter_mb_mbaff_edgev ( h, img_y , 2* linesize, bS , 2, qp [0] ); filter_mb_mbaff_edgev ( h, img_y + linesize, 2* linesize, bS+1, 2, qp [1] ); - filter_mb_mbaff_edgecv( h, img_cb, 2*uvlinesize, bS , 2, bqp[0] ); - filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] ); - filter_mb_mbaff_edgecv( h, img_cr, 2*uvlinesize, bS , 2, rqp[0] ); - filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] ); + if (chroma){ + filter_mb_mbaff_edgecv( h, img_cb, 2*uvlinesize, bS , 2, bqp[0] ); + filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] ); + filter_mb_mbaff_edgecv( h, img_cr, 2*uvlinesize, bS , 2, rqp[0] ); + filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] ); + } } } #if CONFIG_SMALL for( dir = 0; dir < 2; dir++ ) - filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir); + filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, chroma, CHROMA444, dir); #else - filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0); - filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1); + filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, chroma, CHROMA444, 0); + filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, chroma, CHROMA444, 1); #endif } diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index f77a013112..ddfe1d2c64 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -269,7 +269,7 @@ static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_s fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0], fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1], fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0], - fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1] + fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1] }; if(get_bits1(&s->gb)){ sps->scaling_matrix_present |= is_sps; @@ -281,7 +281,15 @@ static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_s decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb if(is_sps || pps->transform_8x8_mode){ decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y - decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y + if(h->sps.chroma_format_idc == 3){ + decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[0],scaling_matrix8[0]); // Intra, Cr + decode_scaling_list(h,scaling_matrix8[2],64,default_scaling8[0],scaling_matrix8[1]); // Intra, Cb + } + decode_scaling_list(h,scaling_matrix8[3],64,default_scaling8[1],fallback[3]); // Inter, Y + if(h->sps.chroma_format_idc == 3){ + decode_scaling_list(h,scaling_matrix8[4],64,default_scaling8[1],scaling_matrix8[3]); // Inter, Cr + decode_scaling_list(h,scaling_matrix8[5],64,default_scaling8[1],scaling_matrix8[4]); // Inter, Cb + } } } } @@ -395,7 +403,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){ if(sps->crop_left || sps->crop_top){ av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n"); } - if(sps->crop_right >= 8 || sps->crop_bottom >= 8){ + if(sps->crop_right >= (8<<CHROMA444) || sps->crop_bottom >= (8<<CHROMA444)){ av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n"); } }else{ diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h index 4b606efa17..b61967adfc 100644 --- a/libavcodec/h264dsp.h +++ b/libavcodec/h264dsp.h @@ -66,11 +66,10 @@ typedef struct H264DSPContext{ void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_dct)(DCTELEM block[4][4]); - void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); + void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); + void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); + void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); + void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul); void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul); }H264DSPContext; diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c index 6816e81ff2..eb336f7e62 100644 --- a/libavcodec/h264dsp_template.c +++ b/libavcodec/h264dsp_template.c @@ -58,11 +58,12 @@ static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int strid op_scale1(15); \ } \ } \ -static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *p_dst, uint8_t *p_src, int stride, int log2_denom, int weightd, int weights, int offset){ \ +static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_src, int stride, int log2_denom, int weightd, int weights, int offset){ \ int y; \ - pixel *dst = (pixel*)p_dst; \ - pixel *src = (pixel*)p_src; \ + pixel *dst = (pixel*)_dst; \ + pixel *src = (pixel*)_src; \ stride >>= sizeof(pixel)-1; \ + offset <<= (BIT_DEPTH-8); \ offset = ((offset + 1) | 1) << log2_denom; \ for(y=0; y<H; y++, dst += stride, src += stride){ \ op_scale2(0); \ diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c index 1b3c635319..83f6f38691 100644 --- a/libavcodec/h264idct_template.c +++ b/libavcodec/h264idct_template.c @@ -30,15 +30,19 @@ #ifndef AVCODEC_H264IDCT_INTERNAL_H #define AVCODEC_H264IDCT_INTERNAL_H //FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split -static const uint8_t scan8[16 + 2*4]={ - 4+1*8, 5+1*8, 4+2*8, 5+2*8, - 6+1*8, 7+1*8, 6+2*8, 7+2*8, - 4+3*8, 5+3*8, 4+4*8, 5+4*8, - 6+3*8, 7+3*8, 6+4*8, 7+4*8, - 1+1*8, 2+1*8, - 1+2*8, 2+2*8, - 1+4*8, 2+4*8, - 1+5*8, 2+5*8, +static const uint8_t scan8[16*3]={ + 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8, + 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8, + 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8, + 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8, + 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8, + 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8, + 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8, + 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8, + 4+11*8, 5+11*8, 4+12*8, 5+12*8, + 6+11*8, 7+11*8, 6+12*8, 7+12*8, + 4+13*8, 5+13*8, 4+14*8, 5+14*8, + 6+13*8, 7+13*8, 6+14*8, 7+14*8 }; #endif @@ -190,7 +194,7 @@ void FUNCC(ff_h264_idct8_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){ } } -void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ +void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ int i; for(i=0; i<16; i++){ int nnz = nnzc[ scan8[i] ]; @@ -201,7 +205,7 @@ void FUNCC(ff_h264_idct_add16)(uint8_t *dst, const int *block_offset, DCTELEM *b } } -void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ +void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ int i; for(i=0; i<16; i++){ if(nnzc[ scan8[i] ]) FUNCC(idct_internal )(dst + block_offset[i], block + i*16*sizeof(pixel), stride, 4, 6, 1); @@ -209,7 +213,7 @@ void FUNCC(ff_h264_idct_add16intra)(uint8_t *dst, const int *block_offset, DCTEL } } -void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ +void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ int i; for(i=0; i<16; i+=4){ int nnz = nnzc[ scan8[i] ]; @@ -220,13 +224,15 @@ void FUNCC(ff_h264_idct8_add4)(uint8_t *dst, const int *block_offset, DCTELEM *b } } -void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ - int i; - for(i=16; i<16+8; i++){ - if(nnzc[ scan8[i] ]) - FUNCC(ff_h264_idct_add )(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride); - else if(((dctcoef*)block)[i*16]) - FUNCC(ff_h264_idct_dc_add)(dest[(i&4)>>2] + block_offset[i], block + i*16*sizeof(pixel), stride); +void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ + int i, j; + for(j=1; j<3; j++){ + for(i=j*16; i<j*16+4; i++){ + if(nnzc[ scan8[i] ]) + FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); + else if(((dctcoef*)block)[i*16]) + FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); + } } } /** diff --git a/libavcodec/iirfilter.c b/libavcodec/iirfilter.c index bc63c3991a..4b9aae2a5d 100644 --- a/libavcodec/iirfilter.c +++ b/libavcodec/iirfilter.c @@ -151,8 +151,6 @@ static int biquad_init_coeffs(void *avc, struct FFIIRFilterCoeffs *c, // during filtering, the delay state will include the gain multiplication c->cx[0] = lrintf(x0 / c->gain); c->cx[1] = lrintf(x1 / c->gain); - c->cy[0] /= c->gain; - c->cy[1] /= c->gain; return 0; } @@ -324,7 +322,7 @@ int main(void) int i; FILE* fd; - fcoeffs = ff_iir_filter_init_coeffs(FF_FILTER_TYPE_BUTTERWORTH, + fcoeffs = ff_iir_filter_init_coeffs(NULL, FF_FILTER_TYPE_BUTTERWORTH, FF_FILTER_MODE_LOWPASS, FILT_ORDER, cutoff_coeff, 0.0, 0.0); fstate = ff_iir_filter_init_state(FILT_ORDER); diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c index b37c343b93..78ac2d83bb 100644 --- a/libavcodec/imgconvert.c +++ b/libavcodec/imgconvert.c @@ -417,13 +417,12 @@ void avcodec_get_chroma_sub_sample(enum PixelFormat pix_fmt, int *h_shift, int * *v_shift = av_pix_fmt_descriptors[pix_fmt].log2_chroma_h; } +#if FF_API_GET_PIX_FMT_NAME const char *avcodec_get_pix_fmt_name(enum PixelFormat pix_fmt) { - if ((unsigned)pix_fmt >= PIX_FMT_NB) - return NULL; - else - return av_pix_fmt_descriptors[pix_fmt].name; + return av_get_pix_fmt_name(pix_fmt); } +#endif #if LIBAVCODEC_VERSION_MAJOR < 53 enum PixelFormat avcodec_get_pix_fmt(const char *name) @@ -841,15 +840,23 @@ int av_picture_crop(AVPicture *dst, const AVPicture *src, int y_shift; int x_shift; - if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB || !is_yuv_planar(&pix_fmt_info[pix_fmt])) + if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB) return -1; y_shift = av_pix_fmt_descriptors[pix_fmt].log2_chroma_h; x_shift = av_pix_fmt_descriptors[pix_fmt].log2_chroma_w; + if (is_yuv_planar(&pix_fmt_info[pix_fmt])) { dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band; dst->data[1] = src->data[1] + ((top_band >> y_shift) * src->linesize[1]) + (left_band >> x_shift); dst->data[2] = src->data[2] + ((top_band >> y_shift) * src->linesize[2]) + (left_band >> x_shift); + } else{ + if(top_band % (1<<y_shift) || left_band % (1<<x_shift)) + return -1; + if(left_band) //FIXME add support for this too + return -1; + dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band; + } dst->linesize[0] = src->linesize[0]; dst->linesize[1] = src->linesize[1]; diff --git a/libavcodec/indeo3.c b/libavcodec/indeo3.c index 588a5b4cac..8e55fbe443 100644 --- a/libavcodec/indeo3.c +++ b/libavcodec/indeo3.c @@ -1,7 +1,4 @@ /* - * Intel Indeo 3 (IV31, IV32, etc.) video decoder for ffmpeg - * written, produced, and directed by Alan Smithee - * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,9 +16,14 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> +/** + * @file + * Intel Indeo 3 (IV31, IV32, etc.) video decoder for FFmpeg + * written, produced, and directed by Alan Smithee + * + * For some documentation see: + * http://wiki.multimedia.cx/index.php?title=Indeo_3 + */ #include "libavutil/imgutils.h" #include "avcodec.h" @@ -211,6 +213,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, int *width_tbl, width_tbl_arr[10]; const signed char *ref_vectors; uint8_t *cur_frm_pos, *ref_frm_pos, *cp, *cp2; + uint8_t *cur_end = cur + width*height + width; uint32_t *cur_lp, *ref_lp; const uint32_t *correction_lp[2], *correctionloworder_lp[2], *correctionhighorder_lp[2]; uint8_t *correction_type_sp[2]; @@ -357,6 +360,8 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, k = *buf1++; cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2]; ref_lp = ((uint32_t *)ref_frm_pos) + width_tbl[lp2]; + if ((uint8_t *)cur_lp >= cur_end-3) + break; switch(correction_type_sp[0][k]) { case 0: @@ -1135,6 +1140,9 @@ static av_cold int indeo3_decode_end(AVCodecContext *avctx) iv_free_func(s); + if (s->frame.data[0]) + avctx->release_buffer(avctx, &s->frame); + return 0; } diff --git a/libavcodec/indeo5.c b/libavcodec/indeo5.c index 5135c46bc7..4c6bfd66d1 100644 --- a/libavcodec/indeo5.c +++ b/libavcodec/indeo5.c @@ -629,7 +629,7 @@ static int decode_band(IVI5DecContext *ctx, int plane_num, FFSWAP(int16_t, band->rv_map->valtab[idx1], band->rv_map->valtab[idx2]); } -#if IVI_DEBUG +#ifdef DEBUG if (band->checksum_present) { uint16_t chksum = ivi_calc_band_checksum(band); if (chksum != band->checksum) { diff --git a/libavcodec/intelh263dec.c b/libavcodec/intelh263dec.c index d5a644ee7c..a011a9f597 100644 --- a/libavcodec/intelh263dec.c +++ b/libavcodec/intelh263dec.c @@ -67,9 +67,9 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s) if (format < 6) { s->width = h263_format[format][0]; s->height = h263_format[format][1]; - s->avctx->sample_aspect_ratio.num=12;s->avctx->sample_aspect_ratio.den=11; - } - else { + s->avctx->sample_aspect_ratio.num = 12; + s->avctx->sample_aspect_ratio.den = 11; + } else { format = get_bits(&s->gb, 3); if(format == 0 || format == 7){ av_log(s->avctx, AV_LOG_ERROR, "Wrong Intel H263 format\n"); @@ -95,12 +95,11 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s) if(ar == 15){ s->avctx->sample_aspect_ratio.num = get_bits(&s->gb, 8); // aspect ratio - width s->avctx->sample_aspect_ratio.den = get_bits(&s->gb, 8); // aspect ratio - height - } - else { + } else { s->avctx->sample_aspect_ratio = ff_h263_pixel_aspect[ar]; } - if(s->avctx->sample_aspect_ratio.num == 0) - av_log(s->avctx, AV_LOG_ERROR, "Invalid aspect ratio\n"); + if (s->avctx->sample_aspect_ratio.num == 0) + av_log(s->avctx, AV_LOG_ERROR, "Invalid aspect ratio.\n"); } s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); diff --git a/libavcodec/interplayvideo.c b/libavcodec/interplayvideo.c index 246408df80..5c61af39e4 100644 --- a/libavcodec/interplayvideo.c +++ b/libavcodec/interplayvideo.c @@ -46,14 +46,6 @@ #define PALETTE_COUNT 256 -/* debugging support */ -#define DEBUG_INTERPLAY 0 -#if DEBUG_INTERPLAY -#define debug_interplay(x,...) av_log(NULL, AV_LOG_DEBUG, x, __VA_ARGS__) -#else -static inline void debug_interplay(const char *format, ...) { } -#endif - typedef struct IpvideoContext { AVCodecContext *avctx; @@ -140,7 +132,7 @@ static int ipvideo_decode_block_opcode_0x2(IpvideoContext *s) y = 8 + ((B - 56) / 29); } - debug_interplay (" motion byte = %d, (x, y) = (%d, %d)\n", B, x, y); + av_dlog(NULL, " motion byte = %d, (x, y) = (%d, %d)\n", B, x, y); return copy_from(s, &s->second_last_frame, x, y); } @@ -168,7 +160,7 @@ static int ipvideo_decode_block_opcode_0x3(IpvideoContext *s) y = -( 8 + ((B - 56) / 29)); } - debug_interplay (" motion byte = %d, (x, y) = (%d, %d)\n", B, x, y); + av_dlog(NULL, " motion byte = %d, (x, y) = (%d, %d)\n", B, x, y); return copy_from(s, &s->current_frame, x, y); } @@ -191,7 +183,7 @@ static int ipvideo_decode_block_opcode_0x4(IpvideoContext *s) x = -8 + BL; y = -8 + BH; - debug_interplay (" motion byte = %d, (x, y) = (%d, %d)\n", B, x, y); + av_dlog(NULL, " motion byte = %d, (x, y) = (%d, %d)\n", B, x, y); return copy_from(s, &s->last_frame, x, y); } @@ -206,7 +198,7 @@ static int ipvideo_decode_block_opcode_0x5(IpvideoContext *s) x = *s->stream_ptr++; y = *s->stream_ptr++; - debug_interplay (" motion bytes = %d, %d\n", x, y); + av_dlog(NULL, " motion bytes = %d, %d\n", x, y); return copy_from(s, &s->last_frame, x, y); } @@ -587,7 +579,7 @@ static int ipvideo_decode_block_opcode_0x6_16(IpvideoContext *s) x = *s->stream_ptr++; y = *s->stream_ptr++; - debug_interplay (" motion bytes = %d, %d\n", x, y); + av_dlog(NULL, " motion bytes = %d, %d\n", x, y); return copy_from(s, &s->second_last_frame, x, y); } @@ -964,7 +956,7 @@ static void ipvideo_decode_opcodes(IpvideoContext *s) static int frame = 0; GetBitContext gb; - debug_interplay("------------------ frame %d\n", frame); + av_dlog(NULL, "------------------ frame %d\n", frame); frame++; if (!s->is_16bpp) { @@ -990,8 +982,8 @@ static void ipvideo_decode_opcodes(IpvideoContext *s) for (x = 0; x < s->avctx->width; x += 8) { opcode = get_bits(&gb, 4); - debug_interplay(" block @ (%3d, %3d): encoding 0x%X, data ptr @ %p\n", - x, y, opcode, s->stream_ptr); + av_dlog(NULL, " block @ (%3d, %3d): encoding 0x%X, data ptr @ %p\n", + x, y, opcode, s->stream_ptr); if (!s->is_16bpp) { s->pixel_ptr = s->current_frame.data[0] + x diff --git a/libavcodec/ituh263dec.c b/libavcodec/ituh263dec.c index e0d57a763d..b1e67231fd 100644 --- a/libavcodec/ituh263dec.c +++ b/libavcodec/ituh263dec.c @@ -152,7 +152,7 @@ int ff_h263_decode_mba(MpegEncContext *s) */ static int h263_decode_gob_header(MpegEncContext *s) { - unsigned int val, gfid, gob_number; + unsigned int val, gob_number; int left; /* Check for GOB Start Code */ @@ -183,12 +183,12 @@ static int h263_decode_gob_header(MpegEncContext *s) s->qscale = get_bits(&s->gb, 5); /* SQUANT */ if(get_bits1(&s->gb)==0) return -1; - gfid = get_bits(&s->gb, 2); /* GFID */ + skip_bits(&s->gb, 2); /* GFID */ }else{ gob_number = get_bits(&s->gb, 5); /* GN */ s->mb_x= 0; s->mb_y= s->gob_index* gob_number; - gfid = get_bits(&s->gb, 2); /* GFID */ + skip_bits(&s->gb, 2); /* GFID */ s->qscale = get_bits(&s->gb, 5); /* GQUANT */ } diff --git a/libavcodec/ivi_common.c b/libavcodec/ivi_common.c index 0e552bccfe..7f14a89f33 100644 --- a/libavcodec/ivi_common.c +++ b/libavcodec/ivi_common.c @@ -418,8 +418,8 @@ int ff_ivi_decode_blocks(GetBitContext *gb, IVIBandDesc *band, IVITile *tile) break; pos = band->scan[scan_pos]; - if (IVI_DEBUG && !val) - av_log(NULL, AV_LOG_ERROR, "Val = 0 encountered!\n"); + if (!val) + av_dlog(NULL, "Val = 0 encountered!\n"); q = (base_tab[pos] * quant) >> 9; if (q > 1) @@ -563,7 +563,7 @@ void ff_ivi_process_empty_tile(AVCodecContext *avctx, IVIBandDesc *band, } -#if IVI_DEBUG +#ifdef DEBUG uint16_t ivi_calc_band_checksum (IVIBandDesc *band) { int x, y; diff --git a/libavcodec/ivi_common.h b/libavcodec/ivi_common.h index 803c0580be..10cca26045 100644 --- a/libavcodec/ivi_common.h +++ b/libavcodec/ivi_common.h @@ -33,8 +33,6 @@ #include "get_bits.h" #include <stdint.h> -#define IVI_DEBUG 0 - #define IVI_VLC_BITS 13 ///< max number of bits of the ivi's huffman codes /** @@ -340,7 +338,6 @@ void ff_ivi_process_empty_tile(AVCodecContext *avctx, IVIBandDesc *band, */ void ff_ivi_output_plane(IVIPlaneDesc *plane, uint8_t *dst, int dst_pitch); -#if IVI_DEBUG /** * Calculate band checksum from band data. */ @@ -350,6 +347,5 @@ uint16_t ivi_calc_band_checksum (IVIBandDesc *band); * Verify that band data lies in range. */ int ivi_check_band (IVIBandDesc *band, const uint8_t *ref, int pitch); -#endif #endif /* AVCODEC_IVI_COMMON_H */ diff --git a/libavcodec/j2k.c b/libavcodec/j2k.c new file mode 100644 index 0000000000..33a7e3100b --- /dev/null +++ b/libavcodec/j2k.c @@ -0,0 +1,392 @@ +/* + * JPEG2000 encoder and decoder common functions + * Copyright (c) 2007 Kamil Nowosad + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * JPEG2000 image encoder and decoder common functions + * @file + * @author Kamil Nowosad + */ + + +#include "avcodec.h" +#include "j2k.h" + +#define SHL(a, n) ((n)>=0 ? (a) << (n) : (a) >> -(n)) + +#if 0 +void ff_j2k_printv(int *tab, int l) +{ + int i; + for (i = 0; i < l; i++) + printf("%.3d ", tab[i]); + printf("\n"); +} + +void ff_j2k_printu(uint8_t *tab, int l) +{ + int i; + for (i = 0; i < l; i++) + printf("%.3hd ", tab[i]); + printf("\n"); +} +#endif + +/* tag tree routines */ + +/** allocate the memory for tag tree */ + +static int tag_tree_size(int w, int h) +{ + int res = 0; + while (w > 1 || h > 1){ + res += w * h; + w = (w+1) >> 1; + h = (h+1) >> 1; + } + return res + 1; +} + +J2kTgtNode *ff_j2k_tag_tree_init(int w, int h) +{ + int pw = w, ph = h; + J2kTgtNode *res, *t, *t2; + + t = res = av_mallocz(tag_tree_size(w, h)*sizeof(J2kTgtNode)); + + if (res == NULL) + return NULL; + + while (w > 1 || h > 1){ + int i, j; + pw = w; + ph = h; + + w = (w+1) >> 1; + h = (h+1) >> 1; + t2 = t + pw*ph; + + for (i = 0; i < ph; i++) + for (j = 0; j < pw; j++){ + t[i*pw + j].parent = &t2[(i>>1)*w + (j>>1)]; + } + t = t2; + } + t[0].parent = NULL; + return res; +} + +static void tag_tree_zero(J2kTgtNode *t, int w, int h) +{ + int i, siz = tag_tree_size(w, h); + + for (i = 0; i < siz; i++){ + t[i].val = 0; + t[i].vis = 0; + } +} + +uint8_t ff_j2k_nbctxno_lut[256][4]; + +static int getnbctxno(int flag, int bandno, int vert_causal_ctx_csty_symbol) +{ + int h, v, d; + + h = ((flag & J2K_T1_SIG_E) ? 1:0)+ + ((flag & J2K_T1_SIG_W) ? 1:0); + v = ((flag & J2K_T1_SIG_N) ? 1:0); + if (!vert_causal_ctx_csty_symbol) + v = v + ((flag & J2K_T1_SIG_S) ? 1:0); + d = ((flag & J2K_T1_SIG_NE) ? 1:0)+ + ((flag & J2K_T1_SIG_NW) ? 1:0); + if (!vert_causal_ctx_csty_symbol) + d = d + ((flag & J2K_T1_SIG_SE) ? 1:0)+ + ((flag & J2K_T1_SIG_SW) ? 1:0); + if (bandno < 3){ + if (bandno == 1) + FFSWAP(int, h, v); + if (h == 2) return 8; + if (h == 1){ + if (v >= 1) return 7; + if (d >= 1) return 6; + return 5; + } + if (v == 2) return 4; + if (v == 1) return 3; + if (d >= 2) return 2; + if (d == 1) return 1; + return 0; + } else{ + if (d >= 3) return 8; + if (d == 2){ + if (h+v >= 1) return 7; + return 6; + } + if (d == 1){ + if (h+v >= 2) return 5; + if (h+v == 1) return 4; + return 3; + } + if (h+v >= 2) return 2; + if (h+v == 1) return 1; + return 0; + } + assert(0); +} + +uint8_t ff_j2k_sgnctxno_lut[16][16], ff_j2k_xorbit_lut[16][16]; + +static int getsgnctxno(int flag, uint8_t *xorbit) +{ + int vcontrib, hcontrib; + static const int contribtab[3][3] = {{0, -1, 1}, {-1, -1, 0}, {1, 0, 1}}; + static const int ctxlbltab[3][3] = {{13, 12, 11}, {10, 9, 10}, {11, 12, 13}}; + static const int xorbittab[3][3] = {{1, 1, 1,}, {1, 0, 0}, {0, 0, 0}}; + + hcontrib = contribtab[flag & J2K_T1_SIG_E ? flag & J2K_T1_SGN_E ? 1:2:0] + [flag & J2K_T1_SIG_W ? flag & J2K_T1_SGN_W ? 1:2:0]+1; + vcontrib = contribtab[flag & J2K_T1_SIG_S ? flag & J2K_T1_SGN_S ? 1:2:0] + [flag & J2K_T1_SIG_N ? flag & J2K_T1_SGN_N ? 1:2:0]+1; + *xorbit = xorbittab[hcontrib][vcontrib]; + return ctxlbltab[hcontrib][vcontrib]; +} + +void ff_j2k_init_tier1_luts(void) +{ + int i, j; + for (i = 0; i < 256; i++) + for (j = 0; j < 4; j++) + ff_j2k_nbctxno_lut[i][j] = getnbctxno(i, j, 0); + for (i = 0; i < 16; i++) + for (j = 0; j < 16; j++) + ff_j2k_sgnctxno_lut[i][j] = getsgnctxno(i + (j << 8), &ff_j2k_xorbit_lut[i][j]); +} + +void ff_j2k_set_significant(J2kT1Context *t1, int x, int y, int negative) +{ + x++; y++; + t1->flags[y][x] |= J2K_T1_SIG; + if (negative){ + t1->flags[y][x+1] |= J2K_T1_SIG_W | J2K_T1_SGN_W; + t1->flags[y][x-1] |= J2K_T1_SIG_E | J2K_T1_SGN_E; + t1->flags[y+1][x] |= J2K_T1_SIG_N | J2K_T1_SGN_N; + t1->flags[y-1][x] |= J2K_T1_SIG_S | J2K_T1_SGN_S; + } else{ + t1->flags[y][x+1] |= J2K_T1_SIG_W; + t1->flags[y][x-1] |= J2K_T1_SIG_E; + t1->flags[y+1][x] |= J2K_T1_SIG_N; + t1->flags[y-1][x] |= J2K_T1_SIG_S; + } + t1->flags[y+1][x+1] |= J2K_T1_SIG_NW; + t1->flags[y+1][x-1] |= J2K_T1_SIG_NE; + t1->flags[y-1][x+1] |= J2K_T1_SIG_SW; + t1->flags[y-1][x-1] |= J2K_T1_SIG_SE; +} + +int ff_j2k_init_component(J2kComponent *comp, J2kCodingStyle *codsty, J2kQuantStyle *qntsty, int cbps, int dx, int dy) +{ + int reslevelno, bandno, gbandno = 0, ret, i, j, csize = 1; + + if (ret=ff_j2k_dwt_init(&comp->dwt, comp->coord, codsty->nreslevels-1, codsty->transform)) + return ret; + for (i = 0; i < 2; i++) + csize *= comp->coord[i][1] - comp->coord[i][0]; + + comp->data = av_malloc(csize * sizeof(int)); + if (!comp->data) + return AVERROR(ENOMEM); + comp->reslevel = av_malloc(codsty->nreslevels * sizeof(J2kResLevel)); + + if (!comp->reslevel) + return AVERROR(ENOMEM); + for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){ + int declvl = codsty->nreslevels - reslevelno; + J2kResLevel *reslevel = comp->reslevel + reslevelno; + + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + reslevel->coord[i][j] = + ff_j2k_ceildivpow2(comp->coord[i][j], declvl - 1); + + if (reslevelno == 0) + reslevel->nbands = 1; + else + reslevel->nbands = 3; + + if (reslevel->coord[0][1] == reslevel->coord[0][0]) + reslevel->num_precincts_x = 0; + else + reslevel->num_precincts_x = ff_j2k_ceildivpow2(reslevel->coord[0][1], codsty->log2_prec_width) + - (reslevel->coord[0][0] >> codsty->log2_prec_width); + + if (reslevel->coord[1][1] == reslevel->coord[1][0]) + reslevel->num_precincts_y = 0; + else + reslevel->num_precincts_y = ff_j2k_ceildivpow2(reslevel->coord[1][1], codsty->log2_prec_height) + - (reslevel->coord[1][0] >> codsty->log2_prec_height); + + reslevel->band = av_malloc(reslevel->nbands * sizeof(J2kBand)); + if (!reslevel->band) + return AVERROR(ENOMEM); + for (bandno = 0; bandno < reslevel->nbands; bandno++, gbandno++){ + J2kBand *band = reslevel->band + bandno; + int cblkno, precx, precy, precno; + int x0, y0, x1, y1; + int xi0, yi0, xi1, yi1; + int cblkperprecw, cblkperprech; + + if (qntsty->quantsty != J2K_QSTY_NONE){ + const static uint8_t lut_gain[2][4] = {{0, 0, 0, 0}, {0, 1, 1, 2}}; + int numbps; + + numbps = cbps + lut_gain[codsty->transform][bandno + reslevelno>0]; + band->stepsize = SHL(2048 + qntsty->mant[gbandno], 2 + numbps - qntsty->expn[gbandno]); + } else + band->stepsize = 1 << 13; + + if (reslevelno == 0){ // the same everywhere + band->codeblock_width = 1 << FFMIN(codsty->log2_cblk_width, codsty->log2_prec_width-1); + band->codeblock_height = 1 << FFMIN(codsty->log2_cblk_height, codsty->log2_prec_height-1); + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + band->coord[i][j] = ff_j2k_ceildivpow2(comp->coord[i][j], declvl-1); + } else{ + band->codeblock_width = 1 << FFMIN(codsty->log2_cblk_width, codsty->log2_prec_width); + band->codeblock_height = 1 << FFMIN(codsty->log2_cblk_height, codsty->log2_prec_height); + + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + band->coord[i][j] = ff_j2k_ceildivpow2(comp->coord[i][j] - (((bandno+1>>i)&1) << declvl-1), declvl); + } + band->cblknx = ff_j2k_ceildiv(band->coord[0][1], band->codeblock_width) - band->coord[0][0] / band->codeblock_width; + band->cblkny = ff_j2k_ceildiv(band->coord[1][1], band->codeblock_height) - band->coord[1][0] / band->codeblock_height; + + for (j = 0; j < 2; j++) + band->coord[0][j] = ff_j2k_ceildiv(band->coord[0][j], dx); + for (j = 0; j < 2; j++) + band->coord[1][j] = ff_j2k_ceildiv(band->coord[1][j], dy); + + band->cblknx = ff_j2k_ceildiv(band->cblknx, dx); + band->cblkny = ff_j2k_ceildiv(band->cblkny, dy); + + band->cblk = av_malloc(band->cblknx * band->cblkny * sizeof(J2kCblk)); + if (!band->cblk) + return AVERROR(ENOMEM); + band->prec = av_malloc(reslevel->num_precincts_x * reslevel->num_precincts_y * sizeof(J2kPrec)); + if (!band->prec) + return AVERROR(ENOMEM); + + for (cblkno = 0; cblkno < band->cblknx * band->cblkny; cblkno++){ + J2kCblk *cblk = band->cblk + cblkno; + cblk->zero = 0; + cblk->lblock = 3; + cblk->length = 0; + cblk->lengthinc = 0; + cblk->npasses = 0; + } + + y0 = band->coord[1][0]; + y1 = ((band->coord[1][0] + (1<<codsty->log2_prec_height)) & ~((1<<codsty->log2_prec_height)-1)) - y0; + yi0 = 0; + yi1 = ff_j2k_ceildivpow2(y1 - y0, codsty->log2_cblk_height) << codsty->log2_cblk_height; + yi1 = FFMIN(yi1, band->cblkny); + cblkperprech = 1<<(codsty->log2_prec_height - codsty->log2_cblk_height); + for (precy = 0, precno = 0; precy < reslevel->num_precincts_y; precy++){ + for (precx = 0; precx < reslevel->num_precincts_x; precx++, precno++){ + band->prec[precno].yi0 = yi0; + band->prec[precno].yi1 = yi1; + } + yi1 += cblkperprech; + yi0 = yi1 - cblkperprech; + yi1 = FFMIN(yi1, band->cblkny); + } + x0 = band->coord[0][0]; + x1 = ((band->coord[0][0] + (1<<codsty->log2_prec_width)) & ~((1<<codsty->log2_prec_width)-1)) - x0; + xi0 = 0; + xi1 = ff_j2k_ceildivpow2(x1 - x0, codsty->log2_cblk_width) << codsty->log2_cblk_width; + xi1 = FFMIN(xi1, band->cblknx); + + cblkperprecw = 1<<(codsty->log2_prec_width - codsty->log2_cblk_width); + for (precx = 0, precno = 0; precx < reslevel->num_precincts_x; precx++){ + for (precy = 0; precy < reslevel->num_precincts_y; precy++, precno = 0){ + J2kPrec *prec = band->prec + precno; + prec->xi0 = xi0; + prec->xi1 = xi1; + prec->cblkincl = ff_j2k_tag_tree_init(prec->xi1 - prec->xi0, + prec->yi1 - prec->yi0); + prec->zerobits = ff_j2k_tag_tree_init(prec->xi1 - prec->xi0, + prec->yi1 - prec->yi0); + if (!prec->cblkincl || !prec->zerobits) + return AVERROR(ENOMEM); + + } + xi1 += cblkperprecw; + xi0 = xi1 - cblkperprecw; + xi1 = FFMIN(xi1, band->cblknx); + } + } + } + return 0; +} + +void ff_j2k_reinit(J2kComponent *comp, J2kCodingStyle *codsty) +{ + int reslevelno, bandno, cblkno, precno; + for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){ + J2kResLevel *rlevel = comp->reslevel + reslevelno; + for (bandno = 0; bandno < rlevel->nbands; bandno++){ + J2kBand *band = rlevel->band + bandno; + for(precno = 0; precno < rlevel->num_precincts_x * rlevel->num_precincts_y; precno++){ + J2kPrec *prec = band->prec + precno; + tag_tree_zero(prec->zerobits, prec->xi1 - prec->xi0, prec->yi1 - prec->yi0); + tag_tree_zero(prec->cblkincl, prec->xi1 - prec->xi0, prec->yi1 - prec->yi0); + } + for (cblkno = 0; cblkno < band->cblknx * band->cblkny; cblkno++){ + J2kCblk *cblk = band->cblk + cblkno; + cblk->length = 0; + cblk->lblock = 3; + } + } + } +} + +void ff_j2k_cleanup(J2kComponent *comp, J2kCodingStyle *codsty) +{ + int reslevelno, bandno, precno; + for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){ + J2kResLevel *reslevel = comp->reslevel + reslevelno; + + for (bandno = 0; bandno < reslevel->nbands ; bandno++){ + J2kBand *band = reslevel->band + bandno; + for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++){ + J2kPrec *prec = band->prec + precno; + av_freep(&prec->zerobits); + av_freep(&prec->cblkincl); + } + av_freep(&band->cblk); + av_freep(&band->prec); + } + av_freep(&reslevel->band); + } + + ff_j2k_dwt_destroy(&comp->dwt); + av_freep(&comp->reslevel); + av_freep(&comp->data); +} diff --git a/libavcodec/j2k.h b/libavcodec/j2k.h new file mode 100644 index 0000000000..85d5cd079c --- /dev/null +++ b/libavcodec/j2k.h @@ -0,0 +1,234 @@ +/* + * JPEG2000 tables + * Copyright (c) 2007 Kamil Nowosad + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_J2K_H +#define AVCODEC_J2K_H + +/** + * JPEG2000 tables + * @file + * @author Kamil Nowosad + */ + +#include "mqc.h" +#include "j2k_dwt.h" + +enum J2kMarkers{ + J2K_SOC = 0xff4f, ///< start of codestream + J2K_SIZ = 0xff51, ///< image and tile size + J2K_COD, ///< coding style default + J2K_COC, ///< coding style component + J2K_TLM = 0xff55, ///< packed packet headers, tile-part header + J2K_PLM = 0xff57, ///< tile-part lengths + J2K_PLT, ///< packet length, main header + J2K_QCD = 0xff5c, ///< quantization default + J2K_QCC, ///< quantization component + J2K_RGN, ///< region of interest + J2K_POC, ///< progression order change + J2K_PPM, ///< packet length, tile-part header + J2K_PPT, ///< packed packet headers, main header + J2K_CRG = 0xff63, ///< component registration + J2K_COM, ///< comment + J2K_SOT = 0xff90, ///< start of tile-part + J2K_SOP, ///< start of packet + J2K_EPH, ///< end of packet header + J2K_SOD, ///< start of data + J2K_EOC = 0xffd9, ///< end of codestream +}; + +enum J2kQuantsty{ ///< quantization style + J2K_QSTY_NONE, ///< no quantization + J2K_QSTY_SI, ///< scalar derived + J2K_QSTY_SE ///< scalar expoounded +}; + +#define J2K_MAX_CBLKW 64 +#define J2K_MAX_CBLKH 64 + +// T1 flags +// flags determining significance of neighbour coefficients +#define J2K_T1_SIG_N 0x0001 +#define J2K_T1_SIG_E 0x0002 +#define J2K_T1_SIG_W 0x0004 +#define J2K_T1_SIG_S 0x0008 +#define J2K_T1_SIG_NE 0x0010 +#define J2K_T1_SIG_NW 0x0020 +#define J2K_T1_SIG_SE 0x0040 +#define J2K_T1_SIG_SW 0x0080 +#define J2K_T1_SIG_NB (J2K_T1_SIG_N | J2K_T1_SIG_E | J2K_T1_SIG_S | J2K_T1_SIG_W \ + |J2K_T1_SIG_NE | J2K_T1_SIG_NW | J2K_T1_SIG_SE | J2K_T1_SIG_SW) +// flags determining sign bit of neighbour coefficients +#define J2K_T1_SGN_N 0x0100 +#define J2K_T1_SGN_S 0x0200 +#define J2K_T1_SGN_W 0x0400 +#define J2K_T1_SGN_E 0x0800 + +#define J2K_T1_VIS 0x1000 +#define J2K_T1_SIG 0x2000 +#define J2K_T1_REF 0x4000 + +#define J2K_T1_SGN 0x8000 + +// Codeblock coding styles +#define J2K_CBLK_BYPASS 0x01 // Selective arithmetic coding bypass +#define J2K_CBLK_RESET 0x02 // Reset context probabilities +#define J2K_CBLK_TERMALL 0x04 // Terminate after each coding pass +#define J2K_CBLK_VSC 0x08 // Vertical stripe causal context formation +#define J2K_CBLK_PREDTERM 0x10 // Predictable termination +#define J2K_CBLK_SEGSYM 0x20 // Segmentation symbols present + +// Coding styles +#define J2K_CSTY_PREC 0x01 // Precincts defined in coding style +#define J2K_CSTY_SOP 0x02 // SOP marker present +#define J2K_CSTY_EPH 0x04 // EPH marker present + +typedef struct { + int data[J2K_MAX_CBLKW][J2K_MAX_CBLKH]; + int flags[J2K_MAX_CBLKW+2][J2K_MAX_CBLKH+2]; + MqcState mqc; +} J2kT1Context; + +typedef struct J2kTgtNode { + uint8_t val; + uint8_t vis; + struct J2kTgtNode *parent; +} J2kTgtNode; + +typedef struct { + uint8_t nreslevels; ///< number of resolution levels + uint8_t log2_cblk_width, + log2_cblk_height; ///< exponent of codeblock size + uint8_t transform; ///< DWT type + uint8_t csty; ///< coding style + uint8_t log2_prec_width, + log2_prec_height; ///< precinct size + uint8_t nlayers; ///< number of layers + uint8_t mct; ///< multiple component transformation + uint8_t cblk_style; ///< codeblock coding style +} J2kCodingStyle; + +typedef struct { + uint8_t expn[32 * 3]; ///< quantization exponent + uint16_t mant[32 * 3]; ///< quantization mantissa + uint8_t quantsty; ///< quantization style + uint8_t nguardbits; ///< number of guard bits +} J2kQuantStyle; + +typedef struct { + uint16_t rate; + int64_t disto; +} J2kPass; + +typedef struct { + uint8_t npasses; + uint8_t ninclpasses; ///< number coding of passes included in codestream + uint8_t nonzerobits; + uint16_t length; + uint16_t lengthinc; + uint8_t lblock; + uint8_t zero; + uint8_t data[8192]; + J2kPass passes[100]; +} J2kCblk; ///< code block + +typedef struct { + uint16_t xi0, xi1, yi0, yi1; ///< codeblock indexes ([xi0, xi1)) + J2kTgtNode *zerobits; + J2kTgtNode *cblkincl; +} J2kPrec; ///< precinct + +typedef struct { + uint16_t coord[2][2]; ///< border coordinates {{x0, x1}, {y0, y1}} + uint16_t codeblock_width, codeblock_height; + uint16_t cblknx, cblkny; + uint32_t stepsize; ///< quantization stepsize (* 2^13) + J2kPrec *prec; + J2kCblk *cblk; +} J2kBand; ///< subband + +typedef struct { + uint8_t nbands; + uint16_t coord[2][2]; ///< border coordinates {{x0, x1}, {y0, y1}} + uint16_t num_precincts_x, num_precincts_y; ///< number of precincts in x/y direction + uint8_t log2_prec_width, log2_prec_height; ///< exponent of precinct size + J2kBand *band; +} J2kResLevel; ///< resolution level + +typedef struct { + J2kResLevel *reslevel; + DWTContext dwt; + int *data; + uint16_t coord[2][2]; ///< border coordinates {{x0, x1}, {y0, y1}} +} J2kComponent; + +/* debug routines */ +#if 0 +#undef fprintf +#undef printf +void ff_j2k_printv(int *tab, int l); +void ff_j2k_printu(uint8_t *tab, int l); +#endif + +/* misc tools */ +static inline int ff_j2k_ceildivpow2(int a, int b) +{ + return (a + (1 << b) - 1)>> b; +} + +static inline int ff_j2k_ceildiv(int a, int b) +{ + return (a + b - 1) / b; +} + +/* tag tree routines */ +J2kTgtNode *ff_j2k_tag_tree_init(int w, int h); + +/* TIER-1 routines */ +void ff_j2k_init_tier1_luts(void); + +void ff_j2k_set_significant(J2kT1Context *t1, int x, int y, int negative); + +extern uint8_t ff_j2k_nbctxno_lut[256][4]; + +static inline int ff_j2k_getnbctxno(int flag, int bandno, int vert_causal_ctx_csty_symbol) +{ + return ff_j2k_nbctxno_lut[flag&255][bandno]; +} + +static inline int ff_j2k_getrefctxno(int flag) +{ + static const uint8_t refctxno_lut[2][2] = {{14, 15}, {16, 16}}; + return refctxno_lut[(flag>>14)&1][(flag & 255) != 0]; +} + +extern uint8_t ff_j2k_sgnctxno_lut[16][16], ff_j2k_xorbit_lut[16][16]; + +static inline int ff_j2k_getsgnctxno(int flag, int *xorbit) +{ + *xorbit = ff_j2k_xorbit_lut[flag&15][(flag>>8)&15]; + return ff_j2k_sgnctxno_lut[flag&15][(flag>>8)&15]; +} + +int ff_j2k_init_component(J2kComponent *comp, J2kCodingStyle *codsty, J2kQuantStyle *qntsty, int cbps, int dx, int dy); +void ff_j2k_reinit(J2kComponent *comp, J2kCodingStyle *codsty); +void ff_j2k_cleanup(J2kComponent *comp, J2kCodingStyle *codsty); + +#endif /* AVCODEC_J2K_H */ diff --git a/libavcodec/j2k_dwt.c b/libavcodec/j2k_dwt.c new file mode 100644 index 0000000000..9ba770ad83 --- /dev/null +++ b/libavcodec/j2k_dwt.c @@ -0,0 +1,384 @@ +/* + * Discrete wavelet transform + * Copyright (c) 2007 Kamil Nowosad + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * Discrete wavelet transform + * @file + * @author Kamil Nowosad + */ + +#include "j2k_dwt.h" + +const static float scale97[] = {1.625786, 1.230174}; + +static inline void extend53(int *p, int i0, int i1) +{ + p[i0 - 1] = p[i0 + 1]; + p[i1 ] = p[i1 - 2]; + p[i0 - 2] = p[i0 + 2]; + p[i1 + 1] = p[i1 - 3]; +} + +static inline void extend97(float *p, int i0, int i1) +{ + int i; + + for (i = 1; i <= 4; i++){ + p[i0 - i] = p[i0 + i]; + p[i1 + i - 1] = p[i1 - i - 1]; + } +} + +static void sd_1d53(int *p, int i0, int i1) +{ + int i; + + if (i1 == i0 + 1) + return; + + extend53(p, i0, i1); + + for (i = (i0+1)/2 - 1; i < (i1+1)/2; i++) + p[2*i+1] -= (p[2*i] + p[2*i+2]) >> 1; + for (i = (i0+1)/2; i < (i1+1)/2; i++) + p[2*i] += (p[2*i-1] + p[2*i+1] + 2) >> 2; +} + +static void dwt_encode53(DWTContext *s, int *t) +{ + int lev, + w = s->linelen[s->ndeclevels-1][0]; + int *line = s->linebuf; + line += 3; + + for (lev = s->ndeclevels-1; lev >= 0; lev--){ + int lh = s->linelen[lev][0], + lv = s->linelen[lev][1], + mh = s->mod[lev][0], + mv = s->mod[lev][1], + lp; + int *l; + + // HOR_SD + l = line + mh; + for (lp = 0; lp < lv; lp++){ + int i, j = 0; + + for (i = 0; i < lh; i++) + l[i] = t[w*lp + i]; + + sd_1d53(line, mh, mh + lh); + + // copy back and deinterleave + for (i = mh; i < lh; i+=2, j++) + t[w*lp + j] = l[i]; + for (i = 1-mh; i < lh; i+=2, j++) + t[w*lp + j] = l[i]; + } + + // VER_SD + l = line + mv; + for (lp = 0; lp < lh; lp++) { + int i, j = 0; + + for (i = 0; i < lv; i++) + l[i] = t[w*i + lp]; + + sd_1d53(line, mv, mv + lv); + + // copy back and deinterleave + for (i = mv; i < lv; i+=2, j++) + t[w*j + lp] = l[i]; + for (i = 1-mv; i < lv; i+=2, j++) + t[w*j + lp] = l[i]; + } + } +} + +static void sd_1d97(float *p, int i0, int i1) +{ + int i; + + if (i1 == i0 + 1) + return; + + extend97(p, i0, i1); + i0++; i1++; + + for (i = i0/2 - 2; i < i1/2 + 1; i++) + p[2*i+1] -= 1.586134 * (p[2*i] + p[2*i+2]); + for (i = i0/2 - 1; i < i1/2 + 1; i++) + p[2*i] -= 0.052980 * (p[2*i-1] + p[2*i+1]); + for (i = i0/2 - 1; i < i1/2; i++) + p[2*i+1] += 0.882911 * (p[2*i] + p[2*i+2]); + for (i = i0/2; i < i1/2; i++) + p[2*i] += 0.443506 * (p[2*i-1] + p[2*i+1]); +} + +static void dwt_encode97(DWTContext *s, int *t) +{ + int lev, + w = s->linelen[s->ndeclevels-1][0]; + float *line = s->linebuf; + line += 5; + + for (lev = s->ndeclevels-1; lev >= 0; lev--){ + int lh = s->linelen[lev][0], + lv = s->linelen[lev][1], + mh = s->mod[lev][0], + mv = s->mod[lev][1], + lp; + float *l; + + // HOR_SD + l = line + mh; + for (lp = 0; lp < lv; lp++){ + int i, j = 0; + + for (i = 0; i < lh; i++) + l[i] = t[w*lp + i]; + + sd_1d97(line, mh, mh + lh); + + // copy back and deinterleave + for (i = mh; i < lh; i+=2, j++) + t[w*lp + j] = scale97[mh] * l[i] / 2; + for (i = 1-mh; i < lh; i+=2, j++) + t[w*lp + j] = scale97[mh] * l[i] / 2; + } + + // VER_SD + l = line + mv; + for (lp = 0; lp < lh; lp++) { + int i, j = 0; + + for (i = 0; i < lv; i++) + l[i] = t[w*i + lp]; + + sd_1d97(line, mv, mv + lv); + + // copy back and deinterleave + for (i = mv; i < lv; i+=2, j++) + t[w*j + lp] = scale97[mv] * l[i] / 2; + for (i = 1-mv; i < lv; i+=2, j++) + t[w*j + lp] = scale97[mv] * l[i] / 2; + } + } +} + +static void sr_1d53(int *p, int i0, int i1) +{ + int i; + + if (i1 == i0 + 1) + return; + + extend53(p, i0, i1); + + for (i = i0/2; i < i1/2 + 1; i++) + p[2*i] -= (p[2*i-1] + p[2*i+1] + 2) >> 2; + for (i = i0/2; i < i1/2; i++) + p[2*i+1] += (p[2*i] + p[2*i+2]) >> 1; +} + +static void dwt_decode53(DWTContext *s, int *t) +{ + int lev, + w = s->linelen[s->ndeclevels-1][0]; + int *line = s->linebuf; + line += 3; + + for (lev = 0; lev < s->ndeclevels; lev++){ + int lh = s->linelen[lev][0], + lv = s->linelen[lev][1], + mh = s->mod[lev][0], + mv = s->mod[lev][1], + lp; + int *l; + + // HOR_SD + l = line + mh; + for (lp = 0; lp < lv; lp++){ + int i, j = 0; + // copy with interleaving + for (i = mh; i < lh; i+=2, j++) + l[i] = t[w*lp + j]; + for (i = 1-mh; i < lh; i+=2, j++) + l[i] = t[w*lp + j]; + + sr_1d53(line, mh, mh + lh); + + for (i = 0; i < lh; i++) + t[w*lp + i] = l[i]; + } + + // VER_SD + l = line + mv; + for (lp = 0; lp < lh; lp++){ + int i, j = 0; + // copy with interleaving + for (i = mv; i < lv; i+=2, j++) + l[i] = t[w*j + lp]; + for (i = 1-mv; i < lv; i+=2, j++) + l[i] = t[w*j + lp]; + + sr_1d53(line, mv, mv + lv); + + for (i = 0; i < lv; i++) + t[w*i + lp] = l[i]; + } + } +} + +static void sr_1d97(float *p, int i0, int i1) +{ + int i; + + if (i1 == i0 + 1) + return; + + extend97(p, i0, i1); + + for (i = i0/2 - 1; i < i1/2 + 2; i++) + p[2*i] -= 0.443506 * (p[2*i-1] + p[2*i+1]); + for (i = i0/2 - 1; i < i1/2 + 1; i++) + p[2*i+1] -= 0.882911 * (p[2*i] + p[2*i+2]); + for (i = i0/2; i < i1/2 + 1; i++) + p[2*i] += 0.052980 * (p[2*i-1] + p[2*i+1]); + for (i = i0/2; i < i1/2; i++) + p[2*i+1] += 1.586134 * (p[2*i] + p[2*i+2]); +} + +static void dwt_decode97(DWTContext *s, int *t) +{ + int lev, + w = s->linelen[s->ndeclevels-1][0]; + float *line = s->linebuf; + line += 5; + + for (lev = 0; lev < s->ndeclevels; lev++){ + int lh = s->linelen[lev][0], + lv = s->linelen[lev][1], + mh = s->mod[lev][0], + mv = s->mod[lev][1], + lp; + float *l; + + // HOR_SD + l = line + mh; + for (lp = 0; lp < lv; lp++){ + int i, j = 0; + // copy with interleaving + for (i = mh; i < lh; i+=2, j++) + l[i] = scale97[1-mh] * t[w*lp + j]; + for (i = 1-mh; i < lh; i+=2, j++) + l[i] = scale97[1-mh] * t[w*lp + j]; + + sr_1d97(line, mh, mh + lh); + + for (i = 0; i < lh; i++) + t[w*lp + i] = l[i]; + } + + // VER_SD + l = line + mv; + for (lp = 0; lp < lh; lp++){ + int i, j = 0; + // copy with interleaving + for (i = mv; i < lv; i+=2, j++) + l[i] = scale97[1-mv] * t[w*j + lp]; + for (i = 1-mv; i < lv; i+=2, j++) + l[i] = scale97[1-mv] * t[w*j + lp]; + + sr_1d97(line, mv, mv + lv); + + for (i = 0; i < lv; i++) + t[w*i + lp] = l[i]; + } + } +} + +int ff_j2k_dwt_init(DWTContext *s, uint16_t border[2][2], int decomp_levels, int type) +{ + int i, j, lev = decomp_levels, maxlen, + b[2][2]; + + s->ndeclevels = decomp_levels; + s->type = type; + + for (i = 0; i < 2; i++) + for(j = 0; j < 2; j++) + b[i][j] = border[i][j]; + + maxlen = FFMAX(b[0][1] - b[0][0], + b[1][1] - b[1][0]); + + while(--lev >= 0){ + for (i = 0; i < 2; i++){ + s->linelen[lev][i] = b[i][1] - b[i][0]; + s->mod[lev][i] = b[i][0] & 1; + for (j = 0; j < 2; j++) + b[i][j] = (b[i][j] + 1) >> 1; + } + } + if (type == FF_DWT97) + s->linebuf = av_malloc((maxlen + 12) * sizeof(float)); + else if (type == FF_DWT53) + s->linebuf = av_malloc((maxlen + 6) * sizeof(int)); + else + return -1; + + if (!s->linebuf) + return AVERROR(ENOMEM); + + return 0; +} + +int ff_j2k_dwt_encode(DWTContext *s, int *t) +{ + switch(s->type){ + case FF_DWT97: + dwt_encode97(s, t); break; + case FF_DWT53: + dwt_encode53(s, t); break; + default: + return -1; + } + return 0; +} + +int ff_j2k_dwt_decode(DWTContext *s, int *t) +{ + switch(s->type){ + case FF_DWT97: + dwt_decode97(s, t); break; + case FF_DWT53: + dwt_decode53(s, t); break; + default: + return -1; + } + return 0; +} + +void ff_j2k_dwt_destroy(DWTContext *s) +{ + av_freep(&s->linebuf); +} diff --git a/libavcodec/j2k_dwt.h b/libavcodec/j2k_dwt.h new file mode 100644 index 0000000000..a2a25a6891 --- /dev/null +++ b/libavcodec/j2k_dwt.h @@ -0,0 +1,63 @@ +/* + * Discrete wavelet transform + * Copyright (c) 2007 Kamil Nowosad + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DWT_H +#define AVCODEC_DWT_H + +/** + * Discrete wavelet transform + * @file + * @author Kamil Nowosad + */ + +#include "avcodec.h" + +#define FF_DWT_MAX_DECLVLS 32 ///< max number of decomposition levels + +enum DWTType{ + FF_DWT97, + FF_DWT53 +}; + +typedef struct { + ///line lengths {horizontal, vertical} in consecutive decomposition levels + uint16_t linelen[FF_DWT_MAX_DECLVLS][2]; + uint8_t mod[FF_DWT_MAX_DECLVLS][2]; ///< coordinates (x0, y0) of decomp. levels mod 2 + uint8_t ndeclevels; ///< number of decomposition levels + uint8_t type; ///< 0 for 9/7; 1 for 5/3 + void *linebuf; ///< buffer used by transform (int or float) +} DWTContext; + +/** + * initialize DWT + * @param s DWT context + * @param border coordinates of transformed region {{x0, x1}, {y0, y1}} + * @param decomp_levels number of decomposition levels + * @param type 0 for DWT 9/7; 1 for DWT 5/3 + */ +int ff_j2k_dwt_init(DWTContext *s, uint16_t border[2][2], int decomp_levels, int type); + +int ff_j2k_dwt_encode(DWTContext *s, int *t); +int ff_j2k_dwt_decode(DWTContext *s, int *t); + +void ff_j2k_dwt_destroy(DWTContext *s); + +#endif /* AVCODEC_DWT_H */ diff --git a/libavcodec/j2kdec.c b/libavcodec/j2kdec.c new file mode 100644 index 0000000000..73af6a73df --- /dev/null +++ b/libavcodec/j2kdec.c @@ -0,0 +1,1067 @@ +/* + * JPEG2000 image decoder + * Copyright (c) 2007 Kamil Nowosad + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * JPEG2000 image decoder + * @file + * @author Kamil Nowosad + */ + +#include "avcodec.h" +#include "bytestream.h" +#include "j2k.h" +#include "libavutil/common.h" + +#define JP2_SIG_TYPE 0x6A502020 +#define JP2_SIG_VALUE 0x0D0A870A +#define JP2_CODESTREAM 0x6A703263 + +#define HAD_COC 0x01 +#define HAD_QCC 0x02 + +typedef struct { + J2kComponent *comp; + uint8_t properties[4]; + J2kCodingStyle codsty[4]; + J2kQuantStyle qntsty[4]; +} J2kTile; + +typedef struct { + AVCodecContext *avctx; + AVFrame picture; + + int width, height; ///< image width and height + int image_offset_x, image_offset_y; + int tile_offset_x, tile_offset_y; + uint8_t cbps[4]; ///< bits per sample in particular components + uint8_t sgnd[4]; ///< if a component is signed + uint8_t properties[4]; + int cdx[4], cdy[4]; + int precision; + int ncomponents; + int tile_width, tile_height; ///< tile size + int numXtiles, numYtiles; + int maxtilelen; + + J2kCodingStyle codsty[4]; + J2kQuantStyle qntsty[4]; + + uint8_t *buf_start; + uint8_t *buf; + uint8_t *buf_end; + int bit_index; + + int16_t curtileno; + + J2kTile *tile; +} J2kDecoderContext; + +static int get_bits(J2kDecoderContext *s, int n) +{ + int res = 0; + if (s->buf_end - s->buf < ((n - s->bit_index) >> 8)) + return AVERROR(EINVAL); + while (--n >= 0){ + res <<= 1; + if (s->bit_index == 0){ + s->bit_index = 7 + (*s->buf != 0xff); + s->buf++; + } + s->bit_index--; + res |= (*s->buf >> s->bit_index) & 1; + } + return res; +} + +static void j2k_flush(J2kDecoderContext *s) +{ + if (*s->buf == 0xff) + s->buf++; + s->bit_index = 8; + s->buf++; +} +#if 0 +void printcomp(J2kComponent *comp) +{ + int i; + for (i = 0; i < comp->y1 - comp->y0; i++) + ff_j2k_printv(comp->data + i * (comp->x1 - comp->x0), comp->x1 - comp->x0); +} + +static void nspaces(FILE *fd, int n) +{ + while(n--) putc(' ', fd); +} + +static void dump(J2kDecoderContext *s, FILE *fd) +{ + int tileno, compno, reslevelno, bandno, precno; + fprintf(fd, "XSiz = %d, YSiz = %d, tile_width = %d, tile_height = %d\n" + "numXtiles = %d, numYtiles = %d, ncomponents = %d\n" + "tiles:\n", + s->width, s->height, s->tile_width, s->tile_height, + s->numXtiles, s->numYtiles, s->ncomponents); + for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){ + J2kTile *tile = s->tile + tileno; + nspaces(fd, 2); + fprintf(fd, "tile %d:\n", tileno); + for(compno = 0; compno < s->ncomponents; compno++){ + J2kComponent *comp = tile->comp + compno; + nspaces(fd, 4); + fprintf(fd, "component %d:\n", compno); + nspaces(fd, 4); + fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d\n", + comp->x0, comp->x1, comp->y0, comp->y1); + for(reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){ + J2kResLevel *reslevel = comp->reslevel + reslevelno; + nspaces(fd, 6); + fprintf(fd, "reslevel %d:\n", reslevelno); + nspaces(fd, 6); + fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d, nbands = %d\n", + reslevel->x0, reslevel->x1, reslevel->y0, + reslevel->y1, reslevel->nbands); + for(bandno = 0; bandno < reslevel->nbands; bandno++){ + J2kBand *band = reslevel->band + bandno; + nspaces(fd, 8); + fprintf(fd, "band %d:\n", bandno); + nspaces(fd, 8); + fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d," + "codeblock_width = %d, codeblock_height = %d cblknx = %d cblkny = %d\n", + band->x0, band->x1, + band->y0, band->y1, + band->codeblock_width, band->codeblock_height, + band->cblknx, band->cblkny); + for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++){ + J2kPrec *prec = band->prec + precno; + nspaces(fd, 10); + fprintf(fd, "prec %d:\n", precno); + nspaces(fd, 10); + fprintf(fd, "xi0 = %d, xi1 = %d, yi0 = %d, yi1 = %d\n", + prec->xi0, prec->xi1, prec->yi0, prec->yi1); + } + } + } + } + } +} +#endif + +/** decode the value stored in node */ +static int tag_tree_decode(J2kDecoderContext *s, J2kTgtNode *node, int threshold) +{ + J2kTgtNode *stack[30]; + int sp = -1, curval = 0; + + while(node && !node->vis){ + stack[++sp] = node; + node = node->parent; + } + + if (node) + curval = node->val; + else + curval = stack[sp]->val; + + while(curval < threshold && sp >= 0){ + if (curval < stack[sp]->val) + curval = stack[sp]->val; + while (curval < threshold){ + int ret; + if ((ret = get_bits(s, 1)) > 0){ + stack[sp]->vis++; + break; + } else if (!ret) + curval++; + else + return ret; + } + stack[sp]->val = curval; + sp--; + } + return curval; +} + +/* marker segments */ +/** get sizes and offsets of image, tiles; number of components */ +static int get_siz(J2kDecoderContext *s) +{ + int i, ret; + + if (s->buf_end - s->buf < 36) + return AVERROR(EINVAL); + + bytestream_get_be16(&s->buf); // Rsiz (skipped) + s->width = bytestream_get_be32(&s->buf); // width + s->height = bytestream_get_be32(&s->buf); // height + s->image_offset_x = bytestream_get_be32(&s->buf); // X0Siz + s->image_offset_y = bytestream_get_be32(&s->buf); // Y0Siz + + s->tile_width = bytestream_get_be32(&s->buf); // XTSiz + s->tile_height = bytestream_get_be32(&s->buf); // YTSiz + s->tile_offset_x = bytestream_get_be32(&s->buf); // XT0Siz + s->tile_offset_y = bytestream_get_be32(&s->buf); // YT0Siz + s->ncomponents = bytestream_get_be16(&s->buf); // CSiz + + if (s->buf_end - s->buf < 2 * s->ncomponents) + return AVERROR(EINVAL); + + for (i = 0; i < s->ncomponents; i++){ // Ssiz_i XRsiz_i, YRsiz_i + uint8_t x = bytestream_get_byte(&s->buf); + s->cbps[i] = (x & 0x7f) + 1; + s->precision = FFMAX(s->cbps[i], s->precision); + s->sgnd[i] = (x & 0x80) == 1; + s->cdx[i] = bytestream_get_byte(&s->buf); + s->cdy[i] = bytestream_get_byte(&s->buf); + } + + s->numXtiles = ff_j2k_ceildiv(s->width - s->tile_offset_x, s->tile_width); + s->numYtiles = ff_j2k_ceildiv(s->height - s->tile_offset_y, s->tile_height); + + s->tile = av_mallocz(s->numXtiles * s->numYtiles * sizeof(J2kTile)); + if (!s->tile) + return AVERROR(ENOMEM); + + for (i = 0; i < s->numXtiles * s->numYtiles; i++){ + J2kTile *tile = s->tile + i; + + tile->comp = av_mallocz(s->ncomponents * sizeof(J2kComponent)); + if (!tile->comp) + return AVERROR(ENOMEM); + } + + s->avctx->width = s->width - s->image_offset_x; + s->avctx->height = s->height - s->image_offset_y; + + switch(s->ncomponents){ + case 1: if (s->precision > 8) { + s->avctx->pix_fmt = PIX_FMT_GRAY16; + } else s->avctx->pix_fmt = PIX_FMT_GRAY8; + break; + case 3: if (s->precision > 8) { + s->avctx->pix_fmt = PIX_FMT_RGB48; + } else s->avctx->pix_fmt = PIX_FMT_RGB24; + break; + case 4: s->avctx->pix_fmt = PIX_FMT_BGRA; break; + } + + if (s->picture.data[0]) + s->avctx->release_buffer(s->avctx, &s->picture); + + if ((ret = s->avctx->get_buffer(s->avctx, &s->picture)) < 0) + return ret; + + s->picture.pict_type = FF_I_TYPE; + s->picture.key_frame = 1; + + return 0; +} + +/** get common part for COD and COC segments */ +static int get_cox(J2kDecoderContext *s, J2kCodingStyle *c) +{ + if (s->buf_end - s->buf < 5) + return AVERROR(EINVAL); + c->nreslevels = bytestream_get_byte(&s->buf) + 1; // num of resolution levels - 1 + c->log2_cblk_width = bytestream_get_byte(&s->buf) + 2; // cblk width + c->log2_cblk_height = bytestream_get_byte(&s->buf) + 2; // cblk height + + c->cblk_style = bytestream_get_byte(&s->buf); + if (c->cblk_style != 0){ // cblk style + av_log(s->avctx, AV_LOG_WARNING, "extra cblk styles %X\n", c->cblk_style); + } + c->transform = bytestream_get_byte(&s->buf); // transformation + if (c->csty & J2K_CSTY_PREC) { + int i; + for (i = 0; i < c->nreslevels; i++) + bytestream_get_byte(&s->buf); + } + return 0; +} + +/** get coding parameters for a particular tile or whole image*/ +static int get_cod(J2kDecoderContext *s, J2kCodingStyle *c, uint8_t *properties) +{ + J2kCodingStyle tmp; + int compno; + + if (s->buf_end - s->buf < 5) + return AVERROR(EINVAL); + + tmp.log2_prec_width = + tmp.log2_prec_height = 15; + + tmp.csty = bytestream_get_byte(&s->buf); + + if (bytestream_get_byte(&s->buf)){ // progression level + av_log(s->avctx, AV_LOG_ERROR, "only LRCP progression supported\n"); + return -1; + } + + tmp.nlayers = bytestream_get_be16(&s->buf); + tmp.mct = bytestream_get_byte(&s->buf); // multiple component transformation + + get_cox(s, &tmp); + for (compno = 0; compno < s->ncomponents; compno++){ + if (!(properties[compno] & HAD_COC)) + memcpy(c + compno, &tmp, sizeof(J2kCodingStyle)); + } + return 0; +} + +/** get coding parameters for a component in the whole image on a particular tile */ +static int get_coc(J2kDecoderContext *s, J2kCodingStyle *c, uint8_t *properties) +{ + int compno; + + if (s->buf_end - s->buf < 2) + return AVERROR(EINVAL); + + compno = bytestream_get_byte(&s->buf); + + c += compno; + c->csty = bytestream_get_byte(&s->buf); + get_cox(s, c); + + properties[compno] |= HAD_COC; + return 0; +} + +/** get common part for QCD and QCC segments */ +static int get_qcx(J2kDecoderContext *s, int n, J2kQuantStyle *q) +{ + int i, x; + + if (s->buf_end - s->buf < 1) + return AVERROR(EINVAL); + + x = bytestream_get_byte(&s->buf); // Sqcd + + q->nguardbits = x >> 5; + q->quantsty = x & 0x1f; + + if (q->quantsty == J2K_QSTY_NONE){ + n -= 3; + if (s->buf_end - s->buf < n) + return AVERROR(EINVAL); + for (i = 0; i < n; i++) + q->expn[i] = bytestream_get_byte(&s->buf) >> 3; + } else if (q->quantsty == J2K_QSTY_SI){ + if (s->buf_end - s->buf < 2) + return AVERROR(EINVAL); + x = bytestream_get_be16(&s->buf); + q->expn[0] = x >> 11; + q->mant[0] = x & 0x7ff; + for (i = 1; i < 32 * 3; i++){ + int curexpn = FFMAX(0, q->expn[0] - (i-1)/3); + q->expn[i] = curexpn; + q->mant[i] = q->mant[0]; + } + } else{ + n = (n - 3) >> 1; + if (s->buf_end - s->buf < n) + return AVERROR(EINVAL); + for (i = 0; i < n; i++){ + x = bytestream_get_be16(&s->buf); + q->expn[i] = x >> 11; + q->mant[i] = x & 0x7ff; + } + } + return 0; +} + +/** get quantization parameters for a particular tile or a whole image */ +static int get_qcd(J2kDecoderContext *s, int n, J2kQuantStyle *q, uint8_t *properties) +{ + J2kQuantStyle tmp; + int compno; + + if (get_qcx(s, n, &tmp)) + return -1; + for (compno = 0; compno < s->ncomponents; compno++) + if (!(properties[compno] & HAD_QCC)) + memcpy(q + compno, &tmp, sizeof(J2kQuantStyle)); + return 0; +} + +/** get quantization parameters for a component in the whole image on in a particular tile */ +static int get_qcc(J2kDecoderContext *s, int n, J2kQuantStyle *q, uint8_t *properties) +{ + int compno; + + if (s->buf_end - s->buf < 1) + return AVERROR(EINVAL); + + compno = bytestream_get_byte(&s->buf); + properties[compno] |= HAD_QCC; + return get_qcx(s, n-1, q+compno); +} + +/** get start of tile segment */ +static uint8_t get_sot(J2kDecoderContext *s) +{ + if (s->buf_end - s->buf < 4) + return AVERROR(EINVAL); + + s->curtileno = bytestream_get_be16(&s->buf); ///< Isot + + s->buf += 4; ///< Psot (ignored) + + if (!bytestream_get_byte(&s->buf)){ ///< TPsot + J2kTile *tile = s->tile + s->curtileno; + + /* copy defaults */ + memcpy(tile->codsty, s->codsty, s->ncomponents * sizeof(J2kCodingStyle)); + memcpy(tile->qntsty, s->qntsty, s->ncomponents * sizeof(J2kQuantStyle)); + } + bytestream_get_byte(&s->buf); ///< TNsot + + return 0; +} + +static int init_tile(J2kDecoderContext *s, int tileno) +{ + int compno, + tilex = tileno % s->numXtiles, + tiley = tileno / s->numXtiles; + J2kTile *tile = s->tile + tileno; + + if (!tile->comp) + return AVERROR(ENOMEM); + for (compno = 0; compno < s->ncomponents; compno++){ + J2kComponent *comp = tile->comp + compno; + J2kCodingStyle *codsty = tile->codsty + compno; + J2kQuantStyle *qntsty = tile->qntsty + compno; + int ret; // global bandno + + comp->coord[0][0] = FFMAX(tilex * s->tile_width + s->tile_offset_x, s->image_offset_x); + comp->coord[0][1] = FFMIN((tilex+1)*s->tile_width + s->tile_offset_x, s->width); + comp->coord[1][0] = FFMAX(tiley * s->tile_height + s->tile_offset_y, s->image_offset_y); + comp->coord[1][1] = FFMIN((tiley+1)*s->tile_height + s->tile_offset_y, s->height); + + if (ret = ff_j2k_init_component(comp, codsty, qntsty, s->cbps[compno], s->cdx[compno], s->cdy[compno])) + return ret; + } + return 0; +} + +/** read the number of coding passes */ +static int getnpasses(J2kDecoderContext *s) +{ + int num; + if (!get_bits(s, 1)) + return 1; + if (!get_bits(s, 1)) + return 2; + if ((num = get_bits(s, 2)) != 3) + return num < 0 ? num : 3 + num; + if ((num = get_bits(s, 5)) != 31) + return num < 0 ? num : 6 + num; + num = get_bits(s, 7); + return num < 0 ? num : 37 + num; +} + +static int getlblockinc(J2kDecoderContext *s) +{ + int res = 0, ret; + while (ret = get_bits(s, 1)){ + if (ret < 0) + return ret; + res++; + } + return res; +} + +static int decode_packet(J2kDecoderContext *s, J2kCodingStyle *codsty, J2kResLevel *rlevel, int precno, + int layno, uint8_t *expn, int numgbits) +{ + int bandno, cblkny, cblknx, cblkno, ret; + + if (!(ret = get_bits(s, 1))){ + j2k_flush(s); + return 0; + } else if (ret < 0) + return ret; + + for (bandno = 0; bandno < rlevel->nbands; bandno++){ + J2kBand *band = rlevel->band + bandno; + J2kPrec *prec = band->prec + precno; + int pos = 0; + + if (band->coord[0][0] == band->coord[0][1] + || band->coord[1][0] == band->coord[1][1]) + continue; + + for (cblkny = prec->yi0; cblkny < prec->yi1; cblkny++) + for(cblknx = prec->xi0, cblkno = cblkny * band->cblknx + cblknx; cblknx < prec->xi1; cblknx++, cblkno++, pos++){ + J2kCblk *cblk = band->cblk + cblkno; + int incl, newpasses, llen; + + if (cblk->npasses) + incl = get_bits(s, 1); + else + incl = tag_tree_decode(s, prec->cblkincl + pos, layno+1) == layno; + if (!incl) + continue; + else if (incl < 0) + return incl; + + if (!cblk->npasses) + cblk->nonzerobits = expn[bandno] + numgbits - 1 - tag_tree_decode(s, prec->zerobits + pos, 100); + if ((newpasses = getnpasses(s)) < 0) + return newpasses; + if ((llen = getlblockinc(s)) < 0) + return llen; + cblk->lblock += llen; + if ((ret = get_bits(s, av_log2(newpasses) + cblk->lblock)) < 0) + return ret; + cblk->lengthinc = ret; + cblk->npasses += newpasses; + } + } + j2k_flush(s); + + if (codsty->csty & J2K_CSTY_EPH) { + if (AV_RB16(s->buf) == J2K_EPH) { + s->buf += 2; + } else { + av_log(s->avctx, AV_LOG_ERROR, "EPH marker not found.\n"); + } + } + + for (bandno = 0; bandno < rlevel->nbands; bandno++){ + J2kBand *band = rlevel->band + bandno; + int yi, cblknw = band->prec[precno].xi1 - band->prec[precno].xi0; + for (yi = band->prec[precno].yi0; yi < band->prec[precno].yi1; yi++){ + int xi; + for (xi = band->prec[precno].xi0; xi < band->prec[precno].xi1; xi++){ + J2kCblk *cblk = band->cblk + yi * cblknw + xi; + if (s->buf_end - s->buf < cblk->lengthinc) + return AVERROR(EINVAL); + bytestream_get_buffer(&s->buf, cblk->data, cblk->lengthinc); + cblk->length += cblk->lengthinc; + cblk->lengthinc = 0; + } + } + } + return 0; +} + +static int decode_packets(J2kDecoderContext *s, J2kTile *tile) +{ + int layno, reslevelno, compno, precno, ok_reslevel; + s->bit_index = 8; + for (layno = 0; layno < tile->codsty[0].nlayers; layno++){ + ok_reslevel = 1; + for (reslevelno = 0; ok_reslevel; reslevelno++){ + ok_reslevel = 0; + for (compno = 0; compno < s->ncomponents; compno++){ + J2kCodingStyle *codsty = tile->codsty + compno; + J2kQuantStyle *qntsty = tile->qntsty + compno; + if (reslevelno < codsty->nreslevels){ + J2kResLevel *rlevel = tile->comp[compno].reslevel + reslevelno; + ok_reslevel = 1; + for (precno = 0; precno < rlevel->num_precincts_x * rlevel->num_precincts_y; precno++){ + if (decode_packet(s, codsty, rlevel, precno, layno, qntsty->expn + + (reslevelno ? 3*(reslevelno-1)+1 : 0), qntsty->nguardbits)) + return -1; + } + } + } + } + } + return 0; +} + +/* TIER-1 routines */ +static void decode_sigpass(J2kT1Context *t1, int width, int height, int bpno, int bandno, int bpass_csty_symbol, + int vert_causal_ctx_csty_symbol) +{ + int mask = 3 << (bpno - 1), y0, x, y; + + for (y0 = 0; y0 < height; y0 += 4) + for (x = 0; x < width; x++) + for (y = y0; y < height && y < y0+4; y++){ + if ((t1->flags[y+1][x+1] & J2K_T1_SIG_NB) + && !(t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS))){ + int vert_causal_ctx_csty_loc_symbol = vert_causal_ctx_csty_symbol && (x == 3 && y == 3); + if (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno, + vert_causal_ctx_csty_loc_symbol))){ + int xorbit, ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit); + if (bpass_csty_symbol) + t1->data[y][x] = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ? -mask : mask; + else + t1->data[y][x] = (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) ? + -mask : mask; + + ff_j2k_set_significant(t1, x, y, t1->data[y][x] < 0); + } + t1->flags[y+1][x+1] |= J2K_T1_VIS; + } + } +} + +static void decode_refpass(J2kT1Context *t1, int width, int height, int bpno) +{ + int phalf, nhalf; + int y0, x, y; + + phalf = 1 << (bpno - 1); + nhalf = -phalf; + + for (y0 = 0; y0 < height; y0 += 4) + for (x = 0; x < width; x++) + for (y = y0; y < height && y < y0+4; y++){ + if ((t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS)) == J2K_T1_SIG){ + int ctxno = ff_j2k_getrefctxno(t1->flags[y+1][x+1]); + int r = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ? phalf : nhalf; + t1->data[y][x] += t1->data[y][x] < 0 ? -r : r; + t1->flags[y+1][x+1] |= J2K_T1_REF; + } + } +} + +static void decode_clnpass(J2kDecoderContext *s, J2kT1Context *t1, int width, int height, + int bpno, int bandno, int seg_symbols) +{ + int mask = 3 << (bpno - 1), y0, x, y, runlen, dec; + + for (y0 = 0; y0 < height; y0 += 4) { + for (x = 0; x < width; x++){ + if (y0 + 3 < height && !( + (t1->flags[y0+1][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) || + (t1->flags[y0+2][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) || + (t1->flags[y0+3][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) || + (t1->flags[y0+4][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)))){ + if (!ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_RL)) + continue; + runlen = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI); + runlen = (runlen << 1) | ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI); + dec = 1; + } else{ + runlen = 0; + dec = 0; + } + + for (y = y0 + runlen; y < y0 + 4 && y < height; y++){ + if (!dec){ + if (!(t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS))) + dec = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_j2k_getnbctxno(t1->flags[y+1][x+1], + bandno, 0)); + } + if (dec){ + int xorbit, ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit); + t1->data[y][x] = (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) ? -mask : mask; + ff_j2k_set_significant(t1, x, y, t1->data[y][x] < 0); + } + dec = 0; + t1->flags[y+1][x+1] &= ~J2K_T1_VIS; + } + } + } + if (seg_symbols) { + int val; + val = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI); + val = (val << 1) + ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI); + val = (val << 1) + ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI); + val = (val << 1) + ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI); + if (val != 0xa) { + av_log(s->avctx, AV_LOG_ERROR,"Segmentation symbol value incorrect\n"); + } + } +} + +static int decode_cblk(J2kDecoderContext *s, J2kCodingStyle *codsty, J2kT1Context *t1, J2kCblk *cblk, + int width, int height, int bandpos) +{ + int passno = cblk->npasses, pass_t = 2, bpno = cblk->nonzerobits - 1, y, clnpass_cnt = 0; + int bpass_csty_symbol = J2K_CBLK_BYPASS & codsty->cblk_style; + int vert_causal_ctx_csty_symbol = J2K_CBLK_VSC & codsty->cblk_style; + + for (y = 0; y < height+2; y++) + memset(t1->flags[y], 0, (width+2)*sizeof(int)); + + for (y = 0; y < height; y++) + memset(t1->data[y], 0, width*sizeof(int)); + + ff_mqc_initdec(&t1->mqc, cblk->data); + cblk->data[cblk->length] = 0xff; + cblk->data[cblk->length+1] = 0xff; + + while(passno--){ + switch(pass_t){ + case 0: decode_sigpass(t1, width, height, bpno+1, bandpos, + bpass_csty_symbol && (clnpass_cnt >= 4), vert_causal_ctx_csty_symbol); + break; + case 1: decode_refpass(t1, width, height, bpno+1); + if (bpass_csty_symbol && clnpass_cnt >= 4) + ff_mqc_initdec(&t1->mqc, cblk->data); + break; + case 2: decode_clnpass(s, t1, width, height, bpno+1, bandpos, + codsty->cblk_style & J2K_CBLK_SEGSYM); + clnpass_cnt = clnpass_cnt + 1; + if (bpass_csty_symbol && clnpass_cnt >= 4) + ff_mqc_initdec(&t1->mqc, cblk->data); + break; + } + + pass_t++; + if (pass_t == 3){ + bpno--; + pass_t = 0; + } + } + return 0; +} + +static void mct_decode(J2kDecoderContext *s, J2kTile *tile) +{ + int i, *src[3], i0, i1, i2, csize = 1; + + for (i = 0; i < 3; i++) + src[i] = tile->comp[i].data; + + for (i = 0; i < 2; i++) + csize *= tile->comp[0].coord[i][1] - tile->comp[0].coord[i][0]; + + if (tile->codsty[0].transform == FF_DWT97){ + for (i = 0; i < csize; i++){ + i0 = *src[0] + (*src[2] * 46802 >> 16); + i1 = *src[0] - (*src[1] * 22553 + *src[2] * 46802 >> 16); + i2 = *src[0] + (116130 * *src[1] >> 16); + *src[0]++ = i0; + *src[1]++ = i1; + *src[2]++ = i2; + } + } else{ + for (i = 0; i < csize; i++){ + i1 = *src[0] - (*src[2] + *src[1] >> 2); + i0 = i1 + *src[2]; + i2 = i1 + *src[1]; + *src[0]++ = i0; + *src[1]++ = i1; + *src[2]++ = i2; + } + } +} + +static int decode_tile(J2kDecoderContext *s, J2kTile *tile) +{ + int compno, reslevelno, bandno; + int x, y, *src[4]; + uint8_t *line; + J2kT1Context t1; + + for (compno = 0; compno < s->ncomponents; compno++){ + J2kComponent *comp = tile->comp + compno; + J2kCodingStyle *codsty = tile->codsty + compno; + + for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){ + J2kResLevel *rlevel = comp->reslevel + reslevelno; + for (bandno = 0; bandno < rlevel->nbands; bandno++){ + J2kBand *band = rlevel->band + bandno; + int cblkx, cblky, cblkno=0, xx0, x0, xx1, y0, yy0, yy1, bandpos; + + bandpos = bandno + (reslevelno > 0); + + yy0 = bandno == 0 ? 0 : comp->reslevel[reslevelno-1].coord[1][1] - comp->reslevel[reslevelno-1].coord[1][0]; + y0 = yy0; + yy1 = FFMIN(ff_j2k_ceildiv(band->coord[1][0] + 1, band->codeblock_height) * band->codeblock_height, + band->coord[1][1]) - band->coord[1][0] + yy0; + + if (band->coord[0][0] == band->coord[0][1] || band->coord[1][0] == band->coord[1][1]) + continue; + + for (cblky = 0; cblky < band->cblkny; cblky++){ + if (reslevelno == 0 || bandno == 1) + xx0 = 0; + else + xx0 = comp->reslevel[reslevelno-1].coord[0][1] - comp->reslevel[reslevelno-1].coord[0][0]; + x0 = xx0; + xx1 = FFMIN(ff_j2k_ceildiv(band->coord[0][0] + 1, band->codeblock_width) * band->codeblock_width, + band->coord[0][1]) - band->coord[0][0] + xx0; + + for (cblkx = 0; cblkx < band->cblknx; cblkx++, cblkno++){ + int y, x; + decode_cblk(s, codsty, &t1, band->cblk + cblkno, xx1 - xx0, yy1 - yy0, bandpos); + if (codsty->transform == FF_DWT53){ + for (y = yy0; y < yy1; y+=s->cdy[compno]){ + int *ptr = t1.data[y-yy0]; + for (x = xx0; x < xx1; x+=s->cdx[compno]){ + comp->data[(comp->coord[0][1] - comp->coord[0][0]) * y + x] = *ptr++ >> 1; + } + } + } else{ + for (y = yy0; y < yy1; y+=s->cdy[compno]){ + int *ptr = t1.data[y-yy0]; + for (x = xx0; x < xx1; x+=s->cdx[compno]){ + int tmp = ((int64_t)*ptr++) * ((int64_t)band->stepsize) >> 13, tmp2; + tmp2 = FFABS(tmp>>1) + FFABS(tmp&1); + comp->data[(comp->coord[0][1] - comp->coord[0][0]) * y + x] = tmp < 0 ? -tmp2 : tmp2; + } + } + } + xx0 = xx1; + xx1 = FFMIN(xx1 + band->codeblock_width, band->coord[0][1] - band->coord[0][0] + x0); + } + yy0 = yy1; + yy1 = FFMIN(yy1 + band->codeblock_height, band->coord[1][1] - band->coord[1][0] + y0); + } + } + } + ff_j2k_dwt_decode(&comp->dwt, comp->data); + src[compno] = comp->data; + } + if (tile->codsty[0].mct) + mct_decode(s, tile); + + if (s->avctx->pix_fmt == PIX_FMT_BGRA) // RGBA -> BGRA + FFSWAP(int *, src[0], src[2]); + + if (s->precision <= 8) { + for (compno = 0; compno < s->ncomponents; compno++){ + y = tile->comp[compno].coord[1][0] - s->image_offset_y; + line = s->picture.data[0] + y * s->picture.linesize[0]; + for (; y < tile->comp[compno].coord[1][1] - s->image_offset_y; y += s->cdy[compno]){ + uint8_t *dst; + + x = tile->comp[compno].coord[0][0] - s->image_offset_x; + dst = line + x * s->ncomponents + compno; + + for (; x < tile->comp[compno].coord[0][1] - s->image_offset_x; x += s->cdx[compno]) { + *src[compno] += 1 << (s->cbps[compno]-1); + if (*src[compno] < 0) + *src[compno] = 0; + else if (*src[compno] >= (1 << s->cbps[compno])) + *src[compno] = (1 << s->cbps[compno]) - 1; + *dst = *src[compno]++; + dst += s->ncomponents; + } + line += s->picture.linesize[0]; + } + } + } else { + for (compno = 0; compno < s->ncomponents; compno++) { + y = tile->comp[compno].coord[1][0] - s->image_offset_y; + line = s->picture.data[0] + y * s->picture.linesize[0]; + for (; y < tile->comp[compno].coord[1][1] - s->image_offset_y; y += s->cdy[compno]) { + uint16_t *dst; + x = tile->comp[compno].coord[0][0] - s->image_offset_x; + dst = line + (x * s->ncomponents + compno) * 2; + for (; x < tile->comp[compno].coord[0][1] - s->image_offset_x; x += s-> cdx[compno]) { + int32_t val; + val = *src[compno]++ << (16 - s->cbps[compno]); + val += 1 << 15; + val = av_clip(val, 0, (1 << 16) - 1); + *dst = val; + dst += s->ncomponents; + } + line += s->picture.linesize[0]; + } + } + } + return 0; +} + +static void cleanup(J2kDecoderContext *s) +{ + int tileno, compno; + for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){ + for (compno = 0; compno < s->ncomponents; compno++){ + J2kComponent *comp = s->tile[tileno].comp + compno; + J2kCodingStyle *codsty = s->tile[tileno].codsty + compno; + + ff_j2k_cleanup(comp, codsty); + } + av_freep(&s->tile[tileno].comp); + } + av_freep(&s->tile); +} + +static int decode_codestream(J2kDecoderContext *s) +{ + J2kCodingStyle *codsty = s->codsty; + J2kQuantStyle *qntsty = s->qntsty; + uint8_t *properties = s->properties; + + for (;;){ + int marker, len, ret = 0; + uint8_t *oldbuf; + if (s->buf_end - s->buf < 2){ + av_log(s->avctx, AV_LOG_ERROR, "Missing EOC\n"); + break; + } + + marker = bytestream_get_be16(&s->buf); + oldbuf = s->buf; + + if (marker == J2K_SOD){ + J2kTile *tile = s->tile + s->curtileno; + if (ret = init_tile(s, s->curtileno)) + return ret; + if (ret = decode_packets(s, tile)) + return ret; + continue; + } + if (marker == J2K_EOC) + break; + + if (s->buf_end - s->buf < 2) + return AVERROR(EINVAL); + len = bytestream_get_be16(&s->buf); + switch(marker){ + case J2K_SIZ: + ret = get_siz(s); break; + case J2K_COC: + ret = get_coc(s, codsty, properties); break; + case J2K_COD: + ret = get_cod(s, codsty, properties); break; + case J2K_QCC: + ret = get_qcc(s, len, qntsty, properties); break; + case J2K_QCD: + ret = get_qcd(s, len, qntsty, properties); break; + case J2K_SOT: + if (!(ret = get_sot(s))){ + codsty = s->tile[s->curtileno].codsty; + qntsty = s->tile[s->curtileno].qntsty; + properties = s->tile[s->curtileno].properties; + } + break; + case J2K_COM: + // the comment is ignored + s->buf += len - 2; break; + default: + av_log(s->avctx, AV_LOG_ERROR, "unsupported marker 0x%.4X at pos 0x%x\n", marker, s->buf - s->buf_start - 4); + s->buf += len - 2; break; + } + if (s->buf - oldbuf != len || ret){ + av_log(s->avctx, AV_LOG_ERROR, "error during processing marker segment %.4x\n", marker); + return ret ? ret : -1; + } + } + return 0; +} + +static int jp2_find_codestream(J2kDecoderContext *s) +{ + int32_t atom_size; + int found_codestream = 0, search_range = 10; + + // skip jpeg2k signature atom + s->buf += 12; + + while(!found_codestream && search_range) { + atom_size = AV_RB32(s->buf); + if(AV_RB32(s->buf + 4) == JP2_CODESTREAM) { + found_codestream = 1; + s->buf += 8; + } else { + s->buf += atom_size; + search_range--; + } + } + + if(found_codestream) + return 1; + return 0; +} + +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + AVPacket *avpkt) +{ + J2kDecoderContext *s = avctx->priv_data; + AVFrame *picture = data; + int tileno, ret; + + s->avctx = avctx; + av_log(s->avctx, AV_LOG_DEBUG, "start\n"); + + // init + s->buf = s->buf_start = avpkt->data; + s->buf_end = s->buf_start + avpkt->size; + s->curtileno = -1; + + ff_j2k_init_tier1_luts(); + + if (s->buf_end - s->buf < 2) + return AVERROR(EINVAL); + + // check if the image is in jp2 format + if((AV_RB32(s->buf) == 12) && (AV_RB32(s->buf + 4) == JP2_SIG_TYPE) && + (AV_RB32(s->buf + 8) == JP2_SIG_VALUE)) { + if(!jp2_find_codestream(s)) { + av_log(avctx, AV_LOG_ERROR, "couldn't find jpeg2k codestream atom\n"); + return -1; + } + } + + if (bytestream_get_be16(&s->buf) != J2K_SOC){ + av_log(avctx, AV_LOG_ERROR, "SOC marker not present\n"); + return -1; + } + if (ret = decode_codestream(s)) + return ret; + + for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) + if (ret = decode_tile(s, s->tile + tileno)) + return ret; + + cleanup(s); + av_log(s->avctx, AV_LOG_DEBUG, "end\n"); + + *data_size = sizeof(AVPicture); + *picture = s->picture; + + return s->buf - s->buf_start; +} + +static av_cold int j2kdec_init(AVCodecContext *avctx) +{ + J2kDecoderContext *s = avctx->priv_data; + + avcodec_get_frame_defaults((AVFrame*)&s->picture); + avctx->coded_frame = (AVFrame*)&s->picture; + return 0; +} + +static av_cold int decode_end(AVCodecContext *avctx) +{ + J2kDecoderContext *s = avctx->priv_data; + + if (s->picture.data[0]) + avctx->release_buffer(avctx, &s->picture); + + return 0; +} + +AVCodec ff_jpeg2000_decoder = { + "j2k", + AVMEDIA_TYPE_VIDEO, + CODEC_ID_JPEG2000, + sizeof(J2kDecoderContext), + j2kdec_init, + NULL, + decode_end, + decode_frame, + .capabilities = CODEC_CAP_EXPERIMENTAL, + .pix_fmts = + (enum PixelFormat[]) {PIX_FMT_GRAY8, PIX_FMT_RGB24, -1} +}; diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c new file mode 100644 index 0000000000..123b796a76 --- /dev/null +++ b/libavcodec/j2kenc.c @@ -0,0 +1,1045 @@ +/* + * JPEG2000 image encoder + * Copyright (c) 2007 Kamil Nowosad + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * JPEG2000 image encoder + * @file + * @author Kamil Nowosad + */ + +#include <float.h> +#include "avcodec.h" +#include "bytestream.h" +#include "j2k.h" +#include "libavutil/common.h" + +#define NMSEDEC_BITS 7 +#define NMSEDEC_FRACBITS (NMSEDEC_BITS-1) +#define WMSEDEC_SHIFT 13 ///< must be >= 13 +#define LAMBDA_SCALE (100000000LL << (WMSEDEC_SHIFT - 13)) + +static int lut_nmsedec_ref [1<<NMSEDEC_BITS], + lut_nmsedec_ref0[1<<NMSEDEC_BITS], + lut_nmsedec_sig [1<<NMSEDEC_BITS], + lut_nmsedec_sig0[1<<NMSEDEC_BITS]; + +static const int dwt_norms[2][4][10] = { // [dwt_type][band][rlevel] (multiplied by 10000) + {{10000, 19650, 41770, 84030, 169000, 338400, 676900, 1353000, 2706000, 5409000}, + {20220, 39890, 83550, 170400, 342700, 686300, 1373000, 2746000, 5490000}, + {20220, 39890, 83550, 170400, 342700, 686300, 1373000, 2746000, 5490000}, + {20800, 38650, 83070, 171800, 347100, 695900, 1393000, 2786000, 5572000}}, + + {{10000, 15000, 27500, 53750, 106800, 213400, 426700, 853300, 1707000, 3413000}, + {10380, 15920, 29190, 57030, 113300, 226400, 452500, 904800, 1809000}, + {10380, 15920, 29190, 57030, 113300, 226400, 452500, 904800, 1809000}, + { 7186, 9218, 15860, 30430, 60190, 120100, 240000, 479700, 959300}} +}; + +typedef struct { + J2kComponent *comp; +} J2kTile; + +typedef struct { + AVCodecContext *avctx; + AVFrame *picture; + + int width, height; ///< image width and height + uint8_t cbps[4]; ///< bits per sample in particular components + int chroma_shift[2]; + uint8_t planar; + int ncomponents; + int tile_width, tile_height; ///< tile size + int numXtiles, numYtiles; + + uint8_t *buf_start; + uint8_t *buf; + uint8_t *buf_end; + int bit_index; + + int64_t lambda; + + J2kCodingStyle codsty; + J2kQuantStyle qntsty; + + J2kTile *tile; +} J2kEncoderContext; + + +/* debug */ +#if 0 +#undef ifprintf +#undef printf + +static void nspaces(FILE *fd, int n) +{ + while(n--) putc(' ', fd); +} + +static void printv(int *tab, int l) +{ + int i; + for (i = 0; i < l; i++) + printf("%.3d ", tab[i]); + printf("\n"); +} + +static void printu(uint8_t *tab, int l) +{ + int i; + for (i = 0; i < l; i++) + printf("%.3hd ", tab[i]); + printf("\n"); +} + +static void printcomp(J2kComponent *comp) +{ + int i; + for (i = 0; i < comp->y1 - comp->y0; i++) + printv(comp->data + i * (comp->x1 - comp->x0), comp->x1 - comp->x0); +} + +static void dump(J2kEncoderContext *s, FILE *fd) +{ + int tileno, compno, reslevelno, bandno, precno; + fprintf(fd, "XSiz = %d, YSiz = %d, tile_width = %d, tile_height = %d\n" + "numXtiles = %d, numYtiles = %d, ncomponents = %d\n" + "tiles:\n", + s->width, s->height, s->tile_width, s->tile_height, + s->numXtiles, s->numYtiles, s->ncomponents); + for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){ + J2kTile *tile = s->tile + tileno; + nspaces(fd, 2); + fprintf(fd, "tile %d:\n", tileno); + for(compno = 0; compno < s->ncomponents; compno++){ + J2kComponent *comp = tile->comp + compno; + nspaces(fd, 4); + fprintf(fd, "component %d:\n", compno); + nspaces(fd, 4); + fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d\n", + comp->x0, comp->x1, comp->y0, comp->y1); + for(reslevelno = 0; reslevelno < s->nreslevels; reslevelno++){ + J2kResLevel *reslevel = comp->reslevel + reslevelno; + nspaces(fd, 6); + fprintf(fd, "reslevel %d:\n", reslevelno); + nspaces(fd, 6); + fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d, nbands = %d\n", + reslevel->x0, reslevel->x1, reslevel->y0, + reslevel->y1, reslevel->nbands); + for(bandno = 0; bandno < reslevel->nbands; bandno++){ + J2kBand *band = reslevel->band + bandno; + nspaces(fd, 8); + fprintf(fd, "band %d:\n", bandno); + nspaces(fd, 8); + fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d," + "codeblock_width = %d, codeblock_height = %d cblknx = %d cblkny = %d\n", + band->x0, band->x1, + band->y0, band->y1, + band->codeblock_width, band->codeblock_height, + band->cblknx, band->cblkny); + for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++){ + J2kPrec *prec = band->prec + precno; + nspaces(fd, 10); + fprintf(fd, "prec %d:\n", precno); + nspaces(fd, 10); + fprintf(fd, "xi0 = %d, xi1 = %d, yi0 = %d, yi1 = %d\n", + prec->xi0, prec->xi1, prec->yi0, prec->yi1); + } + } + } + } + } +} +#endif + +/* bitstream routines */ + +/** put n times val bit */ +static void put_bits(J2kEncoderContext *s, int val, int n) // TODO: optimize +{ + while (n-- > 0){ + if (s->bit_index == 8) + { + s->bit_index = *s->buf == 0xff; + *(++s->buf) = 0; + } + *s->buf |= val << (7 - s->bit_index++); + } +} + +/** put n least significant bits of a number num */ +static void put_num(J2kEncoderContext *s, int num, int n) +{ + while(--n >= 0) + put_bits(s, (num >> n) & 1, 1); +} + +/** flush the bitstream */ +static void j2k_flush(J2kEncoderContext *s) +{ + if (s->bit_index){ + s->bit_index = 0; + s->buf++; + } +} + +/* tag tree routines */ + +/** code the value stored in node */ +static void tag_tree_code(J2kEncoderContext *s, J2kTgtNode *node, int threshold) +{ + J2kTgtNode *stack[30]; + int sp = 1, curval = 0; + stack[0] = node; + + node = node->parent; + while(node){ + if (node->vis){ + curval = node->val; + break; + } + node->vis++; + stack[sp++] = node; + node = node->parent; + } + while(--sp >= 0){ + if (stack[sp]->val >= threshold){ + put_bits(s, 0, threshold - curval); + break; + } + put_bits(s, 0, stack[sp]->val - curval); + put_bits(s, 1, 1); + curval = stack[sp]->val; + } +} + +/** update the value in node */ +static void tag_tree_update(J2kTgtNode *node) +{ + int lev = 0; + while (node->parent){ + if (node->parent->val <= node->val) + break; + node->parent->val = node->val; + node = node->parent; + lev++; + } +} + +static int put_siz(J2kEncoderContext *s) +{ + int i; + + if (s->buf_end - s->buf < 40 + 3 * s->ncomponents) + return -1; + + bytestream_put_be16(&s->buf, J2K_SIZ); + bytestream_put_be16(&s->buf, 38 + 3 * s->ncomponents); // Lsiz + bytestream_put_be16(&s->buf, 0); // Rsiz + bytestream_put_be32(&s->buf, s->width); // width + bytestream_put_be32(&s->buf, s->height); // height + bytestream_put_be32(&s->buf, 0); // X0Siz + bytestream_put_be32(&s->buf, 0); // Y0Siz + + bytestream_put_be32(&s->buf, s->tile_width); // XTSiz + bytestream_put_be32(&s->buf, s->tile_height); // YTSiz + bytestream_put_be32(&s->buf, 0); // XT0Siz + bytestream_put_be32(&s->buf, 0); // YT0Siz + bytestream_put_be16(&s->buf, s->ncomponents); // CSiz + + for (i = 0; i < s->ncomponents; i++){ // Ssiz_i XRsiz_i, YRsiz_i + bytestream_put_byte(&s->buf, 7); + bytestream_put_byte(&s->buf, i?1<<s->chroma_shift[0]:1); + bytestream_put_byte(&s->buf, i?1<<s->chroma_shift[1]:1); + } + return 0; +} + +static int put_cod(J2kEncoderContext *s) +{ + J2kCodingStyle *codsty = &s->codsty; + + if (s->buf_end - s->buf < 14) + return -1; + + bytestream_put_be16(&s->buf, J2K_COD); + bytestream_put_be16(&s->buf, 12); // Lcod + bytestream_put_byte(&s->buf, 0); // Scod + // SGcod + bytestream_put_byte(&s->buf, 0); // progression level + bytestream_put_be16(&s->buf, 1); // num of layers + bytestream_put_byte(&s->buf, 0); // multiple component transformation + // SPcod + bytestream_put_byte(&s->buf, codsty->nreslevels - 1); // num of decomp. levels + bytestream_put_byte(&s->buf, codsty->log2_cblk_width-2); // cblk width + bytestream_put_byte(&s->buf, codsty->log2_cblk_height-2); // cblk height + bytestream_put_byte(&s->buf, 0); // cblk style + bytestream_put_byte(&s->buf, codsty->transform); // transformation + return 0; +} + +static int put_qcd(J2kEncoderContext *s, int compno) +{ + int i, size; + J2kCodingStyle *codsty = &s->codsty; + J2kQuantStyle *qntsty = &s->qntsty; + + if (qntsty->quantsty == J2K_QSTY_NONE) + size = 4 + 3 * (codsty->nreslevels-1); + else // QSTY_SE + size = 5 + 6 * (codsty->nreslevels-1); + + if (s->buf_end - s->buf < size + 2) + return -1; + + bytestream_put_be16(&s->buf, J2K_QCD); + bytestream_put_be16(&s->buf, size); // LQcd + bytestream_put_byte(&s->buf, (qntsty->nguardbits << 5) | qntsty->quantsty); // Sqcd + if (qntsty->quantsty == J2K_QSTY_NONE) + for (i = 0; i < codsty->nreslevels * 3 - 2; i++) + bytestream_put_byte(&s->buf, qntsty->expn[i] << 3); + else // QSTY_SE + for (i = 0; i < codsty->nreslevels * 3 - 2; i++) + bytestream_put_be16(&s->buf, (qntsty->expn[i] << 11) | qntsty->mant[i]); + return 0; +} + +static uint8_t *put_sot(J2kEncoderContext *s, int tileno) +{ + uint8_t *psotptr; + + if (s->buf_end - s->buf < 12) + return -1; + + bytestream_put_be16(&s->buf, J2K_SOT); + bytestream_put_be16(&s->buf, 10); // Lsot + bytestream_put_be16(&s->buf, tileno); // Isot + + psotptr = s->buf; + bytestream_put_be32(&s->buf, 0); // Psot (filled in later) + + bytestream_put_byte(&s->buf, 0); // TPsot + bytestream_put_byte(&s->buf, 1); // TNsot + return psotptr; +} + +/** + * compute the sizes of tiles, resolution levels, bands, etc. + * allocate memory for them + * divide the input image into tile-components + */ +static int init_tiles(J2kEncoderContext *s) +{ + int tileno, tilex, tiley, compno; + J2kCodingStyle *codsty = &s->codsty; + J2kQuantStyle *qntsty = &s->qntsty; + + s->numXtiles = ff_j2k_ceildiv(s->width, s->tile_width); + s->numYtiles = ff_j2k_ceildiv(s->height, s->tile_height); + + s->tile = av_malloc(s->numXtiles * s->numYtiles * sizeof(J2kTile)); + if (!s->tile) + return AVERROR(ENOMEM); + for (tileno = 0, tiley = 0; tiley < s->numYtiles; tiley++) + for (tilex = 0; tilex < s->numXtiles; tilex++, tileno++){ + J2kTile *tile = s->tile + tileno; + + tile->comp = av_malloc(s->ncomponents * sizeof(J2kComponent)); + if (!tile->comp) + return AVERROR(ENOMEM); + for (compno = 0; compno < s->ncomponents; compno++){ + J2kComponent *comp = tile->comp + compno; + int ret, i, j; + + comp->coord[0][0] = tilex * s->tile_width; + comp->coord[0][1] = FFMIN((tilex+1)*s->tile_width, s->width); + comp->coord[1][0] = tiley * s->tile_height; + comp->coord[1][1] = FFMIN((tiley+1)*s->tile_height, s->height); + if (compno > 0) + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + comp->coord[i][j] = ff_j2k_ceildivpow2(comp->coord[i][j], s->chroma_shift[i]); + + if (ret = ff_j2k_init_component(comp, codsty, qntsty, s->cbps[compno])) + return ret; + } + } + return 0; +} + +static void copy_frame(J2kEncoderContext *s) +{ + int tileno, compno, i, y, x; + uint8_t *line; + for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){ + J2kTile *tile = s->tile + tileno; + if (s->planar){ + for (compno = 0; compno < s->ncomponents; compno++){ + J2kComponent *comp = tile->comp + compno; + int *dst = comp->data; + line = s->picture->data[compno] + + comp->coord[1][0] * s->picture->linesize[compno] + + comp->coord[0][0]; + for (y = comp->coord[1][0]; y < comp->coord[1][1]; y++){ + uint8_t *ptr = line; + for (x = comp->coord[0][0]; x < comp->coord[0][1]; x++) + *dst++ = *ptr++ - (1 << 7); + line += s->picture->linesize[compno]; + } + } + } else{ + line = s->picture->data[0] + tile->comp[0].coord[1][0] * s->picture->linesize[0] + + tile->comp[0].coord[0][0] * s->ncomponents; + + i = 0; + for (y = tile->comp[0].coord[1][0]; y < tile->comp[0].coord[1][1]; y++){ + uint8_t *ptr = line; + for (x = tile->comp[0].coord[0][0]; x < tile->comp[0].coord[0][1]; x++, i++){ + for (compno = 0; compno < s->ncomponents; compno++){ + tile->comp[compno].data[i] = *ptr++ - (1 << 7); + } + } + line += s->picture->linesize[0]; + } + } + } +} + +static void init_quantization(J2kEncoderContext *s) +{ + int compno, reslevelno, bandno; + J2kQuantStyle *qntsty = &s->qntsty; + J2kCodingStyle *codsty = &s->codsty; + + for (compno = 0; compno < s->ncomponents; compno++){ + int gbandno = 0; + for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){ + int nbands, lev = codsty->nreslevels - reslevelno - 1; + nbands = reslevelno ? 3 : 1; + for (bandno = 0; bandno < nbands; bandno++, gbandno++){ + int expn, mant; + + if (codsty->transform == FF_DWT97){ + int bandpos = bandno + (reslevelno>0), + ss = 81920000 / dwt_norms[0][bandpos][lev], + log = av_log2(ss); + mant = (11 - log < 0 ? ss >> log - 11 : ss << 11 - log) & 0x7ff; + expn = s->cbps[compno] - log + 13; + } else + expn = ((bandno&2)>>1) + (reslevelno>0) + s->cbps[compno]; + + qntsty->expn[gbandno] = expn; + qntsty->mant[gbandno] = mant; + } + } + } +} + +static void init_luts() +{ + int i, a, + mask = ~((1<<NMSEDEC_FRACBITS)-1); + + for (i = 0; i < (1 << NMSEDEC_BITS); i++){ + lut_nmsedec_sig[i] = FFMAX(6*i - (9<<NMSEDEC_FRACBITS-1) << 12-NMSEDEC_FRACBITS, 0); + lut_nmsedec_sig0[i] = FFMAX((i*i + (1<<NMSEDEC_FRACBITS-1) & mask) << 1, 0); + + a = (i >> (NMSEDEC_BITS-2)&2) + 1; + lut_nmsedec_ref[i] = FFMAX((-2*i + (1<<NMSEDEC_FRACBITS) + a*i - (a*a<<NMSEDEC_FRACBITS-2)) + << 13-NMSEDEC_FRACBITS, 0); + lut_nmsedec_ref0[i] = FFMAX(((i*i + (1-4*i << NMSEDEC_FRACBITS-1) + (1<<2*NMSEDEC_FRACBITS)) & mask) + << 1, 0); + } +} + +/* tier-1 routines */ +static int getnmsedec_sig(int x, int bpno) +{ + if (bpno > NMSEDEC_FRACBITS) + return lut_nmsedec_sig[(x >> (bpno - NMSEDEC_FRACBITS)) & ((1 << NMSEDEC_BITS) - 1)]; + return lut_nmsedec_sig0[x & ((1 << NMSEDEC_BITS) - 1)]; +} + +static int getnmsedec_ref(int x, int bpno) +{ + if (bpno > NMSEDEC_FRACBITS) + return lut_nmsedec_ref[(x >> (bpno - NMSEDEC_FRACBITS)) & ((1 << NMSEDEC_BITS) - 1)]; + return lut_nmsedec_ref0[x & ((1 << NMSEDEC_BITS) - 1)]; +} + +static void encode_sigpass(J2kT1Context *t1, int width, int height, int bandno, int *nmsedec, int bpno) +{ + int y0, x, y, mask = 1 << (bpno + NMSEDEC_FRACBITS); + for (y0 = 0; y0 < height; y0 += 4) + for (x = 0; x < width; x++) + for (y = y0; y < height && y < y0+4; y++){ + if (!(t1->flags[y+1][x+1] & J2K_T1_SIG) && (t1->flags[y+1][x+1] & J2K_T1_SIG_NB)){ + int ctxno = ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno), + bit = t1->data[y][x] & mask ? 1 : 0; + ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, bit); + if (bit){ + int xorbit; + int ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit); + ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit); + *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS); + ff_j2k_set_significant(t1, x, y, t1->flags[y+1][x+1] >> 15); + } + t1->flags[y+1][x+1] |= J2K_T1_VIS; + } + } +} + +static void encode_refpass(J2kT1Context *t1, int width, int height, int *nmsedec, int bpno) +{ + int y0, x, y, mask = 1 << (bpno + NMSEDEC_FRACBITS); + for (y0 = 0; y0 < height; y0 += 4) + for (x = 0; x < width; x++) + for (y = y0; y < height && y < y0+4; y++) + if ((t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS)) == J2K_T1_SIG){ + int ctxno = ff_j2k_getrefctxno(t1->flags[y+1][x+1]); + *nmsedec += getnmsedec_ref(t1->data[y][x], bpno + NMSEDEC_FRACBITS); + ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0); + t1->flags[y+1][x+1] |= J2K_T1_REF; + } +} + +static void encode_clnpass(J2kT1Context *t1, int width, int height, int bandno, int *nmsedec, int bpno) +{ + int y0, x, y, mask = 1 << (bpno + NMSEDEC_FRACBITS); + for (y0 = 0; y0 < height; y0 += 4) + for (x = 0; x < width; x++){ + if (y0 + 3 < height && !( + (t1->flags[y0+1][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) || + (t1->flags[y0+2][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) || + (t1->flags[y0+3][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)) || + (t1->flags[y0+4][x+1] & (J2K_T1_SIG_NB | J2K_T1_VIS | J2K_T1_SIG)))) + { + // aggregation mode + int rlen; + for (rlen = 0; rlen < 4; rlen++) + if (t1->data[y0+rlen][x] & mask) + break; + ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_RL, rlen != 4); + if (rlen == 4) + continue; + ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI, rlen >> 1); + ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI, rlen & 1); + for (y = y0 + rlen; y < y0 + 4; y++){ + if (!(t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS))){ + int ctxno = ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno); + if (y > y0 + rlen) + ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0); + if (t1->data[y][x] & mask){ // newly significant + int xorbit; + int ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit); + *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS); + ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit); + ff_j2k_set_significant(t1, x, y, t1->flags[y+1][x+1] >> 15); + } + } + t1->flags[y+1][x+1] &= ~J2K_T1_VIS; + } + } else{ + for (y = y0; y < y0 + 4 && y < height; y++){ + if (!(t1->flags[y+1][x+1] & (J2K_T1_SIG | J2K_T1_VIS))){ + int ctxno = ff_j2k_getnbctxno(t1->flags[y+1][x+1], bandno); + ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0); + if (t1->data[y][x] & mask){ // newly significant + int xorbit; + int ctxno = ff_j2k_getsgnctxno(t1->flags[y+1][x+1], &xorbit); + *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS); + ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit); + ff_j2k_set_significant(t1, x, y, t1->flags[y+1][x+1] >> 15); + } + } + t1->flags[y+1][x+1] &= ~J2K_T1_VIS; + } + } + } +} + +static void encode_cblk(J2kEncoderContext *s, J2kT1Context *t1, J2kCblk *cblk, J2kTile *tile, + int width, int height, int bandpos, int lev) +{ + int pass_t = 2, passno, x, y, max=0, nmsedec, bpno; + int64_t wmsedec = 0; + + for (y = 0; y < height+2; y++) + memset(t1->flags[y], 0, (width+2)*sizeof(int)); + + for (y = 0; y < height; y++){ + for (x = 0; x < width; x++){ + if (t1->data[y][x] < 0){ + t1->flags[y+1][x+1] |= J2K_T1_SGN; + t1->data[y][x] = -t1->data[y][x]; + } + max = FFMAX(max, t1->data[y][x]); + } + } + + if (max == 0){ + cblk->nonzerobits = 0; + bpno = 0; + } else{ + cblk->nonzerobits = av_log2(max) + 1 - NMSEDEC_FRACBITS; + bpno = cblk->nonzerobits - 1; + } + + ff_mqc_initenc(&t1->mqc, cblk->data); + + for (passno = 0; bpno >= 0; passno++){ + nmsedec=0; + + switch(pass_t){ + case 0: encode_sigpass(t1, width, height, bandpos, &nmsedec, bpno); + break; + case 1: encode_refpass(t1, width, height, &nmsedec, bpno); + break; + case 2: encode_clnpass(t1, width, height, bandpos, &nmsedec, bpno); + break; + } + + cblk->passes[passno].rate = 3 + ff_mqc_length(&t1->mqc); + wmsedec += (int64_t)nmsedec << (2*bpno); + cblk->passes[passno].disto = wmsedec; + + if (++pass_t == 3){ + pass_t = 0; + bpno--; + } + } + cblk->npasses = passno; + cblk->ninclpasses = passno; + + // TODO: optional flush on each pass + cblk->passes[passno-1].rate = ff_mqc_flush(&t1->mqc); +} + +/* tier-2 routines: */ + +static void putnumpasses(J2kEncoderContext *s, int n) +{ + if (n == 1) + put_num(s, 0, 1); + else if (n == 2) + put_num(s, 2, 2); + else if (n <= 5) + put_num(s, 0xc | (n-3), 4); + else if (n <= 36) + put_num(s, 0x1e0 | (n-6), 9); + else + put_num(s, 0xff80 | (n-37), 16); +} + + +static int encode_packet(J2kEncoderContext *s, J2kResLevel *rlevel, int precno, + uint8_t *expn, int numgbits) +{ + int bandno, empty = 1; + + // init bitstream + *s->buf = 0; + s->bit_index = 0; + + // header + + // is the packet empty? + for (bandno = 0; bandno < rlevel->nbands; bandno++){ + if (rlevel->band[bandno].coord[0][0] < rlevel->band[bandno].coord[0][1] + && rlevel->band[bandno].coord[1][0] < rlevel->band[bandno].coord[1][1]){ + empty = 0; + break; + } + } + + put_bits(s, !empty, 1); + if (empty){ + j2k_flush(s); + return 0; + } + + for (bandno = 0; bandno < rlevel->nbands; bandno++){ + J2kBand *band = rlevel->band + bandno; + J2kPrec *prec = band->prec + precno; + int yi, xi, pos; + int cblknw = prec->xi1 - prec->xi0; + + if (band->coord[0][0] == band->coord[0][1] + || band->coord[1][0] == band->coord[1][1]) + continue; + + for (pos=0, yi = prec->yi0; yi < prec->yi1; yi++){ + for (xi = prec->xi0; xi < prec->xi1; xi++, pos++){ + prec->cblkincl[pos].val = band->cblk[yi * cblknw + xi].ninclpasses == 0; + tag_tree_update(prec->cblkincl + pos); + prec->zerobits[pos].val = expn[bandno] + numgbits - 1 - band->cblk[yi * cblknw + xi].nonzerobits; + tag_tree_update(prec->zerobits + pos); + } + } + + for (pos=0, yi = prec->yi0; yi < prec->yi1; yi++){ + for (xi = prec->xi0; xi < prec->xi1; xi++, pos++){ + int pad = 0, llen, length; + J2kCblk *cblk = band->cblk + yi * cblknw + xi; + + if (s->buf_end - s->buf < 20) // approximately + return -1; + + // inclusion information + tag_tree_code(s, prec->cblkincl + pos, 1); + if (!cblk->ninclpasses) + continue; + // zerobits information + tag_tree_code(s, prec->zerobits + pos, 100); + // number of passes + putnumpasses(s, cblk->ninclpasses); + + length = cblk->passes[cblk->ninclpasses-1].rate; + llen = av_log2(length) - av_log2(cblk->ninclpasses) - 2; + if (llen < 0){ + pad = -llen; + llen = 0; + } + // length of code block + put_bits(s, 1, llen); + put_bits(s, 0, 1); + put_num(s, length, av_log2(length)+1+pad); + } + } + } + j2k_flush(s); + for (bandno = 0; bandno < rlevel->nbands; bandno++){ + J2kBand *band = rlevel->band + bandno; + J2kPrec *prec = band->prec + precno; + int yi, cblknw = prec->xi1 - prec->xi0; + for (yi = prec->yi0; yi < prec->yi1; yi++){ + int xi; + for (xi = prec->xi0; xi < prec->xi1; xi++){ + J2kCblk *cblk = band->cblk + yi * cblknw + xi; + if (cblk->ninclpasses){ + if (s->buf_end - s->buf < cblk->passes[cblk->ninclpasses-1].rate) + return -1; + bytestream_put_buffer(&s->buf, cblk->data, cblk->passes[cblk->ninclpasses-1].rate); + } + } + } + } + return 0; +} + +static int encode_packets(J2kEncoderContext *s, J2kTile *tile, int tileno) +{ + int compno, reslevelno, ret; + J2kCodingStyle *codsty = &s->codsty; + J2kQuantStyle *qntsty = &s->qntsty; + + av_log(s->avctx, AV_LOG_DEBUG, "tier2\n"); + // lay-rlevel-comp-pos progression + for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){ + for (compno = 0; compno < s->ncomponents; compno++){ + int precno; + J2kResLevel *reslevel = s->tile[tileno].comp[compno].reslevel + reslevelno; + for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++){ + if (ret = encode_packet(s, reslevel, precno, qntsty->expn + (reslevelno ? 3*reslevelno-2 : 0), + qntsty->nguardbits)) + return ret; + } + } + } + av_log(s->avctx, AV_LOG_DEBUG, "after tier2\n"); + return 0; +} + +static int getcut(J2kCblk *cblk, int64_t lambda, int dwt_norm) +{ + int passno, res = 0; + for (passno = 0; passno < cblk->npasses; passno++){ + int dr; + int64_t dd; + + dr = cblk->passes[passno].rate + - (res ? cblk->passes[res-1].rate:0); + dd = cblk->passes[passno].disto + - (res ? cblk->passes[res-1].disto:0); + + if (((dd * dwt_norm) >> WMSEDEC_SHIFT) * dwt_norm >= dr * lambda) + res = passno+1; + } + return res; +} + +static void truncpasses(J2kEncoderContext *s, J2kTile *tile) +{ + int compno, reslevelno, bandno, cblkno, lev; + J2kCodingStyle *codsty = &s->codsty; + + for (compno = 0; compno < s->ncomponents; compno++){ + J2kComponent *comp = tile->comp + compno; + + for (reslevelno = 0, lev = codsty->nreslevels-1; reslevelno < codsty->nreslevels; reslevelno++, lev--){ + J2kResLevel *reslevel = comp->reslevel + reslevelno; + + for (bandno = 0; bandno < reslevel->nbands ; bandno++){ + int bandpos = bandno + (reslevelno > 0); + J2kBand *band = reslevel->band + bandno; + + for (cblkno = 0; cblkno < band->cblknx * band->cblkny; cblkno++){ + J2kCblk *cblk = band->cblk + cblkno; + + cblk->ninclpasses = getcut(cblk, s->lambda, + (int64_t)dwt_norms[codsty->transform][bandpos][lev] * (int64_t)band->stepsize >> 13); + } + } + } + } +} + +static int encode_tile(J2kEncoderContext *s, J2kTile *tile, int tileno) +{ + int compno, reslevelno, bandno, ret; + J2kT1Context t1; + J2kCodingStyle *codsty = &s->codsty; + for (compno = 0; compno < s->ncomponents; compno++){ + J2kComponent *comp = s->tile[tileno].comp + compno; + + av_log(s->avctx, AV_LOG_DEBUG,"dwt\n"); + if (ret = ff_dwt_encode(&comp->dwt, comp->data)) + return ret; + av_log(s->avctx, AV_LOG_DEBUG,"after dwt -> tier1\n"); + + for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){ + J2kResLevel *reslevel = comp->reslevel + reslevelno; + + for (bandno = 0; bandno < reslevel->nbands ; bandno++){ + J2kBand *band = reslevel->band + bandno; + int cblkx, cblky, cblkno=0, xx0, x0, xx1, y0, yy0, yy1, bandpos; + yy0 = bandno == 0 ? 0 : comp->reslevel[reslevelno-1].coord[1][1] - comp->reslevel[reslevelno-1].coord[1][0]; + y0 = yy0; + yy1 = FFMIN(ff_j2k_ceildiv(band->coord[1][0] + 1, band->codeblock_height) * band->codeblock_height, + band->coord[1][1]) - band->coord[1][0] + yy0; + + if (band->coord[0][0] == band->coord[0][1] || band->coord[1][0] == band->coord[1][1]) + continue; + + bandpos = bandno + (reslevelno > 0); + + for (cblky = 0; cblky < band->cblkny; cblky++){ + if (reslevelno == 0 || bandno == 1) + xx0 = 0; + else + xx0 = comp->reslevel[reslevelno-1].coord[0][1] - comp->reslevel[reslevelno-1].coord[0][0]; + x0 = xx0; + xx1 = FFMIN(ff_j2k_ceildiv(band->coord[0][0] + 1, band->codeblock_width) * band->codeblock_width, + band->coord[0][1]) - band->coord[0][0] + xx0; + + for (cblkx = 0; cblkx < band->cblknx; cblkx++, cblkno++){ + int y, x; + if (codsty->transform == FF_DWT53){ + for (y = yy0; y < yy1; y++){ + int *ptr = t1.data[y-yy0]; + for (x = xx0; x < xx1; x++){ + *ptr++ = comp->data[(comp->coord[0][1] - comp->coord[0][0]) * y + x] << NMSEDEC_FRACBITS; + } + } + } else{ + for (y = yy0; y < yy1; y++){ + int *ptr = t1.data[y-yy0]; + for (x = xx0; x < xx1; x++){ + *ptr = (comp->data[(comp->coord[0][1] - comp->coord[0][0]) * y + x]); + *ptr++ = (int64_t)*ptr * (int64_t)(8192 * 8192 / band->stepsize) >> 13 - NMSEDEC_FRACBITS; + } + } + } + encode_cblk(s, &t1, band->cblk + cblkno, tile, xx1 - xx0, yy1 - yy0, + bandpos, codsty->nreslevels - reslevelno - 1); + xx0 = xx1; + xx1 = FFMIN(xx1 + band->codeblock_width, band->coord[0][1] - band->coord[0][0] + x0); + } + yy0 = yy1; + yy1 = FFMIN(yy1 + band->codeblock_height, band->coord[1][1] - band->coord[1][0] + y0); + } + } + } + av_log(s->avctx, AV_LOG_DEBUG, "after tier1\n"); + } + + av_log(s->avctx, AV_LOG_DEBUG, "rate control\n"); + truncpasses(s, tile); + if (ret = encode_packets(s, tile, tileno)) + return ret; + av_log(s->avctx, AV_LOG_DEBUG, "after rate control\n"); + return 0; +} + +void cleanup(J2kEncoderContext *s) +{ + int tileno, compno; + J2kCodingStyle *codsty = &s->codsty; + + for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){ + for (compno = 0; compno < s->ncomponents; compno++){ + J2kComponent *comp = s->tile[tileno].comp + compno; + ff_j2k_cleanup(comp, codsty); + } + av_freep(&s->tile[tileno].comp); + } + av_freep(&s->tile); +} + +static void reinit(J2kEncoderContext *s) +{ + int tileno, compno; + for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){ + J2kTile *tile = s->tile + tileno; + for (compno = 0; compno < s->ncomponents; compno++) + ff_j2k_reinit(tile->comp + compno, &s->codsty); + } +} + +static int encode_frame(AVCodecContext *avctx, + uint8_t *buf, int buf_size, + void *data) +{ + int tileno, ret; + J2kEncoderContext *s = avctx->priv_data; + + // init: + s->buf = s->buf_start = buf; + s->buf_end = buf + buf_size; + + s->picture = data; + + s->lambda = s->picture->quality * LAMBDA_SCALE; + + copy_frame(s); + reinit(s); + + if (s->buf_end - s->buf < 2) + return -1; + bytestream_put_be16(&s->buf, J2K_SOC); + if (ret = put_siz(s)) + return ret; + if (ret = put_cod(s)) + return ret; + if (ret = put_qcd(s, 0)) + return ret; + + for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){ + uint8_t *psotptr; + if ((psotptr = put_sot(s, tileno)) < 0) + return psotptr; + if (s->buf_end - s->buf < 2) + return -1; + bytestream_put_be16(&s->buf, J2K_SOD); + if (ret = encode_tile(s, s->tile + tileno, tileno)) + return ret; + bytestream_put_be32(&psotptr, s->buf - psotptr + 6); + } + if (s->buf_end - s->buf < 2) + return -1; + bytestream_put_be16(&s->buf, J2K_EOC); + + av_log(s->avctx, AV_LOG_DEBUG, "end\n"); + return s->buf - s->buf_start; +} + +static av_cold int j2kenc_init(AVCodecContext *avctx) +{ + int i, ret; + J2kEncoderContext *s = avctx->priv_data; + J2kCodingStyle *codsty = &s->codsty; + J2kQuantStyle *qntsty = &s->qntsty; + + s->avctx = avctx; + av_log(s->avctx, AV_LOG_DEBUG, "init\n"); + + // defaults: + // TODO: implement setting non-standard precinct size + codsty->log2_prec_width = 15; + codsty->log2_prec_height = 15; + codsty->nreslevels = 7; + codsty->log2_cblk_width = 4; + codsty->log2_cblk_height = 4; + codsty->transform = 1; + + qntsty->nguardbits = 1; + + s->tile_width = 256; + s->tile_height = 256; + + if (codsty->transform == FF_DWT53) + qntsty->quantsty = J2K_QSTY_NONE; + else + qntsty->quantsty = J2K_QSTY_SE; + + s->width = avctx->width; + s->height = avctx->height; + + for (i = 0; i < 3; i++) + s->cbps[i] = 8; + + if (avctx->pix_fmt == PIX_FMT_RGB24){ + s->ncomponents = 3; + } else if (avctx->pix_fmt == PIX_FMT_GRAY8){ + s->ncomponents = 1; + } else{ // planar YUV + s->planar = 1; + s->ncomponents = 3; + avcodec_get_chroma_sub_sample(avctx->pix_fmt, + s->chroma_shift, s->chroma_shift + 1); + } + + ff_j2k_init_tier1_luts(); + + init_luts(); + + init_quantization(s); + if (ret=init_tiles(s)) + return ret; + + av_log(s->avctx, AV_LOG_DEBUG, "after init\n"); + + return 0; +} + +static int j2kenc_destroy(AVCodecContext *avctx) +{ + J2kEncoderContext *s = avctx->priv_data; + + cleanup(s); + return 0; +} + +AVCodec jpeg2000_encoder = { + "j2k", + CODEC_TYPE_VIDEO, + CODEC_ID_JPEG2000, + sizeof(J2kEncoderContext), + j2kenc_init, + encode_frame, + j2kenc_destroy, + NULL, + 0, + .pix_fmts = + (enum PixelFormat[]) {PIX_FMT_GRAY8, PIX_FMT_RGB24, + PIX_FMT_YUV422P, PIX_FMT_YUV444P, + PIX_FMT_YUV410P, PIX_FMT_YUV411P, + -1} +}; diff --git a/libavcodec/kbdwin.h b/libavcodec/kbdwin.h index e762105628..4b939756c1 100644 --- a/libavcodec/kbdwin.h +++ b/libavcodec/kbdwin.h @@ -32,4 +32,4 @@ */ void ff_kbd_window_init(float *window, float alpha, int n); -#endif +#endif /* AVCODEC_KBDWIN_H */ diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c index d53cd05c6b..02d3533b0c 100644 --- a/libavcodec/lagarith.c +++ b/libavcodec/lagarith.c @@ -20,7 +20,7 @@ */ /** - * @file libavcodec/lagarith.c + * @file * Lagarith lossless decoder * @author Nathan Caldwell */ diff --git a/libavcodec/lagarithrac.c b/libavcodec/lagarithrac.c index 0cbc3b84df..56c1d0bcc0 100644 --- a/libavcodec/lagarithrac.c +++ b/libavcodec/lagarithrac.c @@ -21,7 +21,7 @@ */ /** - * @file libavcodec/lagarithrac.c + * @file * Lagarith range decoder * @author Nathan Caldwell * @author David Conrad diff --git a/libavcodec/lagarithrac.h b/libavcodec/lagarithrac.h index d985b60353..2cb7323076 100644 --- a/libavcodec/lagarithrac.h +++ b/libavcodec/lagarithrac.h @@ -21,7 +21,7 @@ */ /** - * @file libavcodec/lagarithrac.h + * @file * Lagarith range decoder * @author Nathan Caldwell * @author David Conrad diff --git a/libavcodec/libfaac.c b/libavcodec/libfaac.c index af85587272..2acc682581 100644 --- a/libavcodec/libfaac.c +++ b/libavcodec/libfaac.c @@ -31,6 +31,13 @@ typedef struct FaacAudioContext { faacEncHandle faac_handle; } FaacAudioContext; +static const int channel_maps[][6] = { + { 2, 0, 1 }, //< C L R + { 2, 0, 1, 3 }, //< C L R Cs + { 2, 0, 1, 3, 4 }, //< C L R Ls Rs + { 2, 0, 1, 4, 5, 3 }, //< C L R Ls Rs LFE +}; + static av_cold int Faac_encode_init(AVCodecContext *avctx) { FaacAudioContext *s = avctx->priv_data; @@ -86,6 +93,9 @@ static av_cold int Faac_encode_init(AVCodecContext *avctx) } faac_cfg->outputFormat = 1; faac_cfg->inputFormat = FAAC_INPUT_16BIT; + if (avctx->channels > 2) + memcpy(faac_cfg->channel_map, channel_maps[avctx->channels-3], + avctx->channels * sizeof(int)); avctx->frame_size = samples_input / avctx->channels; diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c index 5cfb122903..05893830c1 100644 --- a/libavcodec/libmp3lame.c +++ b/libavcodec/libmp3lame.c @@ -24,6 +24,7 @@ * Interface to libmp3lame for mp3 encoding. */ +#include "libavutil/intreadwrite.h" #include "avcodec.h" #include "mpegaudio.h" #include <lame/lame.h> diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c index ef85b3074a..f5c942e0fc 100644 --- a/libavcodec/libvpxenc.c +++ b/libavcodec/libvpxenc.c @@ -30,6 +30,7 @@ #include "avcodec.h" #include "libavutil/base64.h" +#include "libavutil/opt.h" /** * Portion of struct vpx_codec_cx_pkt from vpx_encoder.h. @@ -47,13 +48,49 @@ struct FrameListData { }; typedef struct VP8EncoderContext { + AVClass *av_class; struct vpx_codec_ctx encoder; struct vpx_image rawimg; struct vpx_fixed_buf twopass_stats; - unsigned long deadline; //i.e., RT/GOOD/BEST + int deadline; //i.e., RT/GOOD/BEST struct FrameListData *coded_frame_list; + + int cpuused; + + /** + * VP8 specific flags, see VP8F_* below. + */ + int flags; +#define VP8F_ERROR_RESILIENT 0x00000001 ///< Enable measures appropriate for streaming over lossy links +#define VP8F_AUTO_ALT_REF 0x00000002 ///< Enable automatic alternate reference frame generation + + int arnr_max_frames; + int arnr_strength; + int arnr_type; } VP8Context; +#define V AV_OPT_FLAG_VIDEO_PARAM +#define E AV_OPT_FLAG_ENCODING_PARAM + +static const AVOption options[]={ +{"speed", "", offsetof(VP8Context, cpuused), FF_OPT_TYPE_INT, 3, -16, 16, V|E}, +{"quality", "", offsetof(VP8Context, deadline), FF_OPT_TYPE_INT, VPX_DL_GOOD_QUALITY, INT_MIN, INT_MAX, V|E, "quality"}, +{"best", NULL, 0, FF_OPT_TYPE_CONST, VPX_DL_BEST_QUALITY, INT_MIN, INT_MAX, V|E, "quality"}, +{"good", NULL, 0, FF_OPT_TYPE_CONST, VPX_DL_GOOD_QUALITY, INT_MIN, INT_MAX, V|E, "quality"}, +{"realtime", NULL, 0, FF_OPT_TYPE_CONST, VPX_DL_REALTIME, INT_MIN, INT_MAX, V|E, "quality"}, +{"vp8flags", "", offsetof(VP8Context, flags), FF_OPT_TYPE_FLAGS, 0, 0, UINT_MAX, V|E, "flags"}, +{"error_resilient", "enable error resilience", 0, FF_OPT_TYPE_CONST, VP8F_ERROR_RESILIENT, INT_MIN, INT_MAX, V|E, "flags"}, +{"altref", "enable use of alternate reference frames (VP8/2-pass only)", 0, FF_OPT_TYPE_CONST, VP8F_AUTO_ALT_REF, INT_MIN, INT_MAX, V|E, "flags"}, +{"arnr_max_frames", "altref noise reduction max frame count", offsetof(VP8Context, arnr_max_frames), FF_OPT_TYPE_INT, 0, 0, 15, V|E}, +{"arnr_strength", "altref noise reduction filter strength", offsetof(VP8Context, arnr_strength), FF_OPT_TYPE_INT, 3, 0, 6, V|E}, +{"arnr_type", "altref noise reduction filter type", offsetof(VP8Context, arnr_type), FF_OPT_TYPE_INT, 3, 1, 3, V|E}, +{NULL} +}; +static const AVClass class = { "libvpx", av_default_item_name, options, LIBAVUTIL_VERSION_INT }; + +#undef V +#undef E + /** String mappings for enum vp8e_enc_control_id */ static const char *ctlidstr[] = { [VP8E_UPD_ENTROPY] = "VP8E_UPD_ENTROPY", @@ -205,7 +242,6 @@ static av_cold int vp8_init(AVCodecContext *avctx) { VP8Context *ctx = avctx->priv_data; const struct vpx_codec_iface *iface = &vpx_codec_vp8_cx_algo; - int cpuused = 3; struct vpx_codec_enc_cfg enccfg; int res; @@ -224,6 +260,7 @@ static av_cold int vp8_init(AVCodecContext *avctx) enccfg.g_timebase.num = avctx->time_base.num; enccfg.g_timebase.den = avctx->time_base.den; enccfg.g_threads = avctx->thread_count; + enccfg.g_lag_in_frames= FFMIN(avctx->rc_lookahead, 25); //0-25, avoids init failure if (avctx->flags & CODEC_FLAG_PASS1) enccfg.g_pass = VPX_RC_FIRST_PASS; @@ -259,6 +296,7 @@ static av_cold int vp8_init(AVCodecContext *avctx) enccfg.rc_buf_initial_sz = avctx->rc_initial_buffer_occupancy * 1000LL / avctx->bit_rate; enccfg.rc_buf_optimal_sz = enccfg.rc_buf_sz * 5 / 6; + enccfg.rc_undershoot_pct = round(avctx->rc_buffer_aggressivity * 100); //_enc_init() will balk if kf_min_dist differs from max w/VPX_KF_AUTO if (avctx->keyint_min == avctx->gop_size) @@ -294,13 +332,14 @@ static av_cold int vp8_init(AVCodecContext *avctx) enccfg.rc_twopass_stats_in = ctx->twopass_stats; } - ctx->deadline = VPX_DL_GOOD_QUALITY; /* 0-3: For non-zero values the encoder increasingly optimizes for reduced complexity playback on low powered devices at the expense of encode quality. */ if (avctx->profile != FF_PROFILE_UNKNOWN) enccfg.g_profile = avctx->profile; + enccfg.g_error_resilient = ctx->flags & VP8F_ERROR_RESILIENT; + dump_enc_cfg(avctx, &enccfg); /* Construct Encoder Context */ res = vpx_codec_enc_init(&ctx->encoder, iface, &enccfg, 0); @@ -311,11 +350,17 @@ static av_cold int vp8_init(AVCodecContext *avctx) //codec control failures are currently treated only as warnings av_log(avctx, AV_LOG_DEBUG, "vpx_codec_control\n"); - codecctl_int(avctx, VP8E_SET_CPUUSED, cpuused); + codecctl_int(avctx, VP8E_SET_CPUUSED, ctx->cpuused); codecctl_int(avctx, VP8E_SET_NOISE_SENSITIVITY, avctx->noise_reduction); codecctl_int(avctx, VP8E_SET_TOKEN_PARTITIONS, av_log2(avctx->slices)); codecctl_int(avctx, VP8E_SET_STATIC_THRESHOLD, avctx->mb_threshold); codecctl_int(avctx, VP8E_SET_CQ_LEVEL, (int)avctx->crf); + codecctl_int(avctx, VP8E_SET_ENABLEAUTOALTREF, !!(ctx->flags & VP8F_AUTO_ALT_REF)); + codecctl_int(avctx, VP8E_SET_ARNR_MAXFRAMES, ctx->arnr_max_frames); + codecctl_int(avctx, VP8E_SET_ARNR_STRENGTH, ctx->arnr_strength); + codecctl_int(avctx, VP8E_SET_ARNR_TYPE, ctx->arnr_type); + + av_log(avctx, AV_LOG_DEBUG, "Using deadline: %d\n", ctx->deadline); //provide dummy value to initialize wrapper, values will be updated each _encode() vpx_img_wrap(&ctx->rawimg, VPX_IMG_FMT_I420, avctx->width, avctx->height, 1, @@ -511,4 +556,5 @@ AVCodec ff_libvpx_encoder = { CODEC_CAP_DELAY, .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE}, .long_name = NULL_IF_CONFIG_SMALL("libvpx VP8"), + .priv_class= &class, }; diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index 838cb703e8..1b6f55f801 100644 --- a/libavcodec/libx264.c +++ b/libavcodec/libx264.c @@ -42,6 +42,7 @@ typedef struct X264Context { int fastfirstpass; char *stats; char *weightp; + char *x264opts; } X264Context; static void X264_log(void *p, int level, const char *fmt, va_list args) @@ -118,6 +119,12 @@ static int X264_frame(AVCodecContext *ctx, uint8_t *buf, x4->params.b_tff = frame->top_field_first; x264_encoder_reconfig(x4->enc, &x4->params); } + if (x4->params.vui.i_sar_height != ctx->sample_aspect_ratio.den + || x4->params.vui.i_sar_width != ctx->sample_aspect_ratio.num) { + x4->params.vui.i_sar_height = ctx->sample_aspect_ratio.den; + x4->params.vui.i_sar_width = ctx->sample_aspect_ratio.num; + x264_encoder_reconfig(x4->enc, &x4->params); + } } do { @@ -163,13 +170,6 @@ static av_cold int X264_close(AVCodecContext *avctx) if (x4->enc) x264_encoder_close(x4->enc); - av_free(x4->preset); - av_free(x4->tune); - av_free(x4->profile); - av_free(x4->level); - av_free(x4->stats); - av_free(x4->weightp); - return 0; } @@ -193,7 +193,7 @@ static void check_default_settings(AVCodecContext *avctx) if (score >= 5) { av_log(avctx, AV_LOG_ERROR, "Default settings detected, using medium profile\n"); x4->preset = av_strdup("medium"); - if (avctx->bit_rate == 200*100) + if (avctx->bit_rate == 200*1000) avctx->crf = 23; } } @@ -221,7 +221,6 @@ static av_cold int X264_init(AVCodecContext *avctx) x4->params.i_bframe_adaptive = avctx->b_frame_strategy; x4->params.i_bframe_bias = avctx->bframebias; x4->params.i_bframe_pyramid = avctx->flags2 & CODEC_FLAG2_BPYRAMID ? X264_B_PYRAMID_NORMAL : X264_B_PYRAMID_NONE; - avctx->has_b_frames = avctx->flags2 & CODEC_FLAG2_BPYRAMID ? 2 : !!avctx->max_b_frames; x4->params.i_keyint_min = avctx->keyint_min; if (x4->params.i_keyint_min > x4->params.i_keyint_max) @@ -344,6 +343,17 @@ static av_cold int X264_init(AVCodecContext *avctx) OPT_STR("level", x4->level); + if(x4->x264opts){ + const char *p= x4->x264opts; + while(p){ + char param[256]={0}, val[256]={0}; + sscanf(p, "%255[^:=]=%255[^:]", param, val); + OPT_STR(param, val); + p= strchr(p, ':'); + p+=!!p; + } + } + if (x4->fastfirstpass) x264_param_apply_fastfirstpass(&x4->params); @@ -367,6 +377,8 @@ static av_cold int X264_init(AVCodecContext *avctx) x4->params.b_interlaced = avctx->flags & CODEC_FLAG_INTERLACED_DCT; + x4->params.b_open_gop = !(avctx->flags & CODEC_FLAG_CLOSED_GOP); + x4->params.i_slice_count = avctx->slices; x4->params.vui.b_fullrange = avctx->pix_fmt == PIX_FMT_YUVJ420P; @@ -407,13 +419,14 @@ static av_cold int X264_init(AVCodecContext *avctx) #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM static const AVOption options[] = { - {"preset", "Set the encoding preset", OFFSET(preset), FF_OPT_TYPE_STRING, 0, 0, 0, VE}, - {"tune", "Tune the encoding params", OFFSET(tune), FF_OPT_TYPE_STRING, 0, 0, 0, VE}, - {"fastfirstpass", "Use fast settings when encoding first pass", OFFSET(fastfirstpass), FF_OPT_TYPE_INT, 1, 0, 1, VE}, - {"profile", "Set profile restrictions", OFFSET(profile), FF_OPT_TYPE_STRING, 0, 0, 0, VE}, - {"level", "Specify level (as defined by Annex A)", OFFSET(level), FF_OPT_TYPE_STRING, 0, 0, 0, VE}, - {"passlogfile", "Filename for 2 pass stats", OFFSET(stats), FF_OPT_TYPE_STRING, 0, 0, 0, VE}, - {"wpredp", "Weighted prediction for P-frames", OFFSET(weightp), FF_OPT_TYPE_STRING, 0, 0, 0, VE}, + {"preset", "Set the encoding preset", OFFSET(preset), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE}, + {"tune", "Tune the encoding params", OFFSET(tune), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE}, + {"fastfirstpass", "Use fast settings when encoding first pass", OFFSET(fastfirstpass), FF_OPT_TYPE_INT, {.dbl=1}, 0, 1, VE}, + {"profile", "Set profile restrictions", OFFSET(profile), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE}, + {"level", "Specify level (as defined by Annex A)", OFFSET(level), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE}, + {"passlogfile", "Filename for 2 pass stats", OFFSET(stats), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE}, + {"wpredp", "Weighted prediction for P-frames", OFFSET(weightp), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE}, + {"x264opts", "x264 options", OFFSET(x264opts), FF_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE}, { NULL }, }; diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h index 4d88ed14c9..0477cfdce3 100644 --- a/libavcodec/mathops.h +++ b/libavcodec/mathops.h @@ -23,6 +23,7 @@ #define AVCODEC_MATHOPS_H #include "libavutil/common.h" +#include "config.h" #if ARCH_ARM # include "arm/mathops.h" @@ -40,16 +41,17 @@ /* generic implementation */ +#ifndef MUL64 +# define MUL64(a,b) ((int64_t)(a) * (int64_t)(b)) +#endif + #ifndef MULL -# define MULL(a,b,s) (((int64_t)(a) * (int64_t)(b)) >> (s)) +# define MULL(a,b,s) (MUL64(a, b) >> (s)) #endif #ifndef MULH -//gcc 3.4 creates an incredibly bloated mess out of this -//# define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) - static av_always_inline int MULH(int a, int b){ - return ((int64_t)(a) * (int64_t)(b))>>32; + return MUL64(a, b) >> 32; } #endif @@ -59,10 +61,6 @@ static av_always_inline unsigned UMULH(unsigned a, unsigned b){ } #endif -#ifndef MUL64 -# define MUL64(a,b) ((int64_t)(a) * (int64_t)(b)) -#endif - #ifndef MAC64 # define MAC64(d, a, b) ((d) += MUL64(a, b)) #endif diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c index 30cd3ab176..5f540f05f2 100644 --- a/libavcodec/mdec.c +++ b/libavcodec/mdec.c @@ -126,7 +126,8 @@ static inline int decode_mb(MDECContext *a, DCTELEM block[6][64]){ a->dsp.clear_blocks(block[0]); for(i=0; i<6; i++){ - if( mdec_decode_block_intra(a, block[ block_index[i] ], block_index[i]) < 0) + if( mdec_decode_block_intra(a, block[ block_index[i] ], block_index[i]) < 0 || + get_bits_left(&a->gb) < 0) return -1; } return 0; @@ -252,6 +253,7 @@ static av_cold int decode_init_thread_copy(AVCodecContext *avctx){ return 0; } + static av_cold int decode_end(AVCodecContext *avctx){ MDECContext * const a = avctx->priv_data; diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c index afcc1b74a7..4e58feffad 100644 --- a/libavcodec/mjpegdec.c +++ b/libavcodec/mjpegdec.c @@ -881,9 +881,12 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, i } } - if (s->restart_interval && !--s->restart_count) { + if (s->restart_interval && show_bits(&s->gb, 8) == 0xFF){/* skip RSTn */ + --s->restart_count; align_get_bits(&s->gb); - skip_bits(&s->gb, 16); /* skip RSTn */ + while(show_bits(&s->gb, 8) == 0xFF) + skip_bits(&s->gb, 8); + skip_bits(&s->gb, 8); for (i=0; i<nb_components; i++) /* reset dc */ s->last_dc[i] = 1024; } diff --git a/libavcodec/mjpegenc.c b/libavcodec/mjpegenc.c index db477a0ab6..b721ab3580 100644 --- a/libavcodec/mjpegenc.c +++ b/libavcodec/mjpegenc.c @@ -162,7 +162,7 @@ static void jpeg_put_comments(MpegEncContext *s) put_marker(p, APP0); put_bits(p, 16, 16); ff_put_string(p, "JFIF", 1); /* this puts the trailing zero-byte too */ - put_bits(p, 16, 0x0201); /* v 1.02 */ + put_bits(p, 16, 0x0102); /* v 1.02 */ put_bits(p, 8, 0); /* units type: 0 - aspect ratio */ put_bits(p, 16, s->avctx->sample_aspect_ratio.num); put_bits(p, 16, s->avctx->sample_aspect_ratio.den); diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c index 50826d0ff5..b13d0795cd 100644 --- a/libavcodec/mlpdec.c +++ b/libavcodec/mlpdec.c @@ -340,6 +340,7 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb) m->avctx->channel_layout = ff_truehd_layout(mh.channels_thd_stream1); } if (m->avctx->channels && + !m->avctx->request_channels && !m->avctx->request_channel_layout && av_get_channel_layout_nb_channels(m->avctx->channel_layout) != m->avctx->channels) { m->avctx->channel_layout = 0; av_log_ask_for_sample(m->avctx, "Unknown channel layout."); diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c index a0647eee69..7ec8dd24e6 100644 --- a/libavcodec/mlpdsp.c +++ b/libavcodec/mlpdsp.c @@ -19,8 +19,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavcodec/mlp.h" #include "dsputil.h" +#include "mlp.h" static void ff_mlp_filter_channel(int32_t *state, const int32_t *coeff, int firorder, int iirorder, diff --git a/libavcodec/motionpixels_tablegen.h b/libavcodec/motionpixels_tablegen.h index 5d6df52af1..b56dec64a4 100644 --- a/libavcodec/motionpixels_tablegen.h +++ b/libavcodec/motionpixels_tablegen.h @@ -20,8 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef MOTIONPIXELS_TABLEGEN_H -#define MOTIONPIXELS_TABLEGEN_H +#ifndef AVCODEC_MOTIONPIXELS_TABLEGEN_H +#define AVCODEC_MOTIONPIXELS_TABLEGEN_H #include <stdint.h> @@ -88,4 +88,4 @@ static void motionpixels_tableinit(void) } #endif /* CONFIG_HARDCODED_TABLES */ -#endif /* MOTIONPIXELS_TABLEGEN_H */ +#endif /* AVCODEC_MOTIONPIXELS_TABLEGEN_H */ diff --git a/libavcodec/mp3_header_compress_bsf.c b/libavcodec/mp3_header_compress_bsf.c index 006a3679e9..417a2940d5 100644 --- a/libavcodec/mp3_header_compress_bsf.c +++ b/libavcodec/mp3_header_compress_bsf.c @@ -18,8 +18,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/intreadwrite.h" #include "avcodec.h" -#include "mpegaudio.h" +#include "mpegaudiodecheader.h" static int mp3_header_compress(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const char *args, diff --git a/libavcodec/mp3_header_decompress_bsf.c b/libavcodec/mp3_header_decompress_bsf.c index f096d5e4fd..a9c23b17ce 100644 --- a/libavcodec/mp3_header_decompress_bsf.c +++ b/libavcodec/mp3_header_decompress_bsf.c @@ -18,8 +18,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/intreadwrite.h" #include "avcodec.h" -#include "mpegaudio.h" +#include "mpegaudiodecheader.h" #include "mpegaudiodata.h" diff --git a/libavcodec/mpc.c b/libavcodec/mpc.c index d9a1fb776a..c2975ec732 100644 --- a/libavcodec/mpc.c +++ b/libavcodec/mpc.c @@ -29,6 +29,7 @@ #include "avcodec.h" #include "get_bits.h" #include "dsputil.h" +#include "mpegaudiodsp.h" #include "mpegaudio.h" #include "mpc.h" @@ -36,7 +37,7 @@ void ff_mpc_init(void) { - ff_mpa_synth_init(ff_mpa_synth_window); + ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed); } /** @@ -51,8 +52,9 @@ static void mpc_synth(MPCContext *c, int16_t *out, int channels) for(ch = 0; ch < channels; ch++){ samples_ptr = samples + ch; for(i = 0; i < SAMPLES_PER_BAND; i++) { - ff_mpa_synth_filter(c->synth_buf[ch], &(c->synth_buf_offset[ch]), - ff_mpa_synth_window, &dither_state, + ff_mpa_synth_filter_fixed(&c->mpadsp, + c->synth_buf[ch], &(c->synth_buf_offset[ch]), + ff_mpa_synth_window_fixed, &dither_state, samples_ptr, channels, c->sb_samples[ch][i]); samples_ptr += 32 * channels; diff --git a/libavcodec/mpc.h b/libavcodec/mpc.h index 2d9755a593..cd5769234c 100644 --- a/libavcodec/mpc.h +++ b/libavcodec/mpc.h @@ -34,8 +34,7 @@ #include "get_bits.h" #include "dsputil.h" #include "mpegaudio.h" - -#include "mpcdata.h" +#include "mpegaudiodsp.h" #define BANDS 32 #define SAMPLES_PER_BAND 36 @@ -52,6 +51,7 @@ typedef struct { typedef struct { DSPContext dsp; + MPADSPContext mpadsp; GetBitContext gb; int IS, MSS, gapless; int lastframelen; diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c index 04657e5ff2..bb21469356 100644 --- a/libavcodec/mpc7.c +++ b/libavcodec/mpc7.c @@ -29,7 +29,7 @@ #include "avcodec.h" #include "get_bits.h" #include "dsputil.h" -#include "mpegaudio.h" +#include "mpegaudiodsp.h" #include "libavutil/audioconvert.h" #include "mpc.h" @@ -68,6 +68,7 @@ static av_cold int mpc7_decode_init(AVCodecContext * avctx) memset(c->oldDSCF, 0, sizeof(c->oldDSCF)); av_lfg_init(&c->rnd, 0xDEADBEEF); dsputil_init(&c->dsp, avctx); + ff_mpadsp_init(&c->mpadsp); c->dsp.bswap_buf((uint32_t*)buf, (const uint32_t*)avctx->extradata, 4); ff_mpc_init(); init_get_bits(&gb, buf, 128); diff --git a/libavcodec/mpc8.c b/libavcodec/mpc8.c index cf65ffe904..2864b1a010 100644 --- a/libavcodec/mpc8.c +++ b/libavcodec/mpc8.c @@ -29,11 +29,10 @@ #include "avcodec.h" #include "get_bits.h" #include "dsputil.h" -#include "mpegaudio.h" +#include "mpegaudiodsp.h" #include "libavutil/audioconvert.h" #include "mpc.h" -#include "mpcdata.h" #include "mpc8data.h" #include "mpc8huff.h" @@ -120,6 +119,7 @@ static av_cold int mpc8_decode_init(AVCodecContext * avctx) memset(c->oldDSCF, 0, sizeof(c->oldDSCF)); av_lfg_init(&c->rnd, 0xDEADBEEF); dsputil_init(&c->dsp, avctx); + ff_mpadsp_init(&c->mpadsp); ff_mpc_init(); diff --git a/libavcodec/mpcdata.h b/libavcodec/mpcdata.h index a2212ecb25..03df3da3b5 100644 --- a/libavcodec/mpcdata.h +++ b/libavcodec/mpcdata.h @@ -22,8 +22,6 @@ #ifndef AVCODEC_MPCDATA_H #define AVCODEC_MPCDATA_H -#include <stdint.h> - static const float mpc_CC[18] = { 65536.0000, 21845.3333, 13107.2000, 9362.2857, 7281.7778, 4369.0667, 2114.0645, 1040.2539, 516.0315, 257.0039, 128.2505, 64.0626, 32.0156, 16.0039, 8.0010, diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c index c3394b9c81..219d5c854d 100644 --- a/libavcodec/mpeg12.c +++ b/libavcodec/mpeg12.c @@ -30,6 +30,7 @@ #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" +#include "libavutil/avassert.h" #include "mpeg12.h" #include "mpeg12data.h" @@ -1331,19 +1332,17 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){ avctx->ticks_per_frame=2; //MPEG-2 aspect if(s->aspect_ratio_info > 1){ - AVRational dar= + AVRational dar = av_mul_q( - av_div_q( - ff_mpeg2_aspect[s->aspect_ratio_info], - (AVRational){s1->pan_scan.width, s1->pan_scan.height} - ), + av_div_q(ff_mpeg2_aspect[s->aspect_ratio_info], + (AVRational){s1->pan_scan.width, s1->pan_scan.height}), (AVRational){s->width, s->height}); - //we ignore the spec here and guess a bit as reality does not match the spec, see for example + // we ignore the spec here and guess a bit as reality does not match the spec, see for example // res_change_ffmpeg_aspect.ts and sequence-display-aspect.mpg - //issue1613, 621, 562 - if( (s1->pan_scan.width == 0 )||(s1->pan_scan.height == 0) - || (av_cmp_q(dar,(AVRational){4,3})&&av_cmp_q(dar,(AVRational){16,9}))){ + // issue1613, 621, 562 + if((s1->pan_scan.width == 0 ) || (s1->pan_scan.height == 0) || + (av_cmp_q(dar,(AVRational){4,3}) && av_cmp_q(dar,(AVRational){16,9}))) { s->avctx->sample_aspect_ratio= av_div_q( ff_mpeg2_aspect[s->aspect_ratio_info], @@ -1670,7 +1669,7 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size) *s->current_picture_ptr->pan_scan= s1->pan_scan; - if (HAVE_PTHREADS && avctx->active_thread_type&FF_THREAD_FRAME) + if (HAVE_PTHREADS && (avctx->active_thread_type & FF_THREAD_FRAME)) ff_thread_finish_setup(avctx); }else{ //second field int i; @@ -2004,7 +2003,7 @@ static int slice_end(AVCodecContext *avctx, AVFrame *pict) *pict= *(AVFrame*)s->current_picture_ptr; ff_print_debug_info(s, pict); } else { - if (avctx->active_thread_type&FF_THREAD_FRAME) + if (avctx->active_thread_type & FF_THREAD_FRAME) s->picture_number++; /* latency of 1 frame for I- and P-frames */ /* XXX: use another variable than picture_number */ @@ -2179,14 +2178,13 @@ static void mpeg_decode_gop(AVCodecContext *avctx, Mpeg1Context *s1 = avctx->priv_data; MpegEncContext *s = &s1->mpeg_enc_ctx; - int drop_frame_flag; int time_code_hours, time_code_minutes; int time_code_seconds, time_code_pictures; int broken_link; init_get_bits(&s->gb, buf, buf_size*8); - drop_frame_flag = get_bits1(&s->gb); + skip_bits1(&s->gb); /* drop_frame_flag */ time_code_hours=get_bits(&s->gb,5); time_code_minutes = get_bits(&s->gb,6); @@ -2340,8 +2338,9 @@ static int decode_chunks(AVCodecContext *avctx, buf_ptr = ff_find_start_code(buf_ptr,buf_end, &start_code); if (start_code > 0x1ff){ if(s2->pict_type != AV_PICTURE_TYPE_B || avctx->skip_frame <= AVDISCARD_DEFAULT){ - if(HAVE_THREADS && avctx->active_thread_type&FF_THREAD_SLICE){ + if(HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_SLICE)){ int i; + av_assert0(avctx->thread_count > 1); avctx->execute(avctx, slice_decode_thread, &s2->thread_context[0], NULL, s->slice_count, sizeof(void*)); for(i=0; i<s->slice_count; i++) @@ -2516,8 +2515,9 @@ static int decode_chunks(AVCodecContext *avctx, break; } - if(HAVE_THREADS && avctx->active_thread_type&FF_THREAD_SLICE){ + if(HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_SLICE)){ int threshold= (s2->mb_height*s->slice_count + avctx->thread_count/2) / avctx->thread_count; + av_assert0(avctx->thread_count > 1); if(threshold <= mb_y){ MpegEncContext *thread_context= s2->thread_context[s->slice_count]; diff --git a/libavcodec/mpeg12enc.c b/libavcodec/mpeg12enc.c index 487551171c..41344562b2 100644 --- a/libavcodec/mpeg12enc.c +++ b/libavcodec/mpeg12enc.c @@ -761,10 +761,9 @@ void ff_mpeg1_encode_init(MpegEncContext *s) if(mv==0) len= ff_mpeg12_mbMotionVectorTable[0][1]; else{ - int val, bit_size, range, code; + int val, bit_size, code; bit_size = f_code - 1; - range = 1 << bit_size; val=mv; if (val < 0) diff --git a/libavcodec/mpeg4video.h b/libavcodec/mpeg4video.h index 96db36683e..f1e6a4d172 100644 --- a/libavcodec/mpeg4video.h +++ b/libavcodec/mpeg4video.h @@ -196,4 +196,4 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di return ret; } -#endif +#endif /* AVCODEC_MPEG4VIDEO_H */ diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c index a9d67f220c..6b7b4bfbc0 100644 --- a/libavcodec/mpeg4videodec.c +++ b/libavcodec/mpeg4videodec.c @@ -397,14 +397,13 @@ int mpeg4_decode_video_packet_header(MpegEncContext *s) header_extension= get_bits1(&s->gb); } if(header_extension){ - int time_increment; int time_incr=0; while (get_bits1(&s->gb) != 0) time_incr++; check_marker(&s->gb, "before time_increment in video packed header"); - time_increment= get_bits(&s->gb, s->time_increment_bits); + skip_bits(&s->gb, s->time_increment_bits); /* time_increment */ check_marker(&s->gb, "before vop_coding_type in video packed header"); skip_bits(&s->gb, 2); /* vop coding type */ @@ -1805,16 +1804,14 @@ no_cplx_est: if (s->scalability) { GetBitContext bak= *gb; - int ref_layer_id; - int ref_layer_sampling_dir; int h_sampling_factor_n; int h_sampling_factor_m; int v_sampling_factor_n; int v_sampling_factor_m; s->hierachy_type= get_bits1(gb); - ref_layer_id= get_bits(gb, 4); - ref_layer_sampling_dir= get_bits1(gb); + skip_bits(gb, 4); /* ref_layer_id */ + skip_bits1(gb); /* ref_layer_sampling_dir */ h_sampling_factor_n= get_bits(gb, 5); h_sampling_factor_m= get_bits(gb, 5); v_sampling_factor_n= get_bits(gb, 5); @@ -1993,15 +1990,13 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ if (s->shape != RECT_SHAPE) { if (s->vol_sprite_usage != 1 || s->pict_type != AV_PICTURE_TYPE_I) { - int width, height, hor_spat_ref, ver_spat_ref; - - width = get_bits(gb, 13); + skip_bits(gb, 13); /* width */ skip_bits1(gb); /* marker */ - height = get_bits(gb, 13); + skip_bits(gb, 13); /* height */ skip_bits1(gb); /* marker */ - hor_spat_ref = get_bits(gb, 13); /* hor_spat_ref */ + skip_bits(gb, 13); /* hor_spat_ref */ skip_bits1(gb); /* marker */ - ver_spat_ref = get_bits(gb, 13); /* ver_spat_ref */ + skip_bits(gb, 13); /* ver_spat_ref */ } skip_bits1(gb); /* change_CR_disable */ diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h index 30ef349399..b829cd3f8a 100644 --- a/libavcodec/mpegaudio.h +++ b/libavcodec/mpegaudio.h @@ -30,12 +30,7 @@ # define CONFIG_FLOAT 0 #endif -#include "avcodec.h" -#include "get_bits.h" -#include "dsputil.h" -#include "dct.h" - -#define CONFIG_AUDIO_NONSHORT 0 +#include <stdint.h> /* max frame size, in samples */ #define MPA_FRAME_SIZE 1152 @@ -52,12 +47,6 @@ #define MPA_DUAL 2 #define MPA_MONO 3 -/* header + layer + bitrate + freq + lsf/mpeg25 */ -#define SAME_HEADER_MASK \ - (0xffe00000 | (3 << 17) | (0xf << 12) | (3 << 10) | (3 << 19)) - -#define MP3_MASK 0xFFFE0CCF - #ifndef FRAC_BITS #define FRAC_BITS 23 /* fractional bits for sb_samples and dct */ #define WFRAC_BITS 16 /* fractional bits for window */ @@ -68,137 +57,19 @@ #define FIX(a) ((int)((a) * FRAC_ONE)) #if CONFIG_FLOAT -typedef float OUT_INT; -#define OUT_FMT AV_SAMPLE_FMT_FLT -#elif CONFIG_MPEGAUDIO_HP && CONFIG_AUDIO_NONSHORT -typedef int32_t OUT_INT; -#define OUT_MAX INT32_MAX -#define OUT_MIN INT32_MIN -#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 31) -#define OUT_FMT AV_SAMPLE_FMT_S32 -#else -typedef int16_t OUT_INT; -#define OUT_MAX INT16_MAX -#define OUT_MIN INT16_MIN -#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15) -#define OUT_FMT AV_SAMPLE_FMT_S16 -#endif - -#if CONFIG_FLOAT # define INTFLOAT float typedef float MPA_INT; +typedef float OUT_INT; #elif FRAC_BITS <= 15 # define INTFLOAT int typedef int16_t MPA_INT; +typedef int16_t OUT_INT; #else # define INTFLOAT int typedef int32_t MPA_INT; +typedef int16_t OUT_INT; #endif -#define BACKSTEP_SIZE 512 -#define EXTRABYTES 24 - -/* layer 3 "granule" */ -typedef struct GranuleDef { - uint8_t scfsi; - int part2_3_length; - int big_values; - int global_gain; - int scalefac_compress; - uint8_t block_type; - uint8_t switch_point; - int table_select[3]; - int subblock_gain[3]; - uint8_t scalefac_scale; - uint8_t count1table_select; - int region_size[3]; /* number of huffman codes in each region */ - int preflag; - int short_start, long_end; /* long/short band indexes */ - uint8_t scale_factors[40]; - INTFLOAT sb_hybrid[SBLIMIT * 18]; /* 576 samples */ -} GranuleDef; - -#define MPA_DECODE_HEADER \ - int frame_size; \ - int error_protection; \ - int layer; \ - int sample_rate; \ - int sample_rate_index; /* between 0 and 8 */ \ - int bit_rate; \ - int nb_channels; \ - int mode; \ - int mode_ext; \ - int lsf; - -typedef struct MPADecodeHeader { - MPA_DECODE_HEADER -} MPADecodeHeader; - -typedef struct MPADecodeContext { - MPA_DECODE_HEADER - uint8_t last_buf[2*BACKSTEP_SIZE + EXTRABYTES]; - int last_buf_size; - /* next header (used in free format parsing) */ - uint32_t free_format_next_header; - GetBitContext gb; - GetBitContext in_gb; - DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2]; - int synth_buf_offset[MPA_MAX_CHANNELS]; - DECLARE_ALIGNED(16, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT]; - INTFLOAT mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */ - GranuleDef granules[2][2]; /* Used in Layer 3 */ - int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3 - int dither_state; - int error_recognition; - AVCodecContext* avctx; -#if CONFIG_FLOAT - DCTContext dct; -#endif - void (*apply_window_mp3)(MPA_INT *synth_buf, MPA_INT *window, - int *dither_state, OUT_INT *samples, int incr); -} MPADecodeContext; - -/* layer 3 huffman tables */ -typedef struct HuffTable { - int xsize; - const uint8_t *bits; - const uint16_t *codes; -} HuffTable; - int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf); -int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate); -extern MPA_INT ff_mpa_synth_window[]; -void ff_mpa_synth_init(MPA_INT *window); -void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, - MPA_INT *window, int *dither_state, - OUT_INT *samples, int incr, - INTFLOAT sb_samples[SBLIMIT]); - -void ff_mpa_synth_init_float(MPA_INT *window); -void ff_mpa_synth_filter_float(MPADecodeContext *s, - MPA_INT *synth_buf_ptr, int *synth_buf_offset, - MPA_INT *window, int *dither_state, - OUT_INT *samples, int incr, - INTFLOAT sb_samples[SBLIMIT]); - -void ff_mpegaudiodec_init_mmx(MPADecodeContext *s); -void ff_mpegaudiodec_init_altivec(MPADecodeContext *s); - -/* fast header check for resync */ -static inline int ff_mpa_check_header(uint32_t header){ - /* header */ - if ((header & 0xffe00000) != 0xffe00000) - return -1; - /* layer check */ - if ((header & (3<<17)) == 0) - return -1; - /* bit rate */ - if ((header & (0xf<<12)) == 0xf<<12) - return -1; - /* frequency */ - if ((header & (3<<10)) == 3<<10) - return -1; - return 0; -} #endif /* AVCODEC_MPEGAUDIO_H */ diff --git a/libavcodec/mpegaudio3.h b/libavcodec/mpegaudio3.h deleted file mode 100644 index c374a59a76..0000000000 --- a/libavcodec/mpegaudio3.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2007 Michael Niedermayer - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* layer 3 "granule" */ -typedef struct GranuleDef { - uint8_t scfsi; - int part2_3_length; - int big_values; - int global_gain; - int scalefac_compress; - uint8_t block_type; - uint8_t switch_point; - int table_select[3]; - int subblock_gain[3]; - uint8_t scalefac_scale; - uint8_t count1table_select; - int region_size[3]; /* number of huffman codes in each region */ - int preflag; - int short_start, long_end; /* long/short band indexes */ - uint8_t scale_factors[40]; - int32_t sb_hybrid[SBLIMIT * 18]; /* 576 samples */ -} GranuleDef; - -void ff_mp3_init(void); - -/** - * Compute huffman coded region sizes. - */ -void ff_init_short_region(MPADecodeContext *s, GranuleDef *g); - -/** - * Compute huffman coded region sizes. - */ -void ff_init_long_region(MPADecodeContext *s, GranuleDef *g, int ra1, int ra2); - -void ff_compute_band_indexes(MPADecodeContext *s, GranuleDef *g); diff --git a/libavcodec/mpegaudio_parser.c b/libavcodec/mpegaudio_parser.c index cfd92d42eb..94a286fb14 100644 --- a/libavcodec/mpegaudio_parser.c +++ b/libavcodec/mpegaudio_parser.c @@ -21,7 +21,6 @@ */ #include "parser.h" -#include "mpegaudio.h" #include "mpegaudiodecheader.h" @@ -35,49 +34,9 @@ typedef struct MpegAudioParseContext { #define MPA_HEADER_SIZE 4 /* header + layer + bitrate + freq + lsf/mpeg25 */ -#undef SAME_HEADER_MASK /* mpegaudio.h defines different version */ #define SAME_HEADER_MASK \ (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19)) -/* useful helper to get mpeg audio stream infos. Return -1 if error in - header, otherwise the coded frame size in bytes */ -int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate) -{ - MPADecodeHeader s1, *s = &s1; - - if (ff_mpa_check_header(head) != 0) - return -1; - - if (ff_mpegaudio_decode_header(s, head) != 0) { - return -1; - } - - switch(s->layer) { - case 1: - avctx->codec_id = CODEC_ID_MP1; - *frame_size = 384; - break; - case 2: - avctx->codec_id = CODEC_ID_MP2; - *frame_size = 1152; - break; - default: - case 3: - avctx->codec_id = CODEC_ID_MP3; - if (s->lsf) - *frame_size = 576; - else - *frame_size = 1152; - break; - } - - *sample_rate = s->sample_rate; - *channels = s->nb_channels; - *bit_rate = s->bit_rate; - avctx->sub_id = s->layer; - return s->frame_size; -} - static int mpegaudio_parse(AVCodecParserContext *s1, AVCodecContext *avctx, const uint8_t **poutbuf, int *poutbuf_size, diff --git a/libavcodec/mpegaudio_tablegen.c b/libavcodec/mpegaudio_tablegen.c index 0888e78620..90c9de430a 100644 --- a/libavcodec/mpegaudio_tablegen.c +++ b/libavcodec/mpegaudio_tablegen.c @@ -33,9 +33,9 @@ int main(void) WRITE_ARRAY("static const", int8_t, table_4_3_exp); WRITE_ARRAY("static const", uint32_t, table_4_3_value); - WRITE_ARRAY("static const", uint32_t, exp_table); + WRITE_ARRAY("static const", uint32_t, exp_table_fixed); WRITE_ARRAY("static const", float, exp_table_float); - WRITE_2D_ARRAY("static const", uint32_t, expval_table); + WRITE_2D_ARRAY("static const", uint32_t, expval_table_fixed); WRITE_2D_ARRAY("static const", float, expval_table_float); return 0; diff --git a/libavcodec/mpegaudio_tablegen.h b/libavcodec/mpegaudio_tablegen.h index 01c4174a60..291e40b9aa 100644 --- a/libavcodec/mpegaudio_tablegen.h +++ b/libavcodec/mpegaudio_tablegen.h @@ -20,8 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef MPEGAUDIO_TABLEGEN_H -#define MPEGAUDIO_TABLEGEN_H +#ifndef AVCODEC_MPEGAUDIO_TABLEGEN_H +#define AVCODEC_MPEGAUDIO_TABLEGEN_H #include <stdint.h> #include <math.h> @@ -33,8 +33,8 @@ #else static int8_t table_4_3_exp[TABLE_4_3_SIZE]; static uint32_t table_4_3_value[TABLE_4_3_SIZE]; -static uint32_t exp_table[512]; -static uint32_t expval_table[512][16]; +static uint32_t exp_table_fixed[512]; +static uint32_t expval_table_fixed[512][16]; static float exp_table_float[512]; static float expval_table_float[512][16]; @@ -59,13 +59,13 @@ static void mpegaudio_tableinit(void) for (exponent = 0; exponent < 512; exponent++) { for (value = 0; value < 16; value++) { double f = (double)value * cbrtf(value) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5); - expval_table[exponent][value] = llrint(f); + expval_table_fixed[exponent][value] = llrint(f); expval_table_float[exponent][value] = f; } - exp_table[exponent] = expval_table[exponent][1]; + exp_table_fixed[exponent] = expval_table_fixed[exponent][1]; exp_table_float[exponent] = expval_table_float[exponent][1]; } } #endif /* CONFIG_HARDCODED_TABLES */ -#endif /* MPEGAUDIO_TABLEGEN_H */ +#endif /* AVCODEC_MPEGAUDIO_TABLEGEN_H */ diff --git a/libavcodec/mpegaudiodata.h b/libavcodec/mpegaudiodata.h index 5626e3df50..9d65a1e2a6 100644 --- a/libavcodec/mpegaudiodata.h +++ b/libavcodec/mpegaudiodata.h @@ -27,7 +27,7 @@ #ifndef AVCODEC_MPEGAUDIODATA_H #define AVCODEC_MPEGAUDIODATA_H -#include "libavutil/common.h" +#include <stdint.h> #define MODE_EXT_MS_STEREO 2 #define MODE_EXT_I_STEREO 1 diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c index b1c9ef93a0..56a48ce71b 100644 --- a/libavcodec/mpegaudiodec.c +++ b/libavcodec/mpegaudiodec.c @@ -27,8 +27,8 @@ #include "libavutil/audioconvert.h" #include "avcodec.h" #include "get_bits.h" -#include "dsputil.h" #include "mathops.h" +#include "mpegaudiodsp.h" /* * TODO: @@ -38,25 +38,71 @@ #include "mpegaudio.h" #include "mpegaudiodecheader.h" +#define BACKSTEP_SIZE 512 +#define EXTRABYTES 24 + +/* layer 3 "granule" */ +typedef struct GranuleDef { + uint8_t scfsi; + int part2_3_length; + int big_values; + int global_gain; + int scalefac_compress; + uint8_t block_type; + uint8_t switch_point; + int table_select[3]; + int subblock_gain[3]; + uint8_t scalefac_scale; + uint8_t count1table_select; + int region_size[3]; /* number of huffman codes in each region */ + int preflag; + int short_start, long_end; /* long/short band indexes */ + uint8_t scale_factors[40]; + INTFLOAT sb_hybrid[SBLIMIT * 18]; /* 576 samples */ +} GranuleDef; + +typedef struct MPADecodeContext { + MPA_DECODE_HEADER + uint8_t last_buf[2*BACKSTEP_SIZE + EXTRABYTES]; + int last_buf_size; + /* next header (used in free format parsing) */ + uint32_t free_format_next_header; + GetBitContext gb; + GetBitContext in_gb; + DECLARE_ALIGNED(32, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2]; + int synth_buf_offset[MPA_MAX_CHANNELS]; + DECLARE_ALIGNED(32, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT]; + INTFLOAT mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */ + GranuleDef granules[2][2]; /* Used in Layer 3 */ +#ifdef DEBUG + int frame_count; +#endif + int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3 + int dither_state; + int error_recognition; + AVCodecContext* avctx; + MPADSPContext mpadsp; +} MPADecodeContext; + #if CONFIG_FLOAT # define SHR(a,b) ((a)*(1.0f/(1<<(b)))) -# define compute_antialias compute_antialias_float # define FIXR_OLD(a) ((int)((a) * FRAC_ONE + 0.5)) # define FIXR(x) ((float)(x)) # define FIXHR(x) ((float)(x)) # define MULH3(x, y, s) ((s)*(y)*(x)) # define MULLx(x, y, s) ((y)*(x)) # define RENAME(a) a ## _float +# define OUT_FMT AV_SAMPLE_FMT_FLT #else # define SHR(a,b) ((a)>>(b)) -# define compute_antialias compute_antialias_integer /* WARNING: only correct for posititive numbers */ # define FIXR_OLD(a) ((int)((a) * FRAC_ONE + 0.5)) # define FIXR(a) ((int)((a) * FRAC_ONE + 0.5)) # define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) # define MULH3(x, y, s) MULH((s)*(x), y) # define MULLx(x, y, s) MULL(x,y,s) -# define RENAME(a) a +# define RENAME(a) a ## _fixed +# define OUT_FMT AV_SAMPLE_FMT_S16 #endif /****************/ @@ -66,16 +112,6 @@ #include "mpegaudiodata.h" #include "mpegaudiodectab.h" -#if CONFIG_FLOAT -# include "fft.h" -#else -# include "dct32.c" -#endif - -static void compute_antialias(MPADecodeContext *s, GranuleDef *g); -static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window, - int *dither_state, OUT_INT *samples, int incr); - /* vlc structure for decoding layer 3 huffman tables */ static VLC huff_vlc[16]; static VLC_TYPE huff_vlc_tables[ @@ -97,8 +133,7 @@ static uint16_t band_index_long[9][23]; /* intensity stereo coef table */ static INTFLOAT is_table[2][16]; static INTFLOAT is_table_lsf[2][2][16]; -static int32_t csa_table[8][4]; -static float csa_table_float[8][4]; +static INTFLOAT csa_table[8][4]; static INTFLOAT mdct_win[8][36]; static int16_t division_tab3[1<<6 ]; @@ -124,8 +159,6 @@ static const int32_t scale_factor_mult2[3][3] = { SCALE_GEN(4.0 / 9.0), /* 9 steps */ }; -DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256]; - /** * Convert region offsets to region sizes and truncate * size to big_values. @@ -236,27 +269,6 @@ static inline int l3_unscale(int value, int exponent) return m; } -/* all integer n^(4/3) computation code */ -#define DEV_ORDER 13 - -#define POW_FRAC_BITS 24 -#define POW_FRAC_ONE (1 << POW_FRAC_BITS) -#define POW_FIX(a) ((int)((a) * POW_FRAC_ONE)) -#define POW_MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> POW_FRAC_BITS) - -static int dev_4_3_coefs[DEV_ORDER]; - -static av_cold void int_pow_init(void) -{ - int i, a; - - a = POW_FIX(1.0); - for(i=0;i<DEV_ORDER;i++) { - a = POW_MULL(a, POW_FIX(4.0 / 3.0) - i * POW_FIX(1.0)) / (i + 1); - dev_4_3_coefs[i] = a; - } -} - static av_cold int decode_init(AVCodecContext * avctx) { MPADecodeContext *s = avctx->priv_data; @@ -264,14 +276,8 @@ static av_cold int decode_init(AVCodecContext * avctx) int i, j, k; s->avctx = avctx; - s->apply_window_mp3 = apply_window_mp3_c; -#if HAVE_MMX && CONFIG_FLOAT - ff_mpegaudiodec_init_mmx(s); -#endif -#if CONFIG_FLOAT - ff_dct_init(&s->dct, 5, DCT_II); -#endif - if (HAVE_ALTIVEC && CONFIG_FLOAT) ff_mpegaudiodec_init_altivec(s); + + ff_mpadsp_init(&s->mpadsp); avctx->sample_fmt= OUT_FMT; s->error_recognition= avctx->error_recognition; @@ -358,7 +364,6 @@ static av_cold int decode_init(AVCodecContext * avctx) /* compute n ^ (4/3) and store it in mantissa/exp format */ - int_pow_init(); mpegaudio_tableinit(); for (i = 0; i < 4; i++) @@ -401,8 +406,9 @@ static av_cold int decode_init(AVCodecContext * avctx) k = i & 1; is_table_lsf[j][k ^ 1][i] = FIXR(f); is_table_lsf[j][k][i] = FIXR(1.0); - av_dlog(avctx, "is_table_lsf %d %d: %x %x\n", - i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]); + av_dlog(avctx, "is_table_lsf %d %d: %f %f\n", + i, j, (float) is_table_lsf[j][0][i], + (float) is_table_lsf[j][1][i]); } } @@ -411,14 +417,17 @@ static av_cold int decode_init(AVCodecContext * avctx) ci = ci_table[i]; cs = 1.0 / sqrt(1.0 + ci * ci); ca = cs * ci; +#if !CONFIG_FLOAT csa_table[i][0] = FIXHR(cs/4); csa_table[i][1] = FIXHR(ca/4); csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4); csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4); - csa_table_float[i][0] = cs; - csa_table_float[i][1] = ca; - csa_table_float[i][2] = ca + cs; - csa_table_float[i][3] = ca - cs; +#else + csa_table[i][0] = cs; + csa_table[i][1] = ca; + csa_table[i][2] = ca + cs; + csa_table[i][3] = ca - cs; +#endif } /* compute mdct windows */ @@ -466,183 +475,6 @@ static av_cold int decode_init(AVCodecContext * avctx) return 0; } - -#if CONFIG_FLOAT -static inline float round_sample(float *sum) -{ - float sum1=*sum; - *sum = 0; - return sum1; -} - -/* signed 16x16 -> 32 multiply add accumulate */ -#define MACS(rt, ra, rb) rt+=(ra)*(rb) - -/* signed 16x16 -> 32 multiply */ -#define MULS(ra, rb) ((ra)*(rb)) - -#define MLSS(rt, ra, rb) rt-=(ra)*(rb) - -#else - -static inline int round_sample(int64_t *sum) -{ - int sum1; - sum1 = (int)((*sum) >> OUT_SHIFT); - *sum &= (1<<OUT_SHIFT)-1; - return av_clip(sum1, OUT_MIN, OUT_MAX); -} - -# define MULS(ra, rb) MUL64(ra, rb) -# define MACS(rt, ra, rb) MAC64(rt, ra, rb) -# define MLSS(rt, ra, rb) MLS64(rt, ra, rb) -#endif - -#define SUM8(op, sum, w, p) \ -{ \ - op(sum, (w)[0 * 64], (p)[0 * 64]); \ - op(sum, (w)[1 * 64], (p)[1 * 64]); \ - op(sum, (w)[2 * 64], (p)[2 * 64]); \ - op(sum, (w)[3 * 64], (p)[3 * 64]); \ - op(sum, (w)[4 * 64], (p)[4 * 64]); \ - op(sum, (w)[5 * 64], (p)[5 * 64]); \ - op(sum, (w)[6 * 64], (p)[6 * 64]); \ - op(sum, (w)[7 * 64], (p)[7 * 64]); \ -} - -#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \ -{ \ - INTFLOAT tmp;\ - tmp = p[0 * 64];\ - op1(sum1, (w1)[0 * 64], tmp);\ - op2(sum2, (w2)[0 * 64], tmp);\ - tmp = p[1 * 64];\ - op1(sum1, (w1)[1 * 64], tmp);\ - op2(sum2, (w2)[1 * 64], tmp);\ - tmp = p[2 * 64];\ - op1(sum1, (w1)[2 * 64], tmp);\ - op2(sum2, (w2)[2 * 64], tmp);\ - tmp = p[3 * 64];\ - op1(sum1, (w1)[3 * 64], tmp);\ - op2(sum2, (w2)[3 * 64], tmp);\ - tmp = p[4 * 64];\ - op1(sum1, (w1)[4 * 64], tmp);\ - op2(sum2, (w2)[4 * 64], tmp);\ - tmp = p[5 * 64];\ - op1(sum1, (w1)[5 * 64], tmp);\ - op2(sum2, (w2)[5 * 64], tmp);\ - tmp = p[6 * 64];\ - op1(sum1, (w1)[6 * 64], tmp);\ - op2(sum2, (w2)[6 * 64], tmp);\ - tmp = p[7 * 64];\ - op1(sum1, (w1)[7 * 64], tmp);\ - op2(sum2, (w2)[7 * 64], tmp);\ -} - -void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window) -{ - int i, j; - - /* max = 18760, max sum over all 16 coefs : 44736 */ - for(i=0;i<257;i++) { - INTFLOAT v; - v = ff_mpa_enwindow[i]; -#if CONFIG_FLOAT - v *= 1.0 / (1LL<<(16 + FRAC_BITS)); -#endif - window[i] = v; - if ((i & 63) != 0) - v = -v; - if (i != 0) - window[512 - i] = v; - } - - // Needed for avoiding shuffles in ASM implementations - for(i=0; i < 8; i++) - for(j=0; j < 16; j++) - window[512+16*i+j] = window[64*i+32-j]; - - for(i=0; i < 8; i++) - for(j=0; j < 16; j++) - window[512+128+16*i+j] = window[64*i+48-j]; -} - -static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window, - int *dither_state, OUT_INT *samples, int incr) -{ - register const MPA_INT *w, *w2, *p; - int j; - OUT_INT *samples2; -#if CONFIG_FLOAT - float sum, sum2; -#else - int64_t sum, sum2; -#endif - - /* copy to avoid wrap */ - memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf)); - - samples2 = samples + 31 * incr; - w = window; - w2 = window + 31; - - sum = *dither_state; - p = synth_buf + 16; - SUM8(MACS, sum, w, p); - p = synth_buf + 48; - SUM8(MLSS, sum, w + 32, p); - *samples = round_sample(&sum); - samples += incr; - w++; - - /* we calculate two samples at the same time to avoid one memory - access per two sample */ - for(j=1;j<16;j++) { - sum2 = 0; - p = synth_buf + 16 + j; - SUM8P2(sum, MACS, sum2, MLSS, w, w2, p); - p = synth_buf + 48 - j; - SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p); - - *samples = round_sample(&sum); - samples += incr; - sum += sum2; - *samples2 = round_sample(&sum); - samples2 -= incr; - w++; - w2--; - } - - p = synth_buf + 32; - SUM8(MLSS, sum, w + 32, p); - *samples = round_sample(&sum); - *dither_state= sum; -} - - -/* 32 sub band synthesis filter. Input: 32 sub band samples, Output: - 32 samples. */ -/* XXX: optimize by avoiding ring buffer usage */ -#if !CONFIG_FLOAT -void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, - MPA_INT *window, int *dither_state, - OUT_INT *samples, int incr, - INTFLOAT sb_samples[SBLIMIT]) -{ - register MPA_INT *synth_buf; - int offset; - - offset = *synth_buf_offset; - synth_buf = synth_buf_ptr + offset; - - dct32(synth_buf, sb_samples); - apply_window_mp3_c(synth_buf, window, dither_state, samples, incr); - - offset = (offset - 32) & 511; - *synth_buf_offset = offset; -} -#endif - #define C3 FIXHR(0.86602540378443864676/2) /* 0.5 / cos(pi*(2*i+1)/36) */ @@ -1482,11 +1314,26 @@ static void compute_stereo(MPADecodeContext *s, } } -#if !CONFIG_FLOAT -static void compute_antialias_integer(MPADecodeContext *s, - GranuleDef *g) +#if CONFIG_FLOAT +#define AA(j) do { \ + float tmp0 = ptr[-1-j]; \ + float tmp1 = ptr[ j]; \ + ptr[-1-j] = tmp0 * csa_table[j][0] - tmp1 * csa_table[j][1]; \ + ptr[ j] = tmp0 * csa_table[j][1] + tmp1 * csa_table[j][0]; \ + } while (0) +#else +#define AA(j) do { \ + int tmp0 = ptr[-1-j]; \ + int tmp1 = ptr[ j]; \ + int tmp2 = MULH(tmp0 + tmp1, csa_table[j][0]); \ + ptr[-1-j] = 4*(tmp2 - MULH(tmp1, csa_table[j][2])); \ + ptr[ j] = 4*(tmp2 + MULH(tmp0, csa_table[j][3])); \ + } while (0) +#endif + +static void compute_antialias(MPADecodeContext *s, GranuleDef *g) { - int32_t *ptr, *csa; + INTFLOAT *ptr; int n, i; /* we antialias only "long" bands */ @@ -1501,28 +1348,18 @@ static void compute_antialias_integer(MPADecodeContext *s, ptr = g->sb_hybrid + 18; for(i = n;i > 0;i--) { - int tmp0, tmp1, tmp2; - csa = &csa_table[0][0]; -#define INT_AA(j) \ - tmp0 = ptr[-1-j];\ - tmp1 = ptr[ j];\ - tmp2= MULH(tmp0 + tmp1, csa[0+4*j]);\ - ptr[-1-j] = 4*(tmp2 - MULH(tmp1, csa[2+4*j]));\ - ptr[ j] = 4*(tmp2 + MULH(tmp0, csa[3+4*j])); - - INT_AA(0) - INT_AA(1) - INT_AA(2) - INT_AA(3) - INT_AA(4) - INT_AA(5) - INT_AA(6) - INT_AA(7) + AA(0); + AA(1); + AA(2); + AA(3); + AA(4); + AA(5); + AA(6); + AA(7); ptr += 18; } } -#endif static void compute_imdct(MPADecodeContext *s, GranuleDef *g, @@ -1618,7 +1455,7 @@ static void compute_imdct(MPADecodeContext *s, /* main layer3 decoding function */ static int mp_decode_layer3(MPADecodeContext *s) { - int nb_granules, main_data_begin, private_bits; + int nb_granules, main_data_begin; int gr, ch, blocksplit_flag, i, j, k, n, bits_pos; GranuleDef *g; int16_t exponents[576]; //FIXME try INTFLOAT @@ -1626,14 +1463,14 @@ static int mp_decode_layer3(MPADecodeContext *s) /* read side info */ if (s->lsf) { main_data_begin = get_bits(&s->gb, 8); - private_bits = get_bits(&s->gb, s->nb_channels); + skip_bits(&s->gb, s->nb_channels); nb_granules = 1; } else { main_data_begin = get_bits(&s->gb, 9); if (s->nb_channels == 2) - private_bits = get_bits(&s->gb, 3); + skip_bits(&s->gb, 3); else - private_bits = get_bits(&s->gb, 5); + skip_bits(&s->gb, 5); nb_granules = 2; for(ch=0;ch<s->nb_channels;ch++) { s->granules[ch][0].scfsi = 0;/* all scale factors are transmitted */ @@ -1920,9 +1757,7 @@ static int mp_decode_frame(MPADecodeContext *s, samples_ptr = samples + ch; for(i=0;i<nb_frames;i++) { RENAME(ff_mpa_synth_filter)( -#if CONFIG_FLOAT - s, -#endif + &s->mpadsp, s->synth_buf[ch], &(s->synth_buf_offset[ch]), RENAME(ff_mpa_synth_window), &s->dither_state, samples_ptr, s->nb_channels, diff --git a/libavcodec/mpegaudiodec_float.c b/libavcodec/mpegaudiodec_float.c index 758ef83e05..44b38b66d9 100644 --- a/libavcodec/mpegaudiodec_float.c +++ b/libavcodec/mpegaudiodec_float.c @@ -22,71 +22,6 @@ #define CONFIG_FLOAT 1 #include "mpegaudiodec.c" -void ff_mpa_synth_filter_float(MPADecodeContext *s, float *synth_buf_ptr, - int *synth_buf_offset, - float *window, int *dither_state, - float *samples, int incr, - float sb_samples[SBLIMIT]) -{ - float *synth_buf; - int offset; - - offset = *synth_buf_offset; - synth_buf = synth_buf_ptr + offset; - - s->dct.dct32(synth_buf, sb_samples); - s->apply_window_mp3(synth_buf, window, dither_state, samples, incr); - - offset = (offset - 32) & 511; - *synth_buf_offset = offset; -} - -static void compute_antialias_float(MPADecodeContext *s, - GranuleDef *g) -{ - float *ptr; - int n, i; - - /* we antialias only "long" bands */ - if (g->block_type == 2) { - if (!g->switch_point) - return; - /* XXX: check this for 8000Hz case */ - n = 1; - } else { - n = SBLIMIT - 1; - } - - ptr = g->sb_hybrid + 18; - for(i = n;i > 0;i--) { - float tmp0, tmp1; - float *csa = &csa_table_float[0][0]; -#define FLOAT_AA(j)\ - tmp0= ptr[-1-j];\ - tmp1= ptr[ j];\ - ptr[-1-j] = tmp0 * csa[0+4*j] - tmp1 * csa[1+4*j];\ - ptr[ j] = tmp0 * csa[1+4*j] + tmp1 * csa[0+4*j]; - - FLOAT_AA(0) - FLOAT_AA(1) - FLOAT_AA(2) - FLOAT_AA(3) - FLOAT_AA(4) - FLOAT_AA(5) - FLOAT_AA(6) - FLOAT_AA(7) - - ptr += 18; - } -} - -static av_cold int decode_end(AVCodecContext * avctx) -{ - MPADecodeContext *s = avctx->priv_data; - ff_dct_end(&s->dct); - return 0; -} - #if CONFIG_MP1FLOAT_DECODER AVCodec ff_mp1float_decoder = { @@ -96,7 +31,7 @@ AVCodec ff_mp1float_decoder = sizeof(MPADecodeContext), decode_init, NULL, - decode_end, + .close = NULL, decode_frame, CODEC_CAP_PARSE_ONLY, .flush= flush, @@ -112,7 +47,7 @@ AVCodec ff_mp2float_decoder = sizeof(MPADecodeContext), decode_init, NULL, - decode_end, + .close = NULL, decode_frame, CODEC_CAP_PARSE_ONLY, .flush= flush, @@ -128,7 +63,7 @@ AVCodec ff_mp3float_decoder = sizeof(MPADecodeContext), decode_init, NULL, - decode_end, + .close = NULL, decode_frame, CODEC_CAP_PARSE_ONLY, .flush= flush, @@ -144,7 +79,7 @@ AVCodec ff_mp3adufloat_decoder = sizeof(MPADecodeContext), decode_init, NULL, - decode_end, + .close = NULL, decode_frame_adu, CODEC_CAP_PARSE_ONLY, .flush= flush, diff --git a/libavcodec/mpegaudiodecheader.c b/libavcodec/mpegaudiodecheader.c index 0821dd43c1..1c3b004bcc 100644 --- a/libavcodec/mpegaudiodecheader.c +++ b/libavcodec/mpegaudiodecheader.c @@ -108,3 +108,40 @@ int ff_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header) #endif return 0; } + +int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate) +{ + MPADecodeHeader s1, *s = &s1; + + if (ff_mpa_check_header(head) != 0) + return -1; + + if (ff_mpegaudio_decode_header(s, head) != 0) { + return -1; + } + + switch(s->layer) { + case 1: + avctx->codec_id = CODEC_ID_MP1; + *frame_size = 384; + break; + case 2: + avctx->codec_id = CODEC_ID_MP2; + *frame_size = 1152; + break; + default: + case 3: + avctx->codec_id = CODEC_ID_MP3; + if (s->lsf) + *frame_size = 576; + else + *frame_size = 1152; + break; + } + + *sample_rate = s->sample_rate; + *channels = s->nb_channels; + *bit_rate = s->bit_rate; + avctx->sub_id = s->layer; + return s->frame_size; +} diff --git a/libavcodec/mpegaudiodecheader.h b/libavcodec/mpegaudiodecheader.h index 5578618288..aa77f0122e 100644 --- a/libavcodec/mpegaudiodecheader.h +++ b/libavcodec/mpegaudiodecheader.h @@ -27,13 +27,50 @@ #ifndef AVCODEC_MPEGAUDIODECHEADER_H #define AVCODEC_MPEGAUDIODECHEADER_H -#include "libavutil/common.h" -#include "mpegaudio.h" +#include "avcodec.h" +#define MP3_MASK 0xFFFE0CCF + +#define MPA_DECODE_HEADER \ + int frame_size; \ + int error_protection; \ + int layer; \ + int sample_rate; \ + int sample_rate_index; /* between 0 and 8 */ \ + int bit_rate; \ + int nb_channels; \ + int mode; \ + int mode_ext; \ + int lsf; + +typedef struct MPADecodeHeader { + MPA_DECODE_HEADER +} MPADecodeHeader; /* header decoding. MUST check the header before because no consistency check is done there. Return 1 if free format found and that the frame size must be computed externally */ int ff_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header); +/* useful helper to get mpeg audio stream infos. Return -1 if error in + header, otherwise the coded frame size in bytes */ +int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate); + +/* fast header check for resync */ +static inline int ff_mpa_check_header(uint32_t header){ + /* header */ + if ((header & 0xffe00000) != 0xffe00000) + return -1; + /* layer check */ + if ((header & (3<<17)) == 0) + return -1; + /* bit rate */ + if ((header & (0xf<<12)) == 0xf<<12) + return -1; + /* frequency */ + if ((header & (3<<10)) == 3<<10) + return -1; + return 0; +} + #endif /* AVCODEC_MPEGAUDIODECHEADER_H */ diff --git a/libavcodec/mpegaudiodectab.h b/libavcodec/mpegaudiodectab.h index 234a70e474..accd12b8e2 100644 --- a/libavcodec/mpegaudiodectab.h +++ b/libavcodec/mpegaudiodectab.h @@ -27,12 +27,21 @@ #ifndef AVCODEC_MPEGAUDIODECTAB_H #define AVCODEC_MPEGAUDIODECTAB_H +#include <stddef.h> #include <stdint.h> + #include "mpegaudio.h" /*******************************************************/ /* layer 3 tables */ +/* layer 3 huffman tables */ +typedef struct HuffTable { + int xsize; + const uint8_t *bits; + const uint16_t *codes; +} HuffTable; + /* layer3 scale factor size */ static const uint8_t slen_table[2][16] = { { 0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 }, diff --git a/libavcodec/g729.h b/libavcodec/mpegaudiodsp.c index 462cf8f1ac..d98d25bb21 100644 --- a/libavcodec/g729.h +++ b/libavcodec/mpegaudiodsp.c @@ -1,6 +1,5 @@ /* - * G.729 decoder - * Copyright (c) 2008 Vladimir Voroshilov + * Copyright (c) 2011 Mans Rullgard * * This file is part of FFmpeg. * @@ -18,12 +17,25 @@ * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef AVCODEC_G729_H -#define AVCODEC_G729_H -/** - * subframe size - */ -#define SUBFRAME_SIZE 40 +#include "config.h" +#include "mpegaudiodsp.h" +#include "dct.h" +#include "dct32.h" + +void ff_mpadsp_init(MPADSPContext *s) +{ + DCTContext dct; + + ff_dct_init(&dct, 5, DCT_II); + + s->apply_window_float = ff_mpadsp_apply_window_float; + s->apply_window_fixed = ff_mpadsp_apply_window_fixed; + + s->dct32_float = dct.dct32; + s->dct32_fixed = ff_dct32_fixed; -#endif // AVCODEC_G729_H + if (ARCH_ARM) ff_mpadsp_init_arm(s); + if (HAVE_MMX) ff_mpadsp_init_mmx(s); + if (HAVE_ALTIVEC) ff_mpadsp_init_altivec(s); +} diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h new file mode 100644 index 0000000000..8a18db8325 --- /dev/null +++ b/libavcodec/mpegaudiodsp.h @@ -0,0 +1,64 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MPEGAUDIODSP_H +#define AVCODEC_MPEGAUDIODSP_H + +#include <stdint.h> + +typedef struct MPADSPContext { + void (*apply_window_float)(float *synth_buf, float *window, + int *dither_state, float *samples, int incr); + void (*apply_window_fixed)(int32_t *synth_buf, int32_t *window, + int *dither_state, int16_t *samples, int incr); + void (*dct32_float)(float *dst, const float *src); + void (*dct32_fixed)(int *dst, const int *src); +} MPADSPContext; + +void ff_mpadsp_init(MPADSPContext *s); + +extern int32_t ff_mpa_synth_window_fixed[]; +extern float ff_mpa_synth_window_float[]; + +void ff_mpa_synth_filter_fixed(MPADSPContext *s, + int32_t *synth_buf_ptr, int *synth_buf_offset, + int32_t *window, int *dither_state, + int16_t *samples, int incr, + int32_t *sb_samples); + +void ff_mpa_synth_filter_float(MPADSPContext *s, + float *synth_buf_ptr, int *synth_buf_offset, + float *window, int *dither_state, + float *samples, int incr, + float *sb_samples); + +void ff_mpadsp_init_arm(MPADSPContext *s); +void ff_mpadsp_init_mmx(MPADSPContext *s); +void ff_mpadsp_init_altivec(MPADSPContext *s); + +void ff_mpa_synth_init_float(float *window); +void ff_mpa_synth_init_fixed(int32_t *window); + +void ff_mpadsp_apply_window_float(float *synth_buf, float *window, + int *dither_state, float *samples, + int incr); +void ff_mpadsp_apply_window_fixed(int32_t *synth_buf, int32_t *window, + int *dither_state, int16_t *samples, + int incr); + +#endif /* AVCODEC_MPEGAUDIODSP_H */ diff --git a/libavcodec/mpegaudiodsp_fixed.c b/libavcodec/mpegaudiodsp_fixed.c new file mode 100644 index 0000000000..3c49a568b7 --- /dev/null +++ b/libavcodec/mpegaudiodsp_fixed.c @@ -0,0 +1,20 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define CONFIG_FLOAT 0 +#include "mpegaudiodsp_template.c" diff --git a/libavcodec/mpegaudiodsp_float.c b/libavcodec/mpegaudiodsp_float.c new file mode 100644 index 0000000000..2d8d53ea26 --- /dev/null +++ b/libavcodec/mpegaudiodsp_float.c @@ -0,0 +1,20 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define CONFIG_FLOAT 1 +#include "mpegaudiodsp_template.c" diff --git a/libavcodec/mpegaudiodsp_template.c b/libavcodec/mpegaudiodsp_template.c new file mode 100644 index 0000000000..02a34079c8 --- /dev/null +++ b/libavcodec/mpegaudiodsp_template.c @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2001, 2002 Fabrice Bellard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "libavutil/mem.h" +#include "dct32.h" +#include "mathops.h" +#include "mpegaudiodsp.h" +#include "mpegaudio.h" +#include "mpegaudiodata.h" + +#if CONFIG_FLOAT +#define RENAME(n) n##_float + +static inline float round_sample(float *sum) +{ + float sum1=*sum; + *sum = 0; + return sum1; +} + +#define MACS(rt, ra, rb) rt+=(ra)*(rb) +#define MULS(ra, rb) ((ra)*(rb)) +#define MLSS(rt, ra, rb) rt-=(ra)*(rb) + +#else + +#define RENAME(n) n##_fixed +#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15) + +static inline int round_sample(int64_t *sum) +{ + int sum1; + sum1 = (int)((*sum) >> OUT_SHIFT); + *sum &= (1<<OUT_SHIFT)-1; + return av_clip_int16(sum1); +} + +# define MULS(ra, rb) MUL64(ra, rb) +# define MACS(rt, ra, rb) MAC64(rt, ra, rb) +# define MLSS(rt, ra, rb) MLS64(rt, ra, rb) +#endif + +DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256]; + +#define SUM8(op, sum, w, p) \ +{ \ + op(sum, (w)[0 * 64], (p)[0 * 64]); \ + op(sum, (w)[1 * 64], (p)[1 * 64]); \ + op(sum, (w)[2 * 64], (p)[2 * 64]); \ + op(sum, (w)[3 * 64], (p)[3 * 64]); \ + op(sum, (w)[4 * 64], (p)[4 * 64]); \ + op(sum, (w)[5 * 64], (p)[5 * 64]); \ + op(sum, (w)[6 * 64], (p)[6 * 64]); \ + op(sum, (w)[7 * 64], (p)[7 * 64]); \ +} + +#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \ +{ \ + INTFLOAT tmp;\ + tmp = p[0 * 64];\ + op1(sum1, (w1)[0 * 64], tmp);\ + op2(sum2, (w2)[0 * 64], tmp);\ + tmp = p[1 * 64];\ + op1(sum1, (w1)[1 * 64], tmp);\ + op2(sum2, (w2)[1 * 64], tmp);\ + tmp = p[2 * 64];\ + op1(sum1, (w1)[2 * 64], tmp);\ + op2(sum2, (w2)[2 * 64], tmp);\ + tmp = p[3 * 64];\ + op1(sum1, (w1)[3 * 64], tmp);\ + op2(sum2, (w2)[3 * 64], tmp);\ + tmp = p[4 * 64];\ + op1(sum1, (w1)[4 * 64], tmp);\ + op2(sum2, (w2)[4 * 64], tmp);\ + tmp = p[5 * 64];\ + op1(sum1, (w1)[5 * 64], tmp);\ + op2(sum2, (w2)[5 * 64], tmp);\ + tmp = p[6 * 64];\ + op1(sum1, (w1)[6 * 64], tmp);\ + op2(sum2, (w2)[6 * 64], tmp);\ + tmp = p[7 * 64];\ + op1(sum1, (w1)[7 * 64], tmp);\ + op2(sum2, (w2)[7 * 64], tmp);\ +} + +void RENAME(ff_mpadsp_apply_window)(MPA_INT *synth_buf, MPA_INT *window, + int *dither_state, OUT_INT *samples, + int incr) +{ + register const MPA_INT *w, *w2, *p; + int j; + OUT_INT *samples2; +#if CONFIG_FLOAT + float sum, sum2; +#else + int64_t sum, sum2; +#endif + + /* copy to avoid wrap */ + memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf)); + + samples2 = samples + 31 * incr; + w = window; + w2 = window + 31; + + sum = *dither_state; + p = synth_buf + 16; + SUM8(MACS, sum, w, p); + p = synth_buf + 48; + SUM8(MLSS, sum, w + 32, p); + *samples = round_sample(&sum); + samples += incr; + w++; + + /* we calculate two samples at the same time to avoid one memory + access per two sample */ + for(j=1;j<16;j++) { + sum2 = 0; + p = synth_buf + 16 + j; + SUM8P2(sum, MACS, sum2, MLSS, w, w2, p); + p = synth_buf + 48 - j; + SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p); + + *samples = round_sample(&sum); + samples += incr; + sum += sum2; + *samples2 = round_sample(&sum); + samples2 -= incr; + w++; + w2--; + } + + p = synth_buf + 32; + SUM8(MLSS, sum, w + 32, p); + *samples = round_sample(&sum); + *dither_state= sum; +} + +/* 32 sub band synthesis filter. Input: 32 sub band samples, Output: + 32 samples. */ +void RENAME(ff_mpa_synth_filter)(MPADSPContext *s, MPA_INT *synth_buf_ptr, + int *synth_buf_offset, + MPA_INT *window, int *dither_state, + OUT_INT *samples, int incr, + MPA_INT *sb_samples) +{ + MPA_INT *synth_buf; + int offset; + + offset = *synth_buf_offset; + synth_buf = synth_buf_ptr + offset; + + s->RENAME(dct32)(synth_buf, sb_samples); + s->RENAME(apply_window)(synth_buf, window, dither_state, samples, incr); + + offset = (offset - 32) & 511; + *synth_buf_offset = offset; +} + +void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window) +{ + int i, j; + + /* max = 18760, max sum over all 16 coefs : 44736 */ + for(i=0;i<257;i++) { + INTFLOAT v; + v = ff_mpa_enwindow[i]; +#if CONFIG_FLOAT + v *= 1.0 / (1LL<<(16 + FRAC_BITS)); +#endif + window[i] = v; + if ((i & 63) != 0) + v = -v; + if (i != 0) + window[512 - i] = v; + } + + // Needed for avoiding shuffles in ASM implementations + for(i=0; i < 8; i++) + for(j=0; j < 16; j++) + window[512+16*i+j] = window[64*i+32-j]; + + for(i=0; i < 8; i++) + for(j=0; j < 16; j++) + window[512+128+16*i+j] = window[64*i+48-j]; +} diff --git a/libavcodec/mpegaudioenc.c b/libavcodec/mpegaudioenc.c index 515da6f670..a5859837ef 100644 --- a/libavcodec/mpegaudioenc.c +++ b/libavcodec/mpegaudioenc.c @@ -548,13 +548,11 @@ static void compute_bit_allocation(MpegAudioContext *s, } } } -#if 0 - printf("current=%d max=%d max_sb=%d alloc=%d\n", - current_frame_size, max_frame_size, max_sb, - bit_alloc[max_sb]); -#endif if (max_sb < 0) break; + av_dlog(NULL, "current=%d max=%d max_sb=%d max_ch=%d alloc=%d\n", + current_frame_size, max_frame_size, max_sb, max_ch, + bit_alloc[max_ch][max_sb]); /* find alloc table entry (XXX: not optimal, should use pointer table) */ @@ -782,5 +780,3 @@ AVCodec ff_mp2_encoder = { .supported_samplerates= (const int[]){44100, 48000, 32000, 22050, 24000, 16000, 0}, .long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"), }; - -#undef FIX diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 53230cf6d8..cdbda34185 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -316,7 +316,7 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){ s->prev_pict_types[0]= s->dropable ? AV_PICTURE_TYPE_B : s->pict_type; if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == AV_PICTURE_TYPE_B) pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway. - pic->owner2 = s; + pic->owner2 = NULL; return 0; fail: //for the FF_ALLOCZ_OR_GOTO macro @@ -527,7 +527,7 @@ int ff_mpeg_update_thread_context(AVCodecContext *dst, const AVCodecContext *src s->last_pict_type= s1->pict_type; if (s1->current_picture_ptr) s->last_lambda_for[s1->pict_type] = s1->current_picture_ptr->quality; - if(s1->pict_type!=AV_PICTURE_TYPE_B){ + if(s1->pict_type!=FF_B_TYPE){ s->last_non_b_pict_type= s1->pict_type; } } @@ -586,7 +586,7 @@ av_cold int MPV_common_init(MpegEncContext *s) return -1; } - if(s->avctx->active_thread_type&FF_THREAD_SLICE && + if((s->encoding || (s->avctx->active_thread_type & FF_THREAD_SLICE)) && (s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height))){ av_log(s->avctx, AV_LOG_ERROR, "too many threads\n"); return -1; @@ -745,7 +745,7 @@ av_cold int MPV_common_init(MpegEncContext *s) s->context_initialized = 1; s->thread_context[0]= s; - if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE) { + if (s->encoding || (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE)) { threads = s->avctx->thread_count; for(i=1; i<threads; i++){ @@ -763,6 +763,7 @@ av_cold int MPV_common_init(MpegEncContext *s) if(init_duplicate_context(s, s) < 0) goto fail; s->start_mb_y = 0; s->end_mb_y = s->mb_height; + } return 0; @@ -776,7 +777,7 @@ void MPV_common_end(MpegEncContext *s) { int i, j, k; - if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE) { + if (s->encoding || (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_SLICE)) { for(i=0; i<s->avctx->thread_count; i++){ free_duplicate_context(s->thread_context[i]); } @@ -955,7 +956,7 @@ void ff_release_unused_pictures(MpegEncContext *s, int remove_current) /* release non reference frames */ for(i=0; i<s->picture_count; i++){ if(s->picture[i].data[0] && !s->picture[i].reference - && s->picture[i].owner2 == s + && (!s->picture[i].owner2 || s->picture[i].owner2 == s) && (remove_current || &s->picture[i] != s->current_picture_ptr) /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){ free_frame_buffer(s, &s->picture[i]); @@ -1182,12 +1183,17 @@ void MPV_frame_end(MpegEncContext *s) && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) { - int edges = EDGE_BOTTOM | EDGE_TOP, h = s->v_edge_pos; - - s->dsp.draw_edges(s->current_picture.data[0], s->linesize , s->h_edge_pos , h , EDGE_WIDTH , edges); - s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, h>>1, EDGE_WIDTH/2, edges); - s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, h>>1, EDGE_WIDTH/2, edges); - + int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w; + int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h; + s->dsp.draw_edges(s->current_picture.data[0], s->linesize , + s->h_edge_pos , s->v_edge_pos, + EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM); + s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, + s->h_edge_pos>>hshift, s->v_edge_pos>>vshift, + EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, EDGE_TOP | EDGE_BOTTOM); + s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, + s->h_edge_pos>>hshift, s->v_edge_pos>>vshift, + EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, EDGE_TOP | EDGE_BOTTOM); } emms_c(); @@ -1321,15 +1327,8 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){ if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){ int x,y; - av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: "); - switch (pict->pict_type) { - case AV_PICTURE_TYPE_I: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break; - case AV_PICTURE_TYPE_P: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break; - case AV_PICTURE_TYPE_B: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break; - case AV_PICTURE_TYPE_S: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break; - case AV_PICTURE_TYPE_SI: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break; - case AV_PICTURE_TYPE_SP: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break; - } + av_log(s->avctx, AV_LOG_DEBUG, "New frame, type: %c\n", + av_get_picture_type_char(pict->pict_type)); for(y=0; y<s->mb_height; y++){ for(x=0; x<s->mb_width; x++){ if(s->avctx->debug&FF_DEBUG_SKIP){ @@ -1412,6 +1411,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){ pict->data[i]= s->visualization_buffer[i]; } pict->type= FF_BUFFER_TYPE_COPY; + pict->opaque= NULL; ptr= pict->data[0]; block_height = 16>>v_chroma_shift; @@ -1725,8 +1725,10 @@ static av_always_inline void mpeg_motion_lowres(MpegEncContext *s, if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ uvsx= (uvsx << 2) >> lowres; uvsy= (uvsy << 2) >> lowres; - pix_op[op_index](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy); - pix_op[op_index](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy); + if(h >> s->chroma_y_shift){ + pix_op[op_index](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy); + pix_op[op_index](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy); + } } //FIXME h261 lowres loop filter } @@ -2159,7 +2161,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], } /* add dct residue */ - if(s->encoding || !( s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO + if(s->encoding || !( s->msmpeg4_version || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){ add_dequant_dct(s, block[0], 0, dest_y , dct_linesize, s->qscale); add_dequant_dct(s, block[1], 1, dest_y + block_size, dct_linesize, s->qscale); @@ -2298,14 +2300,19 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){ && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) { int sides = 0, edge_h; + int hshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_w; + int vshift = av_pix_fmt_descriptors[s->avctx->pix_fmt].log2_chroma_h; if (y==0) sides |= EDGE_TOP; if (y + h >= s->v_edge_pos) sides |= EDGE_BOTTOM; edge_h= FFMIN(h, s->v_edge_pos - y); - s->dsp.draw_edges(s->current_picture_ptr->data[0] + y *s->linesize , s->linesize , s->h_edge_pos , edge_h , EDGE_WIDTH , sides); - s->dsp.draw_edges(s->current_picture_ptr->data[1] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides); - s->dsp.draw_edges(s->current_picture_ptr->data[2] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides); + s->dsp.draw_edges(s->current_picture_ptr->data[0] + y *s->linesize , s->linesize, + s->h_edge_pos , edge_h , EDGE_WIDTH , EDGE_WIDTH , sides); + s->dsp.draw_edges(s->current_picture_ptr->data[1] + (y>>vshift)*s->uvlinesize, s->uvlinesize, + s->h_edge_pos>>hshift, edge_h>>hshift, EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, sides); + s->dsp.draw_edges(s->current_picture_ptr->data[2] + (y>>vshift)*s->uvlinesize, s->uvlinesize, + s->h_edge_pos>>hshift, edge_h>>hshift, EDGE_WIDTH>>hshift, EDGE_WIDTH>>vshift, sides); } h= FFMIN(h, s->avctx->height - y); @@ -2638,6 +2645,6 @@ void ff_set_qscale(MpegEncContext * s, int qscale) void MPV_report_decode_progress(MpegEncContext *s) { - if (s->pict_type != AV_PICTURE_TYPE_B && !s->partitioned_frame) + if (s->pict_type != FF_B_TYPE && !s->partitioned_frame && !s->error_occurred) ff_thread_report_progress((AVFrame*)s->current_picture_ptr, s->mb_y, 0); } diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index c8a38f5b6a..266d3a4c9d 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -209,7 +209,6 @@ typedef struct MpegEncContext { /* the following codec id fields are deprecated in favor of codec_id */ int h263_plus; ///< h263 plus headers - int h263_msmpeg4; ///< generate MSMPEG4 compatible stream (deprecated, use msmpeg4_version instead) int h263_flv; ///< use flv h263 header enum CodecID codec_id; /* see CODEC_ID_xxx */ @@ -480,7 +479,7 @@ typedef struct MpegEncContext { int last_bits; ///< temp var used for calculating the above vars /* error concealment / resync */ - int error_count; + int error_count, error_occurred; uint8_t *error_status_table; ///< table of the error status of each MB #define VP_START 1 ///< current MB is the first after a resync marker #define AC_ERROR 2 @@ -848,4 +847,3 @@ void ff_wmv2_encode_mb(MpegEncContext * s, int motion_x, int motion_y); #endif /* AVCODEC_MPEGVIDEO_H */ - diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index cf5faac2b9..a6e9c7c7be 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -352,7 +352,7 @@ av_cold int MPV_encode_init(AVCodecContext *avctx) } if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){ - av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n"); + av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n"); return -1; } @@ -582,7 +582,7 @@ av_cold int MPV_encode_init(AVCodecContext *avctx) case CODEC_ID_H263: if (!CONFIG_H263_ENCODER) return -1; if (ff_match_2uint16(h263_format, FF_ARRAY_ELEMS(h263_format), s->width, s->height) == 8) { - av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height); + av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height); return -1; } s->out_format = FMT_H263; @@ -638,18 +638,8 @@ av_cold int MPV_encode_init(AVCodecContext *avctx) s->low_delay= s->max_b_frames ? 0 : 1; avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1); break; - case CODEC_ID_MSMPEG4V1: - s->out_format = FMT_H263; - s->h263_msmpeg4 = 1; - s->h263_pred = 1; - s->unrestricted_mv = 1; - s->msmpeg4_version= 1; - avctx->delay=0; - s->low_delay=1; - break; case CODEC_ID_MSMPEG4V2: s->out_format = FMT_H263; - s->h263_msmpeg4 = 1; s->h263_pred = 1; s->unrestricted_mv = 1; s->msmpeg4_version= 2; @@ -658,7 +648,6 @@ av_cold int MPV_encode_init(AVCodecContext *avctx) break; case CODEC_ID_MSMPEG4V3: s->out_format = FMT_H263; - s->h263_msmpeg4 = 1; s->h263_pred = 1; s->unrestricted_mv = 1; s->msmpeg4_version= 3; @@ -668,7 +657,6 @@ av_cold int MPV_encode_init(AVCodecContext *avctx) break; case CODEC_ID_WMV1: s->out_format = FMT_H263; - s->h263_msmpeg4 = 1; s->h263_pred = 1; s->unrestricted_mv = 1; s->msmpeg4_version= 4; @@ -678,7 +666,6 @@ av_cold int MPV_encode_init(AVCodecContext *avctx) break; case CODEC_ID_WMV2: s->out_format = FMT_H263; - s->h263_msmpeg4 = 1; s->h263_pred = 1; s->unrestricted_mv = 1; s->msmpeg4_version= 5; @@ -1239,7 +1226,7 @@ int MPV_encode_picture(AVCodecContext *avctx, { MpegEncContext *s = avctx->priv_data; AVFrame *pic_arg = data; - int i, stuffing_count, context_count = avctx->active_thread_type&FF_THREAD_SLICE ? avctx->thread_count : 1; + int i, stuffing_count, context_count = avctx->thread_count; for(i=0; i<context_count; i++){ int start_y= s->thread_context[i]->start_mb_y; @@ -2772,7 +2759,7 @@ static int encode_picture(MpegEncContext *s, int picture_number) { int i; int bits; - int context_count = s->avctx->active_thread_type&FF_THREAD_SLICE ? s->avctx->thread_count : 1; + int context_count = s->avctx->thread_count; s->picture_number = picture_number; @@ -2782,7 +2769,7 @@ static int encode_picture(MpegEncContext *s, int picture_number) /* we need to initialize some time vars before we can encode b-frames */ // RAL: Condition added for MPEG1VIDEO - if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4)) + if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version)) set_frame_distances(s); if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4) ff_set_mpeg4_time(s); @@ -2951,7 +2938,7 @@ static int encode_picture(MpegEncContext *s, int picture_number) case FMT_H263: if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2) ff_wmv2_encode_picture_header(s, picture_number); - else if (CONFIG_MSMPEG4_ENCODER && s->h263_msmpeg4) + else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version) msmpeg4_encode_picture_header(s, picture_number); else if (CONFIG_MPEG4_ENCODER && s->h263_pred) mpeg4_encode_picture_header(s, picture_number); @@ -3807,18 +3794,6 @@ AVCodec ff_h263p_encoder = { .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"), }; -AVCodec ff_msmpeg4v1_encoder = { - "msmpeg4v1", - AVMEDIA_TYPE_VIDEO, - CODEC_ID_MSMPEG4V1, - sizeof(MpegEncContext), - MPV_encode_init, - MPV_encode_picture, - MPV_encode_end, - .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE}, - .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 1"), -}; - AVCodec ff_msmpeg4v2_encoder = { "msmpeg4v2", AVMEDIA_TYPE_VIDEO, diff --git a/libavcodec/mpegvideo_parser.c b/libavcodec/mpegvideo_parser.c index 6369224d1a..cfc950d017 100644 --- a/libavcodec/mpegvideo_parser.c +++ b/libavcodec/mpegvideo_parser.c @@ -32,7 +32,7 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s, uint32_t start_code; int frame_rate_index, ext_type, bytes_left; int frame_rate_ext_n, frame_rate_ext_d; - int picture_structure, top_field_first, repeat_first_field, progressive_frame; + int top_field_first, repeat_first_field, progressive_frame; int horiz_size_ext, vert_size_ext, bit_rate_ext; int did_set_size=0; //FIXME replace the crap with get_bits() @@ -91,7 +91,6 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s, break; case 0x8: /* picture coding extension */ if (bytes_left >= 5) { - picture_structure = buf[2]&3; top_field_first = buf[3] & (1 << 7); repeat_first_field = buf[3] & (1 << 1); progressive_frame = buf[4] & (1 << 7); diff --git a/libavcodec/mqc.c b/libavcodec/mqc.c new file mode 100644 index 0000000000..700b9574c1 --- /dev/null +++ b/libavcodec/mqc.c @@ -0,0 +1,108 @@ +/* + * MQ-coder encoder and decoder common functions + * Copyright (c) 2007 Kamil Nowosad + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * MQ-coder ecoder and decoder common functions + * @file + * @author Kamil Nowosad + */ + +#include "mqc.h" + +typedef struct { + uint16_t qe; + uint8_t nmps; + uint8_t nlps; + uint8_t sw; +} MqcCxState; + +const static MqcCxState cx_states[47] = { + {0x5601, 1, 1, 1}, + {0x3401, 2, 6, 0}, + {0x1801, 3, 9, 0}, + {0x0AC1, 4, 12, 0}, + {0x0521, 5, 29, 0}, + {0x0221, 38, 33, 0}, + {0x5601, 7, 6, 1}, + {0x5401, 8, 14, 0}, + {0x4801, 9, 14, 0}, + {0x3801, 10, 14, 0}, + {0x3001, 11, 17, 0}, + {0x2401, 12, 18, 0}, + {0x1C01, 13, 20, 0}, + {0x1601, 29, 21, 0}, + {0x5601, 15, 14, 1}, + {0x5401, 16, 14, 0}, + {0x5101, 17, 15, 0}, + {0x4801, 18, 16, 0}, + {0x3801, 19, 17, 0}, + {0x3401, 20, 18, 0}, + {0x3001, 21, 19, 0}, + {0x2801, 22, 19, 0}, + {0x2401, 23, 20, 0}, + {0x2201, 24, 21, 0}, + {0x1C01, 25, 22, 0}, + {0x1801, 26, 23, 0}, + {0x1601, 27, 24, 0}, + {0x1401, 28, 25, 0}, + {0x1201, 29, 26, 0}, + {0x1101, 30, 27, 0}, + {0x0AC1, 31, 28, 0}, + {0x09C1, 32, 29, 0}, + {0x08A1, 33, 30, 0}, + {0x0521, 34, 31, 0}, + {0x0441, 35, 32, 0}, + {0x02A1, 36, 33, 0}, + {0x0221, 37, 34, 0}, + {0x0141, 38, 35, 0}, + {0x0111, 39, 36, 0}, + {0x0085, 40, 37, 0}, + {0x0049, 41, 38, 0}, + {0x0025, 42, 39, 0}, + {0x0015, 43, 40, 0}, + {0x0009, 44, 41, 0}, + {0x0005, 45, 42, 0}, + {0x0001, 45, 43, 0}, + {0x5601, 46, 46, 0} +}; + +uint16_t ff_mqc_qe [2*47]; +uint8_t ff_mqc_nlps[2*47]; +uint8_t ff_mqc_nmps[2*47]; + +void ff_mqc_init_contexts(MqcState *mqc) +{ + int i; + memset(mqc->cx_states, 0, sizeof(mqc->cx_states)); + mqc->cx_states[MQC_CX_UNI] = 2 * 46; + mqc->cx_states[MQC_CX_RL] = 2 * 3; + mqc->cx_states[0] = 2 * 4; + + for (i = 0; i < 47; i++){ + ff_mqc_qe[2*i ] = + ff_mqc_qe[2*i+1] = cx_states[i].qe; + + ff_mqc_nlps[2*i ] = 2*cx_states[i].nlps + cx_states[i].sw; + ff_mqc_nlps[2*i+1] = 2*cx_states[i].nlps + 1 - cx_states[i].sw; + ff_mqc_nmps[2*i ] = 2*cx_states[i].nmps; + ff_mqc_nmps[2*i+1] = 2*cx_states[i].nmps + 1; + } +} diff --git a/libavcodec/mqc.h b/libavcodec/mqc.h new file mode 100644 index 0000000000..b28c13ec48 --- /dev/null +++ b/libavcodec/mqc.h @@ -0,0 +1,75 @@ +/* + * MQ-coder + * Copyright (c) 2007 Kamil Nowosad + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MQC_H +#define AVCODEC_MQC_H + +/** + * MQ-coder + * @file + * @author Kamil Nowosad + */ + +#include "avcodec.h" + +#define MQC_CX_UNI 17 +#define MQC_CX_RL 18 + +extern uint16_t ff_mqc_qe[2*47]; +extern uint8_t ff_mqc_nlps[2*47]; +extern uint8_t ff_mqc_nmps[2*47]; + +typedef struct { + uint8_t *bp, *bpstart; + unsigned int a; + unsigned int c; + unsigned int ct; + uint8_t cx_states[19]; +} MqcState; + +/* encoder */ + +/** initialize the encoder */ +void ff_mqc_initenc(MqcState *mqc, uint8_t *bp); + +/** code bit d with context cx */ +void ff_mqc_encode(MqcState *mqc, uint8_t *cxstate, int d); + +/** number of encoded bytes */ +int ff_mqc_length(MqcState *mqc); + +/** flush the encoder [returns number of bytes encoded] */ +int ff_mqc_flush(MqcState *mqc); + +/* decoder */ + +/** initialize the decoder */ +void ff_mqc_initdec(MqcState *mqc, uint8_t *bp); + +/** returns decoded bit with context cx */ +int ff_mqc_decode(MqcState *mqc, uint8_t *cxstate); + +/* common */ + +/** initialize the contexts */ +void ff_mqc_init_contexts(MqcState *mqc); + +#endif /* AVCODEC_MQC_H */ diff --git a/libavcodec/mqcdec.c b/libavcodec/mqcdec.c new file mode 100644 index 0000000000..56e22f88c7 --- /dev/null +++ b/libavcodec/mqcdec.c @@ -0,0 +1,93 @@ +/* + * MQ-coder decoder + * Copyright (c) 2007 Kamil Nowosad + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * MQ-coder decoder + * @file + * @author Kamil Nowosad + */ + +#include "mqc.h" + +static void bytein(MqcState *mqc) +{ + if (*mqc->bp == 0xff){ + if (*(mqc->bp+1) > 0x8f) + mqc->c++; + else{ + mqc->bp++; + mqc->c += 2 + 0xfe00 - (*mqc->bp << 9); + } + } else{ + mqc->bp++; + mqc->c += 1 + 0xff00 - (*mqc->bp << 8); + } +} + +static int exchange(MqcState *mqc, uint8_t *cxstate, int lps) +{ + int d; + if ((mqc->a < ff_mqc_qe[*cxstate]) ^ (!lps)){ + if (lps) + mqc->a = ff_mqc_qe[*cxstate]; + d = *cxstate & 1; + *cxstate = ff_mqc_nmps[*cxstate]; + } else{ + if (lps) + mqc->a = ff_mqc_qe[*cxstate]; + d = 1 - (*cxstate & 1); + *cxstate = ff_mqc_nlps[*cxstate]; + } + // renormd: + do{ + if (!(mqc->c & 0xff)){ + mqc->c -= 0x100; + bytein(mqc); + } + mqc->a += mqc->a; + mqc->c += mqc->c; + } while (!(mqc->a & 0x8000)); + return d; +} + +void ff_mqc_initdec(MqcState *mqc, uint8_t *bp) +{ + ff_mqc_init_contexts(mqc); + mqc->bp = bp; + mqc->c = (*mqc->bp ^ 0xff) << 16; + bytein(mqc); + mqc->c = mqc->c << 7; + mqc->a = 0x8000; +} + +int ff_mqc_decode(MqcState *mqc, uint8_t *cxstate) +{ + mqc->a -= ff_mqc_qe[*cxstate]; + if ((mqc->c >> 16) < mqc->a){ + if (mqc->a & 0x8000) + return *cxstate & 1; + else + return exchange(mqc, cxstate, 0); + } else { + mqc->c -= mqc->a << 16; + return exchange(mqc, cxstate, 1); + } +} diff --git a/libavcodec/mqcenc.c b/libavcodec/mqcenc.c new file mode 100644 index 0000000000..97d352be44 --- /dev/null +++ b/libavcodec/mqcenc.c @@ -0,0 +1,119 @@ +/* + * MQ-coder encoder + * Copyright (c) 2007 Kamil Nowosad + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * MQ-coder encoder + * @file + * @author Kamil Nowosad + */ + +#include "mqc.h" + +static void byteout(MqcState *mqc) +{ +retry: + if (*mqc->bp == 0xff){ + mqc->bp++; + *mqc->bp = mqc->c >> 20; + mqc->c &= 0xfffff; + mqc->ct = 7; + } else if ((mqc->c & 0x8000000)){ + (*mqc->bp)++; + mqc->c &= 0x7ffffff; + goto retry; + } else{ + mqc->bp++; + *mqc->bp = mqc->c >> 19; + mqc->c &= 0x7ffff; + mqc->ct = 8; + } +} + +static void renorme(MqcState *mqc) +{ + do{ + mqc->a += mqc->a; + mqc->c += mqc->c; + if (!--mqc->ct) + byteout(mqc); + } while (!(mqc->a & 0x8000)); +} + +static void setbits(MqcState *mqc) +{ + int tmp = mqc->c + mqc->a; + mqc->c |= 0xffff; + if (mqc->c >= tmp) + mqc->c -= 0x8000; +} + +void ff_mqc_initenc(MqcState *mqc, uint8_t *bp) +{ + ff_mqc_init_contexts(mqc); + mqc->a = 0x8000; + mqc->c = 0; + mqc->bp = bp-1; + mqc->bpstart = bp; + mqc->ct = 12 + (*mqc->bp == 0xff); +} + +void ff_mqc_encode(MqcState *mqc, uint8_t *cxstate, int d) +{ + int qe; + + qe = ff_mqc_qe[*cxstate]; + mqc->a -= qe; + if ((*cxstate & 1) == d){ + if (!(mqc->a & 0x8000)){ + if (mqc->a < qe) + mqc->a = qe; + else + mqc->c += qe; + *cxstate = ff_mqc_nmps[*cxstate]; + renorme(mqc); + } else + mqc->c += qe; + } else{ + if (mqc->a < qe) + mqc->c += qe; + else + mqc->a = qe; + *cxstate = ff_mqc_nlps[*cxstate]; + renorme(mqc); + } +} + +int ff_mqc_length(MqcState *mqc) +{ + return mqc->bp - mqc->bpstart; +} + +int ff_mqc_flush(MqcState *mqc) +{ + setbits(mqc); + mqc->c = mqc->c << mqc->ct; + byteout(mqc); + mqc->c = mqc->c << mqc->ct; + byteout(mqc); + if (*mqc->bp != 0xff) + mqc->bp++; + return mqc->bp - mqc->bpstart; +} diff --git a/libavcodec/msgsmdec.h b/libavcodec/msgsmdec.h index fbf819a49c..3bfd1fd407 100644 --- a/libavcodec/msgsmdec.h +++ b/libavcodec/msgsmdec.h @@ -19,12 +19,12 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef MSGSMDEC_H -#define MSGSMDEC_H +#ifndef AVCODEC_MSGSMDEC_H +#define AVCODEC_MSGSMDEC_H #include "avcodec.h" int ff_msgsm_decode_block(AVCodecContext *avctx, int16_t *samples, const uint8_t *buf); -#endif +#endif /* AVCODEC_MSGSMDEC_H */ diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c index ac0ad9bc9c..06098b04a1 100644 --- a/libavcodec/msmpeg4.c +++ b/libavcodec/msmpeg4.c @@ -846,22 +846,14 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr int pred, extquant; int extrabits = 0; - if(s->msmpeg4_version==1){ - int32_t *dc_val; - pred = msmpeg4v1_pred_dc(s, n, &dc_val); - - /* update predictor */ - *dc_val= level; - }else{ - int16_t *dc_val; - pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr); + int16_t *dc_val; + pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr); - /* update predictor */ - if (n < 4) { - *dc_val = level * s->y_dc_scale; - } else { - *dc_val = level * s->c_dc_scale; - } + /* update predictor */ + if (n < 4) { + *dc_val = level * s->y_dc_scale; + } else { + *dc_val = level * s->c_dc_scale; } /* do the prediction */ @@ -1536,9 +1528,7 @@ int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size) /* the alt_bitstream reader could read over the end so we need to check it */ if(left>=length && left<length+8) { - int fps; - - fps= get_bits(&s->gb, 5); + skip_bits(&s->gb, 5); /* fps */ s->bit_rate= get_bits(&s->gb, 11)*1024; if(s->msmpeg4_version>=3) s->flipflop_rounding= get_bits1(&s->gb); diff --git a/libavcodec/msmpeg4.h b/libavcodec/msmpeg4.h index 28372a0bc8..d8e3727a40 100644 --- a/libavcodec/msmpeg4.h +++ b/libavcodec/msmpeg4.h @@ -54,8 +54,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); CONFIG_MSMPEG4V3_DECODER || \ CONFIG_WMV2_DECODER || \ CONFIG_VC1_DECODER) -#define CONFIG_MSMPEG4_ENCODER (CONFIG_MSMPEG4V1_ENCODER || \ - CONFIG_MSMPEG4V2_ENCODER || \ +#define CONFIG_MSMPEG4_ENCODER (CONFIG_MSMPEG4V2_ENCODER || \ CONFIG_MSMPEG4V3_ENCODER || \ CONFIG_WMV2_ENCODER) diff --git a/libavcodec/nellymoser.h b/libavcodec/nellymoser.h index 88d9aa6245..027fc7ed23 100644 --- a/libavcodec/nellymoser.h +++ b/libavcodec/nellymoser.h @@ -54,4 +54,4 @@ extern const int16_t ff_nelly_delta_table[32]; void ff_nelly_get_sample_bits(const float *buf, int *bits); -#endif +#endif /* AVCODEC_NELLYMOSER_H */ diff --git a/libavcodec/options.c b/libavcodec/options.c index 8aff6379ac..d9a7ea657a 100644 --- a/libavcodec/options.c +++ b/libavcodec/options.c @@ -353,8 +353,8 @@ static const AVOption options[]={ {"brd_scale", "downscales frames for dynamic B-frame decision", OFFSET(brd_scale), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, 10, V|E}, {"crf", "enables constant quality mode, and selects the quality (x264/VP8)", OFFSET(crf), FF_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, 0, 63, V|E}, {"cqp", "constant quantization parameter rate control method", OFFSET(cqp), FF_OPT_TYPE_INT, {.dbl = -1 }, INT_MIN, INT_MAX, V|E}, -{"keyint_min", "minimum interval between IDR-frames (x264)", OFFSET(keyint_min), FF_OPT_TYPE_INT, {.dbl = 25 }, INT_MIN, INT_MAX, V|E}, -{"refs", "reference frames to consider for motion compensation (Snow)", OFFSET(refs), FF_OPT_TYPE_INT, {.dbl = 1 }, INT_MIN, INT_MAX, V|E}, +{"keyint_min", "minimum interval between IDR-frames", OFFSET(keyint_min), FF_OPT_TYPE_INT, {.dbl = 25 }, INT_MIN, INT_MAX, V|E}, +{"refs", "reference frames to consider for motion compensation", OFFSET(refs), FF_OPT_TYPE_INT, {.dbl = 1 }, INT_MIN, INT_MAX, V|E}, {"chromaoffset", "chroma qp offset from luma", OFFSET(chromaoffset), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E}, {"bframebias", "influences how often B-frames are used", OFFSET(bframebias), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E}, {"trellis", "rate-distortion optimal quantization", OFFSET(trellis), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|A|E}, @@ -442,7 +442,12 @@ static const AVOption options[]={ {"em", "Emergency", 0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_EMERGENCY }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, {"vo", "Voice Over", 0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_VOICE_OVER }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, {"ka", "Karaoke", 0, FF_OPT_TYPE_CONST, {.dbl = AV_AUDIO_SERVICE_TYPE_KARAOKE }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, -{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), FF_OPT_TYPE_INT, {.dbl = AV_SAMPLE_FMT_NONE }, AV_SAMPLE_FMT_NONE, AV_SAMPLE_FMT_NB-1, A|D}, +{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), FF_OPT_TYPE_INT, {.dbl = AV_SAMPLE_FMT_NONE }, AV_SAMPLE_FMT_NONE, AV_SAMPLE_FMT_NB-1, A|D, "request_sample_fmt"}, +{"u8" , "8-bit unsigned integer", 0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_U8 }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"}, +{"s16", "16-bit signed integer", 0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_S16 }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"}, +{"s32", "32-bit signed integer", 0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_S32 }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"}, +{"flt", "32-bit float", 0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_FLT }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"}, +{"dbl", "64-bit double", 0, FF_OPT_TYPE_CONST, {.dbl = AV_SAMPLE_FMT_DBL }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"}, {NULL}, }; diff --git a/libavcodec/pcm_tablegen.h b/libavcodec/pcm_tablegen.h index 3d641b05bb..1387210a58 100644 --- a/libavcodec/pcm_tablegen.h +++ b/libavcodec/pcm_tablegen.h @@ -20,8 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef PCM_TABLEGEN_H -#define PCM_TABLEGEN_H +#ifndef AVCODEC_PCM_TABLEGEN_H +#define AVCODEC_PCM_TABLEGEN_H #include <stdint.h> #include "libavutil/attributes.h" @@ -116,4 +116,4 @@ static void pcm_ulaw_tableinit(void) } #endif /* CONFIG_HARDCODED_TABLES */ -#endif /* PCM_TABLEGEN_H */ +#endif /* AVCODEC_PCM_TABLEGEN_H */ diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c index a91cfe7bf6..315dbbd779 100644 --- a/libavcodec/pgssubdec.c +++ b/libavcodec/pgssubdec.c @@ -30,8 +30,6 @@ #include "libavutil/colorspace.h" #include "libavutil/imgutils.h" -//#define DEBUG_PACKET_CONTENTS - #define RGBA(r,g,b,a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)) enum SegmentType { @@ -403,21 +401,18 @@ static int decode(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf_end; uint8_t segment_type; int segment_length; - -#ifdef DEBUG_PACKET_CONTENTS int i; - av_log(avctx, AV_LOG_INFO, "PGS sub packet:\n"); + av_dlog(avctx, "PGS sub packet:\n"); for (i = 0; i < buf_size; i++) { - av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]); + av_dlog(avctx, "%02x ", buf[i]); if (i % 16 == 15) - av_log(avctx, AV_LOG_INFO, "\n"); + av_dlog(avctx, "\n"); } if (i & 15) - av_log(avctx, AV_LOG_INFO, "\n"); -#endif + av_dlog(avctx, "\n"); *data_size = 0; diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c index 100b60cd1e..05ba027802 100644 --- a/libavcodec/pngdec.c +++ b/libavcodec/pngdec.c @@ -379,7 +379,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p; uint8_t *crow_buf_base = NULL; uint32_t tag, length; - int ret, crc; + int ret; FFSWAP(AVFrame *, s->current_picture, s->last_picture); avctx->coded_frame= s->current_picture; @@ -433,7 +433,7 @@ static int decode_frame(AVCodecContext *avctx, s->compression_type = *s->bytestream++; s->filter_type = *s->bytestream++; s->interlace_type = *s->bytestream++; - crc = bytestream_get_be32(&s->bytestream); + s->bytestream += 4; /* crc */ s->state |= PNG_IHDR; av_dlog(avctx, "width=%d height=%d depth=%d color_type=%d compression_type=%d filter_type=%d interlace_type=%d\n", s->width, s->height, s->bit_depth, s->color_type, @@ -467,8 +467,7 @@ static int decode_frame(AVCodecContext *avctx, } else if (s->bit_depth == 16 && s->color_type == PNG_COLOR_TYPE_RGB) { avctx->pix_fmt = PIX_FMT_RGB48BE; - } else if (s->bit_depth == 1 && - s->color_type == PNG_COLOR_TYPE_GRAY) { + } else if (s->bit_depth == 1) { avctx->pix_fmt = PIX_FMT_MONOBLACK; } else if (s->color_type == PNG_COLOR_TYPE_PALETTE) { avctx->pix_fmt = PIX_FMT_PAL8; @@ -504,7 +503,7 @@ static int decode_frame(AVCodecContext *avctx, s->image_buf = p->data[0]; s->image_linesize = p->linesize[0]; /* copy the palette if needed */ - if (s->color_type == PNG_COLOR_TYPE_PALETTE) + if (avctx->pix_fmt == PIX_FMT_PAL8) memcpy(p->data[1], s->palette, 256 * sizeof(uint32_t)); /* empty row is used if differencing to the first row */ s->last_row = av_mallocz(s->row_size); @@ -529,8 +528,7 @@ static int decode_frame(AVCodecContext *avctx, s->state |= PNG_IDAT; if (png_decode_idat(s, length) < 0) goto fail; - /* skip crc */ - crc = bytestream_get_be32(&s->bytestream); + s->bytestream += 4; /* crc */ break; case MKTAG('P', 'L', 'T', 'E'): { @@ -550,7 +548,7 @@ static int decode_frame(AVCodecContext *avctx, s->palette[i] = (0xff << 24); } s->state |= PNG_PLTE; - crc = bytestream_get_be32(&s->bytestream); + s->bytestream += 4; /* crc */ } break; case MKTAG('t', 'R', 'N', 'S'): @@ -566,13 +564,13 @@ static int decode_frame(AVCodecContext *avctx, v = *s->bytestream++; s->palette[i] = (s->palette[i] & 0x00ffffff) | (v << 24); } - crc = bytestream_get_be32(&s->bytestream); + s->bytestream += 4; /* crc */ } break; case MKTAG('I', 'E', 'N', 'D'): if (!(s->state & PNG_ALLIMAGE)) goto fail; - crc = bytestream_get_be32(&s->bytestream); + s->bytestream += 4; /* crc */ goto exit_loop; default: /* skip tag */ diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 35ea0c38f8..8e37fc791d 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -7,11 +7,7 @@ ALTIVEC-OBJS-$(CONFIG_VP5_DECODER) += ppc/vp3dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_VP6_DECODER) += ppc/vp3dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o -ALTIVEC-OBJS-$(CONFIG_MP1FLOAT_DECODER) += ppc/mpegaudiodec_altivec.o -ALTIVEC-OBJS-$(CONFIG_MP2FLOAT_DECODER) += ppc/mpegaudiodec_altivec.o -ALTIVEC-OBJS-$(CONFIG_MP3FLOAT_DECODER) += ppc/mpegaudiodec_altivec.o -ALTIVEC-OBJS-$(CONFIG_MP3ON4FLOAT_DECODER) += ppc/mpegaudiodec_altivec.o -ALTIVEC-OBJS-$(CONFIG_MP3ADUFLOAT_DECODER) += ppc/mpegaudiodec_altivec.o +ALTIVEC-OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodec_altivec.o FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o \ diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c index 9df18888ad..9ba6bbaf2e 100644 --- a/libavcodec/ppc/h264_altivec.c +++ b/libavcodec/ppc/h264_altivec.c @@ -527,7 +527,7 @@ static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int strid h264_idct_dc_add_internal(dst, block, stride, 8); } -static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ +static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ int i; for(i=0; i<16; i++){ int nnz = nnzc[ scan8[i] ]; @@ -538,7 +538,7 @@ static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, DC } } -static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ +static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ int i; for(i=0; i<16; i++){ if(nnzc[ scan8[i] ]) ff_h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride); @@ -546,7 +546,7 @@ static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offse } } -static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ +static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ int i; for(i=0; i<16; i+=4){ int nnz = nnzc[ scan8[i] ]; @@ -557,13 +557,15 @@ static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DC } } -static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ - int i; - for(i=16; i<16+8; i++){ - if(nnzc[ scan8[i] ]) - ff_h264_idct_add_altivec(dest[(i&4)>>2] + block_offset[i], block + i*16, stride); - else if(block[i*16]) - h264_idct_dc_add_altivec(dest[(i&4)>>2] + block_offset[i], block + i*16, stride); +static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ + int i, j; + for (j = 1; j < 3; j++) { + for(i = j * 16; i < j * 16 + 4; i++){ + if(nnzc[ scan8[i] ]) + ff_h264_idct_add_altivec(dest[j-1] + block_offset[i], block + i*16, stride); + else if(block[i*16]) + h264_idct_dc_add_altivec(dest[j-1] + block_offset[i], block + i*16, stride); + } } } diff --git a/libavcodec/ppc/h264_template_altivec.c b/libavcodec/ppc/h264_template_altivec.c index 29e4d40d4f..2573e9c6f7 100644 --- a/libavcodec/ppc/h264_template_altivec.c +++ b/libavcodec/ppc/h264_template_altivec.c @@ -18,8 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -//#define DEBUG_ALIGNMENT -#ifdef DEBUG_ALIGNMENT +#ifdef DEBUG #define ASSERT_ALIGNED(ptr) assert(((unsigned long)ptr&0x0000000F)); #else #define ASSERT_ALIGNED(ptr) ; diff --git a/libavcodec/ppc/mpegaudiodec_altivec.c b/libavcodec/ppc/mpegaudiodec_altivec.c index e087d4add1..2de5dd133a 100644 --- a/libavcodec/ppc/mpegaudiodec_altivec.c +++ b/libavcodec/ppc/mpegaudiodec_altivec.c @@ -21,9 +21,8 @@ #include "dsputil_altivec.h" #include "util_altivec.h" - -#define CONFIG_FLOAT 1 -#include "libavcodec/mpegaudio.h" +#include "libavcodec/dsputil.h" +#include "libavcodec/mpegaudiodsp.h" #define MACS(rt, ra, rb) rt+=(ra)*(rb) #define MLSS(rt, ra, rb) rt-=(ra)*(rb) @@ -124,7 +123,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out, *out = sum; } -void ff_mpegaudiodec_init_altivec(MPADecodeContext *s) +void ff_mpadsp_init_altivec(MPADSPContext *s) { - s->apply_window_mp3 = apply_window_mp3; + s->apply_window_float = apply_window_mp3; } diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c index e0eea4b1f1..e84f2aea4c 100644 --- a/libavcodec/pthread.c +++ b/libavcodec/pthread.c @@ -55,7 +55,7 @@ typedef struct ThreadContext { } ThreadContext; /// Max number of frame buffers that can be allocated when using frame threads. -#define MAX_BUFFERS 32 +#define MAX_BUFFERS (32+1) /** * Context used by codec threads and stored in their AVCodecContext thread_opaque. diff --git a/libavcodec/put_bits.h b/libavcodec/put_bits.h index d301d0afcc..79016912d5 100644 --- a/libavcodec/put_bits.h +++ b/libavcodec/put_bits.h @@ -34,6 +34,7 @@ #include "libavutil/intreadwrite.h" #include "libavutil/log.h" #include "mathops.h" +#include "config.h" //#define ALT_BITSTREAM_WRITER //#define ALIGNED_BITSTREAM_WRITER diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c index a64870a3f9..b9252bab40 100644 --- a/libavcodec/qdm2.c +++ b/libavcodec/qdm2.c @@ -38,7 +38,8 @@ #include "avcodec.h" #include "get_bits.h" #include "dsputil.h" -#include "fft.h" +#include "rdft.h" +#include "mpegaudiodsp.h" #include "mpegaudio.h" #include "qdm2data.h" @@ -170,9 +171,10 @@ typedef struct { float output_buffer[1024]; /// Synthesis filter - DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2]; + MPADSPContext mpadsp; + DECLARE_ALIGNED(32, float, synth_buf)[MPA_MAX_CHANNELS][512*2]; int synth_buf_offset[MPA_MAX_CHANNELS]; - DECLARE_ALIGNED(16, int32_t, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT]; + DECLARE_ALIGNED(32, float, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT]; /// Mixed temporary data used in decoding float tone_level[MPA_MAX_CHANNELS][30][64]; @@ -329,11 +331,6 @@ static av_cold void qdm2_init_vlc(void) } } - -/* for floating point to fixed point conversion */ -static const float f2i_scale = (float) (1 << (FRAC_BITS - 15)); - - static int qdm2_get_vlc (GetBitContext *gb, VLC *vlc, int flag, int depth) { int value; @@ -482,8 +479,8 @@ static void build_sb_samples_from_noise (QDM2Context *q, int sb) for (ch = 0; ch < q->nb_channels; ch++) for (j = 0; j < 64; j++) { - q->sb_samples[ch][j * 2][sb] = (int32_t)(f2i_scale * SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j] + .5); - q->sb_samples[ch][j * 2 + 1][sb] = (int32_t)(f2i_scale * SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j] + .5); + q->sb_samples[ch][j * 2][sb] = SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j]; + q->sb_samples[ch][j * 2 + 1][sb] = SB_DITHERING_NOISE(sb,q->noise_idx) * q->tone_level[ch][sb][j]; } } @@ -923,11 +920,11 @@ static void synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int l for (chs = 0; chs < q->nb_channels; chs++) for (k = 0; k < run; k++) if ((j + k) < 128) - q->sb_samples[chs][j + k][sb] = (int32_t)(f2i_scale * q->tone_level[chs][sb][((j + k)/2)] * tmp[k][chs] + .5); + q->sb_samples[chs][j + k][sb] = q->tone_level[chs][sb][((j + k)/2)] * tmp[k][chs]; } else { for (k = 0; k < run; k++) if ((j + k) < 128) - q->sb_samples[ch][j + k][sb] = (int32_t)(f2i_scale * q->tone_level[ch][sb][(j + k)/2] * samples[k] + .5); + q->sb_samples[ch][j + k][sb] = q->tone_level[ch][sb][(j + k)/2] * samples[k]; } j += run; @@ -1601,7 +1598,7 @@ static void qdm2_calculate_fft (QDM2Context *q, int channel, int sub_packet) */ static void qdm2_synthesis_filter (QDM2Context *q, int index) { - OUT_INT samples[MPA_MAX_CHANNELS * MPA_FRAME_SIZE]; + float samples[MPA_MAX_CHANNELS * MPA_FRAME_SIZE]; int i, k, ch, sb_used, sub_sampling, dither_state = 0; /* copy sb_samples */ @@ -1613,11 +1610,12 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index) q->sb_samples[ch][(8 * index) + i][k] = 0; for (ch = 0; ch < q->nb_channels; ch++) { - OUT_INT *samples_ptr = samples + ch; + float *samples_ptr = samples + ch; for (i = 0; i < 8; i++) { - ff_mpa_synth_filter(q->synth_buf[ch], &(q->synth_buf_offset[ch]), - ff_mpa_synth_window, &dither_state, + ff_mpa_synth_filter_float(&q->mpadsp, + q->synth_buf[ch], &(q->synth_buf_offset[ch]), + ff_mpa_synth_window_float, &dither_state, samples_ptr, q->nb_channels, q->sb_samples[ch][(8 * index) + i]); samples_ptr += 32 * q->nb_channels; @@ -1629,7 +1627,7 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index) for (ch = 0; ch < q->channels; ch++) for (i = 0; i < q->frame_size; i++) - q->output_buffer[q->channels * i + ch] += (float)(samples[q->nb_channels * sub_sampling * i + ch] >> (sizeof(OUT_INT)*8-16)); + q->output_buffer[q->channels * i + ch] += (1 << 23) * samples[q->nb_channels * sub_sampling * i + ch]; } @@ -1646,7 +1644,7 @@ static av_cold void qdm2_init(QDM2Context *q) { initialized = 1; qdm2_init_vlc(); - ff_mpa_synth_init(ff_mpa_synth_window); + ff_mpa_synth_init_float(ff_mpa_synth_window_float); softclip_table_init(); rnd_table_init(); init_noise_samples(); @@ -1863,6 +1861,7 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx) } ff_rdft_init(&s->rdft_ctx, s->fft_order, IDFT_C2R); + ff_mpadsp_init(&s->mpadsp); qdm2_init(s); diff --git a/libavcodec/qdm2_tablegen.h b/libavcodec/qdm2_tablegen.h index 6718f58b09..f215b15ffc 100644 --- a/libavcodec/qdm2_tablegen.h +++ b/libavcodec/qdm2_tablegen.h @@ -20,8 +20,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef QDM2_TABLEGEN_H -#define QDM2_TABLEGEN_H +#ifndef AVCODEC_QDM2_TABLEGEN_H +#define AVCODEC_QDM2_TABLEGEN_H #include <stdint.h> #include <math.h> @@ -99,4 +99,4 @@ static av_cold void init_noise_samples(void) { } #endif /* CONFIG_HARDCODED_TABLES */ -#endif /* QDM2_TABLEGEN_H */ +#endif /* AVCODEC_QDM2_TABLEGEN_H */ diff --git a/libavcodec/qtrleenc.c b/libavcodec/qtrleenc.c index d43ff7b06b..6258b143ad 100644 --- a/libavcodec/qtrleenc.c +++ b/libavcodec/qtrleenc.c @@ -231,10 +231,11 @@ static void qtrle_encode_line(QtrleEncContext *s, AVFrame *p, int line, uint8_t else if (rlecode > 0) { /* bulk copy */ if (s->avctx->pix_fmt == PIX_FMT_GRAY8) { + int j; // QT grayscale colorspace has 0=white and 255=black, we will // ignore the palette that is included in the AVFrame because // PIX_FMT_GRAY8 has defined color mapping - for (int j = 0; j < rlecode*s->pixel_size; ++j) + for (j = 0; j < rlecode*s->pixel_size; ++j) bytestream_put_byte(buf, *(this_line + i*s->pixel_size + j) ^ 0xff); } else { bytestream_put_buffer(buf, this_line + i*s->pixel_size, rlecode*s->pixel_size); @@ -244,8 +245,9 @@ static void qtrle_encode_line(QtrleEncContext *s, AVFrame *p, int line, uint8_t else { /* repeat the bits */ if (s->avctx->pix_fmt == PIX_FMT_GRAY8) { + int j; // QT grayscale colorspace has 0=white and 255=black, ... - for (int j = 0; j < s->pixel_size; ++j) + for (j = 0; j < s->pixel_size; ++j) bytestream_put_byte(buf, *(this_line + i*s->pixel_size + j) ^ 0xff); } else { bytestream_put_buffer(buf, this_line + i*s->pixel_size, s->pixel_size); diff --git a/libavcodec/raw.c b/libavcodec/raw.c index 7481b2fba9..0bc04df071 100644 --- a/libavcodec/raw.c +++ b/libavcodec/raw.c @@ -123,7 +123,6 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = { { PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', '2') }, { PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', 's') }, { PIX_FMT_YUYV422, MKTAG('D', 'V', 'O', 'O') }, /* Digital Voodoo SD 8 Bit */ - { PIX_FMT_PAL8, MKTAG('W', 'R', 'A', 'W') }, { PIX_FMT_RGB555LE,MKTAG('L', '5', '5', '5') }, { PIX_FMT_RGB565LE,MKTAG('L', '5', '6', '5') }, { PIX_FMT_RGB565BE,MKTAG('B', '5', '6', '5') }, diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c index e6d3c21519..d6791c39e1 100644 --- a/libavcodec/rawdec.c +++ b/libavcodec/rawdec.c @@ -29,23 +29,34 @@ #include "raw.h" #include "libavutil/intreadwrite.h" #include "libavutil/imgutils.h" +#include "libavutil/opt.h" typedef struct RawVideoContext { + AVClass *av_class; uint32_t palette[AVPALETTE_COUNT]; unsigned char * buffer; /* block of memory for holding one frame */ int length; /* number of bytes in buffer */ int flip; AVFrame pic; ///< AVCodecContext.coded_frame + int tff; } RawVideoContext; +static const AVOption options[]={ +{"top", "top field first", offsetof(RawVideoContext, tff), FF_OPT_TYPE_INT, {.dbl = -1}, -1, 1, AV_OPT_FLAG_DECODING_PARAM|AV_OPT_FLAG_VIDEO_PARAM}, +{NULL} +}; +static const AVClass class = { "rawdec", NULL, options, LIBAVUTIL_VERSION_INT }; + static const PixelFormatTag pix_fmt_bps_avi[] = { + { PIX_FMT_MONOWHITE, 1 }, + { PIX_FMT_PAL8, 2 }, { PIX_FMT_PAL8, 4 }, { PIX_FMT_PAL8, 8 }, { PIX_FMT_RGB444, 12 }, { PIX_FMT_RGB555, 15 }, { PIX_FMT_RGB555, 16 }, { PIX_FMT_BGR24, 24 }, - { PIX_FMT_RGB32, 32 }, + { PIX_FMT_BGRA, 32 }, { PIX_FMT_NONE, 0 }, }; @@ -79,11 +90,18 @@ static av_cold int raw_init_decoder(AVCodecContext *avctx) if (avctx->codec_tag == MKTAG('r','a','w',' ')) avctx->pix_fmt = ff_find_pix_fmt(pix_fmt_bps_mov, avctx->bits_per_coded_sample); + else if (avctx->codec_tag == MKTAG('W','R','A','W')) + avctx->pix_fmt = ff_find_pix_fmt(pix_fmt_bps_avi, avctx->bits_per_coded_sample); else if (avctx->codec_tag) avctx->pix_fmt = ff_find_pix_fmt(ff_raw_pix_fmt_tags, avctx->codec_tag); else if (avctx->pix_fmt == PIX_FMT_NONE && avctx->bits_per_coded_sample) avctx->pix_fmt = ff_find_pix_fmt(pix_fmt_bps_avi, avctx->bits_per_coded_sample); + if (avctx->pix_fmt == PIX_FMT_NONE) { + av_log(avctx, AV_LOG_ERROR, "Pixel format was not specified and cannot be detected\n"); + return AVERROR(EINVAL); + } + ff_set_systematic_pal2(context->palette, avctx->pix_fmt); context->length = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height); if((avctx->bits_per_coded_sample == 4 || avctx->bits_per_coded_sample == 2) && @@ -99,7 +117,7 @@ static av_cold int raw_init_decoder(AVCodecContext *avctx) avctx->coded_frame= &context->pic; if((avctx->extradata_size >= 9 && !memcmp(avctx->extradata + avctx->extradata_size - 9, "BottomUp", 9)) || - avctx->codec_tag == MKTAG( 3 , 0 , 0 , 0 )) + avctx->codec_tag == MKTAG(3, 0, 0, 0) || avctx->codec_tag == MKTAG('W','R','A','W')) context->flip=1; return 0; @@ -121,12 +139,18 @@ static int raw_decode(AVCodecContext *avctx, AVFrame * frame = (AVFrame *) data; AVPicture * picture = (AVPicture *) data; + frame->pict_type = avctx->coded_frame->pict_type; frame->interlaced_frame = avctx->coded_frame->interlaced_frame; frame->top_field_first = avctx->coded_frame->top_field_first; frame->reordered_opaque = avctx->reordered_opaque; frame->pkt_pts = avctx->pkt->pts; frame->pkt_pos = avctx->pkt->pos; + if(context->tff>=0){ + frame->interlaced_frame = 1; + frame->top_field_first = context->tff; + } + //2bpp and 4bpp raw in avi and mov (yes this is ugly ...) if (context->buffer) { int i; @@ -207,4 +231,5 @@ AVCodec ff_rawvideo_decoder = { raw_close_decoder, raw_decode, .long_name = NULL_IF_CONFIG_SMALL("raw video"), + .priv_class= &class, }; diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h index cf50aba39c..5fb03232a1 100644 --- a/libavcodec/rdft.h +++ b/libavcodec/rdft.h @@ -71,4 +71,4 @@ void ff_rdft_end(RDFTContext *s); void ff_rdft_init_arm(RDFTContext *s); -#endif +#endif /* AVCODEC_RDFT_H */ diff --git a/libavcodec/resample.c b/libavcodec/resample.c index 1fb19b0232..10413c8d0e 100644 --- a/libavcodec/resample.c +++ b/libavcodec/resample.c @@ -108,6 +108,39 @@ static void mono_to_stereo(short *output, short *input, int n1) } } +/* +5.1 to stereo input: [fl, fr, c, lfe, rl, rr] +- Left = front_left + rear_gain * rear_left + center_gain * center +- Right = front_right + rear_gain * rear_right + center_gain * center +Where rear_gain is usually around 0.5-1.0 and + center_gain is almost always 0.7 (-3 dB) +*/ +static void surround_to_stereo(short **output, short *input, int channels, int samples) +{ + int i; + short l, r; + + for (i = 0; i < samples; i++) { + int fl,fr,c,rl,rr,lfe; + fl = input[0]; + fr = input[1]; + c = input[2]; + lfe = input[3]; + rl = input[4]; + rr = input[5]; + + l = av_clip_int16(fl + (0.5 * rl) + (0.7 * c)); + r = av_clip_int16(fr + (0.5 * rr) + (0.7 * c)); + + /* output l & r. */ + *output[0]++ = l; + *output[1]++ = r; + + /* increment input. */ + input += channels; + } +} + static void deinterleave(short **output, short *input, int channels, int samples) { int i, j; @@ -147,6 +180,21 @@ static void ac3_5p1_mux(short *output, short *input1, short *input2, int n) } } +#define SUPPORT_RESAMPLE(ch1, ch2, ch3, ch4, ch5, ch6, ch7, ch8) \ + ch8<<7 | ch7<<6 | ch6<<5 | ch5<<4 | ch4<<3 | ch3<<2 | ch2<<1 | ch1<<0 + +static const uint8_t supported_resampling[MAX_CHANNELS] = { + //ouput channels:1 2 3 4 5 6 7 8 + SUPPORT_RESAMPLE(1, 1, 0, 0, 0, 0, 0, 0), // 1 input channel + SUPPORT_RESAMPLE(1, 1, 0, 0, 0, 1, 0, 0), // 2 input channels + SUPPORT_RESAMPLE(0, 0, 1, 0, 0, 0, 0, 0), // 3 input channels + SUPPORT_RESAMPLE(0, 0, 0, 1, 0, 0, 0, 0), // 4 input channels + SUPPORT_RESAMPLE(0, 0, 0, 0, 1, 0, 0, 0), // 5 input channels + SUPPORT_RESAMPLE(0, 1, 0, 0, 0, 1, 0, 0), // 6 input channels + SUPPORT_RESAMPLE(0, 0, 0, 0, 0, 0, 1, 0), // 7 input channels + SUPPORT_RESAMPLE(0, 0, 0, 0, 0, 0, 0, 1), // 8 input channels +}; + ReSampleContext *av_audio_resample_init(int output_channels, int input_channels, int output_rate, int input_rate, enum AVSampleFormat sample_fmt_out, @@ -162,11 +210,15 @@ ReSampleContext *av_audio_resample_init(int output_channels, int input_channels, MAX_CHANNELS); return NULL; } - if (output_channels > 2 && - !(output_channels == 6 && input_channels == 2) && - output_channels != input_channels) { - av_log(NULL, AV_LOG_ERROR, - "Resampling output channel count must be 1 or 2 for mono input; 1, 2 or 6 for stereo input; or N for N channel input.\n"); + if (!(supported_resampling[input_channels-1] & (1<<(output_channels-1)))) { + int i; + av_log(NULL, AV_LOG_ERROR, "Unsupported audio resampling. Allowed " + "output channels for %d input channel%s", input_channels, + input_channels > 1 ? "s:" : ":"); + for (i = 0; i < MAX_CHANNELS; i++) + if (supported_resampling[input_channels-1] & (1<<i)) + av_log(NULL, AV_LOG_ERROR, " %d", i + 1); + av_log(NULL, AV_LOG_ERROR, "\n"); return NULL; } @@ -312,6 +364,10 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl } else if (s->output_channels >= 2 && s->input_channels == 1) { buftmp3[0] = bufout[0]; memcpy(buftmp2[0], input, nb_samples * sizeof(short)); + } else if (s->input_channels == 6 && s->output_channels ==2) { + buftmp3[0] = bufout[0]; + buftmp3[1] = bufout[1]; + surround_to_stereo(buftmp2, input, s->input_channels, nb_samples); } else if (s->output_channels >= s->input_channels && s->input_channels >= 2) { for (i = 0; i < s->input_channels; i++) { buftmp3[i] = bufout[i]; @@ -341,7 +397,8 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl mono_to_stereo(output, buftmp3[0], nb_samples1); } else if (s->output_channels == 6 && s->input_channels == 2) { ac3_5p1_mux(output, buftmp3[0], buftmp3[1], nb_samples1); - } else if (s->output_channels == s->input_channels && s->input_channels >= 2) { + } else if ((s->output_channels == s->input_channels && s->input_channels >= 2) || + (s->output_channels == 2 && s->input_channels == 6)) { interleave(output, buftmp3, s->output_channels, nb_samples1); } diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c index cbecfa8a87..2f822a8ac2 100644 --- a/libavcodec/rv10.c +++ b/libavcodec/rv10.c @@ -235,7 +235,7 @@ int rv_decode_dc(MpegEncContext *s, int n) /* read RV 1.0 compatible frame header */ static int rv10_decode_picture_header(MpegEncContext *s) { - int mb_count, pb_frame, marker, unk, mb_xy; + int mb_count, pb_frame, marker, mb_xy; marker = get_bits1(&s->gb); @@ -282,7 +282,7 @@ static int rv10_decode_picture_header(MpegEncContext *s) s->mb_y = 0; mb_count = s->mb_width * s->mb_height; } - unk= get_bits(&s->gb, 3); /* ignored */ + skip_bits(&s->gb, 3); /* ignored */ s->f_code = 1; s->unrestricted_mv = 1; diff --git a/libavcodec/s302m.c b/libavcodec/s302m.c index d8b2b38a02..98a5e73a8c 100644 --- a/libavcodec/s302m.c +++ b/libavcodec/s302m.c @@ -29,7 +29,7 @@ static int s302m_parse_frame_header(AVCodecContext *avctx, const uint8_t *buf, int buf_size) { uint32_t h; - int frame_size, channels, id, bits; + int frame_size, channels, bits; if (buf_size <= AES3_HEADER_LEN) { av_log(avctx, AV_LOG_ERROR, "frame is too short\n"); @@ -48,7 +48,6 @@ static int s302m_parse_frame_header(AVCodecContext *avctx, const uint8_t *buf, h = AV_RB32(buf); frame_size = (h >> 16) & 0xffff; channels = ((h >> 14) & 0x0003) * 2 + 2; - id = (h >> 6) & 0x00ff; bits = ((h >> 4) & 0x0003) * 4 + 16; if (AES3_HEADER_LEN + frame_size != buf_size || bits > 24) { @@ -108,7 +107,7 @@ static int s302m_decode_frame(AVCodecContext *avctx, void *data, *o++ = (av_reverse[buf[6] & 0xf0] << 28) | (av_reverse[buf[5]] << 20) | (av_reverse[buf[4]] << 12) | - (av_reverse[buf[3] & 0x0f] << 8); + (av_reverse[buf[3] & 0x0f] << 4); buf += 7; } *data_size = (uint8_t*) o - (uint8_t*) data; @@ -131,7 +130,7 @@ static int s302m_decode_frame(AVCodecContext *avctx, void *data, av_reverse[buf[0]]; *o++ = (av_reverse[buf[4] & 0xf0] << 12) | (av_reverse[buf[3]] << 4) | - av_reverse[buf[2] & 0x0f]; + (av_reverse[buf[2]] >> 4); buf += 5; } *data_size = (uint8_t*) o - (uint8_t*) data; diff --git a/libavcodec/sh4/dsputil_sh4.h b/libavcodec/sh4/dsputil_sh4.h index 2c1f77c770..2e554e7370 100644 --- a/libavcodec/sh4/dsputil_sh4.h +++ b/libavcodec/sh4/dsputil_sh4.h @@ -25,4 +25,4 @@ void idct_sh4(DCTELEM *block); void dsputil_init_align(DSPContext* c, AVCodecContext *avctx); -#endif +#endif /* AVCODEC_SH4_DSPUTIL_SH4_H */ diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c index f50da4e87f..f593d0a164 100644 --- a/libavcodec/shorten.c +++ b/libavcodec/shorten.c @@ -26,7 +26,6 @@ * */ -#define DEBUG #include <limits.h> #include "avcodec.h" #include "get_bits.h" @@ -196,7 +195,6 @@ static int decode_wave_header(AVCodecContext *avctx, uint8_t *header, int header { GetBitContext hb; int len; - int chunk_size; short wave_format; init_get_bits(&hb, header, header_size*8); @@ -205,7 +203,7 @@ static int decode_wave_header(AVCodecContext *avctx, uint8_t *header, int header return -1; } - chunk_size = get_le32(&hb); + skip_bits_long(&hb, 32); /* chunk_size */ if (get_le32(&hb) != MKTAG('W','A','V','E')) { av_log(avctx, AV_LOG_ERROR, "missing WAVE tag\n"); @@ -305,7 +303,6 @@ static int shorten_decode_frame(AVCodecContext *avctx, s->bitstream_size= buf_size; if(buf_size < s->max_framesize){ - //av_dlog(avctx, "wanna more data ... %d\n", buf_size); *data_size = 0; return input_buf_size; } diff --git a/libavcodec/sinewin.h b/libavcodec/sinewin.h index 33f7d68b40..61135fd6a2 100644 --- a/libavcodec/sinewin.h +++ b/libavcodec/sinewin.h @@ -56,4 +56,4 @@ extern SINETABLE(4096); extern SINETABLE_CONST float * const ff_sine_windows[13]; -#endif +#endif /* AVCODEC_SINEWIN_H */ diff --git a/libavcodec/sinewin_tablegen.h b/libavcodec/sinewin_tablegen.h index 1c85d79f2f..f587595c21 100644 --- a/libavcodec/sinewin_tablegen.h +++ b/libavcodec/sinewin_tablegen.h @@ -20,6 +20,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#ifndef AVCODEC_SINEWIN_TABLEGEN_H +#define AVCODEC_SINEWIN_TABLEGEN_H + #include <assert.h> // do not use libavutil/libm.h since this is compiled both // for the host and the target and config.h is only valid for the target @@ -58,3 +61,5 @@ av_cold void ff_init_ff_sine_windows(int index) { ff_sine_window_init(ff_sine_windows[index], 1 << index); #endif } + +#endif /* AVCODEC_SINEWIN_TABLEGEN_H */ diff --git a/libavcodec/snow.c b/libavcodec/snow.c index 4ee1722e4c..43a71904eb 100644 --- a/libavcodec/snow.c +++ b/libavcodec/snow.c @@ -1976,9 +1976,15 @@ static int frame_start(SnowContext *s){ int h= s->avctx->height; if(s->current_picture.data[0]){ - s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH , EDGE_TOP|EDGE_BOTTOM); - s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2, EDGE_TOP|EDGE_BOTTOM); - s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2, EDGE_TOP|EDGE_BOTTOM); + s->dsp.draw_edges(s->current_picture.data[0], + s->current_picture.linesize[0], w , h , + EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM); + s->dsp.draw_edges(s->current_picture.data[1], + s->current_picture.linesize[1], w>>1, h>>1, + EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM); + s->dsp.draw_edges(s->current_picture.data[2], + s->current_picture.linesize[2], w>>1, h>>1, + EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM); } release_buffer(s->avctx); diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c index 1af978f21b..0b56c101db 100644 --- a/libavcodec/sp5xdec.c +++ b/libavcodec/sp5xdec.c @@ -38,15 +38,12 @@ static int sp5x_decode_frame(AVCodecContext *avctx, int buf_size = avpkt->size; AVPacket avpkt_recoded; const int qscale = 5; - const uint8_t *buf_ptr; uint8_t *recoded; int i = 0, j = 0; if (!avctx->width || !avctx->height) return -1; - buf_ptr = buf; - recoded = av_mallocz(buf_size + 1024); if (!recoded) return -1; diff --git a/libavcodec/sparc/dsputil_vis.h b/libavcodec/sparc/dsputil_vis.h index 97ff965da2..e1cbcb49e3 100644 --- a/libavcodec/sparc/dsputil_vis.h +++ b/libavcodec/sparc/dsputil_vis.h @@ -26,4 +26,4 @@ void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data); void ff_simple_idct_add_vis(uint8_t *dest, int line_size, DCTELEM *data); void ff_simple_idct_vis(DCTELEM *data); -#endif +#endif /* AVCODEC_SPARC_DSPUTIL_VIS_H */ diff --git a/libavcodec/svq1dec.c b/libavcodec/svq1dec.c index 66536145b2..3ef8dc4c0e 100644 --- a/libavcodec/svq1dec.c +++ b/libavcodec/svq1dec.c @@ -33,7 +33,6 @@ */ -//#define DEBUG_SVQ1 #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" @@ -238,9 +237,9 @@ static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int } if ((stages > 0) && (level >= 4)) { -#ifdef DEBUG_SVQ1 - av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",stages,level); -#endif + av_dlog(NULL, + "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n", + stages, level); return -1; /* invalid vector */ } @@ -288,9 +287,9 @@ static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels, if (stages == -1) continue; /* skip vector */ if ((stages > 0) && (level >= 4)) { -#ifdef DEBUG_SVQ1 - av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",stages,level); -#endif + av_dlog(NULL, + "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n", + stages, level); return -1; /* invalid vector */ } @@ -499,9 +498,7 @@ static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf, if (result != 0) { -#ifdef DEBUG_SVQ1 - av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_block %i\n",result); -#endif + av_dlog(s->avctx, "Error in svq1_motion_inter_block %i\n", result); break; } result = svq1_decode_block_non_intra (bitbuf, current, pitch); @@ -512,9 +509,7 @@ static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf, if (result != 0) { -#ifdef DEBUG_SVQ1 - av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_4v_block %i\n",result); -#endif + av_dlog(s->avctx, "Error in svq1_motion_inter_4v_block %i\n", result); break; } result = svq1_decode_block_non_intra (bitbuf, current, pitch); @@ -554,9 +549,8 @@ static void svq1_parse_string (GetBitContext *bitbuf, uint8_t *out) { static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) { int frame_size_code; - int temporal_reference; - temporal_reference = get_bits (bitbuf, 8); + skip_bits(bitbuf, 8); /* temporal_reference */ /* frame type */ s->pict_type= get_bits (bitbuf, 2)+1; @@ -661,9 +655,7 @@ static int svq1_decode_frame(AVCodecContext *avctx, if (result != 0) { -#ifdef DEBUG_SVQ1 - av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_frame_header %i\n",result); -#endif + av_dlog(s->avctx, "Error in svq1_decode_frame_header %i\n",result); return result; } @@ -715,9 +707,7 @@ static int svq1_decode_frame(AVCodecContext *avctx, result = svq1_decode_block_intra (&s->gb, ¤t[x], linesize); if (result != 0) { -//#ifdef DEBUG_SVQ1 av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_block %i (keyframe)\n",result); -//#endif goto err; } } @@ -733,9 +723,7 @@ static int svq1_decode_frame(AVCodecContext *avctx, linesize, pmv, x, y); if (result != 0) { -#ifdef DEBUG_SVQ1 - av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_delta_block %i\n",result); -#endif + av_dlog(s->avctx, "Error in svq1_decode_delta_block %i\n",result); goto err; } } diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c index ab9eab2309..b1f3601ee7 100644 --- a/libavcodec/svq3.c +++ b/libavcodec/svq3.c @@ -635,8 +635,9 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type) memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy], DC_PRED, 8); } if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) { - memset(h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t)); - s->dsp.clear_blocks(h->mb); + memset(h->non_zero_count_cache + 8, 0, 14*8*sizeof(uint8_t)); + s->dsp.clear_blocks(h->mb+ 0); + s->dsp.clear_blocks(h->mb+384); } if (!IS_INTRA16x16(mb_type) && (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) { @@ -656,8 +657,8 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type) } } if (IS_INTRA16x16(mb_type)) { - AV_ZERO128(h->mb_luma_dc+0); - AV_ZERO128(h->mb_luma_dc+8); + AV_ZERO128(h->mb_luma_dc[0]+0); + AV_ZERO128(h->mb_luma_dc[0]+8); if (svq3_decode_block(&s->gb, h->mb_luma_dc, 0, 1)){ av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding intra luma dc\n"); return -1; @@ -683,20 +684,23 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type) } if ((cbp & 0x30)) { - for (i = 0; i < 2; ++i) { - if (svq3_decode_block(&s->gb, &h->mb[16*(16 + 4*i)], 0, 3)){ + for (i = 1; i < 3; ++i) { + if (svq3_decode_block(&s->gb, &h->mb[16*16*i], 0, 3)){ av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma dc block\n"); return -1; } } if ((cbp & 0x20)) { - for (i = 0; i < 8; i++) { - h->non_zero_count_cache[ scan8[16+i] ] = 1; - - if (svq3_decode_block(&s->gb, &h->mb[16*(16 + i)], 1, 1)){ - av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n"); - return -1; + for (i = 1; i < 3; i++) { + for (j = 0; j < 4; j++) { + k = 16*i + j; + h->non_zero_count_cache[ scan8[k] ] = 1; + + if (svq3_decode_block(&s->gb, &h->mb[16*k], 1, 1)){ + av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n"); + return -1; + } } } } @@ -819,20 +823,12 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx) avctx->pix_fmt = avctx->codec->pix_fmts[0]; if (!s->context_initialized) { - s->width = avctx->width; - s->height = avctx->height; h->chroma_qp[0] = h->chroma_qp[1] = 4; - svq3->halfpel_flag = 1; + svq3->halfpel_flag = 1; svq3->thirdpel_flag = 1; - svq3->unknown_flag = 0; - - if (MPV_common_init(s) < 0) - return -1; + svq3->unknown_flag = 0; - h->b_stride = 4*s->mb_width; - - ff_h264_alloc_tables(h); /* prowl for the "SEQH" marker in the extradata */ extradata = (unsigned char *)avctx->extradata; @@ -920,6 +916,16 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx) #endif } } + + s->width = avctx->width; + s->height = avctx->height; + + if (MPV_common_init(s) < 0) + return -1; + + h->b_stride = 4*s->mb_width; + + ff_h264_alloc_tables(h); } return 0; diff --git a/libavcodec/tableprint.h b/libavcodec/tableprint.h index 97a667db33..d81b9a387b 100644 --- a/libavcodec/tableprint.h +++ b/libavcodec/tableprint.h @@ -23,8 +23,9 @@ #ifndef AVCODEC_TABLEPRINT_H #define AVCODEC_TABLEPRINT_H -#include <stdint.h> +#include <inttypes.h> #include <stdio.h> + #include "libavutil/common.h" #define WRITE_1D_FUNC_ARGV(type, linebrk, fmtstr, ...)\ @@ -70,9 +71,6 @@ void write_uint32_t_2d_array(const void *, int, int); void write_float_2d_array (const void *, int, int); /** \} */ // end of printfuncs group -/** Write a standard file header */ -void write_fileheader(void); - #define WRITE_ARRAY(prefix, type, name) \ do { \ const size_t array_size = FF_ARRAY_ELEMS(name); \ @@ -92,4 +90,22 @@ void write_fileheader(void); printf("};\n"); \ } while(0) + +WRITE_1D_FUNC(int8_t, "%3"PRIi8, 15) +WRITE_1D_FUNC(uint8_t, "0x%02"PRIx8, 15) +WRITE_1D_FUNC(uint16_t, "0x%08"PRIx16, 7) +WRITE_1D_FUNC(uint32_t, "0x%08"PRIx32, 7) +WRITE_1D_FUNC(float, "%.18e", 3) + +WRITE_2D_FUNC(int8_t) +WRITE_2D_FUNC(uint8_t) +WRITE_2D_FUNC(uint32_t) +WRITE_2D_FUNC(float) + +static inline void write_fileheader(void) +{ + printf("/* This file was automatically generated. */\n"); + printf("#include <stdint.h>\n"); +} + #endif /* AVCODEC_TABLEPRINT_H */ diff --git a/libavcodec/targa.c b/libavcodec/targa.c index 6f9bc13151..3125504b7c 100644 --- a/libavcodec/targa.c +++ b/libavcodec/targa.c @@ -108,18 +108,18 @@ static int decode_frame(AVCodecContext *avctx, AVFrame * const p= (AVFrame*)&s->picture; uint8_t *dst; int stride; - int idlen, pal, compr, x, y, w, h, bpp, flags; + int idlen, compr, y, w, h, bpp, flags; int first_clr, colors, csize; /* parse image header */ CHECK_BUFFER_SIZE(buf, buf_end, 18, "header"); idlen = *buf++; - pal = *buf++; + buf++; /* pal */ compr = *buf++; first_clr = AV_RL16(buf); buf += 2; colors = AV_RL16(buf); buf += 2; csize = *buf++; - x = AV_RL16(buf); buf += 2; + buf += 2; /* x */ y = AV_RL16(buf); buf += 2; w = AV_RL16(buf); buf += 2; h = AV_RL16(buf); buf += 2; @@ -217,6 +217,7 @@ static int decode_frame(AVCodecContext *avctx, CHECK_BUFFER_SIZE(buf, buf_end, img_size, "image data"); for(y = 0; y < s->height; y++){ #if HAVE_BIGENDIAN + int x; if((s->bpp + 1) >> 3 == 2){ uint16_t *dst16 = (uint16_t*)dst; for(x = 0; x < s->width; x++) diff --git a/libavcodec/targa.h b/libavcodec/targa.h index 5bc2a4407c..158a5ea0f4 100644 --- a/libavcodec/targa.h +++ b/libavcodec/targa.h @@ -16,6 +16,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#ifndef AVCODEC_TARGA_H +#define AVCODEC_TARGA_H + /** * @file * targa file common definitions @@ -34,3 +37,5 @@ enum TargaCompr { TGA_BW = 3, // black & white or grayscale TGA_RLE = 8, // flag pointing that data is RLE-coded }; + +#endif /* AVCODEC_TARGA_H */ diff --git a/libavcodec/targaenc.c b/libavcodec/targaenc.c index 7bdaca3bec..1171f605a2 100644 --- a/libavcodec/targaenc.c +++ b/libavcodec/targaenc.c @@ -20,6 +20,7 @@ */ #include "libavutil/intreadwrite.h" +#include "libavutil/pixdesc.h" #include "avcodec.h" #include "rle.h" #include "targa.h" @@ -119,7 +120,7 @@ static int targa_encode_frame(AVCodecContext *avctx, break; default: av_log(avctx, AV_LOG_ERROR, "Pixel format '%s' not supported.\n", - avcodec_get_pix_fmt_name(avctx->pix_fmt)); + av_get_pix_fmt_name(avctx->pix_fmt)); return AVERROR(EINVAL); } bpp = outbuf[16] >> 3; diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c index 683527bedf..c54eaee346 100644 --- a/libavcodec/tiff.c +++ b/libavcodec/tiff.c @@ -39,6 +39,8 @@ typedef struct TiffContext { int width, height; unsigned int bpp, bppcount; + uint32_t palette[256]; + int palette_is_set; int le; enum TiffCompr compr; int invert; @@ -255,11 +257,15 @@ static int init_image(TiffContext *s) av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return ret; } - if (s->bpp == 8 && s->picture.data[1]){ - /* make default grayscale pal */ - pal = (uint32_t *) s->picture.data[1]; - for (i = 0; i < 256; i++) - pal[i] = i * 0x010101; + if (s->avctx->pix_fmt == PIX_FMT_PAL8) { + if (s->palette_is_set) { + memcpy(s->picture.data[1], s->palette, sizeof(s->palette)); + } else { + /* make default grayscale pal */ + pal = (uint32_t *) s->picture.data[1]; + for (i = 0; i < 256; i++) + pal[i] = i * 0x010101; + } } return 0; } @@ -442,11 +448,7 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t * s->fill_order = value - 1; break; case TIFF_PAL: - if(s->avctx->pix_fmt != PIX_FMT_PAL8){ - av_log(s->avctx, AV_LOG_ERROR, "Palette met but this is not palettized format\n"); - return -1; - } - pal = (uint32_t *) s->picture.data[1]; + pal = (uint32_t *) s->palette; off = type_sizes[type]; rp = buf; gp = buf + count / 3 * off; @@ -459,6 +461,7 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t * j |= tget(&bp, type, s->le) >> off; pal[i] = j; } + s->palette_is_set = 1; break; case TIFF_PLANAR: if(value == 2){ @@ -474,7 +477,6 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t * if(s->compr == TIFF_G4) s->fax_opts = value; break; - default: av_log(s->avctx, AV_LOG_DEBUG, "Unknown or unsupported tag %d/0X%0X\n", tag, tag); } diff --git a/libavcodec/truemotion2.c b/libavcodec/truemotion2.c index 122049c957..2b9a0cba72 100644 --- a/libavcodec/truemotion2.c +++ b/libavcodec/truemotion2.c @@ -201,7 +201,6 @@ static inline int tm2_read_header(TM2Context *ctx, const uint8_t *buf) { uint32_t magic; const uint8_t *obuf; - int length; obuf = buf; @@ -212,19 +211,6 @@ static inline int tm2_read_header(TM2Context *ctx, const uint8_t *buf) /* av_log (ctx->avctx, AV_LOG_ERROR, "TM2 old header: not implemented (yet)\n"); */ return 40; } else if(magic == 0x00000101) { /* new header */ - int w, h, size, flags, xr, yr; - - length = AV_RL32(buf); - buf += 4; - - init_get_bits(&ctx->gb, buf, 32 * 8); - size = get_bits_long(&ctx->gb, 31); - h = get_bits(&ctx->gb, 15); - w = get_bits(&ctx->gb, 15); - flags = get_bits_long(&ctx->gb, 31); - yr = get_bits(&ctx->gb, 9); - xr = get_bits(&ctx->gb, 9); - return 40; } else { av_log (ctx->avctx, AV_LOG_ERROR, "Not a TM2 header: 0x%08X\n", magic); diff --git a/libavcodec/tscc.c b/libavcodec/tscc.c index 3a69a6aaf8..b12a608555 100644 --- a/libavcodec/tscc.c +++ b/libavcodec/tscc.c @@ -73,7 +73,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac int buf_size = avpkt->size; CamtasiaContext * const c = avctx->priv_data; const unsigned char *encoded = buf; - unsigned char *outptr; int zret; // Zlib return code int len = buf_size; @@ -87,8 +86,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac return -1; } - outptr = c->pic.data[0]; // Output image pointer - zret = inflateReset(&(c->zstream)); if (zret != Z_OK) { av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret); diff --git a/libavcodec/utils.c b/libavcodec/utils.c index b169c9b9d1..1b86213029 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -122,7 +122,7 @@ typedef struct InternalBuffer{ enum PixelFormat pix_fmt; }InternalBuffer; -#define INTERNAL_BUFFER_SIZE 32 +#define INTERNAL_BUFFER_SIZE (32+1) void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, int linesize_align[4]){ int w_align= 1; @@ -576,7 +576,7 @@ int attribute_align_arg avcodec_open(AVCodecContext *avctx, AVCodec *codec) } } - if (avctx->codec->max_lowres < avctx->lowres) { + if (avctx->codec->max_lowres < avctx->lowres || avctx->lowres < 0) { av_log(avctx, AV_LOG_ERROR, "The maximum value for lowres supported by the decoder is %d\n", avctx->codec->max_lowres); ret = AVERROR(EINVAL); @@ -762,9 +762,8 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi if((avctx->coded_width||avctx->coded_height) && av_image_check_size(avctx->coded_width, avctx->coded_height, 0, avctx)) return -1; - avctx->pkt = avpkt; - if((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type&FF_THREAD_FRAME)){ + avctx->pkt = avpkt; if (HAVE_PTHREADS && avctx->active_thread_type&FF_THREAD_FRAME) ret = ff_thread_decode_frame(avctx, picture, got_picture_ptr, avpkt); @@ -775,6 +774,8 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi if(!avctx->has_b_frames){ picture->pkt_pos= avpkt->pos; + } + //FIXME these should be under if(!avctx->has_b_frames) if (!picture->sample_aspect_ratio.num) picture->sample_aspect_ratio = avctx->sample_aspect_ratio; if (!picture->width) @@ -783,7 +784,6 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi picture->height = avctx->height; if (picture->format == PIX_FMT_NONE) picture->format = avctx->pix_fmt; - } } emms_c(); //needed to avoid an emms_c() call before every return; @@ -914,6 +914,9 @@ av_cold int avcodec_close(AVCodecContext *avctx) avctx->codec->close(avctx); avcodec_default_free_buffers(avctx); avctx->coded_frame = NULL; + if (avctx->codec && avctx->codec->priv_class) + av_opt_free(avctx->priv_data); + av_opt_free(avctx); av_freep(&avctx->priv_data); if(avctx->codec && avctx->codec->encode) av_freep(&avctx->extradata); @@ -960,14 +963,18 @@ AVCodec *avcodec_find_encoder_by_name(const char *name) AVCodec *avcodec_find_decoder(enum CodecID id) { - AVCodec *p; + AVCodec *p, *experimental=NULL; p = first_avcodec; while (p) { - if (p->decode != NULL && p->id == id) - return p; + if (p->decode != NULL && p->id == id) { + if (p->capabilities & CODEC_CAP_EXPERIMENTAL && !experimental) { + experimental = p; + } else + return p; + } p = p->next; } - return NULL; + return experimental; } AVCodec *avcodec_find_decoder_by_name(const char *name) @@ -1064,7 +1071,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) if (enc->pix_fmt != PIX_FMT_NONE) { snprintf(buf + strlen(buf), buf_size - strlen(buf), ", %s", - avcodec_get_pix_fmt_name(enc->pix_fmt)); + av_get_pix_fmt_name(enc->pix_fmt)); } if (enc->width) { snprintf(buf + strlen(buf), buf_size - strlen(buf), diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c index d1b3d9f858..fd47d7a5e7 100644 --- a/libavcodec/v210enc.c +++ b/libavcodec/v210enc.c @@ -22,7 +22,7 @@ */ #include "avcodec.h" -#include "libavcodec/bytestream.h" +#include "bytestream.h" static av_cold int encode_init(AVCodecContext *avctx) { diff --git a/libavcodec/vaapi_mpeg2.c b/libavcodec/vaapi_mpeg2.c index 3b3f6e0444..6c92a0ff72 100644 --- a/libavcodec/vaapi_mpeg2.c +++ b/libavcodec/vaapi_mpeg2.c @@ -109,7 +109,7 @@ static int vaapi_mpeg2_decode_slice(AVCodecContext *avctx, const uint8_t *buffer MpegEncContext * const s = avctx->priv_data; VASliceParameterBufferMPEG2 *slice_param; GetBitContext gb; - uint32_t start_code, quantiser_scale_code, intra_slice_flag, macroblock_offset; + uint32_t start_code av_unused, quantiser_scale_code, intra_slice_flag, macroblock_offset; av_dlog(avctx, "vaapi_mpeg2_decode_slice(): buffer %p, size %d\n", buffer, size); diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index 922481406f..7093363084 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -3398,7 +3398,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) VC1Context *v = avctx->priv_data; MpegEncContext *s = &v->s; GetBitContext gb; - int i; + int i, cur_width, cur_height; if (!avctx->extradata_size || !avctx->extradata) return -1; if (!(avctx->flags & CODEC_FLAG_GRAY)) @@ -3419,8 +3419,8 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) if (vc1_init_common(v) < 0) return -1; ff_vc1dsp_init(&v->vc1dsp); - avctx->coded_width = avctx->width; - avctx->coded_height = avctx->height; + cur_width = avctx->coded_width = avctx->width; + cur_height = avctx->coded_height = avctx->height; if (avctx->codec_id == CODEC_ID_WMV3) { int count = 0; @@ -3491,6 +3491,19 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) } v->res_sprite = (avctx->codec_tag == MKTAG('W','V','P','2')); } + // Sequence header information may not have been parsed + // yet when ff_msmpeg4_decode_init was called the fist time + // above. If sequence information changes, we need to call + // it again. + if (cur_width != avctx->width || + cur_height != avctx->height) { + MPV_common_end(s); + if(ff_msmpeg4_decode_init(avctx) < 0) + return -1; + avctx->coded_width = avctx->width; + avctx->coded_height = avctx->height; + } + avctx->profile = v->profile; if (v->profile == PROFILE_ADVANCED) avctx->level = v->level; diff --git a/libavcodec/version.h b/libavcodec/version.h index 434d7b93d7..7cbf020d90 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -21,7 +21,7 @@ #define AVCODEC_VERSION_H #define LIBAVCODEC_VERSION_MAJOR 52 -#define LIBAVCODEC_VERSION_MINOR 121 +#define LIBAVCODEC_VERSION_MINOR 122 #define LIBAVCODEC_VERSION_MICRO 0 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ @@ -98,5 +98,8 @@ #ifndef FF_API_FLAC_GLOBAL_OPTS #define FF_API_FLAC_GLOBAL_OPTS (LIBAVCODEC_VERSION_MAJOR < 54) #endif +#ifndef FF_API_GET_PIX_FMT_NAME +#define FF_API_GET_PIX_FMT_NAME (LIBAVCODEC_VERSION_MAJOR < 54) +#endif #endif /* AVCODEC_VERSION_H */ diff --git a/libavcodec/vmdav.c b/libavcodec/vmdav.c index 8d8bc61e42..934a52b939 100644 --- a/libavcodec/vmdav.c +++ b/libavcodec/vmdav.c @@ -199,7 +199,6 @@ static void vmd_decode(VmdVideoContext *s) int frame_x, frame_y; int frame_width, frame_height; - int dp_size; frame_x = AV_RL16(&s->buf[6]); frame_y = AV_RL16(&s->buf[8]); @@ -247,7 +246,6 @@ static void vmd_decode(VmdVideoContext *s) } dp = &s->frame.data[0][frame_y * s->frame.linesize[0] + frame_x]; - dp_size = s->frame.linesize[0] * s->avctx->height; pp = &s->prev_frame.data[0][frame_y * s->prev_frame.linesize[0] + frame_x]; switch (meth) { case 1: diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c index a805452eab..fc830293cc 100644 --- a/libavcodec/vorbis.c +++ b/libavcodec/vorbis.c @@ -20,9 +20,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#undef V_DEBUG -//#define V_DEBUG - #define ALT_BITSTREAM_READER_LE #include "avcodec.h" #include "get_bits.h" @@ -57,7 +54,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) unsigned i, j, p, code; -#ifdef V_DEBUG +#ifdef DEBUG GetBitContext gb; #endif @@ -74,7 +71,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) for (i = 0; i < bits[p]; ++i) exit_at_level[i+1] = 1 << i; -#ifdef V_DEBUG +#ifdef DEBUG av_log(NULL, AV_LOG_INFO, " %u. of %u code len %d code %d - ", p, num, bits[p], codes[p]); init_get_bits(&gb, (uint8_t *)&codes[p], bits[p]); for (i = 0; i < bits[p]; ++i) @@ -102,7 +99,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) exit_at_level[j] = code + (1 << (j - 1)); codes[p] = code; -#ifdef V_DEBUG +#ifdef DEBUG av_log(NULL, AV_LOG_INFO, " %d. code len %d code %d - ", p, bits[p], codes[p]); init_get_bits(&gb, (uint8_t *)&codes[p], bits[p]); for (i = 0; i < bits[p]; ++i) diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c index f4b743e8ab..e012f611a9 100644 --- a/libavcodec/vorbisdec.c +++ b/libavcodec/vorbisdec.c @@ -20,10 +20,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#undef V_DEBUG -//#define V_DEBUG -//#define AV_DEBUG(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__) - +#include <inttypes.h> #include <math.h> #define ALT_BITSTREAM_READER_LE @@ -41,10 +38,6 @@ #define V_MAX_VLCS (1 << 16) #define V_MAX_PARTITIONS (1 << 20) -#ifndef V_DEBUG -#define AV_DEBUG(...) -#endif - #undef NDEBUG #include <assert.h> @@ -245,7 +238,7 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) vc->codebook_count = get_bits(gb, 8) + 1; - AV_DEBUG(" Codebooks: %d \n", vc->codebook_count); + av_dlog(NULL, " Codebooks: %d \n", vc->codebook_count); vc->codebooks = av_mallocz(vc->codebook_count * sizeof(*vc->codebooks)); tmp_vlc_bits = av_mallocz(V_MAX_VLCS * sizeof(*tmp_vlc_bits)); @@ -256,7 +249,7 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) vorbis_codebook *codebook_setup = &vc->codebooks[cb]; unsigned ordered, t, entries, used_entries = 0; - AV_DEBUG(" %u. Codebook\n", cb); + av_dlog(NULL, " %u. Codebook\n", cb); if (get_bits(gb, 24) != 0x564342) { av_log(vc->avccontext, AV_LOG_ERROR, @@ -281,17 +274,17 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) ordered = get_bits1(gb); - AV_DEBUG(" codebook_dimensions %d, codebook_entries %u\n", - codebook_setup->dimensions, entries); + av_dlog(NULL, " codebook_dimensions %d, codebook_entries %u\n", + codebook_setup->dimensions, entries); if (!ordered) { unsigned ce, flag; unsigned sparse = get_bits1(gb); - AV_DEBUG(" not ordered \n"); + av_dlog(NULL, " not ordered \n"); if (sparse) { - AV_DEBUG(" sparse \n"); + av_dlog(NULL, " sparse \n"); used_entries = 0; for (ce = 0; ce < entries; ++ce) { @@ -303,7 +296,7 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) tmp_vlc_bits[ce] = 0; } } else { - AV_DEBUG(" not sparse \n"); + av_dlog(NULL, " not sparse \n"); used_entries = entries; for (ce = 0; ce < entries; ++ce) @@ -313,17 +306,17 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) unsigned current_entry = 0; unsigned current_length = get_bits(gb, 5) + 1; - AV_DEBUG(" ordered, current length: %u\n", current_length); //FIXME + av_dlog(NULL, " ordered, current length: %u\n", current_length); //FIXME used_entries = entries; for (; current_entry < used_entries && current_length <= 32; ++current_length) { unsigned i, number; - AV_DEBUG(" number bits: %u ", ilog(entries - current_entry)); + av_dlog(NULL, " number bits: %u ", ilog(entries - current_entry)); number = get_bits(gb, ilog(entries - current_entry)); - AV_DEBUG(" number: %u\n", number); + av_dlog(NULL, " number: %u\n", number); for (i = current_entry; i < number+current_entry; ++i) if (i < used_entries) @@ -339,7 +332,8 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) codebook_setup->lookup_type = get_bits(gb, 4); - AV_DEBUG(" lookup type: %d : %s \n", codebook_setup->lookup_type, codebook_setup->lookup_type ? "vq" : "no lookup"); + av_dlog(NULL, " lookup type: %d : %s \n", codebook_setup->lookup_type, + codebook_setup->lookup_type ? "vq" : "no lookup"); // If the codebook is used for (inverse) VQ, calculate codevectors. @@ -352,14 +346,17 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) unsigned codebook_value_bits = get_bits(gb, 4) + 1; unsigned codebook_sequence_p = get_bits1(gb); - AV_DEBUG(" We expect %d numbers for building the codevectors. \n", codebook_lookup_values); - AV_DEBUG(" delta %f minmum %f \n", codebook_delta_value, codebook_minimum_value); + av_dlog(NULL, " We expect %d numbers for building the codevectors. \n", + codebook_lookup_values); + av_dlog(NULL, " delta %f minmum %f \n", + codebook_delta_value, codebook_minimum_value); for (i = 0; i < codebook_lookup_values; ++i) { codebook_multiplicands[i] = get_bits(gb, codebook_value_bits); - AV_DEBUG(" multiplicands*delta+minmum : %e \n", (float)codebook_multiplicands[i]*codebook_delta_value+codebook_minimum_value); - AV_DEBUG(" multiplicand %u\n", codebook_multiplicands[i]); + av_dlog(NULL, " multiplicands*delta+minmum : %e \n", + (float)codebook_multiplicands[i] * codebook_delta_value + codebook_minimum_value); + av_dlog(NULL, " multiplicand %u\n", codebook_multiplicands[i]); } // Weed out unused vlcs and build codevector vector @@ -374,9 +371,7 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) float last = 0.0; unsigned lookup_offset = i; -#ifdef V_DEBUG - av_log(vc->avccontext, AV_LOG_INFO, "Lookup offset %u ,", i); -#endif + av_dlog(vc->avccontext, "Lookup offset %u ,", i); for (k = 0; k < dim; ++k) { unsigned multiplicand_offset = lookup_offset % codebook_lookup_values; @@ -387,12 +382,11 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) } tmp_vlc_bits[j] = tmp_vlc_bits[i]; -#ifdef V_DEBUG - av_log(vc->avccontext, AV_LOG_INFO, "real lookup offset %u, vector: ", j); + av_dlog(vc->avccontext, "real lookup offset %u, vector: ", j); for (k = 0; k < dim; ++k) - av_log(vc->avccontext, AV_LOG_INFO, " %f ", codebook_setup->codevectors[j * dim + k]); - av_log(vc->avccontext, AV_LOG_INFO, "\n"); -#endif + av_dlog(vc->avccontext, " %f ", + codebook_setup->codevectors[j * dim + k]); + av_dlog(vc->avccontext, "\n"); ++j; } @@ -453,8 +447,8 @@ static int vorbis_parse_setup_hdr_tdtransforms(vorbis_context *vc) for (i = 0; i < vorbis_time_count; ++i) { unsigned vorbis_tdtransform = get_bits(gb, 16); - AV_DEBUG(" Vorbis time domain transform %u: %u\n", - vorbis_time_count, vorbis_tdtransform); + av_dlog(NULL, " Vorbis time domain transform %u: %u\n", + vorbis_time_count, vorbis_tdtransform); if (vorbis_tdtransform) { av_log(vc->avccontext, AV_LOG_ERROR, "Vorbis time domain transform data nonzero. \n"); @@ -485,7 +479,7 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) floor_setup->floor_type = get_bits(gb, 16); - AV_DEBUG(" %d. floor type %d \n", i, floor_setup->floor_type); + av_dlog(NULL, " %d. floor type %d \n", i, floor_setup->floor_type); if (floor_setup->floor_type == 1) { int maximum_class = -1; @@ -495,29 +489,33 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) floor_setup->data.t1.partitions = get_bits(gb, 5); - AV_DEBUG(" %d.floor: %d partitions \n", i, floor_setup->data.t1.partitions); + av_dlog(NULL, " %d.floor: %d partitions \n", + i, floor_setup->data.t1.partitions); for (j = 0; j < floor_setup->data.t1.partitions; ++j) { floor_setup->data.t1.partition_class[j] = get_bits(gb, 4); if (floor_setup->data.t1.partition_class[j] > maximum_class) maximum_class = floor_setup->data.t1.partition_class[j]; - AV_DEBUG(" %d. floor %d partition class %d \n", i, j, floor_setup->data.t1.partition_class[j]); + av_dlog(NULL, " %d. floor %d partition class %d \n", + i, j, floor_setup->data.t1.partition_class[j]); } - AV_DEBUG(" maximum class %d \n", maximum_class); + av_dlog(NULL, " maximum class %d \n", maximum_class); for (j = 0; j <= maximum_class; ++j) { floor_setup->data.t1.class_dimensions[j] = get_bits(gb, 3) + 1; floor_setup->data.t1.class_subclasses[j] = get_bits(gb, 2); - AV_DEBUG(" %d floor %d class dim: %d subclasses %d \n", i, j, floor_setup->data.t1.class_dimensions[j], floor_setup->data.t1.class_subclasses[j]); + av_dlog(NULL, " %d floor %d class dim: %d subclasses %d \n", i, j, + floor_setup->data.t1.class_dimensions[j], + floor_setup->data.t1.class_subclasses[j]); if (floor_setup->data.t1.class_subclasses[j]) { GET_VALIDATED_INDEX(floor_setup->data.t1.class_masterbook[j], 8, vc->codebook_count) - AV_DEBUG(" masterbook: %d \n", floor_setup->data.t1.class_masterbook[j]); + av_dlog(NULL, " masterbook: %d \n", floor_setup->data.t1.class_masterbook[j]); } for (k = 0; k < (1 << floor_setup->data.t1.class_subclasses[j]); ++k) { @@ -526,7 +524,7 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) VALIDATE_INDEX(bits, vc->codebook_count) floor_setup->data.t1.subclass_books[j][k] = bits; - AV_DEBUG(" book %d. : %d \n", k, floor_setup->data.t1.subclass_books[j][k]); + av_dlog(NULL, " book %d. : %d \n", k, floor_setup->data.t1.subclass_books[j][k]); } } @@ -555,8 +553,8 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) for (k = 0; k < floor_setup->data.t1.class_dimensions[floor_setup->data.t1.partition_class[j]]; ++k, ++floor1_values) { floor_setup->data.t1.list[floor1_values].x = get_bits(gb, rangebits); - AV_DEBUG(" %u. floor1 Y coord. %d\n", floor1_values, - floor_setup->data.t1.list[floor1_values].x); + av_dlog(NULL, " %u. floor1 Y coord. %d\n", floor1_values, + floor_setup->data.t1.list[floor1_values].x); } } @@ -608,28 +606,26 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) if (!floor_setup->data.t0.lsp) return -1; -#ifdef V_DEBUG /* debug output parsed headers */ - AV_DEBUG("floor0 order: %u\n", floor_setup->data.t0.order); - AV_DEBUG("floor0 rate: %u\n", floor_setup->data.t0.rate); - AV_DEBUG("floor0 bark map size: %u\n", - floor_setup->data.t0.bark_map_size); - AV_DEBUG("floor0 amplitude bits: %u\n", - floor_setup->data.t0.amplitude_bits); - AV_DEBUG("floor0 amplitude offset: %u\n", - floor_setup->data.t0.amplitude_offset); - AV_DEBUG("floor0 number of books: %u\n", - floor_setup->data.t0.num_books); - AV_DEBUG("floor0 book list pointer: %p\n", - floor_setup->data.t0.book_list); + /* debug output parsed headers */ + av_dlog(NULL, "floor0 order: %u\n", floor_setup->data.t0.order); + av_dlog(NULL, "floor0 rate: %u\n", floor_setup->data.t0.rate); + av_dlog(NULL, "floor0 bark map size: %u\n", + floor_setup->data.t0.bark_map_size); + av_dlog(NULL, "floor0 amplitude bits: %u\n", + floor_setup->data.t0.amplitude_bits); + av_dlog(NULL, "floor0 amplitude offset: %u\n", + floor_setup->data.t0.amplitude_offset); + av_dlog(NULL, "floor0 number of books: %u\n", + floor_setup->data.t0.num_books); + av_dlog(NULL, "floor0 book list pointer: %p\n", + floor_setup->data.t0.book_list); { int idx; for (idx = 0; idx < floor_setup->data.t0.num_books; ++idx) { - AV_DEBUG(" Book %d: %u\n", - idx+1, - floor_setup->data.t0.book_list[idx]); + av_dlog(NULL, " Book %d: %u\n", idx + 1, + floor_setup->data.t0.book_list[idx]); } } -#endif } else { av_log(vc->avccontext, AV_LOG_ERROR, "Invalid floor type!\n"); return -1; @@ -648,7 +644,7 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc) vc->residue_count = get_bits(gb, 6)+1; vc->residues = av_mallocz(vc->residue_count * sizeof(*vc->residues)); - AV_DEBUG(" There are %d residues. \n", vc->residue_count); + av_dlog(NULL, " There are %d residues. \n", vc->residue_count); for (i = 0; i < vc->residue_count; ++i) { vorbis_residue *res_setup = &vc->residues[i]; @@ -657,7 +653,7 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc) res_setup->type = get_bits(gb, 16); - AV_DEBUG(" %u. residue type %d\n", i, res_setup->type); + av_dlog(NULL, " %u. residue type %d\n", i, res_setup->type); res_setup->begin = get_bits(gb, 24); res_setup->end = get_bits(gb, 24); @@ -684,8 +680,9 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc) if (!res_setup->classifs) return AVERROR(ENOMEM); - AV_DEBUG(" begin %d end %d part.size %d classif.s %d classbook %d \n", res_setup->begin, res_setup->end, res_setup->partition_size, - res_setup->classifications, res_setup->classbook); + av_dlog(NULL, " begin %d end %d part.size %d classif.s %d classbook %d \n", + res_setup->begin, res_setup->end, res_setup->partition_size, + res_setup->classifications, res_setup->classbook); for (j = 0; j < res_setup->classifications; ++j) { high_bits = 0; @@ -694,7 +691,7 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc) high_bits = get_bits(gb, 5); cascade[j] = (high_bits << 3) + low_bits; - AV_DEBUG(" %u class cascade depth: %d\n", j, ilog(cascade[j])); + av_dlog(NULL, " %u class cascade depth: %d\n", j, ilog(cascade[j])); } res_setup->maxpass = 0; @@ -703,8 +700,8 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc) if (cascade[j]&(1 << k)) { GET_VALIDATED_INDEX(res_setup->books[j][k], 8, vc->codebook_count) - AV_DEBUG(" %u class cascade depth %u book: %d\n", - j, k, res_setup->books[j][k]); + av_dlog(NULL, " %u class cascade depth %u book: %d\n", + j, k, res_setup->books[j][k]); if (k>res_setup->maxpass) res_setup->maxpass = k; @@ -727,7 +724,7 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc) vc->mapping_count = get_bits(gb, 6)+1; vc->mappings = av_mallocz(vc->mapping_count * sizeof(*vc->mappings)); - AV_DEBUG(" There are %d mappings. \n", vc->mapping_count); + av_dlog(NULL, " There are %d mappings. \n", vc->mapping_count); for (i = 0; i < vc->mapping_count; ++i) { vorbis_mapping *mapping_setup = &vc->mappings[i]; @@ -756,8 +753,8 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc) mapping_setup->coupling_steps = 0; } - AV_DEBUG(" %u mapping coupling steps: %d\n", - i, mapping_setup->coupling_steps); + av_dlog(NULL, " %u mapping coupling steps: %d\n", + i, mapping_setup->coupling_steps); if (get_bits(gb, 2)) { av_log(vc->avccontext, AV_LOG_ERROR, "%u. mapping setup data invalid.\n", i); @@ -776,10 +773,9 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc) GET_VALIDATED_INDEX(mapping_setup->submap_floor[j], 8, vc->floor_count) GET_VALIDATED_INDEX(mapping_setup->submap_residue[j], 8, vc->residue_count) - AV_DEBUG(" %u mapping %u submap : floor %d, residue %d\n", - i, j, - mapping_setup->submap_floor[j], - mapping_setup->submap_residue[j]); + av_dlog(NULL, " %u mapping %u submap : floor %d, residue %d\n", i, j, + mapping_setup->submap_floor[j], + mapping_setup->submap_residue[j]); } } return 0; @@ -814,12 +810,9 @@ static void create_map(vorbis_context *vc, unsigned floor_number) vf->map_size[blockflag] = n; } -#ifdef V_DEBUG for (idx = 0; idx <= n; ++idx) { - AV_DEBUG("floor0 map: map at pos %d is %d\n", - idx, map[idx]); + av_dlog(NULL, "floor0 map: map at pos %d is %d\n", idx, map[idx]); } -#endif } static int vorbis_parse_setup_hdr_modes(vorbis_context *vc) @@ -830,7 +823,7 @@ static int vorbis_parse_setup_hdr_modes(vorbis_context *vc) vc->mode_count = get_bits(gb, 6) + 1; vc->modes = av_mallocz(vc->mode_count * sizeof(*vc->modes)); - AV_DEBUG(" There are %d modes.\n", vc->mode_count); + av_dlog(NULL, " There are %d modes.\n", vc->mode_count); for (i = 0; i < vc->mode_count; ++i) { vorbis_mode *mode_setup = &vc->modes[i]; @@ -840,9 +833,9 @@ static int vorbis_parse_setup_hdr_modes(vorbis_context *vc) mode_setup->transformtype = get_bits(gb, 16); //FIXME check GET_VALIDATED_INDEX(mode_setup->mapping, 8, vc->mapping_count); - AV_DEBUG(" %u mode: blockflag %d, windowtype %d, transformtype %d, mapping %d\n", - i, mode_setup->blockflag, mode_setup->windowtype, - mode_setup->transformtype, mode_setup->mapping); + av_dlog(NULL, " %u mode: blockflag %d, windowtype %d, transformtype %d, mapping %d\n", + i, mode_setup->blockflag, mode_setup->windowtype, + mode_setup->transformtype, mode_setup->mapping); } return 0; } @@ -950,7 +943,7 @@ static int vorbis_parse_id_hdr(vorbis_context *vc) ff_mdct_init(&vc->mdct[0], bl0, 1, -vc->scale_bias); ff_mdct_init(&vc->mdct[1], bl1, 1, -vc->scale_bias); - AV_DEBUG(" vorbis version %d \n audio_channels %d \n audio_samplerate %d \n bitrate_max %d \n bitrate_nom %d \n bitrate_min %d \n blk_0 %d blk_1 %d \n ", + av_dlog(NULL, " vorbis version %d \n audio_channels %d \n audio_samplerate %d \n bitrate_max %d \n bitrate_nom %d \n bitrate_min %d \n blk_0 %d blk_1 %d \n ", vc->version, vc->audio_channels, vc->audio_samplerate, vc->bitrate_maximum, vc->bitrate_nominal, vc->bitrate_minimum, vc->blocksize[0], vc->blocksize[1]); /* @@ -979,7 +972,13 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext) dsputil_init(&vc->dsp, avccontext); ff_fmt_convert_init(&vc->fmt_conv, avccontext); - vc->scale_bias = 32768.0f; + if (avccontext->request_sample_fmt == AV_SAMPLE_FMT_FLT) { + avccontext->sample_fmt = AV_SAMPLE_FMT_FLT; + vc->scale_bias = 1.0f; + } else { + avccontext->sample_fmt = AV_SAMPLE_FMT_S16; + vc->scale_bias = 32768.0f; + } if (!headers_len) { av_log(avccontext, AV_LOG_ERROR, "Extradata missing.\n"); @@ -1024,9 +1023,6 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext) avccontext->channels = vc->audio_channels; avccontext->sample_rate = vc->audio_samplerate; avccontext->frame_size = FFMIN(vc->blocksize[0], vc->blocksize[1]) >> 2; - avccontext->sample_fmt = - avccontext->request_sample_fmt == AV_SAMPLE_FMT_FLT ? - AV_SAMPLE_FMT_FLT : AV_SAMPLE_FMT_S16; return 0 ; } @@ -1055,7 +1051,7 @@ static int vorbis_floor0_decode(vorbis_context *vc, "floor0 dec: booknumber too high!\n"); book_idx = 0; } - AV_DEBUG("floor0 dec: booknumber: %u\n", book_idx); + av_dlog(NULL, "floor0 dec: booknumber: %u\n", book_idx); codebook = vc->codebooks[vf->book_list[book_idx]]; /* Invalid codebook! */ if (!codebook.codevectors) @@ -1064,13 +1060,13 @@ static int vorbis_floor0_decode(vorbis_context *vc, while (lsp_len<vf->order) { int vec_off; - AV_DEBUG("floor0 dec: book dimension: %d\n", codebook.dimensions); - AV_DEBUG("floor0 dec: maximum depth: %d\n", codebook.maxdepth); + av_dlog(NULL, "floor0 dec: book dimension: %d\n", codebook.dimensions); + av_dlog(NULL, "floor0 dec: maximum depth: %d\n", codebook.maxdepth); /* read temp vector */ vec_off = get_vlc2(&vc->gb, codebook.vlc.table, codebook.nb_bits, codebook.maxdepth) * codebook.dimensions; - AV_DEBUG("floor0 dec: vector offset: %d\n", vec_off); + av_dlog(NULL, "floor0 dec: vector offset: %d\n", vec_off); /* copy each vector component and add last to it */ for (idx = 0; idx < codebook.dimensions; ++idx) lsp[lsp_len+idx] = codebook.codevectors[vec_off+idx] + last; @@ -1078,14 +1074,12 @@ static int vorbis_floor0_decode(vorbis_context *vc, lsp_len += codebook.dimensions; } -#ifdef V_DEBUG /* DEBUG: output lsp coeffs */ { int idx; for (idx = 0; idx < lsp_len; ++idx) - AV_DEBUG("floor0 dec: coeff at %d is %f\n", idx, lsp[idx]); + av_dlog(NULL, "floor0 dec: coeff at %d is %f\n", idx, lsp[idx]); } -#endif /* synthesize floor output vector */ { @@ -1096,8 +1090,8 @@ static int vorbis_floor0_decode(vorbis_context *vc, for (i = 0; i < order; i++) lsp[i] = 2.0f * cos(lsp[i]); - AV_DEBUG("floor0 synth: map_size = %d; m = %d; wstep = %f\n", - vf->map_size, order, wstep); + av_dlog(NULL, "floor0 synth: map_size = %"PRIu32"; m = %d; wstep = %f\n", + vf->map_size[blockflag], order, wstep); i = 0; while (i < vf->map_size[blockflag]) { @@ -1138,7 +1132,7 @@ static int vorbis_floor0_decode(vorbis_context *vc, return 1; } - AV_DEBUG(" Floor0 decoded\n"); + av_dlog(NULL, " Floor0 decoded\n"); return 0; } @@ -1165,7 +1159,7 @@ static int vorbis_floor1_decode(vorbis_context *vc, floor1_Y[0] = get_bits(gb, ilog(range - 1)); floor1_Y[1] = get_bits(gb, ilog(range - 1)); - AV_DEBUG("floor 0 Y %d floor 1 Y %d \n", floor1_Y[0], floor1_Y[1]); + av_dlog(NULL, "floor 0 Y %d floor 1 Y %d \n", floor1_Y[0], floor1_Y[1]); offset = 2; for (i = 0; i < vf->partitions; ++i) { @@ -1175,7 +1169,7 @@ static int vorbis_floor1_decode(vorbis_context *vc, csub = (1 << cbits) - 1; cval = 0; - AV_DEBUG("Cbits %u\n", cbits); + av_dlog(NULL, "Cbits %u\n", cbits); if (cbits) // this reads all subclasses for this partition's class cval = get_vlc2(gb, vc->codebooks[vf->class_masterbook[partition_class]].vlc.table, @@ -1184,8 +1178,8 @@ static int vorbis_floor1_decode(vorbis_context *vc, for (j = 0; j < cdim; ++j) { book = vf->subclass_books[partition_class][cval & csub]; - AV_DEBUG("book %d Cbits %u cval %u bits:%d\n", - book, cbits, cval, get_bits_count(gb)); + av_dlog(NULL, "book %d Cbits %u cval %u bits:%d\n", + book, cbits, cval, get_bits_count(gb)); cval = cval >> cbits; if (book > -1) { @@ -1195,7 +1189,8 @@ static int vorbis_floor1_decode(vorbis_context *vc, floor1_Y[offset+j] = 0; } - AV_DEBUG(" floor(%d) = %d \n", vf->list[offset+j].x, floor1_Y[offset+j]); + av_dlog(NULL, " floor(%d) = %d \n", + vf->list[offset+j].x, floor1_Y[offset+j]); } offset+=cdim; } @@ -1253,15 +1248,15 @@ static int vorbis_floor1_decode(vorbis_context *vc, floor1_Y_final[i] = predicted; } - AV_DEBUG(" Decoded floor(%d) = %u / val %u\n", - vf->list[i].x, floor1_Y_final[i], val); + av_dlog(NULL, " Decoded floor(%d) = %u / val %u\n", + vf->list[i].x, floor1_Y_final[i], val); } // Curve synth - connect the calculated dots and convert from dB scale FIXME optimize ? ff_vorbis_floor1_render_list(vf->list, vf->x_list_dim, floor1_Y_final, floor1_flag, vf->multiplier, vec, vf->list[1].x); - AV_DEBUG(" Floor decoded\n"); + av_dlog(NULL, " Floor decoded\n"); return 0; } @@ -1292,7 +1287,7 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, ch_used = ch; } - AV_DEBUG(" residue type 0/1/2 decode begin, ch: %d cpc %d \n", ch, c_p_c); + av_dlog(NULL, " residue type 0/1/2 decode begin, ch: %d cpc %d \n", ch, c_p_c); for (pass = 0; pass <= vr->maxpass; ++pass) { // FIXME OPTIMIZE? uint16_t voffset, partition_count, j_times_ptns_to_read; @@ -1306,7 +1301,7 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, unsigned temp = get_vlc2(gb, vc->codebooks[vr->classbook].vlc.table, vc->codebooks[vr->classbook].nb_bits, 3); - AV_DEBUG("Classword: %u\n", temp); + av_dlog(NULL, "Classword: %u\n", temp); assert(vr->classifications > 1 && temp <= 65536); //needed for inverse[] for (i = 0; i < c_p_c; ++i) { @@ -1351,7 +1346,8 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, for (l = 0; l < dim; ++l, ++voffs) { vec[voffs]+=codebook.codevectors[coffs+l]; // FPMATH - AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs); + av_dlog(NULL, " pass %d offs: %d curr: %f change: %f cv offs.: %d \n", + pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs); } } } else if (vr_type == 2 && ch == 2 && (voffset & 1) == 0 && (dim & 1) == 0) { // most frequent case optimized @@ -1378,7 +1374,10 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, vec[voffs ] += codebook.codevectors[coffs + l ]; // FPMATH vec[voffs + vlen] += codebook.codevectors[coffs + l + 1]; // FPMATH - AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset / ch + (voffs % ch) * vlen, vec[voffset / ch + (voffs % ch) * vlen], codebook.codevectors[coffs + l], coffs, l); + av_dlog(NULL, " pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", + pass, voffset / ch + (voffs % ch) * vlen, + vec[voffset / ch + (voffs % ch) * vlen], + codebook.codevectors[coffs + l], coffs, l); } } @@ -1390,7 +1389,10 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, for (l = 0; l < dim; ++l, ++voffs) { vec[voffs / ch + (voffs % ch) * vlen] += codebook.codevectors[coffs + l]; // FPMATH FIXME use if and counter instead of / and % - AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset / ch + (voffs % ch) * vlen, vec[voffset / ch + (voffs % ch) * vlen], codebook.codevectors[coffs + l], coffs, l); + av_dlog(NULL, " pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", + pass, voffset / ch + (voffs % ch) * vlen, + vec[voffset / ch + (voffs % ch) * vlen], + codebook.codevectors[coffs + l], coffs, l); } } } @@ -1478,8 +1480,8 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) vc->mode_number = mode_number; mapping = &vc->mappings[vc->modes[mode_number].mapping]; - AV_DEBUG(" Mode number: %u , mapping: %d , blocktype %d\n", mode_number, - vc->modes[mode_number].mapping, vc->modes[mode_number].blockflag); + av_dlog(NULL, " Mode number: %u , mapping: %d , blocktype %d\n", mode_number, + vc->modes[mode_number].mapping, vc->modes[mode_number].blockflag); blockflag = vc->modes[mode_number].blockflag; blocksize = vc->blocksize[blockflag]; @@ -1608,7 +1610,7 @@ static int vorbis_decode_frame(AVCodecContext *avccontext, if (!buf_size) return 0; - AV_DEBUG("packet length %d \n", buf_size); + av_dlog(NULL, "packet length %d \n", buf_size); init_get_bits(gb, buf, buf_size*8); @@ -1625,7 +1627,8 @@ static int vorbis_decode_frame(AVCodecContext *avccontext, return buf_size ; } - AV_DEBUG("parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", get_bits_count(gb)/8, get_bits_count(gb)%8, len); + av_dlog(NULL, "parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", + get_bits_count(gb) / 8, get_bits_count(gb) % 8, len); if (vc->audio_channels > 8) { for (i = 0; i < vc->audio_channels; i++) @@ -1636,15 +1639,14 @@ static int vorbis_decode_frame(AVCodecContext *avccontext, len * ff_vorbis_channel_layout_offsets[vc->audio_channels - 1][i]; } - *data_size = len * vc->audio_channels; - if (avccontext->sample_fmt == AV_SAMPLE_FMT_FLT) { - float_interleave(data, channel_ptrs, len, vc->audio_channels); - *data_size *= sizeof(float); - } else { + if (avccontext->sample_fmt == AV_SAMPLE_FMT_FLT) + vc->fmt_conv.float_interleave(data, channel_ptrs, len, vc->audio_channels); + else vc->fmt_conv.float_to_int16_interleave(data, channel_ptrs, len, vc->audio_channels); - *data_size *= 2; - } + + *data_size = len * vc->audio_channels * + (av_get_bits_per_sample_fmt(avccontext->sample_fmt) / 8); return buf_size ; } @@ -1671,5 +1673,8 @@ AVCodec ff_vorbis_decoder = { vorbis_decode_frame, .long_name = NULL_IF_CONFIG_SMALL("Vorbis"), .channel_layouts = ff_vorbis_channel_layouts, + .sample_fmts = (const enum AVSampleFormat[]) { + AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE + }, }; diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 6bef32973d..082d8e5829 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -166,12 +166,12 @@ static void get_quants(VP8Context *s) } else base_qi = yac_qi; - s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)]; - s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)]; - s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)]; - s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100; - s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)]; - s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)]; + s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)]; + s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)]; + s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)]; + s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100; + s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)]; + s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)]; s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); @@ -1612,7 +1612,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, s->mv_min.x = -MARGIN; s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; - if (prev_frame && s->segmentation.enabled && s->segmentation.update_map) + if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map) ff_thread_await_progress(prev_frame, mb_y, 0); for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h index d706b34aa6..468e28e8d5 100644 --- a/libavcodec/vp8.h +++ b/libavcodec/vp8.h @@ -239,4 +239,4 @@ typedef struct { AVFrame frames[5]; } VP8Context; -#endif +#endif /* AVCODEC_VP8_H */ diff --git a/libavcodec/vp8data.h b/libavcodec/vp8data.h index af9b0d9188..9ff21a1a97 100644 --- a/libavcodec/vp8data.h +++ b/libavcodec/vp8data.h @@ -685,4 +685,4 @@ static const uint8_t vp8_mv_default_prob[2][19] = { 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 } }; -#endif +#endif /* AVCODEC_VP8DATA_H */ diff --git a/libavcodec/vqavideo.c b/libavcodec/vqavideo.c index 272d80906e..ca4fd94d75 100644 --- a/libavcodec/vqavideo.c +++ b/libavcodec/vqavideo.c @@ -90,14 +90,6 @@ #define CPLZ_TAG MKBETAG('C', 'P', 'L', 'Z') #define VPTZ_TAG MKBETAG('V', 'P', 'T', 'Z') -#define VQA_DEBUG 0 - -#if VQA_DEBUG -#define vqa_debug printf -#else -static inline void vqa_debug(const char *format, ...) { } -#endif - typedef struct VqaContext { AVCodecContext *avctx; @@ -213,7 +205,7 @@ static void decode_format80(const unsigned char *src, int src_size, while (src_index < src_size) { - vqa_debug(" opcode %02X: ", src[src_index]); + av_dlog(NULL, " opcode %02X: ", src[src_index]); /* 0x80 means that frame is finished */ if (src[src_index] == 0x80) @@ -232,7 +224,7 @@ static void decode_format80(const unsigned char *src, int src_size, src_index += 2; src_pos = AV_RL16(&src[src_index]); src_index += 2; - vqa_debug("(1) copy %X bytes from absolute pos %X\n", count, src_pos); + av_dlog(NULL, "(1) copy %X bytes from absolute pos %X\n", count, src_pos); CHECK_COUNT(); for (i = 0; i < count; i++) dest[dest_index + i] = dest[src_pos + i]; @@ -244,7 +236,7 @@ static void decode_format80(const unsigned char *src, int src_size, count = AV_RL16(&src[src_index]); src_index += 2; color = src[src_index++]; - vqa_debug("(2) set %X bytes to %02X\n", count, color); + av_dlog(NULL, "(2) set %X bytes to %02X\n", count, color); CHECK_COUNT(); memset(&dest[dest_index], color, count); dest_index += count; @@ -254,7 +246,7 @@ static void decode_format80(const unsigned char *src, int src_size, count = (src[src_index++] & 0x3F) + 3; src_pos = AV_RL16(&src[src_index]); src_index += 2; - vqa_debug("(3) copy %X bytes from absolute pos %X\n", count, src_pos); + av_dlog(NULL, "(3) copy %X bytes from absolute pos %X\n", count, src_pos); CHECK_COUNT(); for (i = 0; i < count; i++) dest[dest_index + i] = dest[src_pos + i]; @@ -263,7 +255,7 @@ static void decode_format80(const unsigned char *src, int src_size, } else if (src[src_index] > 0x80) { count = src[src_index++] & 0x3F; - vqa_debug("(4) copy %X bytes from source to dest\n", count); + av_dlog(NULL, "(4) copy %X bytes from source to dest\n", count); CHECK_COUNT(); memcpy(&dest[dest_index], &src[src_index], count); src_index += count; @@ -274,7 +266,7 @@ static void decode_format80(const unsigned char *src, int src_size, count = ((src[src_index] & 0x70) >> 4) + 3; src_pos = AV_RB16(&src[src_index]) & 0x0FFF; src_index += 2; - vqa_debug("(5) copy %X bytes from relpos %X\n", count, src_pos); + av_dlog(NULL, "(5) copy %X bytes from relpos %X\n", count, src_pos); CHECK_COUNT(); for (i = 0; i < count; i++) dest[dest_index + i] = dest[dest_index - src_pos + i]; diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c index 3da1a60c19..840025d493 100644 --- a/libavcodec/wmadec.c +++ b/libavcodec/wmadec.c @@ -827,7 +827,7 @@ static int wma_decode_superframe(AVCodecContext *avctx, return 0; } if (buf_size < s->block_align) - return 0; + return AVERROR(EINVAL); buf_size = s->block_align; samples = data; diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c index c0492c36f5..e1d942dca2 100644 --- a/libavcodec/wmaprodec.c +++ b/libavcodec/wmaprodec.c @@ -1320,7 +1320,7 @@ static int decode_frame(WMAProDecodeCtx *s) /** no idea what these are for, might be the number of samples that need to be skipped at the beginning or end of a stream */ if (get_bits1(gb)) { - int skip; + int av_unused skip; /** usually true for the first frame */ if (get_bits1(gb)) { diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 4775853697..9ea330c930 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -1,6 +1,8 @@ OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o +YASM-OBJS-$(CONFIG_DCT) += x86/dct32_sse.o + YASM-OBJS-FFT-$(HAVE_AMD3DNOW) += x86/fft_3dn.o YASM-OBJS-FFT-$(HAVE_AMD3DNOWEXT) += x86/fft_3dn2.o YASM-OBJS-FFT-$(HAVE_SSE) += x86/fft_sse.o @@ -10,10 +12,12 @@ YASM-OBJS-$(CONFIG_FFT) += x86/fft_mmx.o \ MMX-OBJS-$(CONFIG_H264DSP) += x86/h264dsp_mmx.o YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \ x86/h264_deblock_10bit.o \ - x86/h264_weight.o \ x86/h264_idct.o \ + x86/h264_idct_10bit.o \ + x86/h264_weight.o \ -YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o +YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \ + x86/h264_intrapred_10bit.o MMX-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_yasm.o @@ -21,11 +25,7 @@ YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_yasm.o MMX-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_mmx.o YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o -MMX-OBJS-$(CONFIG_MP1FLOAT_DECODER) += x86/mpegaudiodec_mmx.o -MMX-OBJS-$(CONFIG_MP2FLOAT_DECODER) += x86/mpegaudiodec_mmx.o -MMX-OBJS-$(CONFIG_MP3FLOAT_DECODER) += x86/mpegaudiodec_mmx.o -MMX-OBJS-$(CONFIG_MP3ON4FLOAT_DECODER) += x86/mpegaudiodec_mmx.o -MMX-OBJS-$(CONFIG_MP3ADUFLOAT_DECODER) += x86/mpegaudiodec_mmx.o +MMX-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodec_mmx.o MMX-OBJS-$(CONFIG_PNG_DECODER) += x86/png_mmx.o MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_yasm.o @@ -59,4 +59,3 @@ OBJS-$(HAVE_MMX) += x86/dnxhd_mmx.o \ x86/mpegvideo_mmx.o \ x86/simple_idct_mmx.o \ -MMX-OBJS-$(CONFIG_DCT) += x86/dct32_sse.o diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index b67f893f22..99c5df340e 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -27,6 +27,11 @@ SECTION_RODATA ; 16777216.0f - used in ff_float_to_fixed24() pf_1_24: times 4 dd 0x4B800000 +; used in ff_ac3_compute_mantissa_size() +cextern ac3_bap_bits +pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768 +pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 + SECTION .text ;----------------------------------------------------------------------------- @@ -293,3 +298,51 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len %endif ja .loop REP_RET + +;------------------------------------------------------------------------------ +; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16]) +;------------------------------------------------------------------------------ + +%macro PHADDD4 2 ; xmm src, xmm tmp + movhlps %2, %1 + paddd %1, %2 + pshufd %2, %1, 0x1 + paddd %1, %2 +%endmacro + +INIT_XMM +cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum + movdqa m0, [mant_cntq ] + movdqa m1, [mant_cntq+ 1*16] + paddw m0, [mant_cntq+ 2*16] + paddw m1, [mant_cntq+ 3*16] + paddw m0, [mant_cntq+ 4*16] + paddw m1, [mant_cntq+ 5*16] + paddw m0, [mant_cntq+ 6*16] + paddw m1, [mant_cntq+ 7*16] + paddw m0, [mant_cntq+ 8*16] + paddw m1, [mant_cntq+ 9*16] + paddw m0, [mant_cntq+10*16] + paddw m1, [mant_cntq+11*16] + pmaddwd m0, [ac3_bap_bits ] + pmaddwd m1, [ac3_bap_bits+16] + paddd m0, m1 + PHADDD4 m0, m1 + movd sumd, m0 + movdqa m3, [pw_bap_mul1] + movhpd m0, [mant_cntq +2] + movlpd m0, [mant_cntq+1*32+2] + movhpd m1, [mant_cntq+2*32+2] + movlpd m1, [mant_cntq+3*32+2] + movhpd m2, [mant_cntq+4*32+2] + movlpd m2, [mant_cntq+5*32+2] + pmulhuw m0, m3 + pmulhuw m1, m3 + pmulhuw m2, m3 + paddusw m0, m1 + paddusw m0, m2 + pmaddwd m0, [pw_bap_mul2] + PHADDD4 m0, m1 + movd eax, m0 + add eax, sumd + RET diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c index 97d0657aa6..e853b8831a 100644 --- a/libavcodec/x86/ac3dsp_mmx.c +++ b/libavcodec/x86/ac3dsp_mmx.c @@ -42,6 +42,8 @@ extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned i extern void ff_float_to_fixed24_sse (int32_t *dst, const float *src, unsigned int len); extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len); +extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]); + av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) { int mm_flags = av_get_cpu_flags(); @@ -69,6 +71,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) c->ac3_exponent_min = ff_ac3_exponent_min_sse2; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; c->float_to_fixed24 = ff_float_to_fixed24_sse2; + c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm new file mode 100644 index 0000000000..7f5f815591 --- /dev/null +++ b/libavcodec/x86/dct32_sse.asm @@ -0,0 +1,494 @@ +;****************************************************************************** +;* 32 point SSE-optimized DCT transform +;* Copyright (c) 2010 Vitor Sessak +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +SECTION_RODATA 32 + +align 32 +ps_cos_vec: dd 0.500603, 0.505471, 0.515447, 0.531043 + dd 0.553104, 0.582935, 0.622504, 0.674808 + dd -10.190008, -3.407609, -2.057781, -1.484165 + dd -1.169440, -0.972568, -0.839350, -0.744536 + dd 0.502419, 0.522499, 0.566944, 0.646822 + dd 0.788155, 1.060678, 1.722447, 5.101149 + dd 0.509796, 0.601345, 0.899976, 2.562916 + dd 0.509796, 0.601345, 0.899976, 2.562916 + dd 1.000000, 1.000000, 1.306563, 0.541196 + dd 1.000000, 1.000000, 1.306563, 0.541196 + dd 1.000000, 0.707107, 1.000000, -0.707107 + dd 1.000000, 0.707107, 1.000000, -0.707107 + dd 0.707107, 0.707107, 0.707107, 0.707107 + +align 32 +ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 + +%macro BUTTERFLY_SSE 4 + movaps %4, %1 + subps %1, %2 + addps %2, %4 + mulps %1, %3 +%endmacro + +%macro BUTTERFLY_AVX 4 + vsubps %4, %1, %2 + vaddps %2, %2, %1 + vmulps %1, %4, %3 +%endmacro + +%macro BUTTERFLY0_SSE 5 + movaps %4, %1 + shufps %1, %1, %5 + xorps %4, %2 + addps %1, %4 + mulps %1, %3 +%endmacro + +%macro BUTTERFLY0_AVX 5 + vshufps %4, %1, %1, %5 + vxorps %1, %1, %2 + vaddps %4, %4, %1 + vmulps %1, %4, %3 +%endmacro + +%macro BUTTERFLY2 4 + BUTTERFLY0 %1, %2, %3, %4, 0x1b +%endmacro + +%macro BUTTERFLY3 4 + BUTTERFLY0 %1, %2, %3, %4, 0xb1 +%endmacro + +%macro BUTTERFLY3V 5 + movaps m%5, m%1 + addps m%1, m%2 + subps m%5, m%2 + SWAP %2, %5 + mulps m%2, [ps_cos_vec+192] + movaps m%5, m%3 + addps m%3, m%4 + subps m%4, m%5 + mulps m%4, [ps_cos_vec+192] +%endmacro + +%macro PASS6_AND_PERMUTE 0 + mov tmpd, [outq+4] + movss m7, [outq+72] + addss m7, [outq+76] + movss m3, [outq+56] + addss m3, [outq+60] + addss m4, m3 + movss m2, [outq+52] + addss m2, m3 + movss m3, [outq+104] + addss m3, [outq+108] + addss m1, m3 + addss m5, m4 + movss [outq+ 16], m1 + movss m1, [outq+100] + addss m1, m3 + movss m3, [outq+40] + movss [outq+ 48], m1 + addss m3, [outq+44] + movss m1, [outq+100] + addss m4, m3 + addss m3, m2 + addss m1, [outq+108] + movss [outq+ 40], m3 + addss m2, [outq+36] + movss m3, [outq+8] + movss [outq+ 56], m2 + addss m3, [outq+12] + movss [outq+ 32], m3 + movss m3, [outq+80] + movss [outq+ 8], m5 + movss [outq+ 80], m1 + movss m2, [outq+52] + movss m5, [outq+120] + addss m5, [outq+124] + movss m1, [outq+64] + addss m2, [outq+60] + addss m0, m5 + addss m5, [outq+116] + mov [outq+64], tmpd + addss m6, m0 + addss m1, m6 + mov tmpd, [outq+12] + mov [outq+ 96], tmpd + movss [outq+ 4], m1 + movss m1, [outq+24] + movss [outq+ 24], m4 + movss m4, [outq+88] + addss m4, [outq+92] + addss m3, m4 + addss m4, [outq+84] + mov tmpd, [outq+108] + addss m1, [outq+28] + addss m0, m1 + addss m1, m5 + addss m6, m3 + addss m3, m0 + addss m0, m7 + addss m5, [outq+20] + addss m7, m1 + movss [outq+ 12], m6 + mov [outq+112], tmpd + movss m6, [outq+28] + movss [outq+ 28], m0 + movss m0, [outq+36] + movss [outq+ 36], m7 + addss m1, m4 + movss m7, [outq+116] + addss m0, m2 + addss m7, [outq+124] + movss [outq+ 72], m0 + movss m0, [outq+44] + addss m2, m0 + movss [outq+ 44], m1 + movss [outq+ 88], m2 + addss m0, [outq+60] + mov tmpd, [outq+60] + mov [outq+120], tmpd + movss [outq+104], m0 + addss m4, m5 + addss m5, [outq+68] + movss [outq+52], m4 + movss [outq+60], m5 + movss m4, [outq+68] + movss m5, [outq+20] + movss [outq+ 20], m3 + addss m5, m7 + addss m7, m6 + addss m4, m5 + movss m2, [outq+84] + addss m2, [outq+92] + addss m5, m2 + movss [outq+ 68], m4 + addss m2, m7 + movss m4, [outq+76] + movss [outq+ 84], m2 + movss [outq+ 76], m5 + addss m7, m4 + addss m6, [outq+124] + addss m4, m6 + addss m6, [outq+92] + movss [outq+100], m4 + movss [outq+108], m6 + movss m6, [outq+92] + movss [outq+92], m7 + addss m6, [outq+124] + movss [outq+116], m6 +%endmacro + +%define BUTTERFLY BUTTERFLY_AVX +%define BUTTERFLY0 BUTTERFLY0_AVX + +INIT_YMM +SECTION_TEXT +%ifdef HAVE_AVX +; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in) +cglobal dct32_float_avx, 2,3,8, out, in, tmp + ; pass 1 + vmovaps m4, [inq+0] + vinsertf128 m5, m5, [inq+96], 1 + vinsertf128 m5, m5, [inq+112], 0 + vshufps m5, m5, m5, 0x1b + BUTTERFLY m4, m5, [ps_cos_vec], m6 + + vmovaps m2, [inq+64] + vinsertf128 m6, m6, [inq+32], 1 + vinsertf128 m6, m6, [inq+48], 0 + vshufps m6, m6, m6, 0x1b + BUTTERFLY m2, m6, [ps_cos_vec+32], m0 + + ; pass 2 + + BUTTERFLY m5, m6, [ps_cos_vec+64], m0 + BUTTERFLY m4, m2, [ps_cos_vec+64], m7 + + + ; pass 3 + vperm2f128 m3, m6, m4, 0x31 + vperm2f128 m1, m6, m4, 0x20 + vshufps m3, m3, m3, 0x1b + + BUTTERFLY m1, m3, [ps_cos_vec+96], m6 + + + vperm2f128 m4, m5, m2, 0x20 + vperm2f128 m5, m5, m2, 0x31 + vshufps m5, m5, m5, 0x1b + + BUTTERFLY m4, m5, [ps_cos_vec+96], m6 + + ; pass 4 + vmovaps m6, [ps_p1p1m1m1+0] + vmovaps m2, [ps_cos_vec+128] + + BUTTERFLY2 m5, m6, m2, m7 + BUTTERFLY2 m4, m6, m2, m7 + BUTTERFLY2 m1, m6, m2, m7 + BUTTERFLY2 m3, m6, m2, m7 + + + ; pass 5 + vshufps m6, m6, m6, 0xcc + vmovaps m2, [ps_cos_vec+160] + + BUTTERFLY3 m5, m6, m2, m7 + BUTTERFLY3 m4, m6, m2, m7 + BUTTERFLY3 m1, m6, m2, m7 + BUTTERFLY3 m3, m6, m2, m7 + + vperm2f128 m6, m3, m3, 0x31 + vmovaps [outq], m3 + + vextractf128 [outq+64], m5, 1 + vextractf128 [outq+32], m5, 0 + + vextractf128 [outq+80], m4, 1 + vextractf128 [outq+48], m4, 0 + + vperm2f128 m0, m1, m1, 0x31 + vmovaps [outq+96], m1 + + vzeroupper + + ; pass 6, no SIMD... +INIT_XMM + PASS6_AND_PERMUTE + RET +%endif + +%define BUTTERFLY BUTTERFLY_SSE +%define BUTTERFLY0 BUTTERFLY0_SSE + +%ifdef ARCH_X86_64 +%define SPILL SWAP +%define UNSPILL SWAP + +%macro PASS5 0 + nop ; FIXME code alignment + SWAP 5, 8 + SWAP 4, 12 + SWAP 6, 14 + SWAP 7, 13 + SWAP 0, 15 + PERMUTE 9,10, 10,12, 11,14, 12,9, 13,11, 14,13 + TRANSPOSE4x4PS 8, 9, 10, 11, 0 + BUTTERFLY3V 8, 9, 10, 11, 0 + addps m10, m11 + TRANSPOSE4x4PS 12, 13, 14, 15, 0 + BUTTERFLY3V 12, 13, 14, 15, 0 + addps m14, m15 + addps m12, m14 + addps m14, m13 + addps m13, m15 +%endmacro + +%macro PASS6 0 + SWAP 9, 12 + SWAP 11, 14 + movss [outq+0x00], m8 + pshuflw m0, m8, 0xe + movss [outq+0x10], m9 + pshuflw m1, m9, 0xe + movss [outq+0x20], m10 + pshuflw m2, m10, 0xe + movss [outq+0x30], m11 + pshuflw m3, m11, 0xe + movss [outq+0x40], m12 + pshuflw m4, m12, 0xe + movss [outq+0x50], m13 + pshuflw m5, m13, 0xe + movss [outq+0x60], m14 + pshuflw m6, m14, 0xe + movaps [outq+0x70], m15 + pshuflw m7, m15, 0xe + addss m0, m1 + addss m1, m2 + movss [outq+0x08], m0 + addss m2, m3 + movss [outq+0x18], m1 + addss m3, m4 + movss [outq+0x28], m2 + addss m4, m5 + movss [outq+0x38], m3 + addss m5, m6 + movss [outq+0x48], m4 + addss m6, m7 + movss [outq+0x58], m5 + movss [outq+0x68], m6 + movss [outq+0x78], m7 + + PERMUTE 1,8, 3,9, 5,10, 7,11, 9,12, 11,13, 13,14, 8,1, 10,3, 12,5, 14,7 + movhlps m0, m1 + pshufd m1, m1, 3 + SWAP 0, 2, 4, 6, 8, 10, 12, 14 + SWAP 1, 3, 5, 7, 9, 11, 13, 15 +%rep 7 + movhlps m0, m1 + pshufd m1, m1, 3 + addss m15, m1 + SWAP 0, 2, 4, 6, 8, 10, 12, 14 + SWAP 1, 3, 5, 7, 9, 11, 13, 15 +%endrep +%assign i 4 +%rep 15 + addss m0, m1 + movss [outq+i], m0 + SWAP 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + %assign i i+8 +%endrep +%endmacro + +%else ; ARCH_X86_32 +%macro SPILL 2 ; xmm#, mempos + movaps [outq+(%2-8)*16], m%1 +%endmacro +%macro UNSPILL 2 + movaps m%1, [outq+(%2-8)*16] +%endmacro + +%define PASS6 PASS6_AND_PERMUTE +%macro PASS5 0 + movaps m2, [ps_cos_vec+160] + shufps m3, m3, 0xcc + + BUTTERFLY3 m5, m3, m2, m1 + SPILL 5, 8 + + UNSPILL 1, 9 + BUTTERFLY3 m1, m3, m2, m5 + SPILL 1, 14 + + BUTTERFLY3 m4, m3, m2, m5 + SPILL 4, 12 + + BUTTERFLY3 m7, m3, m2, m5 + SPILL 7, 13 + + UNSPILL 5, 10 + BUTTERFLY3 m5, m3, m2, m7 + SPILL 5, 10 + + UNSPILL 4, 11 + BUTTERFLY3 m4, m3, m2, m7 + SPILL 4, 11 + + BUTTERFLY3 m6, m3, m2, m7 + SPILL 6, 9 + + BUTTERFLY3 m0, m3, m2, m7 + SPILL 0, 15 +%endmacro +%endif + + +INIT_XMM +; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in) +cglobal dct32_float_sse, 2,3,16, out, in, tmp + ; pass 1 + + movaps m0, [inq+0] + movaps m1, [inq+112] + shufps m1, m1, 0x1b + BUTTERFLY m0, m1, [ps_cos_vec], m3 + + movaps m7, [inq+64] + movaps m4, [inq+48] + shufps m4, m4, 0x1b + BUTTERFLY m7, m4, [ps_cos_vec+32], m3 + + ; pass 2 + movaps m2, [ps_cos_vec+64] + BUTTERFLY m1, m4, m2, m3 + SPILL 1, 11 + SPILL 4, 8 + + ; pass 1 + movaps m1, [inq+16] + movaps m6, [inq+96] + shufps m6, m6, 0x1b + BUTTERFLY m1, m6, [ps_cos_vec+16], m3 + + movaps m4, [inq+80] + movaps m5, [inq+32] + shufps m5, m5, 0x1b + BUTTERFLY m4, m5, [ps_cos_vec+48], m3 + + ; pass 2 + BUTTERFLY m0, m7, m2, m3 + + movaps m2, [ps_cos_vec+80] + BUTTERFLY m6, m5, m2, m3 + + BUTTERFLY m1, m4, m2, m3 + + ; pass 3 + movaps m2, [ps_cos_vec+96] + shufps m1, m1, 0x1b + BUTTERFLY m0, m1, m2, m3 + SPILL 0, 15 + SPILL 1, 14 + + UNSPILL 0, 8 + shufps m5, m5, 0x1b + BUTTERFLY m0, m5, m2, m3 + + UNSPILL 1, 11 + shufps m6, m6, 0x1b + BUTTERFLY m1, m6, m2, m3 + SPILL 1, 11 + + shufps m4, m4, 0x1b + BUTTERFLY m7, m4, m2, m3 + + ; pass 4 + movaps m3, [ps_p1p1m1m1+0] + movaps m2, [ps_cos_vec+128] + + BUTTERFLY2 m5, m3, m2, m1 + + BUTTERFLY2 m0, m3, m2, m1 + SPILL 0, 9 + + BUTTERFLY2 m6, m3, m2, m1 + SPILL 6, 10 + + UNSPILL 0, 11 + BUTTERFLY2 m0, m3, m2, m1 + SPILL 0, 11 + + BUTTERFLY2 m4, m3, m2, m1 + + BUTTERFLY2 m7, m3, m2, m1 + + UNSPILL 6, 14 + BUTTERFLY2 m6, m3, m2, m1 + + UNSPILL 0, 15 + BUTTERFLY2 m0, m3, m2, m1 + + PASS5 + PASS6 + RET diff --git a/libavcodec/x86/dct32_sse.c b/libavcodec/x86/dct32_sse.c deleted file mode 100644 index 74338d6920..0000000000 --- a/libavcodec/x86/dct32_sse.c +++ /dev/null @@ -1,296 +0,0 @@ -/* - * 32 point SSE-optimized DCT transform - * Copyright (c) 2010 Vitor Sessak - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stdint.h> - -#include "libavutil/x86_cpu.h" -#include "libavutil/mem.h" -#include "libavcodec/dsputil.h" -#include "fft.h" - -DECLARE_ALIGNED(16, static const float, b1)[] = { - 0.500603, 0.505471, 0.515447, 0.531043, - 0.553104, 0.582935, 0.622504, 0.674808, - -1.169440, -0.972568, -0.839350, -0.744536, - -10.190008, -3.407609, -2.057781, -1.484165, - 0.502419, 0.522499, 0.566944, 0.646822, - 0.788155, 1.060678, 1.722447, 5.101149, - 0.509796, 0.601345, 0.899976, 2.562916, - 1.000000, 1.000000, 1.306563, 0.541196, - 1.000000, 0.707107, 1.000000, -0.707107 -}; - -DECLARE_ALIGNED(16, static const int32_t, smask)[4] = { - 0, 0, 0x80000000, 0x80000000 -}; - -/* butterfly operator */ -#define BUTTERFLY(a,b,c,tmp) \ - "movaps %%" #a ", %%" #tmp " \n\t" \ - "subps %%" #b ", %%" #a " \n\t" \ - "addps %%" #tmp ", %%" #b " \n\t" \ - "mulps " #c ", %%" #a " \n\t" - -///* Same as BUTTERFLY when vectors a and b overlap */ -#define BUTTERFLY0(val, mask, cos, tmp, shuf) \ - "movaps %%" #val ", %%" #tmp " \n\t" \ - "shufps " #shuf ", %%" #val ",%%" #val " \n\t" \ - "xorps %%" #mask ", %%" #tmp " \n\t" /* flip signs */ \ - "addps %%" #tmp ", %%" #val " \n\t" \ - "mulps %%" #cos ", %%" #val " \n\t" - -#define BUTTERFLY2(val, mask, cos, tmp) BUTTERFLY0(val, mask, cos, tmp, $0x1b) -#define BUTTERFLY3(val, mask, cos, tmp) BUTTERFLY0(val, mask, cos, tmp, $0xb1) - -void ff_dct32_float_sse(FFTSample *out, const FFTSample *in) -{ - int32_t tmp1 = 0; - __asm__ volatile( - /* pass 1 */ - - "movaps (%4), %%xmm0 \n\t" - "movaps 112(%4), %%xmm1 \n\t" - "shufps $0x1b, %%xmm1, %%xmm1 \n\t" - BUTTERFLY(xmm0, xmm1, (%2), xmm3) - - "movaps 64(%4), %%xmm7 \n\t" - "movaps 48(%4), %%xmm4 \n\t" - "shufps $0x1b, %%xmm4, %%xmm4 \n\t" - BUTTERFLY(xmm7, xmm4, 48(%2), xmm3) - - - /* pass 2 */ - "movaps 64(%2), %%xmm2 \n\t" - BUTTERFLY(xmm1, xmm4, %%xmm2, xmm3) - "movaps %%xmm1, 48(%1) \n\t" - "movaps %%xmm4, (%1) \n\t" - - /* pass 1 */ - "movaps 16(%4), %%xmm1 \n\t" - "movaps 96(%4), %%xmm6 \n\t" - "shufps $0x1b, %%xmm6, %%xmm6 \n\t" - BUTTERFLY(xmm1, xmm6, 16(%2), xmm3) - - "movaps 80(%4), %%xmm4 \n\t" - "movaps 32(%4), %%xmm5 \n\t" - "shufps $0x1b, %%xmm5, %%xmm5 \n\t" - BUTTERFLY(xmm4, xmm5, 32(%2), xmm3) - - /* pass 2 */ - BUTTERFLY(xmm0, xmm7, %%xmm2, xmm3) - - "movaps 80(%2), %%xmm2 \n\t" - BUTTERFLY(xmm6, xmm5, %%xmm2, xmm3) - - BUTTERFLY(xmm1, xmm4, %%xmm2, xmm3) - - /* pass 3 */ - "movaps 96(%2), %%xmm2 \n\t" - "shufps $0x1b, %%xmm1, %%xmm1 \n\t" - BUTTERFLY(xmm0, xmm1, %%xmm2, xmm3) - "movaps %%xmm0, 112(%1) \n\t" - "movaps %%xmm1, 96(%1) \n\t" - - "movaps 0(%1), %%xmm0 \n\t" - "shufps $0x1b, %%xmm5, %%xmm5 \n\t" - BUTTERFLY(xmm0, xmm5, %%xmm2, xmm3) - - "movaps 48(%1), %%xmm1 \n\t" - "shufps $0x1b, %%xmm6, %%xmm6 \n\t" - BUTTERFLY(xmm1, xmm6, %%xmm2, xmm3) - "movaps %%xmm1, 48(%1) \n\t" - - "shufps $0x1b, %%xmm4, %%xmm4 \n\t" - BUTTERFLY(xmm7, xmm4, %%xmm2, xmm3) - - /* pass 4 */ - "movaps (%3), %%xmm3 \n\t" - "movaps 112(%2), %%xmm2 \n\t" - - BUTTERFLY2(xmm5, xmm3, xmm2, xmm1) - - BUTTERFLY2(xmm0, xmm3, xmm2, xmm1) - "movaps %%xmm0, 16(%1) \n\t" - - BUTTERFLY2(xmm6, xmm3, xmm2, xmm1) - "movaps %%xmm6, 32(%1) \n\t" - - "movaps 48(%1), %%xmm0 \n\t" - BUTTERFLY2(xmm0, xmm3, xmm2, xmm1) - "movaps %%xmm0, 48(%1) \n\t" - - BUTTERFLY2(xmm4, xmm3, xmm2, xmm1) - - BUTTERFLY2(xmm7, xmm3, xmm2, xmm1) - - "movaps 96(%1), %%xmm6 \n\t" - BUTTERFLY2(xmm6, xmm3, xmm2, xmm1) - - "movaps 112(%1), %%xmm0 \n\t" - BUTTERFLY2(xmm0, xmm3, xmm2, xmm1) - - /* pass 5 */ - "movaps 128(%2), %%xmm2 \n\t" - "shufps $0xCC, %%xmm3,%%xmm3 \n\t" - - BUTTERFLY3(xmm5, xmm3, xmm2, xmm1) - "movaps %%xmm5, (%1) \n\t" - - "movaps 16(%1), %%xmm1 \n\t" - BUTTERFLY3(xmm1, xmm3, xmm2, xmm5) - "movaps %%xmm1, 16(%1) \n\t" - - BUTTERFLY3(xmm4, xmm3, xmm2, xmm5) - "movaps %%xmm4, 64(%1) \n\t" - - BUTTERFLY3(xmm7, xmm3, xmm2, xmm5) - "movaps %%xmm7, 80(%1) \n\t" - - "movaps 32(%1), %%xmm5 \n\t" - BUTTERFLY3(xmm5, xmm3, xmm2, xmm7) - "movaps %%xmm5, 32(%1) \n\t" - - "movaps 48(%1), %%xmm4 \n\t" - BUTTERFLY3(xmm4, xmm3, xmm2, xmm7) - "movaps %%xmm4, 48(%1) \n\t" - - BUTTERFLY3(xmm6, xmm3, xmm2, xmm7) - "movaps %%xmm6, 96(%1) \n\t" - - BUTTERFLY3(xmm0, xmm3, xmm2, xmm7) - "movaps %%xmm0, 112(%1) \n\t" - - - /* pass 6, no SIMD... */ - "movss 56(%1), %%xmm3 \n\t" - "movl 4(%1), %0 \n\t" - "addss 60(%1), %%xmm3 \n\t" - "movss 72(%1), %%xmm7 \n\t" - "addss %%xmm3, %%xmm4 \n\t" - "movss 52(%1), %%xmm2 \n\t" - "addss %%xmm3, %%xmm2 \n\t" - "movss 24(%1), %%xmm3 \n\t" - "addss 28(%1), %%xmm3 \n\t" - "addss 76(%1), %%xmm7 \n\t" - "addss %%xmm3, %%xmm1 \n\t" - "addss %%xmm4, %%xmm5 \n\t" - "movss %%xmm1, 16(%1) \n\t" - "movss 20(%1), %%xmm1 \n\t" - "addss %%xmm3, %%xmm1 \n\t" - "movss 40(%1), %%xmm3 \n\t" - "movss %%xmm1, 48(%1) \n\t" - "addss 44(%1), %%xmm3 \n\t" - "movss 20(%1), %%xmm1 \n\t" - "addss %%xmm3, %%xmm4 \n\t" - "addss %%xmm2, %%xmm3 \n\t" - "addss 28(%1), %%xmm1 \n\t" - "movss %%xmm3, 40(%1) \n\t" - "addss 36(%1), %%xmm2 \n\t" - "movss 8(%1), %%xmm3 \n\t" - "movss %%xmm2, 56(%1) \n\t" - "addss 12(%1), %%xmm3 \n\t" - "movss %%xmm5, 8(%1) \n\t" - "movss %%xmm3, 32(%1) \n\t" - "movss 52(%1), %%xmm2 \n\t" - "movss 80(%1), %%xmm3 \n\t" - "movss 120(%1), %%xmm5 \n\t" - "movss %%xmm1, 80(%1) \n\t" - "movss %%xmm4, 24(%1) \n\t" - "addss 124(%1), %%xmm5 \n\t" - "movss 64(%1), %%xmm1 \n\t" - "addss 60(%1), %%xmm2 \n\t" - "addss %%xmm5, %%xmm0 \n\t" - "addss 116(%1), %%xmm5 \n\t" - "movl %0, 64(%1) \n\t" - "addss %%xmm0, %%xmm6 \n\t" - "addss %%xmm6, %%xmm1 \n\t" - "movl 12(%1), %0 \n\t" - "movss %%xmm1, 4(%1) \n\t" - "movss 88(%1), %%xmm1 \n\t" - "movl %0, 96(%1) \n\t" - "addss 92(%1), %%xmm1 \n\t" - "movss 104(%1), %%xmm4 \n\t" - "movl 28(%1), %0 \n\t" - "addss 108(%1), %%xmm4 \n\t" - "addss %%xmm4, %%xmm0 \n\t" - "addss %%xmm1, %%xmm3 \n\t" - "addss 84(%1), %%xmm1 \n\t" - "addss %%xmm5, %%xmm4 \n\t" - "addss %%xmm3, %%xmm6 \n\t" - "addss %%xmm0, %%xmm3 \n\t" - "addss %%xmm7, %%xmm0 \n\t" - "addss 100(%1), %%xmm5 \n\t" - "addss %%xmm4, %%xmm7 \n\t" - "movl %0, 112(%1) \n\t" - "movss %%xmm0, 28(%1) \n\t" - "movss 36(%1), %%xmm0 \n\t" - "movss %%xmm7, 36(%1) \n\t" - "addss %%xmm1, %%xmm4 \n\t" - "movss 116(%1), %%xmm7 \n\t" - "addss %%xmm2, %%xmm0 \n\t" - "addss 124(%1), %%xmm7 \n\t" - "movss %%xmm0, 72(%1) \n\t" - "movss 44(%1), %%xmm0 \n\t" - "movss %%xmm6, 12(%1) \n\t" - "movss %%xmm3, 20(%1) \n\t" - "addss %%xmm0, %%xmm2 \n\t" - "movss %%xmm4, 44(%1) \n\t" - "movss %%xmm2, 88(%1) \n\t" - "addss 60(%1), %%xmm0 \n\t" - "movl 60(%1), %0 \n\t" - "movl %0, 120(%1) \n\t" - "movss %%xmm0, 104(%1) \n\t" - "addss %%xmm5, %%xmm1 \n\t" - "addss 68(%1), %%xmm5 \n\t" - "movss %%xmm1, 52(%1) \n\t" - "movss %%xmm5, 60(%1) \n\t" - "movss 68(%1), %%xmm1 \n\t" - "movss 100(%1), %%xmm5 \n\t" - "addss %%xmm7, %%xmm5 \n\t" - "addss 108(%1), %%xmm7 \n\t" - "addss %%xmm5, %%xmm1 \n\t" - "movss 84(%1), %%xmm2 \n\t" - "addss 92(%1), %%xmm2 \n\t" - "addss %%xmm2, %%xmm5 \n\t" - "movss %%xmm1, 68(%1) \n\t" - "addss %%xmm7, %%xmm2 \n\t" - "movss 76(%1), %%xmm1 \n\t" - "movss %%xmm2, 84(%1) \n\t" - "movss %%xmm5, 76(%1) \n\t" - "movss 108(%1), %%xmm2 \n\t" - "addss %%xmm1, %%xmm7 \n\t" - "addss 124(%1), %%xmm2 \n\t" - "addss %%xmm2, %%xmm1 \n\t" - "addss 92(%1), %%xmm2 \n\t" - "movss %%xmm1, 100(%1) \n\t" - "movss %%xmm2, 108(%1) \n\t" - "movss 92(%1), %%xmm2 \n\t" - "movss %%xmm7, 92(%1) \n\t" - "addss 124(%1), %%xmm2 \n\t" - "movss %%xmm2, 116(%1) \n\t" - :"+&r"(tmp1) - :"r"(out), "r"(b1), "r"(smask), "r"(in) - :XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", - "%xmm4", "%xmm5", "%xmm6", "%xmm7",) - "memory" - ); -} - diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index d867dc3e6a..71bbd11741 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -762,7 +762,7 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ /* draw the edges of width 'w' of an image of size width, height this mmx version can only handle w==8 || w==16 */ -static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int sides) +static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides) { uint8_t *ptr, *last_line; int i; @@ -817,7 +817,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, /* top and bottom (and hopefully also the corners) */ if (sides&EDGE_TOP) { - for(i = 0; i < w; i += 4) { + for(i = 0; i < h; i += 4) { ptr= buf - (i + 1) * wrap - w; __asm__ volatile( "1: \n\t" diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index a41640b0a2..695aba5ec3 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -30,7 +30,7 @@ pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13 pb_revwords: db 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 pd_16384: times 4 dd 16384 -section .text align=16 +SECTION_TEXT %macro SCALARPRODUCT 1 ; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order, int shift) diff --git a/libavcodec/x86/dsputilenc_yasm.asm b/libavcodec/x86/dsputilenc_yasm.asm index a4f2d0cf51..016b354d6c 100644 --- a/libavcodec/x86/dsputilenc_yasm.asm +++ b/libavcodec/x86/dsputilenc_yasm.asm @@ -59,12 +59,12 @@ SECTION .text %endmacro %macro HADAMARD8 0 - SUMSUB_BADC m0, m1, m2, m3 - SUMSUB_BADC m4, m5, m6, m7 - SUMSUB_BADC m0, m2, m1, m3 - SUMSUB_BADC m4, m6, m5, m7 - SUMSUB_BADC m0, m4, m1, m5 - SUMSUB_BADC m2, m6, m3, m7 + SUMSUB_BADC w, 0, 1, 2, 3 + SUMSUB_BADC w, 4, 5, 6, 7 + SUMSUB_BADC w, 0, 2, 1, 3 + SUMSUB_BADC w, 4, 6, 5, 7 + SUMSUB_BADC w, 0, 4, 1, 5 + SUMSUB_BADC w, 2, 6, 3, 7 %endmacro %macro ABS1_SUM 3 diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c index 13f7ea6ec2..18964d88ca 100644 --- a/libavcodec/x86/fft.c +++ b/libavcodec/x86/fft.c @@ -56,9 +56,13 @@ av_cold void ff_fft_init_mmx(FFTContext *s) #if CONFIG_DCT av_cold void ff_dct_init_mmx(DCTContext *s) { +#if HAVE_YASM int has_vectors = av_get_cpu_flags(); - if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) + if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX) + s->dct32 = ff_dct32_float_avx; + else if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) s->dct32 = ff_dct32_float_sse; +#endif } #endif diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h index 611db8508f..79064c27cd 100644 --- a/libavcodec/x86/fft.h +++ b/libavcodec/x86/fft.h @@ -35,5 +35,6 @@ void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input) void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_dct32_float_sse(FFTSample *out, const FFTSample *in); +void ff_dct32_float_avx(FFTSample *out, const FFTSample *in); -#endif +#endif /* AVCODEC_X86_FFT_H */ diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm index 02b638f144..15fb67654c 100644 --- a/libavcodec/x86/fft_mmx.asm +++ b/libavcodec/x86/fft_mmx.asm @@ -85,7 +85,7 @@ cextern cos_ %+ i %1 %endmacro -section .text align=16 +SECTION_TEXT %macro T2_3DN 4 ; z0, z1, mem0, mem1 mova %1, %3 @@ -299,6 +299,7 @@ IF%1 mova Z(1), m5 INIT_YMM +%ifdef HAVE_AVX align 16 fft8_avx: mova m0, Z(0) @@ -388,6 +389,8 @@ fft32_interleave_avx: jg .deint_loop ret +%endif + INIT_XMM %define movdqa movaps @@ -532,6 +535,7 @@ DEFINE_ARGS z, w, n, o1, o3 INIT_YMM +%ifdef HAVE_AVX %macro INTERL_AVX 5 vunpckhps %3, %2, %1 vunpcklps %2, %2, %1 @@ -545,6 +549,7 @@ INIT_YMM DECL_PASS pass_avx, PASS_BIG 1 DECL_PASS pass_interleave_avx, PASS_BIG 0 +%endif INIT_XMM @@ -634,8 +639,10 @@ cglobal fft_dispatch%3%2, 2,5,8, z, nbits RET %endmacro ; DECL_FFT +%ifdef HAVE_AVX DECL_FFT 6, _avx DECL_FFT 6, _avx, _interleave +%endif DECL_FFT 5, _sse DECL_FFT 5, _sse, _interleave DECL_FFT 4, _3dn @@ -847,4 +854,6 @@ DECL_IMDCT _sse, POSROTATESHUF INIT_YMM +%ifdef HAVE_AVX DECL_IMDCT _avx, POSROTATESHUF_AVX +%endif diff --git a/libavcodec/x86/fft_sse.c b/libavcodec/x86/fft_sse.c index 801dc1bc8a..26b933c810 100644 --- a/libavcodec/x86/fft_sse.c +++ b/libavcodec/x86/fft_sse.c @@ -22,6 +22,7 @@ #include "libavutil/x86_cpu.h" #include "libavcodec/dsputil.h" #include "fft.h" +#include "config.h" DECLARE_ASM_CONST(16, int, ff_m1m1m1m1)[4] = { 1 << 31, 1 << 31, 1 << 31, 1 << 31 }; @@ -30,10 +31,12 @@ void ff_fft_dispatch_sse(FFTComplex *z, int nbits); void ff_fft_dispatch_interleave_sse(FFTComplex *z, int nbits); void ff_fft_dispatch_interleave_avx(FFTComplex *z, int nbits); +#if HAVE_AVX void ff_fft_calc_avx(FFTContext *s, FFTComplex *z) { ff_fft_dispatch_interleave_avx(z, s->nbits); } +#endif void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) { diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index dc038dde73..598e412f73 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -20,8 +20,9 @@ ;****************************************************************************** %include "x86inc.asm" +%include "x86util.asm" -section .text align=16 +SECTION_TEXT %macro PSWAPD_SSE 2 pshufw %1, %2, 0x4e @@ -89,3 +90,136 @@ FLOAT_TO_INT16_INTERLEAVE6 3dnow %undef pswapd FLOAT_TO_INT16_INTERLEAVE6 3dn2 %undef cvtps2pi + +;----------------------------------------------------------------------------- +; void ff_float_interleave6(float *dst, const float **src, unsigned int len); +;----------------------------------------------------------------------------- + +%macro FLOAT_INTERLEAVE6 2 +cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5 +%ifdef ARCH_X86_64 + %define lend r10d + mov lend, r2d +%else + %define lend dword r2m +%endif + mov src1q, [srcq+1*gprsize] + mov src2q, [srcq+2*gprsize] + mov src3q, [srcq+3*gprsize] + mov src4q, [srcq+4*gprsize] + mov src5q, [srcq+5*gprsize] + mov srcq, [srcq] + sub src1q, srcq + sub src2q, srcq + sub src3q, srcq + sub src4q, srcq + sub src5q, srcq +.loop: +%ifidn %1, sse + movaps m0, [srcq] + movaps m1, [srcq+src1q] + movaps m2, [srcq+src2q] + movaps m3, [srcq+src3q] + movaps m4, [srcq+src4q] + movaps m5, [srcq+src5q] + + SBUTTERFLYPS 0, 1, 6 + SBUTTERFLYPS 2, 3, 6 + SBUTTERFLYPS 4, 5, 6 + + movaps m6, m4 + shufps m4, m0, 0xe4 + movlhps m0, m2 + movhlps m6, m2 + movaps [dstq ], m0 + movaps [dstq+16], m4 + movaps [dstq+32], m6 + + movaps m6, m5 + shufps m5, m1, 0xe4 + movlhps m1, m3 + movhlps m6, m3 + movaps [dstq+48], m1 + movaps [dstq+64], m5 + movaps [dstq+80], m6 +%else ; mmx + movq m0, [srcq] + movq m1, [srcq+src1q] + movq m2, [srcq+src2q] + movq m3, [srcq+src3q] + movq m4, [srcq+src4q] + movq m5, [srcq+src5q] + + SBUTTERFLY dq, 0, 1, 6 + SBUTTERFLY dq, 2, 3, 6 + SBUTTERFLY dq, 4, 5, 6 + movq [dstq ], m0 + movq [dstq+ 8], m2 + movq [dstq+16], m4 + movq [dstq+24], m1 + movq [dstq+32], m3 + movq [dstq+40], m5 +%endif + add srcq, mmsize + add dstq, mmsize*6 + sub lend, mmsize/4 + jg .loop +%ifidn %1, mmx + emms +%endif + REP_RET +%endmacro + +INIT_MMX +FLOAT_INTERLEAVE6 mmx, 0 +INIT_XMM +FLOAT_INTERLEAVE6 sse, 7 + +;----------------------------------------------------------------------------- +; void ff_float_interleave2(float *dst, const float **src, unsigned int len); +;----------------------------------------------------------------------------- + +%macro FLOAT_INTERLEAVE2 2 +cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1 + mov src1q, [srcq+gprsize] + mov srcq, [srcq ] + sub src1q, srcq +.loop + MOVPS m0, [srcq ] + MOVPS m1, [srcq+src1q ] + MOVPS m3, [srcq +mmsize] + MOVPS m4, [srcq+src1q+mmsize] + + MOVPS m2, m0 + PUNPCKLDQ m0, m1 + PUNPCKHDQ m2, m1 + + MOVPS m1, m3 + PUNPCKLDQ m3, m4 + PUNPCKHDQ m1, m4 + + MOVPS [dstq ], m0 + MOVPS [dstq+1*mmsize], m2 + MOVPS [dstq+2*mmsize], m3 + MOVPS [dstq+3*mmsize], m1 + + add srcq, mmsize*2 + add dstq, mmsize*4 + sub lend, mmsize/2 + jg .loop +%ifidn %1, mmx + emms +%endif + REP_RET +%endmacro + +INIT_MMX +%define MOVPS movq +%define PUNPCKLDQ punpckldq +%define PUNPCKHDQ punpckhdq +FLOAT_INTERLEAVE2 mmx, 0 +INIT_XMM +%define MOVPS movaps +%define PUNPCKLDQ unpcklps +%define PUNPCKHDQ unpckhps +FLOAT_INTERLEAVE2 sse, 5 diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c index ea41f730e8..ba2c2c9bd5 100644 --- a/libavcodec/x86/fmtconvert_mmx.c +++ b/libavcodec/x86/fmtconvert_mmx.c @@ -235,11 +235,44 @@ static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long float_to_int16_interleave_3dnow(dst, src, len, channels); } +#if HAVE_YASM +void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len); +void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len); + +void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len); +void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len); + +static void float_interleave_mmx(float *dst, const float **src, + unsigned int len, int channels) +{ + if (channels == 2) { + ff_float_interleave2_mmx(dst, src, len); + } else if (channels == 6) + ff_float_interleave6_mmx(dst, src, len); + else + ff_float_interleave_c(dst, src, len, channels); +} + +static void float_interleave_sse(float *dst, const float **src, + unsigned int len, int channels) +{ + if (channels == 2) { + ff_float_interleave2_sse(dst, src, len); + } else if (channels == 6) + ff_float_interleave6_sse(dst, src, len); + else + ff_float_interleave_c(dst, src, len, channels); +} +#endif + void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); if (mm_flags & AV_CPU_FLAG_MMX) { +#if HAVE_YASM + c->float_interleave = float_interleave_mmx; +#endif if(mm_flags & AV_CPU_FLAG_3DNOW){ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ @@ -256,6 +289,9 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; c->float_to_int16 = float_to_int16_sse; c->float_to_int16_interleave = float_to_int16_interleave_sse; +#if HAVE_YASM + c->float_interleave = float_interleave_sse; +#endif } if(mm_flags & AV_CPU_FLAG_SSE2){ c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2; diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index f5a13f1250..699fc4a687 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -165,7 +165,7 @@ cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16) SUB rsp, pad shl r2d, 2 shl r3d, 2 - LOAD_AB m4, m5, r2, r3 + LOAD_AB m4, m5, r2d, r3d mov r3, 32/mmsize mov r2, r0 sub r0, r1 @@ -222,7 +222,7 @@ cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16) SUB rsp, pad shl r2d, 2 shl r3d, 2 - LOAD_AB m4, m5, r2, r3 + LOAD_AB m4, m5, r2d, r3d mov r3, r1 mova am, m4 add r3, r1 @@ -352,7 +352,7 @@ cglobal deblock_v_luma_10_%1, 5,5,15 %define mask2 m11 shl r2d, 2 shl r3d, 2 - LOAD_AB m12, m13, r2, r3 + LOAD_AB m12, m13, r2d, r3d mov r2, r0 sub r0, r1 sub r0, r1 @@ -380,7 +380,7 @@ cglobal deblock_v_luma_10_%1, 5,5,15 cglobal deblock_h_luma_10_%1, 5,7,15 shl r2d, 2 shl r3d, 2 - LOAD_AB m12, m13, r2, r3 + LOAD_AB m12, m13, r2d, r3d mov r2, r1 add r2, r1 add r2, r1 @@ -836,6 +836,13 @@ DEBLOCK_LUMA_INTRA avx mova [r0+2*r1], m2 %endmacro +%macro CHROMA_V_LOAD_TC 2 + movd %1, [%2] + punpcklbw %1, %1 + punpcklwd %1, %1 + psraw %1, 6 +%endmacro + %macro DEBLOCK_CHROMA 1 ;----------------------------------------------------------------------------- ; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) @@ -851,10 +858,10 @@ cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16) .loop: %endif CHROMA_V_LOAD r5 - LOAD_AB m4, m5, r2, r3 + LOAD_AB m4, m5, r2d, r3d LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 pxor m4, m4 - LOAD_TC m6, r4 + CHROMA_V_LOAD_TC m6, r4 psubw m6, [pw_3] pmaxsw m6, m4 pand m7, m6 @@ -885,7 +892,7 @@ cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16) .loop: %endif CHROMA_V_LOAD r4 - LOAD_AB m4, m5, r2, r3 + LOAD_AB m4, m5, r2d, r3d LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 CHROMA_DEBLOCK_P0_Q0_INTRA m1, m2, m0, m3, m7, m5, m6 CHROMA_V_STORE diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h index 26c163be3c..2f55879ce1 100644 --- a/libavcodec/x86/h264_i386.h +++ b/libavcodec/x86/h264_i386.h @@ -36,7 +36,7 @@ #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, - int *index){ + int *index, x86_reg last_off){ void *end= significant_coeff_ctx_base + max_coeff - 1; int minusstart= -(int)significant_coeff_ctx_base; int minusindex= 4-(int)index; @@ -52,10 +52,12 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, "test $1, %%edx \n\t" " jz 3f \n\t" + "add %7, %1 \n\t" - BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", + BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") + "sub %7, %1 \n\t" "mov %2, %%"REG_a" \n\t" "movl %4, %%ecx \n\t" "add %1, %%"REG_c" \n\t" @@ -82,7 +84,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, "movl %%esi, "RANGE "(%3) \n\t" "movl %%ebx, "LOW "(%3) \n\t" :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index) - :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex) + :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off) : "%"REG_c, "%ebx", "%edx", "%esi", "memory" ); return coeff_count; @@ -90,7 +92,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, - int *index, const uint8_t *sig_off){ + int *index, x86_reg last_off, const uint8_t *sig_off){ int minusindex= 4-(int)index; int coeff_count; x86_reg last=0; @@ -114,8 +116,9 @@ static int decode_significance_8x8_x86(CABACContext *c, "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t" "add %5, %%"REG_D" \n\t" + "add %7, %%"REG_D" \n\t" - BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx", + BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") "mov %2, %%"REG_a" \n\t" @@ -142,7 +145,7 @@ static int decode_significance_8x8_x86(CABACContext *c, "movl %%esi, "RANGE "(%3) \n\t" "movl %%ebx, "LOW "(%3) \n\t" :"=&a"(coeff_count),"+m"(last), "+m"(index) - :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off) + :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off) : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory" ); return coeff_count; diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm index e90b0b1186..04dabc3a2d 100644 --- a/libavcodec/x86/h264_idct.asm +++ b/libavcodec/x86/h264_idct.asm @@ -32,14 +32,18 @@ SECTION_RODATA ; FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split -scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8 - db 6+1*8, 7+1*8, 6+2*8, 7+2*8 - db 4+3*8, 5+3*8, 4+4*8, 5+4*8 - db 6+3*8, 7+3*8, 6+4*8, 7+4*8 - db 1+1*8, 2+1*8 - db 1+2*8, 2+2*8 - db 1+4*8, 2+4*8 - db 1+5*8, 2+5*8 +scan8_mem: db 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8 + db 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8 + db 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8 + db 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8 + db 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8 + db 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8 + db 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8 + db 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8 + db 4+11*8, 5+11*8, 4+12*8, 5+12*8 + db 6+11*8, 7+11*8, 6+12*8, 7+12*8 + db 4+13*8, 5+13*8, 4+14*8, 5+14*8 + db 6+13*8, 7+13*8, 6+14*8, 7+14*8 %ifdef PIC %define scan8 r11 %else @@ -59,11 +63,11 @@ SECTION .text movq m2, [%2+16] movq m3, [%2+24] - IDCT4_1D 0, 1, 2, 3, 4, 5 + IDCT4_1D w, 0, 1, 2, 3, 4, 5 mova m6, [pw_32] TRANSPOSE4x4W 0, 1, 2, 3, 4 paddw m0, m6 - IDCT4_1D 0, 1, 2, 3, 4, 5 + IDCT4_1D w, 0, 1, 2, 3, 4, 5 pxor m7, m7 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3 @@ -73,7 +77,7 @@ SECTION .text INIT_MMX ; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) -cglobal h264_idct_add_mmx, 3, 3, 0 +cglobal h264_idct_add_8_mmx, 3, 3, 0 IDCT4_ADD r0, r1, r2 RET @@ -118,14 +122,14 @@ cglobal h264_idct_add_mmx, 3, 3, 0 mova m2, %1 mova m5, %2 - SUMSUB_BA m5, m2 - SUMSUB_BA m6, m5 - SUMSUB_BA m4, m2 - SUMSUB_BA m7, m6 - SUMSUB_BA m0, m4 - SUMSUB_BA m3, m2 - SUMSUB_BA m1, m5 - SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567 + SUMSUB_BA w, 5, 2 + SUMSUB_BA w, 6, 5 + SUMSUB_BA w, 4, 2 + SUMSUB_BA w, 7, 6 + SUMSUB_BA w, 0, 4 + SUMSUB_BA w, 3, 2 + SUMSUB_BA w, 1, 5 + SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567 %endmacro %macro IDCT8_1D_FULL 1 @@ -177,7 +181,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0 INIT_MMX ; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) -cglobal h264_idct8_add_mmx, 3, 4, 0 +cglobal h264_idct8_add_8_mmx, 3, 4, 0 %assign pad 128+4-(stack_offset&7) SUB rsp, pad @@ -237,7 +241,7 @@ cglobal h264_idct8_add_mmx, 3, 4, 0 INIT_XMM ; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride) -cglobal h264_idct8_add_sse2, 3, 4, 10 +cglobal h264_idct8_add_8_sse2, 3, 4, 10 IDCT8_ADD_SSE r0, r1, r2, r3 RET @@ -261,7 +265,7 @@ cglobal h264_idct8_add_sse2, 3, 4, 10 packuswb m1, m1 %endmacro -%macro DC_ADD_MMX2_OP 3-4 +%macro DC_ADD_MMX2_OP 4 %1 m2, [%2 ] %1 m3, [%2+%3 ] %1 m4, [%2+%3*2] @@ -282,13 +286,13 @@ cglobal h264_idct8_add_sse2, 3, 4, 10 INIT_MMX ; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) -cglobal h264_idct_dc_add_mmx2, 3, 3, 0 +cglobal h264_idct_dc_add_8_mmx2, 3, 3, 0 DC_ADD_MMX2_INIT r1, r2 DC_ADD_MMX2_OP movh, r0, r2, r1 RET ; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) -cglobal h264_idct8_dc_add_mmx2, 3, 3, 0 +cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0 DC_ADD_MMX2_INIT r1, r2 DC_ADD_MMX2_OP mova, r0, r2, r1 lea r0, [r0+r2*4] @@ -297,7 +301,7 @@ cglobal h264_idct8_dc_add_mmx2, 3, 3, 0 ; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct_add16_mmx, 5, 7, 0 +cglobal h264_idct_add16_8_mmx, 5, 7, 0 xor r5, r5 %ifdef PIC lea r11, [scan8_mem] @@ -319,7 +323,7 @@ cglobal h264_idct_add16_mmx, 5, 7, 0 ; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct8_add4_mmx, 5, 7, 0 +cglobal h264_idct8_add4_8_mmx, 5, 7, 0 %assign pad 128+4-(stack_offset&7) SUB rsp, pad @@ -351,7 +355,7 @@ cglobal h264_idct8_add4_mmx, 5, 7, 0 ; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct_add16_mmx2, 5, 7, 0 +cglobal h264_idct_add16_8_mmx2, 5, 7, 0 xor r5, r5 %ifdef PIC lea r11, [scan8_mem] @@ -398,7 +402,7 @@ cglobal h264_idct_add16_mmx2, 5, 7, 0 ; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct_add16intra_mmx, 5, 7, 0 +cglobal h264_idct_add16intra_8_mmx, 5, 7, 0 xor r5, r5 %ifdef PIC lea r11, [scan8_mem] @@ -421,7 +425,7 @@ cglobal h264_idct_add16intra_mmx, 5, 7, 0 ; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct_add16intra_mmx2, 5, 7, 0 +cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0 xor r5, r5 %ifdef PIC lea r11, [scan8_mem] @@ -466,7 +470,7 @@ cglobal h264_idct_add16intra_mmx2, 5, 7, 0 ; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct8_add4_mmx2, 5, 7, 0 +cglobal h264_idct8_add4_8_mmx2, 5, 7, 0 %assign pad 128+4-(stack_offset&7) SUB rsp, pad @@ -529,7 +533,7 @@ cglobal h264_idct8_add4_mmx2, 5, 7, 0 INIT_XMM ; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct8_add4_sse2, 5, 7, 10 +cglobal h264_idct8_add4_8_sse2, 5, 7, 10 xor r5, r5 %ifdef PIC lea r11, [scan8_mem] @@ -607,7 +611,7 @@ h264_idct_add8_mmx_plane: ; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct_add8_mmx, 5, 7, 0 +cglobal h264_idct_add8_8_mmx, 5, 7, 0 mov r5, 16 add r2, 512 %ifdef PIC @@ -617,6 +621,8 @@ cglobal h264_idct_add8_mmx, 5, 7, 0 mov r10, r0 %endif call h264_idct_add8_mmx_plane + mov r5, 32 + add r2, 384 %ifdef ARCH_X86_64 add r10, gprsize %else @@ -668,7 +674,7 @@ h264_idct_add8_mmx2_plane ; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct_add8_mmx2, 5, 7, 0 +cglobal h264_idct_add8_8_mmx2, 5, 7, 0 mov r5, 16 add r2, 512 %ifdef ARCH_X86_64 @@ -678,6 +684,8 @@ cglobal h264_idct_add8_mmx2, 5, 7, 0 lea r11, [scan8_mem] %endif call h264_idct_add8_mmx2_plane + mov r5, 32 + add r2, 384 %ifdef ARCH_X86_64 add r10, gprsize %else @@ -715,10 +723,10 @@ x264_add8x4_idct_sse2: movhps m1, [r2+40] movhps m2, [r2+48] movhps m3, [r2+56] - IDCT4_1D 0,1,2,3,4,5 + IDCT4_1D w,0,1,2,3,4,5 TRANSPOSE2x4x4W 0,1,2,3,4 paddw m0, [pw_32] - IDCT4_1D 0,1,2,3,4,5 + IDCT4_1D w,0,1,2,3,4,5 pxor m7, m7 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3 lea r0, [r0+r3*2] @@ -744,7 +752,7 @@ x264_add8x4_idct_sse2: ; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct_add16_sse2, 5, 5, 8 +cglobal h264_idct_add16_8_sse2, 5, 5, 8 %ifdef ARCH_X86_64 mov r10, r0 %endif @@ -791,7 +799,7 @@ cglobal h264_idct_add16_sse2, 5, 5, 8 ; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct_add16intra_sse2, 5, 7, 8 +cglobal h264_idct_add16intra_8_sse2, 5, 7, 8 %ifdef ARCH_X86_64 mov r10, r0 %endif @@ -810,12 +818,12 @@ cglobal h264_idct_add16intra_sse2, 5, 7, 8 test r0, r0 jz .try%1dc %ifdef ARCH_X86_64 - mov r0d, dword [r1+%1*8+64] + mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))] add r0, [r10] %else mov r0, r0m mov r0, [r0] - add r0, dword [r1+%1*8+64] + add r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))] %endif call x264_add8x4_idct_sse2 jmp .cycle%1end @@ -824,43 +832,45 @@ cglobal h264_idct_add16intra_sse2, 5, 7, 8 or r0w, word [r2+32] jz .cycle%1end %ifdef ARCH_X86_64 - mov r0d, dword [r1+%1*8+64] + mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))] add r0, [r10] %else mov r0, r0m mov r0, [r0] - add r0, dword [r1+%1*8+64] + add r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))] %endif call h264_idct_dc_add8_mmx2 .cycle%1end -%if %1 < 3 +%if %1 == 1 + add r2, 384+64 +%elif %1 < 3 add r2, 64 %endif %endmacro ; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct_add8_sse2, 5, 7, 8 +cglobal h264_idct_add8_8_sse2, 5, 7, 8 add r2, 512 %ifdef ARCH_X86_64 mov r10, r0 %endif - add8_sse2_cycle 0, 0x09 - add8_sse2_cycle 1, 0x11 + add8_sse2_cycle 0, 0x34 + add8_sse2_cycle 1, 0x3c %ifdef ARCH_X86_64 add r10, gprsize %else add r0mp, gprsize %endif - add8_sse2_cycle 2, 0x21 - add8_sse2_cycle 3, 0x29 + add8_sse2_cycle 2, 0x5c + add8_sse2_cycle 3, 0x64 RET ;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul) %macro WALSH4_1D 5 - SUMSUB_BADC m%4, m%3, m%2, m%1, m%5 - SUMSUB_BADC m%4, m%2, m%3, m%1, m%5 + SUMSUB_BADC w, %4, %3, %2, %1, %5 + SUMSUB_BADC w, %4, %2, %3, %1, %5 SWAP %1, %4, %3 %endmacro diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm new file mode 100644 index 0000000000..54636a95d0 --- /dev/null +++ b/libavcodec/x86/h264_idct_10bit.asm @@ -0,0 +1,565 @@ +;***************************************************************************** +;* MMX/SSE2/AVX-optimized 10-bit H.264 iDCT code +;***************************************************************************** +;* Copyright (C) 2005-2011 x264 project +;* +;* Authors: Daniel Kang <daniel.d.kang@gmail.com> +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +SECTION_RODATA + +pw_pixel_max: times 8 dw ((1 << 10)-1) +pd_32: times 4 dd 32 +scan8_mem: db 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8 + db 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8 + db 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8 + db 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8 + db 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8 + db 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8 + db 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8 + db 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8 + db 4+11*8, 5+11*8, 4+12*8, 5+12*8 + db 6+11*8, 7+11*8, 6+12*8, 7+12*8 + db 4+13*8, 5+13*8, 4+14*8, 5+14*8 + db 6+13*8, 7+13*8, 6+14*8, 7+14*8 + +%ifdef PIC +%define scan8 r11 +%else +%define scan8 scan8_mem +%endif + +SECTION .text + +;----------------------------------------------------------------------------- +; void h264_idct_add(pixel *dst, dctcoef *block, int stride) +;----------------------------------------------------------------------------- +%macro STORE_DIFFx2 6 + psrad %1, 6 + psrad %2, 6 + packssdw %1, %2 + movq %3, [%5] + movhps %3, [%5+%6] + paddsw %1, %3 + CLIPW %1, %4, [pw_pixel_max] + movq [%5], %1 + movhps [%5+%6], %1 +%endmacro + +%macro STORE_DIFF16 5 + psrad %1, 6 + psrad %2, 6 + packssdw %1, %2 + paddsw %1, [%5] + CLIPW %1, %3, %4 + mova [%5], %1 +%endmacro + +;dst, in, stride +%macro IDCT4_ADD_10 3 + mova m0, [%2+ 0] + mova m1, [%2+16] + mova m2, [%2+32] + mova m3, [%2+48] + IDCT4_1D d,0,1,2,3,4,5 + TRANSPOSE4x4D 0,1,2,3,4 + paddd m0, [pd_32] + IDCT4_1D d,0,1,2,3,4,5 + pxor m5, m5 + STORE_DIFFx2 m0, m1, m4, m5, %1, %3 + lea %1, [%1+%3*2] + STORE_DIFFx2 m2, m3, m4, m5, %1, %3 +%endmacro + +%macro IDCT_ADD_10 1 +cglobal h264_idct_add_10_%1, 3,3 + IDCT4_ADD_10 r0, r1, r2 + RET +%endmacro + +INIT_XMM +IDCT_ADD_10 sse2 +%ifdef HAVE_AVX +INIT_AVX +IDCT_ADD_10 avx +%endif + +;----------------------------------------------------------------------------- +; h264_idct_add16(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8]) +;----------------------------------------------------------------------------- +;;;;;;; NO FATE SAMPLES TRIGGER THIS +%macro ADD4x4IDCT 1 +add4x4_idct_%1: + add r5, r0 + mova m0, [r2+ 0] + mova m1, [r2+16] + mova m2, [r2+32] + mova m3, [r2+48] + IDCT4_1D d,0,1,2,3,4,5 + TRANSPOSE4x4D 0,1,2,3,4 + paddd m0, [pd_32] + IDCT4_1D d,0,1,2,3,4,5 + pxor m5, m5 + STORE_DIFFx2 m0, m1, m4, m5, r5, r3 + lea r5, [r5+r3*2] + STORE_DIFFx2 m2, m3, m4, m5, r5, r3 + ret +%endmacro + +INIT_XMM +ALIGN 16 +ADD4x4IDCT sse2 +%ifdef HAVE_AVX +INIT_AVX +ALIGN 16 +ADD4x4IDCT avx +%endif + +%macro ADD16_OP 3 + cmp byte [r4+%3], 0 + jz .skipblock%2 + mov r5d, [r1+%2*4] + call add4x4_idct_%1 +.skipblock%2: +%if %2<15 + add r2, 64 +%endif +%endmacro + +%macro IDCT_ADD16_10 1 +cglobal h264_idct_add16_10_%1, 5,6 + ADD16_OP %1, 0, 4+1*8 + ADD16_OP %1, 1, 5+1*8 + ADD16_OP %1, 2, 4+2*8 + ADD16_OP %1, 3, 5+2*8 + ADD16_OP %1, 4, 6+1*8 + ADD16_OP %1, 5, 7+1*8 + ADD16_OP %1, 6, 6+2*8 + ADD16_OP %1, 7, 7+2*8 + ADD16_OP %1, 8, 4+3*8 + ADD16_OP %1, 9, 5+3*8 + ADD16_OP %1, 10, 4+4*8 + ADD16_OP %1, 11, 5+4*8 + ADD16_OP %1, 12, 6+3*8 + ADD16_OP %1, 13, 7+3*8 + ADD16_OP %1, 14, 6+4*8 + ADD16_OP %1, 15, 7+4*8 + REP_RET +%endmacro + +INIT_XMM +IDCT_ADD16_10 sse2 +%ifdef HAVE_AVX +INIT_AVX +IDCT_ADD16_10 avx +%endif + +;----------------------------------------------------------------------------- +; void h264_idct_dc_add(pixel *dst, dctcoef *block, int stride) +;----------------------------------------------------------------------------- +%macro IDCT_DC_ADD_OP_10 3 + pxor m5, m5 +%if avx_enabled + paddw m1, m0, [%1+0 ] + paddw m2, m0, [%1+%2 ] + paddw m3, m0, [%1+%2*2] + paddw m4, m0, [%1+%3 ] +%else + mova m1, [%1+0 ] + mova m2, [%1+%2 ] + mova m3, [%1+%2*2] + mova m4, [%1+%3 ] + paddw m1, m0 + paddw m2, m0 + paddw m3, m0 + paddw m4, m0 +%endif + CLIPW m1, m5, m6 + CLIPW m2, m5, m6 + CLIPW m3, m5, m6 + CLIPW m4, m5, m6 + mova [%1+0 ], m1 + mova [%1+%2 ], m2 + mova [%1+%2*2], m3 + mova [%1+%3 ], m4 +%endmacro + +INIT_MMX +cglobal h264_idct_dc_add_10_mmx2,3,3 + movd m0, [r1] + paddd m0, [pd_32] + psrad m0, 6 + lea r1, [r2*3] + pshufw m0, m0, 0 + mova m6, [pw_pixel_max] + IDCT_DC_ADD_OP_10 r0, r2, r1 + RET + +;----------------------------------------------------------------------------- +; void h264_idct8_dc_add(pixel *dst, dctcoef *block, int stride) +;----------------------------------------------------------------------------- +%macro IDCT8_DC_ADD 1 +cglobal h264_idct8_dc_add_10_%1,3,3,7 + mov r1d, [r1] + add r1, 32 + sar r1, 6 + movd m0, r1d + lea r1, [r2*3] + SPLATW m0, m0, 0 + mova m6, [pw_pixel_max] + IDCT_DC_ADD_OP_10 r0, r2, r1 + lea r0, [r0+r2*4] + IDCT_DC_ADD_OP_10 r0, r2, r1 + RET +%endmacro + +INIT_XMM +IDCT8_DC_ADD sse2 +%ifdef HAVE_AVX +INIT_AVX +IDCT8_DC_ADD avx +%endif + +;----------------------------------------------------------------------------- +; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8]) +;----------------------------------------------------------------------------- +%macro AC 2 +.ac%2 + mov r5d, [r1+(%2+0)*4] + call add4x4_idct_%1 + mov r5d, [r1+(%2+1)*4] + add r2, 64 + call add4x4_idct_%1 + add r2, 64 + jmp .skipadd%2 +%endmacro + +%assign last_block 16 +%macro ADD16_OP_INTRA 3 + cmp word [r4+%3], 0 + jnz .ac%2 + mov r5d, [r2+ 0] + or r5d, [r2+64] + jz .skipblock%2 + mov r5d, [r1+(%2+0)*4] + call idct_dc_add_%1 +.skipblock%2: +%if %2<last_block-2 + add r2, 128 +%endif +.skipadd%2: +%endmacro + +%macro IDCT_ADD16INTRA_10 1 +idct_dc_add_%1: + add r5, r0 + movq m0, [r2+ 0] + movhps m0, [r2+64] + paddd m0, [pd_32] + psrad m0, 6 + pshufhw m0, m0, 0 + pshuflw m0, m0, 0 + lea r6, [r3*3] + mova m6, [pw_pixel_max] + IDCT_DC_ADD_OP_10 r5, r3, r6 + ret + +cglobal h264_idct_add16intra_10_%1,5,7,8 + ADD16_OP_INTRA %1, 0, 4+1*8 + ADD16_OP_INTRA %1, 2, 4+2*8 + ADD16_OP_INTRA %1, 4, 6+1*8 + ADD16_OP_INTRA %1, 6, 6+2*8 + ADD16_OP_INTRA %1, 8, 4+3*8 + ADD16_OP_INTRA %1, 10, 4+4*8 + ADD16_OP_INTRA %1, 12, 6+3*8 + ADD16_OP_INTRA %1, 14, 6+4*8 + REP_RET + AC %1, 8 + AC %1, 10 + AC %1, 12 + AC %1, 14 + AC %1, 0 + AC %1, 2 + AC %1, 4 + AC %1, 6 +%endmacro + +INIT_XMM +IDCT_ADD16INTRA_10 sse2 +%ifdef HAVE_AVX +INIT_AVX +IDCT_ADD16INTRA_10 avx +%endif + +%assign last_block 36 +;----------------------------------------------------------------------------- +; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8]) +;----------------------------------------------------------------------------- +%macro IDCT_ADD8 1 +cglobal h264_idct_add8_10_%1,5,7 +%ifdef ARCH_X86_64 + mov r10, r0 +%endif + add r2, 1024 + mov r0, [r0] + ADD16_OP_INTRA %1, 16, 4+ 6*8 + ADD16_OP_INTRA %1, 18, 4+ 7*8 + add r2, 1024-128*2 +%ifdef ARCH_X86_64 + mov r0, [r10+gprsize] +%else + mov r0, r0m + mov r0, [r0+gprsize] +%endif + ADD16_OP_INTRA %1, 32, 4+11*8 + ADD16_OP_INTRA %1, 34, 4+12*8 + REP_RET + AC %1, 16 + AC %1, 18 + AC %1, 32 + AC %1, 34 + +%endmacro ; IDCT_ADD8 + +INIT_XMM +IDCT_ADD8 sse2 +%ifdef HAVE_AVX +INIT_AVX +IDCT_ADD8 avx +%endif + +;----------------------------------------------------------------------------- +; void h264_idct8_add(pixel *dst, dctcoef *block, int stride) +;----------------------------------------------------------------------------- +%macro IDCT8_1D 2 + SWAP 0, 1 + psrad m4, m5, 1 + psrad m1, m0, 1 + paddd m4, m5 + paddd m1, m0 + paddd m4, m7 + paddd m1, m5 + psubd m4, m0 + paddd m1, m3 + + psubd m0, m3 + psubd m5, m3 + paddd m0, m7 + psubd m5, m7 + psrad m3, 1 + psrad m7, 1 + psubd m0, m3 + psubd m5, m7 + + SWAP 1, 7 + psrad m1, m7, 2 + psrad m3, m4, 2 + paddd m3, m0 + psrad m0, 2 + paddd m1, m5 + psrad m5, 2 + psubd m0, m4 + psubd m7, m5 + + SWAP 5, 6 + psrad m4, m2, 1 + psrad m6, m5, 1 + psubd m4, m5 + paddd m6, m2 + + mova m2, %1 + mova m5, %2 + SUMSUB_BA d, 5, 2 + SUMSUB_BA d, 6, 5 + SUMSUB_BA d, 4, 2 + SUMSUB_BA d, 7, 6 + SUMSUB_BA d, 0, 4 + SUMSUB_BA d, 3, 2 + SUMSUB_BA d, 1, 5 + SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567 +%endmacro + +%macro IDCT8_1D_FULL 1 + mova m7, [%1+112*2] + mova m6, [%1+ 96*2] + mova m5, [%1+ 80*2] + mova m3, [%1+ 48*2] + mova m2, [%1+ 32*2] + mova m1, [%1+ 16*2] + IDCT8_1D [%1], [%1+ 64*2] +%endmacro + +; %1=int16_t *block, %2=int16_t *dstblock +%macro IDCT8_ADD_SSE_START 2 + IDCT8_1D_FULL %1 +%ifdef ARCH_X86_64 + TRANSPOSE4x4D 0,1,2,3,8 + mova [%2 ], m0 + TRANSPOSE4x4D 4,5,6,7,8 + mova [%2+8*2], m4 +%else + mova [%1], m7 + TRANSPOSE4x4D 0,1,2,3,7 + mova m7, [%1] + mova [%2 ], m0 + mova [%2+16*2], m1 + mova [%2+32*2], m2 + mova [%2+48*2], m3 + TRANSPOSE4x4D 4,5,6,7,3 + mova [%2+ 8*2], m4 + mova [%2+24*2], m5 + mova [%2+40*2], m6 + mova [%2+56*2], m7 +%endif +%endmacro + +; %1=uint8_t *dst, %2=int16_t *block, %3=int stride +%macro IDCT8_ADD_SSE_END 3 + IDCT8_1D_FULL %2 + mova [%2 ], m6 + mova [%2+16*2], m7 + + pxor m7, m7 + STORE_DIFFx2 m0, m1, m6, m7, %1, %3 + lea %1, [%1+%3*2] + STORE_DIFFx2 m2, m3, m6, m7, %1, %3 + mova m0, [%2 ] + mova m1, [%2+16*2] + lea %1, [%1+%3*2] + STORE_DIFFx2 m4, m5, m6, m7, %1, %3 + lea %1, [%1+%3*2] + STORE_DIFFx2 m0, m1, m6, m7, %1, %3 +%endmacro + +%macro IDCT8_ADD 1 +cglobal h264_idct8_add_10_%1, 3,4,16 +%ifndef UNIX64 + %assign pad 16-gprsize-(stack_offset&15) + sub rsp, pad + call h264_idct8_add1_10_%1 + add rsp, pad + RET +%endif + +ALIGN 16 +; TODO: does not need to use stack +h264_idct8_add1_10_%1: +%assign pad 256+16-gprsize + sub rsp, pad + add dword [r1], 32 + +%ifdef ARCH_X86_64 + IDCT8_ADD_SSE_START r1, rsp + SWAP 1, 9 + SWAP 2, 10 + SWAP 3, 11 + SWAP 5, 13 + SWAP 6, 14 + SWAP 7, 15 + IDCT8_ADD_SSE_START r1+16, rsp+128 + PERMUTE 1,9, 2,10, 3,11, 5,1, 6,2, 7,3, 9,13, 10,14, 11,15, 13,5, 14,6, 15,7 + IDCT8_1D [rsp], [rsp+128] + SWAP 0, 8 + SWAP 1, 9 + SWAP 2, 10 + SWAP 3, 11 + SWAP 4, 12 + SWAP 5, 13 + SWAP 6, 14 + SWAP 7, 15 + IDCT8_1D [rsp+16], [rsp+144] + psrad m8, 6 + psrad m0, 6 + packssdw m8, m0 + paddsw m8, [r0] + pxor m0, m0 + CLIPW m8, m0, [pw_pixel_max] + mova [r0], m8 + mova m8, [pw_pixel_max] + STORE_DIFF16 m9, m1, m0, m8, r0+r2 + lea r0, [r0+r2*2] + STORE_DIFF16 m10, m2, m0, m8, r0 + STORE_DIFF16 m11, m3, m0, m8, r0+r2 + lea r0, [r0+r2*2] + STORE_DIFF16 m12, m4, m0, m8, r0 + STORE_DIFF16 m13, m5, m0, m8, r0+r2 + lea r0, [r0+r2*2] + STORE_DIFF16 m14, m6, m0, m8, r0 + STORE_DIFF16 m15, m7, m0, m8, r0+r2 +%else + IDCT8_ADD_SSE_START r1, rsp + IDCT8_ADD_SSE_START r1+16, rsp+128 + lea r3, [r0+8] + IDCT8_ADD_SSE_END r0, rsp, r2 + IDCT8_ADD_SSE_END r3, rsp+16, r2 +%endif ; ARCH_X86_64 + + add rsp, pad + ret +%endmacro + +INIT_XMM +IDCT8_ADD sse2 +%ifdef HAVE_AVX +INIT_AVX +IDCT8_ADD avx +%endif + +;----------------------------------------------------------------------------- +; h264_idct8_add4(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8]) +;----------------------------------------------------------------------------- +;;;;;;; NO FATE SAMPLES TRIGGER THIS +%macro IDCT8_ADD4_OP 3 + cmp byte [r4+%3], 0 + jz .skipblock%2 + mov r0d, [r6+%2*4] + add r0, r5 + call h264_idct8_add1_10_%1 +.skipblock%2: +%if %2<12 + add r1, 256 +%endif +%endmacro + +%macro IDCT8_ADD4 1 +cglobal h264_idct8_add4_10_%1, 0,7,16 + %assign pad 16-gprsize-(stack_offset&15) + SUB rsp, pad + mov r5, r0mp + mov r6, r1mp + mov r1, r2mp + mov r2d, r3m + movifnidn r4, r4mp + IDCT8_ADD4_OP %1, 0, 4+1*8 + IDCT8_ADD4_OP %1, 4, 6+1*8 + IDCT8_ADD4_OP %1, 8, 4+3*8 + IDCT8_ADD4_OP %1, 12, 6+3*8 + ADD rsp, pad + RET +%endmacro ; IDCT8_ADD4 + +INIT_XMM +IDCT8_ADD4 sse2 +%ifdef HAVE_AVX +INIT_AVX +IDCT8_ADD4 avx +%endif diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 191eb8ddf2..a9da6a19db 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -836,7 +836,6 @@ PRED8x8_H ssse3 ;----------------------------------------------------------------------------- ; void pred8x8_top_dc_mmxext(uint8_t *src, int stride) ;----------------------------------------------------------------------------- -%ifdef CONFIG_GPL cglobal pred8x8_top_dc_mmxext, 2,5 sub r0, r1 movq mm0, [r0] @@ -927,7 +926,6 @@ cglobal pred8x8_dc_mmxext, 2,5 movq [r4+r1*1], m1 movq [r4+r1*2], m1 RET -%endif ;----------------------------------------------------------------------------- ; void pred8x8_dc_rv40(uint8_t *src, int stride) @@ -1083,7 +1081,6 @@ cglobal pred8x8_tm_vp8_ssse3, 2,3,6 ;----------------------------------------------------------------------------- ; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride) ;----------------------------------------------------------------------------- -%ifdef CONFIG_GPL %macro PRED8x8L_TOP_DC 1 cglobal pred8x8l_top_dc_%1, 4,4 sub r0, r3 @@ -2476,7 +2473,6 @@ PRED8x8L_HORIZONTAL_DOWN sse2 INIT_MMX %define PALIGNR PALIGNR_SSSE3 PRED8x8L_HORIZONTAL_DOWN ssse3 -%endif ;----------------------------------------------------------------------------- ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride) @@ -2608,7 +2604,6 @@ cglobal pred4x4_vertical_vp8_mmxext, 3,3 ;----------------------------------------------------------------------------- ; void pred4x4_down_left_mmxext(uint8_t *src, const uint8_t *topright, int stride) ;----------------------------------------------------------------------------- -%ifdef CONFIG_GPL INIT_MMX cglobal pred4x4_down_left_mmxext, 3,3 sub r0, r2 @@ -2786,4 +2781,3 @@ cglobal pred4x4_down_right_mmxext, 3,3 psrlq m0, 8 movh [r0+r2*1], m0 RET -%endif diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm new file mode 100644 index 0000000000..5cb593ac38 --- /dev/null +++ b/libavcodec/x86/h264_intrapred_10bit.asm @@ -0,0 +1,337 @@ +;***************************************************************************** +;* MMX/SSE2/AVX-optimized 10-bit H.264 intra prediction code +;***************************************************************************** +;* Copyright (C) 2005-2011 x264 project +;* +;* Authors: Daniel Kang <daniel.d.kang@gmail.com> +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +SECTION_RODATA + +SECTION .text + +cextern pw_4 +cextern pw_1 + +%macro PRED4x4_LOWPASS 4 + paddw %2, %3 + psrlw %2, 1 + pavgw %1, %4, %2 +%endmacro + +;----------------------------------------------------------------------------- +; void pred4x4_down_right(pixel *src, const pixel *topright, int stride) +;----------------------------------------------------------------------------- +%macro PRED4x4_DR 1 +cglobal pred4x4_down_right_10_%1, 3,3 + sub r0, r2 + lea r1, [r0+r2*2] + movhps m1, [r1-8] + movhps m2, [r0+r2*1-8] + movhps m4, [r0-8] + punpckhwd m2, m4 + movq m3, [r0] + punpckhdq m1, m2 + PALIGNR m3, m1, 10, m1 + mova m1, m3 + movhps m4, [r1+r2*1-8] + PALIGNR m3, m4, 14, m4 + mova m2, m3 + movhps m4, [r1+r2*2-8] + PALIGNR m3, m4, 14, m4 + PRED4x4_LOWPASS m0, m3, m1, m2 + movq [r1+r2*2], m0 + psrldq m0, 2 + movq [r1+r2*1], m0 + psrldq m0, 2 + movq [r0+r2*2], m0 + psrldq m0, 2 + movq [r0+r2*1], m0 + RET +%endmacro + +INIT_XMM +%define PALIGNR PALIGNR_MMX +PRED4x4_DR sse2 +%define PALIGNR PALIGNR_SSSE3 +PRED4x4_DR ssse3 +%ifdef HAVE_AVX +INIT_AVX +PRED4x4_DR avx +%endif + +;----------------------------------------------------------------------------- +; void pred4x4_vertical_right(pixel *src, const pixel *topright, int stride) +;----------------------------------------------------------------------------- +%macro PRED4x4_VR 1 +cglobal pred4x4_vertical_right_10_%1, 3,3,6 + sub r0, r2 + lea r1, [r0+r2*2] + movq m5, [r0] ; ........t3t2t1t0 + movhps m1, [r0-8] + PALIGNR m0, m5, m1, 14, m1 ; ......t3t2t1t0lt + pavgw m5, m0 + movhps m1, [r0+r2*1-8] + PALIGNR m0, m1, 14, m1 ; ....t3t2t1t0ltl0 + mova m1, m0 + movhps m2, [r0+r2*2-8] + PALIGNR m0, m2, 14, m2 ; ..t3t2t1t0ltl0l1 + mova m2, m0 + movhps m3, [r1+r2*1-8] + PALIGNR m0, m3, 14, m3 ; t3t2t1t0ltl0l1l2 + PRED4x4_LOWPASS m3, m1, m0, m2 + pslldq m1, m3, 12 + psrldq m3, 4 + movq [r0+r2*1], m5 + movq [r0+r2*2], m3 + PALIGNR m5, m1, 14, m2 + pslldq m1, 2 + movq [r1+r2*1], m5 + PALIGNR m3, m1, 14, m1 + movq [r1+r2*2], m3 + RET +%endmacro + +INIT_XMM +%define PALIGNR PALIGNR_MMX +PRED4x4_VR sse2 +%define PALIGNR PALIGNR_SSSE3 +PRED4x4_VR ssse3 +%ifdef HAVE_AVX +INIT_AVX +PRED4x4_VR avx +%endif + +;----------------------------------------------------------------------------- +; void pred4x4_horizontal_down(pixel *src, const pixel *topright, int stride) +;----------------------------------------------------------------------------- +%macro PRED4x4_HD 1 +cglobal pred4x4_horizontal_down_10_%1, 3,3 + sub r0, r2 + lea r1, [r0+r2*2] + movq m0, [r0-8] ; lt .. + movhps m0, [r0] + pslldq m0, 2 ; t2 t1 t0 lt .. .. .. .. + movq m1, [r1+r2*2-8] ; l3 + movq m3, [r1+r2*1-8] + punpcklwd m1, m3 ; l2 l3 + movq m2, [r0+r2*2-8] ; l1 + movq m3, [r0+r2*1-8] + punpcklwd m2, m3 ; l0 l1 + punpckhdq m1, m2 ; l0 l1 l2 l3 + punpckhqdq m1, m0 ; t2 t1 t0 lt l0 l1 l2 l3 + psrldq m0, m1, 4 ; .. .. t2 t1 t0 lt l0 l1 + psrldq m2, m1, 2 ; .. t2 t1 t0 lt l0 l1 l2 + pavgw m5, m1, m2 + PRED4x4_LOWPASS m3, m1, m0, m2 + punpcklwd m5, m3 + psrldq m3, 8 + PALIGNR m3, m5, 12, m4 + movq [r1+r2*2], m5 + movhps [r0+r2*2], m5 + psrldq m5, 4 + movq [r1+r2*1], m5 + movq [r0+r2*1], m3 + RET +%endmacro + +INIT_XMM +%define PALIGNR PALIGNR_MMX +PRED4x4_HD sse2 +%define PALIGNR PALIGNR_SSSE3 +PRED4x4_HD ssse3 +%ifdef HAVE_AVX +INIT_AVX +PRED4x4_HD avx +%endif + +;----------------------------------------------------------------------------- +; void pred4x4_dc(pixel *src, const pixel *topright, int stride) +;----------------------------------------------------------------------------- +%macro HADDD 2 ; sum junk +%if mmsize == 16 + movhlps %2, %1 + paddd %1, %2 + pshuflw %2, %1, 0xE + paddd %1, %2 +%else + pshufw %2, %1, 0xE + paddd %1, %2 +%endif +%endmacro + +%macro HADDW 2 + pmaddwd %1, [pw_1] + HADDD %1, %2 +%endmacro + +INIT_MMX +cglobal pred4x4_dc_10_mmxext, 3,3 + sub r0, r2 + lea r1, [r0+r2*2] + movq m2, [r0+r2*1-8] + paddw m2, [r0+r2*2-8] + paddw m2, [r1+r2*1-8] + paddw m2, [r1+r2*2-8] + psrlq m2, 48 + movq m0, [r0] + HADDW m0, m1 + paddw m0, [pw_4] + paddw m0, m2 + psrlw m0, 3 + SPLATW m0, m0, 0 + movq [r0+r2*1], m0 + movq [r0+r2*2], m0 + movq [r1+r2*1], m0 + movq [r1+r2*2], m0 + RET + +;----------------------------------------------------------------------------- +; void pred4x4_down_left(pixel *src, const pixel *topright, int stride) +;----------------------------------------------------------------------------- +;TODO: more AVX here +%macro PRED4x4_DL 1 +cglobal pred4x4_down_left_10_%1, 3,3 + sub r0, r2 + movq m1, [r0] + movhps m1, [r1] + pslldq m5, m1, 2 + pxor m2, m5, m1 + psrldq m2, 2 + pxor m3, m1, m2 + PRED4x4_LOWPASS m0, m5, m3, m1 + lea r1, [r0+r2*2] + movhps [r1+r2*2], m0 + psrldq m0, 2 + movq [r0+r2*1], m0 + psrldq m0, 2 + movq [r0+r2*2], m0 + psrldq m0, 2 + movq [r1+r2*1], m0 + RET +%endmacro + +INIT_XMM +PRED4x4_DL sse2 +%ifdef HAVE_AVX +INIT_AVX +PRED4x4_DL avx +%endif + +;----------------------------------------------------------------------------- +; void pred4x4_vertical_left(pixel *src, const pixel *topright, int stride) +;----------------------------------------------------------------------------- +%macro PRED4x4_VL 1 +cglobal pred4x4_vertical_left_10_%1, 3,3 + sub r0, r2 + movu m1, [r0] + movhps m1, [r1] + psrldq m3, m1, 2 + psrldq m2, m1, 4 + pavgw m4, m3, m1 + PRED4x4_LOWPASS m0, m1, m2, m3 + lea r1, [r0+r2*2] + movq [r0+r2*1], m4 + movq [r0+r2*2], m0 + psrldq m4, 2 + psrldq m0, 2 + movq [r1+r2*1], m4 + movq [r1+r2*2], m0 + RET +%endmacro + +INIT_XMM +PRED4x4_VL sse2 +%ifdef HAVE_AVX +INIT_AVX +PRED4x4_VL avx +%endif + +;----------------------------------------------------------------------------- +; void pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride) +;----------------------------------------------------------------------------- +INIT_MMX +cglobal pred4x4_horizontal_up_10_mmxext, 3,3 + sub r0, r2 + lea r1, [r0+r2*2] + movq m0, [r0+r2*1-8] + punpckhwd m0, [r0+r2*2-8] + movq m1, [r1+r2*1-8] + punpckhwd m1, [r1+r2*2-8] + punpckhdq m0, m1 + pshufw m1, m1, 0xFF + movq [r1+r2*2], m1 + movd [r1+r2*1+4], m1 + pshufw m2, m0, 11111001b + movq m1, m2 + pavgw m2, m0 + + pshufw m5, m0, 11111110b + PRED4x4_LOWPASS m3, m0, m5, m1 + movq m6, m2 + punpcklwd m6, m3 + movq [r0+r2*1], m6 + psrlq m2, 16 + psrlq m3, 16 + punpcklwd m2, m3 + movq [r0+r2*2], m2 + psrlq m2, 32 + movd [r1+r2*1], m2 + RET + + + +;----------------------------------------------------------------------------- +; void pred8x8_vertical(pixel *src, int stride) +;----------------------------------------------------------------------------- +INIT_XMM +cglobal pred8x8_vertical_10_sse2, 2,2 + sub r0, r1 + mova m0, [r0] +%rep 3 + mova [r0+r1*1], m0 + mova [r0+r1*2], m0 + lea r0, [r0+r1*2] +%endrep + mova [r0+r1*1], m0 + mova [r0+r1*2], m0 + RET + +;----------------------------------------------------------------------------- +; void pred8x8_horizontal(pixel *src, int stride) +;----------------------------------------------------------------------------- +INIT_XMM +cglobal pred8x8_horizontal_10_sse2, 2,3 + mov r2, 4 +.loop: + movq m0, [r0+r1*0-8] + movq m1, [r0+r1*1-8] + pshuflw m0, m0, 0xff + pshuflw m1, m1, 0xff + punpcklqdq m0, m0 + punpcklqdq m1, m1 + mova [r0+r1*0], m0 + mova [r0+r1*1], m1 + lea r0, [r0+r1*2] + dec r2 + jg .loop + REP_RET diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index fa5b86fc3e..d71f71e861 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -21,6 +21,31 @@ #include "libavutil/cpu.h" #include "libavcodec/h264pred.h" +#define PRED4x4(TYPE, DEPTH, OPT) \ +void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, const uint8_t *topright, int stride); + +PRED4x4(dc, 10, mmxext) +PRED4x4(down_left, 10, sse2) +PRED4x4(down_left, 10, avx) +PRED4x4(down_right, 10, sse2) +PRED4x4(down_right, 10, ssse3) +PRED4x4(down_right, 10, avx) +PRED4x4(vertical_left, 10, sse2) +PRED4x4(vertical_left, 10, avx) +PRED4x4(vertical_right, 10, sse2) +PRED4x4(vertical_right, 10, ssse3) +PRED4x4(vertical_right, 10, avx) +PRED4x4(horizontal_up, 10, mmxext) +PRED4x4(horizontal_down, 10, sse2) +PRED4x4(horizontal_down, 10, ssse3) +PRED4x4(horizontal_down, 10, avx) + +#define PRED8x8(TYPE, DEPTH, OPT) \ +void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride); + +PRED8x8(vertical, 10, sse2) +PRED8x8(horizontal, 10, sse2) + void ff_pred16x16_vertical_mmx (uint8_t *src, int stride); void ff_pred16x16_vertical_sse (uint8_t *src, int stride); void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride); @@ -98,140 +123,160 @@ void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int s void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth) { int mm_flags = av_get_cpu_flags(); - const int high_depth = bit_depth > 8; - - if (high_depth) - return; #if HAVE_YASM - if (mm_flags & AV_CPU_FLAG_MMX) { - h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_mmx; - h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; - h->pred8x8 [VERT_PRED8x8] = ff_pred8x8_vertical_mmx; - h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; - if (codec_id == CODEC_ID_VP8) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx; - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx; - h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx; - } else { - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx; - if (codec_id == CODEC_ID_SVQ3) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx; - } else if (codec_id == CODEC_ID_RV40) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_mmx; + if (bit_depth == 8) { + if (mm_flags & AV_CPU_FLAG_MMX) { + h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx; + h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; + h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_mmx; + h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; + if (codec_id == CODEC_ID_VP8) { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmx; + h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_mmx; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx; } else { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_mmx; + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx; + if (codec_id == CODEC_ID_SVQ3) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx; + } else if (codec_id == CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_mmx; + } else { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_mmx; + } } } - } - if (mm_flags & AV_CPU_FLAG_MMX2) { - h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext; - h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext; - h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; -#if CONFIG_GPL - h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext; - h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_mmxext; - h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_mmxext; - h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_mmxext; - h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_mmxext; - h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext; - h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext; - h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_mmxext; - h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_mmxext; - h->pred4x4 [DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_mmxext; - h->pred4x4 [VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_mmxext; - h->pred4x4 [HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_mmxext; -#endif - h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; -#if CONFIG_GPL - if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264) - h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext; - if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) - h->pred4x4 [VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_mmxext; - if (codec_id != CODEC_ID_RV40) { - h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_mmxext; - } - if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) { - h->pred8x8 [TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext; - h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_mmxext; - } -#endif - if (codec_id == CODEC_ID_VP8) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext; - h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext; - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmxext; - h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext; - h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext; - } else { - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2; - if (codec_id == CODEC_ID_SVQ3) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx2; - } else if (codec_id == CODEC_ID_RV40) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_mmx2; + if (mm_flags & AV_CPU_FLAG_MMX2) { + h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext; + h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext; + h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; + h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext; + h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_mmxext; + h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_mmxext; + h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_mmxext; + h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_mmxext; + h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext; + h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext; + h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_mmxext; + h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_mmxext; + h->pred4x4 [DIAG_DOWN_RIGHT_PRED ] = ff_pred4x4_down_right_mmxext; + h->pred4x4 [VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_mmxext; + h->pred4x4 [HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_mmxext; + h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; + if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264) { + h->pred4x4 [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_mmxext; + } + if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) { + h->pred4x4 [VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_mmxext; + } + if (codec_id != CODEC_ID_RV40) { + h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_mmxext; + } + if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) { + h->pred8x8 [TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext; + h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_mmxext; + } + if (codec_id == CODEC_ID_VP8) { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmxext; + h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext; + h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_mmxext; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext; + h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext; } else { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_mmx2; + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2; + if (codec_id == CODEC_ID_SVQ3) { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_svq3_mmx2; + } else if (codec_id == CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_rv40_mmx2; + } else { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_h264_mmx2; + } } } - } - if (mm_flags & AV_CPU_FLAG_SSE) { - h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse; - } + if (mm_flags & AV_CPU_FLAG_SSE) { + h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse; + } - if (mm_flags & AV_CPU_FLAG_SSE2) { - h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2; -#if CONFIG_GPL - h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2; - h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_sse2; - h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2; - h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_sse2; - h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_sse2; -#endif - if (codec_id == CODEC_ID_VP8) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2; - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2; - } else { - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_sse2; - if (codec_id == CODEC_ID_SVQ3) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2; - } else if (codec_id == CODEC_ID_RV40) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_sse2; + if (mm_flags & AV_CPU_FLAG_SSE2) { + h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2; + h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2; + h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_sse2; + h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2; + h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_sse2; + h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_sse2; + if (codec_id == CODEC_ID_VP8) { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_sse2; + h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_sse2; } else { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_sse2; + h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_plane_sse2; + if (codec_id == CODEC_ID_SVQ3) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2; + } else if (codec_id == CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_sse2; + } else { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_sse2; + } } } - } - if (mm_flags & AV_CPU_FLAG_SSSE3) { - h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; - h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; - h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; -#if CONFIG_GPL - h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3; - h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_ssse3; - h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_ssse3; - h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_ssse3; - h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_ssse3; - h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_ssse3; - h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_ssse3; - h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_ssse3; - h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_ssse3; - h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_ssse3; -#endif - if (codec_id == CODEC_ID_VP8) { - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3; - h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3; - } else { - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3; - if (codec_id == CODEC_ID_SVQ3) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3; - } else if (codec_id == CODEC_ID_RV40) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_ssse3; + if (mm_flags & AV_CPU_FLAG_SSSE3) { + h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; + h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; + h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; + h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3; + h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_ssse3; + h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_ssse3; + h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_ssse3; + h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_ssse3; + h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_ssse3; + h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_ssse3; + h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_ssse3; + h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_ssse3; + h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_ssse3; + if (codec_id == CODEC_ID_VP8) { + h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_ssse3; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3; } else { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_ssse3; + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3; + if (codec_id == CODEC_ID_SVQ3) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3; + } else if (codec_id == CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_ssse3; + } else { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_ssse3; + } } } + } else if (bit_depth == 10) { + if (mm_flags & AV_CPU_FLAG_MMX2) { + h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; + h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; + } + if (mm_flags & AV_CPU_FLAG_SSE2) { + h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2; + h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2; + h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2; + h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2; + h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2; + + h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2; + h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2; + } + if (mm_flags & AV_CPU_FLAG_SSSE3) { + h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3; + h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3; + h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3; + } +#if HAVE_AVX + if (mm_flags & AV_CPU_FLAG_AVX) { + h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx; + h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx; + h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_avx; + h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_avx; + } +#endif /* HAVE_AVX */ } -#endif +#endif /* HAVE_YASM */ } diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index b331f94b5e..3bb4384a5f 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -27,38 +27,61 @@ DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL; /***********************************/ /* IDCT */ +#define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \ +void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT (uint8_t *dst, int16_t *block, int stride); + +IDCT_ADD_FUNC(, 8, mmx) +IDCT_ADD_FUNC(, 10, sse2) +IDCT_ADD_FUNC(_dc, 8, mmx2) +IDCT_ADD_FUNC(_dc, 10, mmx2) +IDCT_ADD_FUNC(8_dc, 8, mmx2) +IDCT_ADD_FUNC(8_dc, 10, sse2) +IDCT_ADD_FUNC(8, 8, mmx) +IDCT_ADD_FUNC(8, 8, sse2) +IDCT_ADD_FUNC(8, 10, sse2) +#if HAVE_AVX +IDCT_ADD_FUNC(, 10, avx) +IDCT_ADD_FUNC(8_dc, 10, avx) +IDCT_ADD_FUNC(8, 10, avx) +#endif + + +#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \ +void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ + (uint8_t *dst, const int *block_offset, \ + DCTELEM *block, int stride, const uint8_t nnzc[6*8]); + +IDCT_ADD_REP_FUNC(8, 4, 8, mmx) +IDCT_ADD_REP_FUNC(8, 4, 8, mmx2) +IDCT_ADD_REP_FUNC(8, 4, 8, sse2) +IDCT_ADD_REP_FUNC(8, 4, 10, sse2) +IDCT_ADD_REP_FUNC(8, 4, 10, avx) +IDCT_ADD_REP_FUNC(, 16, 8, mmx) +IDCT_ADD_REP_FUNC(, 16, 8, mmx2) +IDCT_ADD_REP_FUNC(, 16, 8, sse2) +IDCT_ADD_REP_FUNC(, 16, 10, sse2) +IDCT_ADD_REP_FUNC(, 16intra, 8, mmx) +IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2) +IDCT_ADD_REP_FUNC(, 16intra, 8, sse2) +IDCT_ADD_REP_FUNC(, 16intra, 10, sse2) +#if HAVE_AVX +IDCT_ADD_REP_FUNC(, 16, 10, avx) +IDCT_ADD_REP_FUNC(, 16intra, 10, avx) +#endif + + +#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \ +void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ + (uint8_t **dst, const int *block_offset, \ + DCTELEM *block, int stride, const uint8_t nnzc[6*8]); +IDCT_ADD_REP_FUNC2(, 8, 8, mmx) +IDCT_ADD_REP_FUNC2(, 8, 8, mmx2) +IDCT_ADD_REP_FUNC2(, 8, 8, sse2) +IDCT_ADD_REP_FUNC2(, 8, 10, sse2) +#if HAVE_AVX +IDCT_ADD_REP_FUNC2(, 8, 10, avx) +#endif -void ff_h264_idct_add_mmx (uint8_t *dst, int16_t *block, int stride); -void ff_h264_idct8_add_mmx (uint8_t *dst, int16_t *block, int stride); -void ff_h264_idct8_add_sse2 (uint8_t *dst, int16_t *block, int stride); -void ff_h264_idct_dc_add_mmx2 (uint8_t *dst, int16_t *block, int stride); -void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride); - -void ff_h264_idct_add16_mmx (uint8_t *dst, const int *block_offset, - DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct8_add4_mmx (uint8_t *dst, const int *block_offset, - DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add16_mmx2 (uint8_t *dst, const int *block_offset, - DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add16intra_mmx (uint8_t *dst, const int *block_offset, - DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset, - DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct8_add4_mmx2 (uint8_t *dst, const int *block_offset, - DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct8_add4_sse2 (uint8_t *dst, const int *block_offset, - DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add8_mmx (uint8_t **dest, const int *block_offset, - DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add8_mmx2 (uint8_t **dest, const int *block_offset, - DCTELEM *block, int stride, const uint8_t nnzc[6*8]); - -void ff_h264_idct_add16_sse2 (uint8_t *dst, const int *block_offset, DCTELEM *block, - int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, - int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add8_sse2 (uint8_t **dest, const int *block_offset, DCTELEM *block, - int stride, const uint8_t nnzc[6*8]); void ff_h264_luma_dc_dequant_idct_mmx (DCTELEM *output, DCTELEM *input, int qmul); void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul); @@ -252,6 +275,7 @@ LF_IFUNC(v, chroma_intra, depth, avx) LF_FUNCS( uint8_t, 8) LF_FUNCS(uint16_t, 10) +#if ARCH_X86_32 LF_FUNC (v8, luma, 8, mmxext) static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { @@ -266,6 +290,7 @@ static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, int alpha ff_deblock_v8_luma_intra_8_mmxext(pix+0, stride, alpha, beta); ff_deblock_v8_luma_intra_8_mmxext(pix+8, stride, alpha, beta); } +#endif /* ARCH_X86_32 */ LF_FUNC (v, luma, 10, mmxext) LF_IFUNC(v, luma_intra, 10, mmxext) @@ -311,24 +336,24 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) } #if HAVE_YASM if (mm_flags & AV_CPU_FLAG_MMX) { - c->h264_idct_dc_add= - c->h264_idct_add= ff_h264_idct_add_mmx; - c->h264_idct8_dc_add= - c->h264_idct8_add= ff_h264_idct8_add_mmx; - - c->h264_idct_add16 = ff_h264_idct_add16_mmx; - c->h264_idct8_add4 = ff_h264_idct8_add4_mmx; - c->h264_idct_add8 = ff_h264_idct_add8_mmx; - c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx; + c->h264_idct_dc_add = + c->h264_idct_add = ff_h264_idct_add_8_mmx; + c->h264_idct8_dc_add = + c->h264_idct8_add = ff_h264_idct8_add_8_mmx; + + c->h264_idct_add16 = ff_h264_idct_add16_8_mmx; + c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx; + c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; + c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx; c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx; if (mm_flags & AV_CPU_FLAG_MMX2) { - c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; - c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; - c->h264_idct_add16 = ff_h264_idct_add16_mmx2; - c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2; - c->h264_idct_add8 = ff_h264_idct_add8_mmx2; - c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; + c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2; + c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; + c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; + c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx2; + c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2; + c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2; c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext; c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext; @@ -359,8 +384,12 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; if (mm_flags&AV_CPU_FLAG_SSE2) { - c->h264_idct8_add = ff_h264_idct8_add_sse2; - c->h264_idct8_add4= ff_h264_idct8_add4_sse2; + c->h264_idct8_add = ff_h264_idct8_add_8_sse2; + + c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; + c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; + c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; + c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2; @@ -381,10 +410,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; #endif - - c->h264_idct_add16 = ff_h264_idct_add16_sse2; - c->h264_idct_add8 = ff_h264_idct_add8_sse2; - c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; } if (mm_flags&AV_CPU_FLAG_SSSE3) { c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3; @@ -416,7 +441,19 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext; #endif + c->h264_idct_dc_add= ff_h264_idct_dc_add_10_mmx2; if (mm_flags&AV_CPU_FLAG_SSE2) { + c->h264_idct_add = ff_h264_idct_add_10_sse2; + c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; + + c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; + c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; + c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2; +#if HAVE_ALIGNED_STACK + c->h264_idct8_add = ff_h264_idct8_add_10_sse2; + c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; +#endif + c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2; c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2; #if HAVE_ALIGNED_STACK @@ -426,7 +463,20 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; #endif } +#if HAVE_AVX if (mm_flags&AV_CPU_FLAG_AVX) { + c->h264_idct_dc_add = + c->h264_idct_add = ff_h264_idct_add_10_avx; + c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; + + c->h264_idct_add16 = ff_h264_idct_add16_10_avx; + c->h264_idct_add8 = ff_h264_idct_add8_10_avx; + c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx; +#if HAVE_ALIGNED_STACK + c->h264_idct8_add = ff_h264_idct8_add_10_avx; + c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx; +#endif + c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_avx; c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_avx; #if HAVE_ALIGNED_STACK @@ -436,6 +486,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; #endif } +#endif /* HAVE_AVX */ } } #endif diff --git a/libavcodec/x86/mpegaudiodec_mmx.c b/libavcodec/x86/mpegaudiodec_mmx.c index 2f34281510..d7f8a0a142 100644 --- a/libavcodec/x86/mpegaudiodec_mmx.c +++ b/libavcodec/x86/mpegaudiodec_mmx.c @@ -21,9 +21,8 @@ #include "libavutil/cpu.h" #include "libavutil/x86_cpu.h" - -#define CONFIG_FLOAT 1 -#include "libavcodec/mpegaudio.h" +#include "libavcodec/dsputil.h" +#include "libavcodec/mpegaudiodsp.h" #define MACS(rt, ra, rb) rt+=(ra)*(rb) #define MLSS(rt, ra, rb) rt-=(ra)*(rb) @@ -148,11 +147,11 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out, *out = sum; } -void ff_mpegaudiodec_init_mmx(MPADecodeContext *s) +void ff_mpadsp_init_mmx(MPADSPContext *s) { int mm_flags = av_get_cpu_flags(); if (mm_flags & AV_CPU_FLAG_SSE2) { - s->apply_window_mp3 = apply_window_mp3; + s->apply_window_float = apply_window_mp3; } } diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 9b175c1488..14b49705dc 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -1106,10 +1106,10 @@ cglobal vp8_idct_dc_add4uv_mmx, 3, 3 ; %5/%6 are temporary registers ; we assume m6/m7 have constant words 20091/17734 loaded in them %macro VP8_IDCT_TRANSFORM4x4_1D 6 - SUMSUB_BA m%3, m%1, m%5 ;t0, t1 + SUMSUB_BA w, %3, %1, %5 ;t0, t1 VP8_MULTIPLY_SUMSUB m%2, m%4, m%5,m%6 ;t2, t3 - SUMSUB_BA m%4, m%3, m%5 ;tmp0, tmp3 - SUMSUB_BA m%2, m%1, m%5 ;tmp1, tmp2 + SUMSUB_BA w, %4, %3, %5 ;tmp0, tmp3 + SUMSUB_BA w, %2, %1, %5 ;tmp1, tmp2 SWAP %4, %1 SWAP %4, %3 %endmacro @@ -1181,8 +1181,8 @@ VP8_IDCT_ADD sse %endmacro %macro HADAMARD4_1D 4 - SUMSUB_BADC m%2, m%1, m%4, m%3 - SUMSUB_BADC m%4, m%2, m%3, m%1 + SUMSUB_BADC w, %2, %1, %4, %3 + SUMSUB_BADC w, %4, %2, %3, %1 SWAP %1, %4, %3 %endmacro diff --git a/libavcodec/x86/x86inc.asm b/libavcodec/x86/x86inc.asm index 53091c14c9..c84d5566a2 100644 --- a/libavcodec/x86/x86inc.asm +++ b/libavcodec/x86/x86inc.asm @@ -63,11 +63,22 @@ %elifidn __OUTPUT_FORMAT__,macho SECTION .text align=%1 fakegot: + %elifidn __OUTPUT_FORMAT__,aout + section .text %else SECTION .rodata align=%1 %endif %endmacro +; aout does not support align= +%macro SECTION_TEXT 0-1 16 + %ifidn __OUTPUT_FORMAT__,aout + SECTION .text + %else + SECTION .text align=%1 + %endif +%endmacro + %ifdef WIN64 %define PIC %elifndef ARCH_X86_64 diff --git a/libavcodec/x86/x86util.asm b/libavcodec/x86/x86util.asm index 7e5b67419a..b8ca348b68 100644 --- a/libavcodec/x86/x86util.asm +++ b/libavcodec/x86/x86util.asm @@ -41,6 +41,13 @@ SWAP %2, %4, %3 %endmacro +%macro SBUTTERFLYPS 3 + movaps m%3, m%1 + unpcklps m%1, m%2 + unpckhps m%3, m%2 + SWAP %2, %3 +%endmacro + %macro TRANSPOSE4x4B 5 SBUTTERFLY bw, %1, %2, %5 SBUTTERFLY bw, %3, %4, %5 @@ -74,6 +81,19 @@ SWAP %2, %3 %endmacro +; identical behavior to TRANSPOSE4x4D, but using SSE1 float ops +%macro TRANSPOSE4x4PS 5 + SBUTTERFLYPS %1, %2, %5 + SBUTTERFLYPS %3, %4, %5 + movaps m%5, m%1 + movlhps m%1, m%3 + movhlps m%3, m%5 + movaps m%5, m%2 + movlhps m%2, m%4 + movhlps m%4, m%5 + SWAP %2, %3 +%endmacro + %macro TRANSPOSE8x8W 9-11 %ifdef ARCH_X86_64 SBUTTERFLY wd, %1, %2, %9 @@ -208,6 +228,17 @@ pminub %2, %4 %endmacro +%macro ABSD2_MMX 4 + pxor %3, %3 + pxor %4, %4 + pcmpgtd %3, %1 + pcmpgtd %4, %2 + pxor %1, %3 + pxor %2, %4 + psubd %1, %3 + psubd %2, %4 +%endmacro + %macro ABSB_SSSE3 2 pabsb %1, %1 %endmacro @@ -230,12 +261,7 @@ %macro SPLATB_MMX 3 movd %1, [%2-3] ;to avoid crossing a cacheline punpcklbw %1, %1 -%if mmsize==16 - pshuflw %1, %1, 0xff - punpcklqdq %1, %1 -%else - pshufw %1, %1, 0xff -%endif + SPLATW %1, %1, 3 %endmacro %macro SPLATB_SSSE3 3 @@ -243,125 +269,169 @@ pshufb %1, %3 %endmacro -%macro PALIGNR_MMX 4 - %ifnidn %4, %2 +%macro PALIGNR_MMX 4-5 ; [dst,] src1, src2, imm, tmp + %define %%dst %1 +%if %0==5 +%ifnidn %1, %2 + mova %%dst, %2 +%endif + %rotate 1 +%endif +%ifnidn %4, %2 mova %4, %2 - %endif - %if mmsize == 8 - psllq %1, (8-%3)*8 +%endif +%if mmsize==8 + psllq %%dst, (8-%3)*8 psrlq %4, %3*8 - %else - pslldq %1, 16-%3 +%else + pslldq %%dst, 16-%3 psrldq %4, %3 - %endif - por %1, %4 +%endif + por %%dst, %4 %endmacro -%macro PALIGNR_SSSE3 4 +%macro PALIGNR_SSSE3 4-5 +%if %0==5 + palignr %1, %2, %3, %4 +%else palignr %1, %2, %3 +%endif %endmacro %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from %ifnum %5 - mova m%1, m%5 - mova m%3, m%5 + pand m%3, m%5, m%4 ; src .. y6 .. y4 + pand m%1, m%5, m%2 ; dst .. y6 .. y4 %else mova m%1, %5 - mova m%3, m%1 + pand m%3, m%1, m%4 ; src .. y6 .. y4 + pand m%1, m%1, m%2 ; dst .. y6 .. y4 %endif - pand m%1, m%2 ; dst .. y6 .. y4 - pand m%3, m%4 ; src .. y6 .. y4 - psrlw m%2, 8 ; dst .. y7 .. y5 - psrlw m%4, 8 ; src .. y7 .. y5 + psrlw m%2, 8 ; dst .. y7 .. y5 + psrlw m%4, 8 ; src .. y7 .. y5 %endmacro -%macro SUMSUB_BA 2-3 -%if %0==2 - paddw %1, %2 - paddw %2, %2 - psubw %2, %1 +%macro SUMSUB_BA 3-4 +%if %0==3 + padd%1 m%2, m%3 + padd%1 m%3, m%3 + psub%1 m%3, m%2 +%else +%if avx_enabled == 0 + mova m%4, m%2 + padd%1 m%2, m%3 + psub%1 m%3, m%4 %else - mova %3, %1 - paddw %1, %2 - psubw %2, %3 + padd%1 m%4, m%2, m%3 + psub%1 m%3, m%2 + SWAP %2, %4 +%endif %endif %endmacro -%macro SUMSUB_BADC 4-5 -%if %0==5 - SUMSUB_BA %1, %2, %5 - SUMSUB_BA %3, %4, %5 +%macro SUMSUB_BADC 5-6 +%if %0==6 + SUMSUB_BA %1, %2, %3, %6 + SUMSUB_BA %1, %4, %5, %6 %else - paddw %1, %2 - paddw %3, %4 - paddw %2, %2 - paddw %4, %4 - psubw %2, %1 - psubw %4, %3 + padd%1 m%2, m%3 + padd%1 m%4, m%5 + padd%1 m%3, m%3 + padd%1 m%5, m%5 + psub%1 m%3, m%2 + psub%1 m%5, m%4 %endif %endmacro -%macro SUMSUB2_AB 3 - mova %3, %1 - paddw %1, %1 - paddw %1, %2 - psubw %3, %2 - psubw %3, %2 +%macro SUMSUB2_AB 4 +%ifnum %3 + psub%1 m%4, m%2, m%3 + psub%1 m%4, m%3 + padd%1 m%2, m%2 + padd%1 m%2, m%3 +%else + mova m%4, m%2 + padd%1 m%2, m%2 + padd%1 m%2, %3 + psub%1 m%4, %3 + psub%1 m%4, %3 +%endif %endmacro -%macro SUMSUB2_BA 3 - mova m%3, m%1 - paddw m%1, m%2 - paddw m%1, m%2 - psubw m%2, m%3 - psubw m%2, m%3 +%macro SUMSUB2_BA 4 +%if avx_enabled == 0 + mova m%4, m%2 + padd%1 m%2, m%3 + padd%1 m%2, m%3 + psub%1 m%3, m%4 + psub%1 m%3, m%4 +%else + padd%1 m%4, m%2, m%3 + padd%1 m%4, m%3 + psub%1 m%3, m%2 + psub%1 m%3, m%2 + SWAP %2, %4 +%endif %endmacro -%macro SUMSUBD2_AB 4 - mova %4, %1 - mova %3, %2 - psraw %2, 1 ; %2: %2>>1 - psraw %1, 1 ; %1: %1>>1 - paddw %2, %4 ; %2: %2>>1+%1 - psubw %1, %3 ; %1: %1>>1-%2 +%macro SUMSUBD2_AB 5 +%ifnum %4 + psra%1 m%5, m%2, 1 ; %3: %3>>1 + psra%1 m%4, m%3, 1 ; %2: %2>>1 + padd%1 m%4, m%2 ; %3: %3>>1+%2 + psub%1 m%5, m%3 ; %2: %2>>1-%3 + SWAP %2, %5 + SWAP %3, %4 +%else + mova %5, m%2 + mova %4, m%3 + psra%1 m%3, 1 ; %3: %3>>1 + psra%1 m%2, 1 ; %2: %2>>1 + padd%1 m%3, %5 ; %3: %3>>1+%2 + psub%1 m%2, %4 ; %2: %2>>1-%3 +%endif %endmacro %macro DCT4_1D 5 %ifnum %5 - SUMSUB_BADC m%4, m%1, m%3, m%2; m%5 - SUMSUB_BA m%3, m%4, m%5 - SUMSUB2_AB m%1, m%2, m%5 + SUMSUB_BADC w, %4, %1, %3, %2, %5 + SUMSUB_BA w, %3, %4, %5 + SUMSUB2_AB w, %1, %2, %5 SWAP %1, %3, %4, %5, %2 %else - SUMSUB_BADC m%4, m%1, m%3, m%2 - SUMSUB_BA m%3, m%4 - mova [%5], m%2 - SUMSUB2_AB m%1, [%5], m%2 + SUMSUB_BADC w, %4, %1, %3, %2 + SUMSUB_BA w, %3, %4 + mova [%5], m%2 + SUMSUB2_AB w, %1, [%5], %2 SWAP %1, %3, %4, %2 %endif %endmacro -%macro IDCT4_1D 5-6 -%ifnum %5 - SUMSUBD2_AB m%2, m%4, m%6, m%5 - ; %2: %2>>1-%4 %4: %2+%4>>1 - SUMSUB_BA m%3, m%1, m%6 - ; %3: %1+%3 %1: %1-%3 - SUMSUB_BADC m%4, m%3, m%2, m%1, m%6 - ; %4: %1+%3 + (%2+%4>>1) - ; %3: %1+%3 - (%2+%4>>1) - ; %2: %1-%3 + (%2>>1-%4) - ; %1: %1-%3 - (%2>>1-%4) +%macro IDCT4_1D 6-7 +%ifnum %6 + SUMSUBD2_AB %1, %3, %5, %7, %6 + ; %3: %3>>1-%5 %5: %3+%5>>1 + SUMSUB_BA %1, %4, %2, %7 + ; %4: %2+%4 %2: %2-%4 + SUMSUB_BADC %1, %5, %4, %3, %2, %7 + ; %5: %2+%4 + (%3+%5>>1) + ; %4: %2+%4 - (%3+%5>>1) + ; %3: %2-%4 + (%3>>1-%5) + ; %2: %2-%4 - (%3>>1-%5) %else - SUMSUBD2_AB m%2, m%4, [%5], [%5+16] - SUMSUB_BA m%3, m%1 - SUMSUB_BADC m%4, m%3, m%2, m%1 +%ifidn %1, w + SUMSUBD2_AB %1, %3, %5, [%6], [%6+16] +%else + SUMSUBD2_AB %1, %3, %5, [%6], [%6+32] +%endif + SUMSUB_BA %1, %4, %2 + SUMSUB_BADC %1, %5, %4, %3, %2 %endif - SWAP %1, %4, %3 - ; %1: %1+%3 + (%2+%4>>1) row0 - ; %2: %1-%3 + (%2>>1-%4) row1 - ; %3: %1-%3 - (%2>>1-%4) row2 - ; %4: %1+%3 - (%2+%4>>1) row3 + SWAP %2, %5, %4 + ; %2: %2+%4 + (%3+%5>>1) row0 + ; %3: %2-%4 + (%3>>1-%5) row1 + ; %4: %2-%4 - (%3>>1-%5) row2 + ; %5: %2+%4 - (%3+%5>>1) row3 %endmacro diff --git a/libavcodec/xvmc_internal.h b/libavcodec/xvmc_internal.h index 9bb8909d28..04197cefae 100644 --- a/libavcodec/xvmc_internal.h +++ b/libavcodec/xvmc_internal.h @@ -18,8 +18,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef AVCODEC_INTERNAL_XVMC_H -#define AVCODEC_INTERNAL_XVMC_H +#ifndef AVCODEC_XVMC_INTERNAL_H +#define AVCODEC_XVMC_INTERNAL_H #include "avcodec.h" #include "mpegvideo.h" @@ -30,4 +30,4 @@ int ff_xvmc_field_start(MpegEncContext*s, AVCodecContext *avctx); void ff_xvmc_field_end(MpegEncContext *s); void ff_xvmc_decode_mb(MpegEncContext *s); -#endif /* AVCODEC_INTERNAL_XVMC_H */ +#endif /* AVCODEC_XVMC_INTERNAL_H */ diff --git a/libavcodec/zmbvenc.c b/libavcodec/zmbvenc.c index 55aa7b936b..4c98987fea 100644 --- a/libavcodec/zmbvenc.c +++ b/libavcodec/zmbvenc.c @@ -181,7 +181,7 @@ static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void int x, y, bh2, bw2, xored; uint8_t *tsrc, *tprev; uint8_t *mv; - int mx, my, bv; + int mx, my; bw = (avctx->width + ZMBV_BLOCK - 1) / ZMBV_BLOCK; bh = (avctx->height + ZMBV_BLOCK - 1) / ZMBV_BLOCK; @@ -197,7 +197,7 @@ static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void tsrc = src + x; tprev = prev + x; - bv = zmbv_me(c, tsrc, p->linesize[0], tprev, c->pstride, x, y, &mx, &my, &xored); + zmbv_me(c, tsrc, p->linesize[0], tprev, c->pstride, x, y, &mx, &my, &xored); mv[0] = (mx << 1) | !!xored; mv[1] = my << 1; tprev += mx + my * c->pstride; diff --git a/libavdevice/Makefile b/libavdevice/Makefile index 5cfc5e8ecc..60103a4864 100644 --- a/libavdevice/Makefile +++ b/libavdevice/Makefile @@ -13,11 +13,15 @@ OBJS-$(CONFIG_ALSA_INDEV) += alsa-audio-common.o \ OBJS-$(CONFIG_ALSA_OUTDEV) += alsa-audio-common.o \ alsa-audio-enc.o OBJS-$(CONFIG_BKTR_INDEV) += bktr.o +OBJS-$(CONFIG_DSHOW_INDEV) += dshow.o dshow_enummediatypes.o \ + dshow_enumpins.o dshow_filter.o \ + dshow_pin.o dshow_common.o OBJS-$(CONFIG_DV1394_INDEV) += dv1394.o OBJS-$(CONFIG_FBDEV_INDEV) += fbdev.o OBJS-$(CONFIG_JACK_INDEV) += jack_audio.o OBJS-$(CONFIG_OSS_INDEV) += oss_audio.o OBJS-$(CONFIG_OSS_OUTDEV) += oss_audio.o +OBJS-$(CONFIG_SDL_OUTDEV) += sdl.o OBJS-$(CONFIG_SNDIO_INDEV) += sndio_common.o sndio_dec.o OBJS-$(CONFIG_SNDIO_OUTDEV) += sndio_common.o sndio_enc.o OBJS-$(CONFIG_V4L2_INDEV) += v4l2.o diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c index a0c9b08c6f..7846704861 100644 --- a/libavdevice/alldevices.c +++ b/libavdevice/alldevices.c @@ -19,7 +19,6 @@ */ #include "config.h" -#include "libavformat/avformat.h" #include "avdevice.h" #define REGISTER_OUTDEV(X,x) { \ @@ -41,13 +40,17 @@ void avdevice_register_all(void) /* devices */ REGISTER_INOUTDEV (ALSA, alsa); REGISTER_INDEV (BKTR, bktr); + REGISTER_INDEV (DSHOW, dshow); REGISTER_INDEV (DV1394, dv1394); REGISTER_INDEV (FBDEV, fbdev); REGISTER_INDEV (JACK, jack); REGISTER_INOUTDEV (OSS, oss); + REGISTER_OUTDEV (SDL, sdl); REGISTER_INOUTDEV (SNDIO, sndio); REGISTER_INDEV (V4L2, v4l2); +#if FF_API_V4L REGISTER_INDEV (V4L, v4l); +#endif REGISTER_INDEV (VFWCAP, vfwcap); REGISTER_INDEV (X11_GRAB_DEVICE, x11_grab_device); diff --git a/libavdevice/alsa-audio-common.c b/libavdevice/alsa-audio-common.c index c00e973e1a..38466a06ce 100644 --- a/libavdevice/alsa-audio-common.c +++ b/libavdevice/alsa-audio-common.c @@ -29,7 +29,7 @@ */ #include <alsa/asoundlib.h> -#include "libavformat/avformat.h" +#include "avdevice.h" #include "alsa-audio.h" diff --git a/libavdevice/alsa-audio-dec.c b/libavdevice/alsa-audio-dec.c index 8ee0e52642..2424c022d3 100644 --- a/libavdevice/alsa-audio-dec.c +++ b/libavdevice/alsa-audio-dec.c @@ -46,8 +46,9 @@ */ #include <alsa/asoundlib.h> -#include "libavformat/avformat.h" +#include "libavutil/opt.h" +#include "avdevice.h" #include "alsa-audio.h" static av_cold int audio_read_header(AVFormatContext *s1, @@ -56,21 +57,16 @@ static av_cold int audio_read_header(AVFormatContext *s1, AlsaData *s = s1->priv_data; AVStream *st; int ret; - unsigned int sample_rate; enum CodecID codec_id; snd_pcm_sw_params_t *sw_params; - if (ap->sample_rate <= 0) { - av_log(s1, AV_LOG_ERROR, "Bad sample rate %d\n", ap->sample_rate); +#if FF_API_FORMAT_PARAMETERS + if (ap->sample_rate > 0) + s->sample_rate = ap->sample_rate; - return AVERROR(EIO); - } - - if (ap->channels <= 0) { - av_log(s1, AV_LOG_ERROR, "Bad channels number %d\n", ap->channels); - - return AVERROR(EIO); - } + if (ap->channels > 0) + s->channels = ap->channels; +#endif st = av_new_stream(s1, 0); if (!st) { @@ -78,10 +74,9 @@ static av_cold int audio_read_header(AVFormatContext *s1, return AVERROR(ENOMEM); } - sample_rate = ap->sample_rate; codec_id = s1->audio_codec_id; - ret = ff_alsa_open(s1, SND_PCM_STREAM_CAPTURE, &sample_rate, ap->channels, + ret = ff_alsa_open(s1, SND_PCM_STREAM_CAPTURE, &s->sample_rate, s->channels, &codec_id); if (ret < 0) { return AVERROR(EIO); @@ -113,8 +108,8 @@ static av_cold int audio_read_header(AVFormatContext *s1, /* take real parameters */ st->codec->codec_type = AVMEDIA_TYPE_AUDIO; st->codec->codec_id = codec_id; - st->codec->sample_rate = sample_rate; - st->codec->channels = ap->channels; + st->codec->sample_rate = s->sample_rate; + st->codec->channels = s->channels; av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in us */ return 0; @@ -163,6 +158,19 @@ static int audio_read_packet(AVFormatContext *s1, AVPacket *pkt) return 0; } +static const AVOption options[] = { + { "sample_rate", "", offsetof(AlsaData, sample_rate), FF_OPT_TYPE_INT, {.dbl = 48000}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM }, + { "channels", "", offsetof(AlsaData, channels), FF_OPT_TYPE_INT, {.dbl = 2}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM }, + { NULL }, +}; + +static const AVClass alsa_demuxer_class = { + .class_name = "ALSA demuxer", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_alsa_demuxer = { "alsa", NULL_IF_CONFIG_SMALL("ALSA audio input"), @@ -172,4 +180,5 @@ AVInputFormat ff_alsa_demuxer = { audio_read_packet, ff_alsa_close, .flags = AVFMT_NOFILE, + .priv_class = &alsa_demuxer_class, }; diff --git a/libavdevice/alsa-audio-enc.c b/libavdevice/alsa-audio-enc.c index 0bc53b6f54..a53c1763d5 100644 --- a/libavdevice/alsa-audio-enc.c +++ b/libavdevice/alsa-audio-enc.c @@ -38,8 +38,8 @@ */ #include <alsa/asoundlib.h> -#include "libavformat/avformat.h" +#include "avdevice.h" #include "alsa-audio.h" static av_cold int audio_write_header(AVFormatContext *s1) diff --git a/libavdevice/alsa-audio.h b/libavdevice/alsa-audio.h index 8c7c516585..431401bb13 100644 --- a/libavdevice/alsa-audio.h +++ b/libavdevice/alsa-audio.h @@ -32,7 +32,8 @@ #include <alsa/asoundlib.h> #include "config.h" -#include "libavformat/avformat.h" +#include "libavutil/log.h" +#include "avdevice.h" /* XXX: we make the assumption that the soundcard accepts this format */ /* XXX: find better solution with "preinit" method, needed also in @@ -42,12 +43,15 @@ typedef void (*ff_reorder_func)(const void *, void *, int); typedef struct { + AVClass *class; snd_pcm_t *h; int frame_size; ///< preferred size for reads and writes int period_size; ///< bytes per sample * channels ff_reorder_func reorder_func; void *reorder_buf; int reorder_buf_size; ///< in frames + int sample_rate; ///< sample rate set by user + int channels; ///< number of channels set by user } AlsaData; /** diff --git a/libavdevice/avdevice.h b/libavdevice/avdevice.h index 0c29c7b975..0661bcbc2c 100644 --- a/libavdevice/avdevice.h +++ b/libavdevice/avdevice.h @@ -20,9 +20,10 @@ #define AVDEVICE_AVDEVICE_H #include "libavutil/avutil.h" +#include "libavformat/avformat.h" #define LIBAVDEVICE_VERSION_MAJOR 52 -#define LIBAVDEVICE_VERSION_MINOR 4 +#define LIBAVDEVICE_VERSION_MINOR 5 #define LIBAVDEVICE_VERSION_MICRO 0 #define LIBAVDEVICE_VERSION_INT AV_VERSION_INT(LIBAVDEVICE_VERSION_MAJOR, \ @@ -33,6 +34,10 @@ LIBAVDEVICE_VERSION_MICRO) #define LIBAVDEVICE_BUILD LIBAVDEVICE_VERSION_INT +#ifndef FF_API_V4L +#define FF_API_V4L (LIBAVDEVICE_VERSION_MAJOR < 54) +#endif + /** * Return the LIBAVDEVICE_VERSION_INT constant. */ diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c index 6378ce7873..f6216e0aa3 100644 --- a/libavdevice/bktr.c +++ b/libavdevice/bktr.c @@ -24,7 +24,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavformat/avformat.h" +#include "libavutil/log.h" +#include "libavutil/opt.h" +#include "libavutil/parseutils.h" #if HAVE_DEV_BKTR_IOCTL_METEOR_H && HAVE_DEV_BKTR_IOCTL_BT848_H # include <dev/bktr/ioctl_meteor.h> # include <dev/bktr/ioctl_bt848.h> @@ -45,14 +47,17 @@ #include <signal.h> #include <stdint.h> #include <strings.h> +#include "avdevice.h" typedef struct { + AVClass *class; int video_fd; int tuner_fd; int width, height; - int frame_rate; - int frame_rate_base; uint64_t per_frame; + int standard; + char *video_size; /**< String describing video size, set by a private option. */ + char *framerate; /**< Set by a private option. */ } VideoData; @@ -243,54 +248,79 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap) VideoData *s = s1->priv_data; AVStream *st; int width, height; - int frame_rate; - int frame_rate_base; - int format = -1; + AVRational fps; + int ret = 0; - if (ap->width <= 0 || ap->height <= 0 || ap->time_base.den <= 0) - return -1; +#if FF_API_FORMAT_PARAMETERS + if (ap->standard) { + if (!strcasecmp(ap->standard, "pal")) + s->standard = PAL; + else if (!strcasecmp(ap->standard, "secam")) + s->standard = SECAM; + else if (!strcasecmp(ap->standard, "ntsc")) + s->standard = NTSC; + } +#endif - width = ap->width; - height = ap->height; - frame_rate = ap->time_base.den; - frame_rate_base = ap->time_base.num; + if ((ret = av_parse_video_size(&width, &height, s->video_size)) < 0) { + av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n"); + goto out; + } + + if (!s->framerate) + switch (s->standard) { + case PAL: s->framerate = av_strdup("pal"); break; + case NTSC: s->framerate = av_strdup("ntsc"); break; + case SECAM: s->framerate = av_strdup("25"); break; + default: + av_log(s1, AV_LOG_ERROR, "Unknown standard.\n"); + ret = AVERROR(EINVAL); + goto out; + } + if ((ret = av_parse_video_rate(&fps, s->framerate)) < 0) { + av_log(s1, AV_LOG_ERROR, "Couldn't parse framerate.\n"); + goto out; + } +#if FF_API_FORMAT_PARAMETERS + if (ap->width > 0) + width = ap->width; + if (ap->height > 0) + height = ap->height; + if (ap->time_base.num) + fps = (AVRational){ap->time_base.den, ap->time_base.num}; +#endif st = av_new_stream(s1, 0); - if (!st) - return AVERROR(ENOMEM); + if (!st) { + ret = AVERROR(ENOMEM); + goto out; + } av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in use */ s->width = width; s->height = height; - s->frame_rate = frame_rate; - s->frame_rate_base = frame_rate_base; - s->per_frame = ((uint64_t)1000000 * s->frame_rate_base) / s->frame_rate; + s->per_frame = ((uint64_t)1000000 * fps.den) / fps.num; st->codec->codec_type = AVMEDIA_TYPE_VIDEO; st->codec->pix_fmt = PIX_FMT_YUV420P; st->codec->codec_id = CODEC_ID_RAWVIDEO; st->codec->width = width; st->codec->height = height; - st->codec->time_base.den = frame_rate; - st->codec->time_base.num = frame_rate_base; + st->codec->time_base.den = fps.num; + st->codec->time_base.num = fps.den; - if (ap->standard) { - if (!strcasecmp(ap->standard, "pal")) - format = PAL; - else if (!strcasecmp(ap->standard, "secam")) - format = SECAM; - else if (!strcasecmp(ap->standard, "ntsc")) - format = NTSC; - } - if (bktr_init(s1->filename, width, height, format, - &(s->video_fd), &(s->tuner_fd), -1, 0.0) < 0) - return AVERROR(EIO); + if (bktr_init(s1->filename, width, height, s->standard, + &(s->video_fd), &(s->tuner_fd), -1, 0.0) < 0) { + ret = AVERROR(EIO); + goto out; + } nsignals = 0; last_frame_time = 0; - return 0; +out: + return ret; } static int grab_read_close(AVFormatContext *s1) @@ -311,6 +341,28 @@ static int grab_read_close(AVFormatContext *s1) return 0; } +#define OFFSET(x) offsetof(VideoData, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM +static const AVOption options[] = { + { "standard", "", offsetof(VideoData, standard), FF_OPT_TYPE_INT, {.dbl = VIDEO_FORMAT}, PAL, NTSCJ, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "PAL", "", 0, FF_OPT_TYPE_CONST, {.dbl = PAL}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "NTSC", "", 0, FF_OPT_TYPE_CONST, {.dbl = NTSC}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "SECAM", "", 0, FF_OPT_TYPE_CONST, {.dbl = SECAM}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "PALN", "", 0, FF_OPT_TYPE_CONST, {.dbl = PALN}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "PALM", "", 0, FF_OPT_TYPE_CONST, {.dbl = PALM}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "NTSCJ", "", 0, FF_OPT_TYPE_CONST, {.dbl = NTSCJ}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "vga"}, 0, 0, DEC }, + { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC }, + { NULL }, +}; + +static const AVClass bktr_class = { + .class_name = "BKTR grab interface", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_bktr_demuxer = { "bktr", NULL_IF_CONFIG_SMALL("video grab"), @@ -320,4 +372,5 @@ AVInputFormat ff_bktr_demuxer = { grab_read_packet, grab_read_close, .flags = AVFMT_NOFILE, + .priv_class = &bktr_class, }; diff --git a/libavdevice/dshow.c b/libavdevice/dshow.c new file mode 100644 index 0000000000..348fda6ecd --- /dev/null +++ b/libavdevice/dshow.c @@ -0,0 +1,646 @@ +/* + * Directshow capture interface + * Copyright (c) 2010 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavformat/timefilter.h" + +#include "avdevice.h" +#include "dshow.h" + +struct dshow_ctx { + IGraphBuilder *graph; + + char *device_name[2]; + + IBaseFilter *device_filter[2]; + IPin *device_pin[2]; + libAVFilter *capture_filter[2]; + libAVPin *capture_pin[2]; + + HANDLE mutex; + HANDLE event; + AVPacketList *pktl; + + unsigned int curbufsize; + unsigned int video_frame_num; + + IMediaControl *control; + + TimeFilter *timefilter; +}; + +static enum PixelFormat dshow_pixfmt(DWORD biCompression, WORD biBitCount) +{ + switch(biCompression) { + case MKTAG('U', 'Y', 'V', 'Y'): + return PIX_FMT_UYVY422; + case MKTAG('Y', 'U', 'Y', '2'): + return PIX_FMT_YUYV422; + case MKTAG('I', '4', '2', '0'): + return PIX_FMT_YUV420P; + case BI_RGB: + switch(biBitCount) { /* 1-8 are untested */ + case 1: + return PIX_FMT_MONOWHITE; + case 4: + return PIX_FMT_RGB4; + case 8: + return PIX_FMT_RGB8; + case 16: + return PIX_FMT_RGB555; + case 24: + return PIX_FMT_BGR24; + case 32: + return PIX_FMT_RGB32; + } + } + return PIX_FMT_NONE; +} + +static enum CodecID dshow_codecid(DWORD biCompression) +{ + switch(biCompression) { + case MKTAG('d', 'v', 's', 'd'): + return CODEC_ID_DVVIDEO; + case MKTAG('M', 'J', 'P', 'G'): + case MKTAG('m', 'j', 'p', 'g'): + return CODEC_ID_MJPEG; + } + return CODEC_ID_NONE; +} + +static int +dshow_read_close(AVFormatContext *s) +{ + struct dshow_ctx *ctx = s->priv_data; + AVPacketList *pktl; + + if (ctx->control) { + IMediaControl_Stop(ctx->control); + IMediaControl_Release(ctx->control); + } + if (ctx->graph) + IGraphBuilder_Release(ctx->graph); + + /* FIXME remove filters from graph */ + /* FIXME disconnect pins */ + if (ctx->capture_pin[VideoDevice]) + libAVPin_Release(ctx->capture_pin[VideoDevice]); + if (ctx->capture_pin[AudioDevice]) + libAVPin_Release(ctx->capture_pin[AudioDevice]); + if (ctx->capture_filter[VideoDevice]) + libAVFilter_Release(ctx->capture_filter[VideoDevice]); + if (ctx->capture_filter[AudioDevice]) + libAVFilter_Release(ctx->capture_filter[AudioDevice]); + + if (ctx->device_pin[VideoDevice]) + IPin_Release(ctx->device_pin[VideoDevice]); + if (ctx->device_pin[AudioDevice]) + IPin_Release(ctx->device_pin[AudioDevice]); + if (ctx->device_filter[VideoDevice]) + IBaseFilter_Release(ctx->device_filter[VideoDevice]); + if (ctx->device_filter[AudioDevice]) + IBaseFilter_Release(ctx->device_filter[AudioDevice]); + + if (ctx->device_name[0]) + av_free(ctx->device_name[0]); + if (ctx->device_name[1]) + av_free(ctx->device_name[1]); + + if(ctx->mutex) + CloseHandle(ctx->mutex); + if(ctx->event) + CloseHandle(ctx->event); + + pktl = ctx->pktl; + while (pktl) { + AVPacketList *next = pktl->next; + av_destruct_packet(&pktl->pkt); + av_free(pktl); + pktl = next; + } + + return 0; +} + +static char *dup_wchar_to_utf8(wchar_t *w) +{ + char *s = NULL; + int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0); + s = av_malloc(l); + if (s) + WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0); + return s; +} + +static int shall_we_drop(AVFormatContext *s) +{ + struct dshow_ctx *ctx = s->priv_data; + const uint8_t dropscore[] = {62, 75, 87, 100}; + const int ndropscores = FF_ARRAY_ELEMS(dropscore); + unsigned int buffer_fullness = (ctx->curbufsize*100)/s->max_picture_buffer; + + if(dropscore[++ctx->video_frame_num%ndropscores] <= buffer_fullness) { + av_log(s, AV_LOG_ERROR, + "real-time buffer %d%% full! frame dropped!\n", buffer_fullness); + return 1; + } + + return 0; +} + +static void +callback(void *priv_data, int index, uint8_t *buf, int buf_size, int64_t time) +{ + AVFormatContext *s = priv_data; + struct dshow_ctx *ctx = s->priv_data; + AVPacketList **ppktl, *pktl_next; + +// dump_videohdr(s, vdhdr); + + if(shall_we_drop(s)) + return; + + WaitForSingleObject(ctx->mutex, INFINITE); + + pktl_next = av_mallocz(sizeof(AVPacketList)); + if(!pktl_next) + goto fail; + + if(av_new_packet(&pktl_next->pkt, buf_size) < 0) { + av_free(pktl_next); + goto fail; + } + + pktl_next->pkt.stream_index = index; + pktl_next->pkt.pts = time; + memcpy(pktl_next->pkt.data, buf, buf_size); + + for(ppktl = &ctx->pktl ; *ppktl ; ppktl = &(*ppktl)->next); + *ppktl = pktl_next; + + ctx->curbufsize += buf_size; + + SetEvent(ctx->event); + ReleaseMutex(ctx->mutex); + + return; +fail: + ReleaseMutex(ctx->mutex); + return; +} + +static int +dshow_open_device(AVFormatContext *avctx, ICreateDevEnum *devenum, + enum dshowDeviceType devtype) +{ + struct dshow_ctx *ctx = avctx->priv_data; + IBaseFilter *device_filter = NULL; + IEnumMoniker *classenum = NULL; + IGraphBuilder *graph = ctx->graph; + IEnumPins *pins = 0; + IMoniker *m = NULL; + IPin *device_pin = NULL; + libAVPin *capture_pin = NULL; + libAVFilter *capture_filter = NULL; + const char *device_name = ctx->device_name[devtype]; + int ret = AVERROR(EIO); + IPin *pin; + int r, i; + + const GUID *device_guid[2] = { &CLSID_VideoInputDeviceCategory, + &CLSID_AudioInputDeviceCategory }; + const GUID *mediatype[2] = { &MEDIATYPE_Video, &MEDIATYPE_Audio }; + const char *devtypename = (devtype == VideoDevice) ? "video" : "audio"; + const wchar_t *filter_name[2] = { L"Audio capture filter", L"Video capture filter" }; + + r = ICreateDevEnum_CreateClassEnumerator(devenum, device_guid[devtype], + (IEnumMoniker **) &classenum, 0); + if (r != S_OK) { + av_log(avctx, AV_LOG_ERROR, "Could not enumerate %s devices.\n", + devtypename); + goto error; + } + + while (IEnumMoniker_Next(classenum, 1, &m, NULL) == S_OK && !device_filter) { + IPropertyBag *bag = NULL; + char *buf = NULL; + VARIANT var; + + r = IMoniker_BindToStorage(m, 0, 0, &IID_IPropertyBag, (void *) &bag); + if (r != S_OK) + goto fail1; + + var.vt = VT_BSTR; + r = IPropertyBag_Read(bag, L"FriendlyName", &var, NULL); + if (r != S_OK) + goto fail1; + + buf = dup_wchar_to_utf8(var.bstrVal); + + if (strcmp(device_name, buf)) + goto fail1; + + IMoniker_BindToObject(m, 0, 0, &IID_IBaseFilter, (void *) &device_filter); + +fail1: + if (buf) + av_free(buf); + if (bag) + IPropertyBag_Release(bag); + IMoniker_Release(m); + } + + if (!device_filter) { + av_log(avctx, AV_LOG_ERROR, "Could not find %s device.\n", + devtypename); + goto error; + } + ctx->device_filter [devtype] = device_filter; + + r = IGraphBuilder_AddFilter(graph, device_filter, NULL); + if (r != S_OK) { + av_log(avctx, AV_LOG_ERROR, "Could not add device filter to graph.\n"); + goto error; + } + + r = IBaseFilter_EnumPins(device_filter, &pins); + if (r != S_OK) { + av_log(avctx, AV_LOG_ERROR, "Could not enumerate pins.\n"); + goto error; + } + + i = 0; + while (IEnumPins_Next(pins, 1, &pin, NULL) == S_OK && !device_pin) { + IKsPropertySet *p = NULL; + IEnumMediaTypes *types; + PIN_INFO info = {0}; + AM_MEDIA_TYPE *type; + GUID category; + DWORD r2; + + IPin_QueryPinInfo(pin, &info); + IBaseFilter_Release(info.pFilter); + + if (info.dir != PINDIR_OUTPUT) + goto next; + if (IPin_QueryInterface(pin, &IID_IKsPropertySet, (void **) &p) != S_OK) + goto next; + if (IKsPropertySet_Get(p, &ROPSETID_Pin, AMPROPERTY_PIN_CATEGORY, + NULL, 0, &category, sizeof(GUID), &r2) != S_OK) + goto next; + if (!IsEqualGUID(&category, &PIN_CATEGORY_CAPTURE)) + goto next; + + if (IPin_EnumMediaTypes(pin, &types) != S_OK) + goto next; + + IEnumMediaTypes_Reset(types); + while (IEnumMediaTypes_Next(types, 1, &type, NULL) == S_OK && !device_pin) { + if (IsEqualGUID(&type->majortype, mediatype[devtype])) { + device_pin = pin; + goto next; + } + CoTaskMemFree(type); + } + +next: + if (types) + IEnumMediaTypes_Release(types); + if (p) + IKsPropertySet_Release(p); + if (device_pin != pin) + IPin_Release(pin); + } + + if (!device_pin) { + av_log(avctx, AV_LOG_ERROR, + "Could not find output pin from %s capture device.\n", devtypename); + goto error; + } + ctx->device_pin[devtype] = device_pin; + + capture_filter = libAVFilter_Create(avctx, callback, devtype); + if (!capture_filter) { + av_log(avctx, AV_LOG_ERROR, "Could not create grabber filter.\n"); + goto error; + } + ctx->capture_filter[devtype] = capture_filter; + + r = IGraphBuilder_AddFilter(graph, (IBaseFilter *) capture_filter, + filter_name[devtype]); + if (r != S_OK) { + av_log(avctx, AV_LOG_ERROR, "Could not add capture filter to graph\n"); + goto error; + } + + libAVPin_AddRef(capture_filter->pin); + capture_pin = capture_filter->pin; + ctx->capture_pin[devtype] = capture_pin; + + r = IGraphBuilder_ConnectDirect(graph, device_pin, (IPin *) capture_pin, NULL); + if (r != S_OK) { + av_log(avctx, AV_LOG_ERROR, "Could not connect pins\n"); + goto error; + } + + ret = 0; + +error: + if (pins) + IEnumPins_Release(pins); + if (classenum) + IEnumMoniker_Release(classenum); + + return ret; +} + +static enum CodecID waveform_codec_id(enum AVSampleFormat sample_fmt) +{ + switch (sample_fmt) { + case AV_SAMPLE_FMT_U8: return CODEC_ID_PCM_U8; + case AV_SAMPLE_FMT_S16: return CODEC_ID_PCM_S16LE; + case AV_SAMPLE_FMT_S32: return CODEC_ID_PCM_S32LE; + default: return CODEC_ID_NONE; /* Should never happen. */ + } +} + +static enum SampleFormat sample_fmt_bits_per_sample(int bits) +{ + switch (bits) { + case 8: return AV_SAMPLE_FMT_U8; + case 16: return AV_SAMPLE_FMT_S16; + case 32: return AV_SAMPLE_FMT_S32; + default: return AV_SAMPLE_FMT_NONE; /* Should never happen. */ + } +} + +static int +dshow_add_device(AVFormatContext *avctx, AVFormatParameters *ap, + enum dshowDeviceType devtype) +{ + struct dshow_ctx *ctx = avctx->priv_data; + AM_MEDIA_TYPE type; + AVCodecContext *codec; + AVStream *st; + int ret = AVERROR(EIO); + + st = av_new_stream(avctx, devtype); + if (!st) { + ret = AVERROR(ENOMEM); + goto error; + } + + ctx->capture_filter[devtype]->stream_index = st->index; + + libAVPin_ConnectionMediaType(ctx->capture_pin[devtype], &type); + + codec = st->codec; + if (devtype == VideoDevice) { + BITMAPINFOHEADER *bih = NULL; + + if (IsEqualGUID(&type.formattype, &FORMAT_VideoInfo)) { + VIDEOINFOHEADER *v = (void *) type.pbFormat; + bih = &v->bmiHeader; + } else if (IsEqualGUID(&type.formattype, &FORMAT_VideoInfo2)) { + VIDEOINFOHEADER2 *v = (void *) type.pbFormat; + bih = &v->bmiHeader; + } + if (!bih) { + av_log(avctx, AV_LOG_ERROR, "Could not get media type.\n"); + goto error; + } + + codec->time_base = ap->time_base; + codec->codec_type = AVMEDIA_TYPE_VIDEO; + codec->width = bih->biWidth; + codec->height = bih->biHeight; + codec->pix_fmt = dshow_pixfmt(bih->biCompression, bih->biBitCount); + if (codec->pix_fmt == PIX_FMT_NONE) { + codec->codec_id = dshow_codecid(bih->biCompression); + if (codec->codec_id == CODEC_ID_NONE) { + av_log(avctx, AV_LOG_ERROR, "Unknown compression type. " + "Please report verbose (-v 9) debug information.\n"); + dshow_read_close(avctx); + return AVERROR_PATCHWELCOME; + } + codec->bits_per_coded_sample = bih->biBitCount; + } else { + codec->codec_id = CODEC_ID_RAWVIDEO; + if (bih->biCompression == BI_RGB) { + codec->bits_per_coded_sample = bih->biBitCount; + codec->extradata = av_malloc(9 + FF_INPUT_BUFFER_PADDING_SIZE); + if (codec->extradata) { + codec->extradata_size = 9; + memcpy(codec->extradata, "BottomUp", 9); + } + } + } + } else { + WAVEFORMATEX *fx = NULL; + + if (IsEqualGUID(&type.formattype, &FORMAT_WaveFormatEx)) { + fx = (void *) type.pbFormat; + } + if (!fx) { + av_log(avctx, AV_LOG_ERROR, "Could not get media type.\n"); + goto error; + } + + codec->codec_type = AVMEDIA_TYPE_AUDIO; + codec->sample_fmt = sample_fmt_bits_per_sample(fx->wBitsPerSample); + codec->codec_id = waveform_codec_id(codec->sample_fmt); + codec->sample_rate = fx->nSamplesPerSec; + codec->channels = fx->nChannels; + } + + av_set_pts_info(st, 64, 1, 10000000); + + ret = 0; + +error: + return ret; +} + +static int parse_device_name(AVFormatContext *avctx) +{ + struct dshow_ctx *ctx = avctx->priv_data; + char **device_name = ctx->device_name; + char *name = av_strdup(avctx->filename); + char *tmp = name; + int ret = 1; + char *type; + + while ((type = strtok(tmp, "="))) { + char *token = strtok(NULL, ":"); + tmp = NULL; + + if (!strcmp(type, "video")) { + device_name[0] = token; + } else if (!strcmp(type, "audio")) { + device_name[1] = token; + } else { + device_name[0] = NULL; + device_name[1] = NULL; + break; + } + } + + if (!device_name[0] && !device_name[1]) { + ret = 0; + } else { + if (device_name[0]) + device_name[0] = av_strdup(device_name[0]); + if (device_name[1]) + device_name[1] = av_strdup(device_name[1]); + } + + av_free(name); + return ret; +} + +static int dshow_read_header(AVFormatContext *avctx, AVFormatParameters *ap) +{ + struct dshow_ctx *ctx = avctx->priv_data; + IGraphBuilder *graph = NULL; + ICreateDevEnum *devenum = NULL; + IMediaControl *control = NULL; + int ret = AVERROR(EIO); + int r; + + if (!parse_device_name(avctx)) { + av_log(avctx, AV_LOG_ERROR, "Malformed dshow input string.\n"); + goto error; + } + + CoInitialize(0); + + r = CoCreateInstance(&CLSID_FilterGraph, NULL, CLSCTX_INPROC_SERVER, + &IID_IGraphBuilder, (void **) &graph); + if (r != S_OK) { + av_log(avctx, AV_LOG_ERROR, "Could not create capture graph.\n"); + goto error; + } + ctx->graph = graph; + + r = CoCreateInstance(&CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC_SERVER, + &IID_ICreateDevEnum, (void **) &devenum); + if (r != S_OK) { + av_log(avctx, AV_LOG_ERROR, "Could not enumerate system devices.\n"); + goto error; + } + + if (ctx->device_name[VideoDevice]) { + ret = dshow_open_device(avctx, devenum, VideoDevice); + if (ret < 0) + goto error; + ret = dshow_add_device(avctx, ap, VideoDevice); + if (ret < 0) + goto error; + } + if (ctx->device_name[AudioDevice]) { + ret = dshow_open_device(avctx, devenum, AudioDevice); + if (ret < 0) + goto error; + ret = dshow_add_device(avctx, ap, AudioDevice); + if (ret < 0) + goto error; + } + + ctx->mutex = CreateMutex(NULL, 0, NULL); + if (!ctx->mutex) { + av_log(avctx, AV_LOG_ERROR, "Could not create Mutex\n"); + goto error; + } + ctx->event = CreateEvent(NULL, 1, 0, NULL); + if (!ctx->event) { + av_log(avctx, AV_LOG_ERROR, "Could not create Event\n"); + goto error; + } + + r = IGraphBuilder_QueryInterface(graph, &IID_IMediaControl, (void **) &control); + if (r != S_OK) { + av_log(avctx, AV_LOG_ERROR, "Could not get media control.\n"); + goto error; + } + ctx->control = control; + + r = IMediaControl_Run(control); + if (r == S_FALSE) { + OAFilterState pfs; + r = IMediaControl_GetState(control, 0, &pfs); + } + if (r != S_OK) { + av_log(avctx, AV_LOG_ERROR, "Could not run filter\n"); + goto error; + } + + ret = 0; + +error: + + if (ret < 0) + dshow_read_close(avctx); + + if (devenum) + ICreateDevEnum_Release(devenum); + + return ret; +} + +static int dshow_read_packet(AVFormatContext *s, AVPacket *pkt) +{ + struct dshow_ctx *ctx = s->priv_data; + AVPacketList *pktl = NULL; + + while (!pktl) { + WaitForSingleObject(ctx->mutex, INFINITE); + pktl = ctx->pktl; + if (ctx->pktl) { + *pkt = ctx->pktl->pkt; + ctx->pktl = ctx->pktl->next; + av_free(pktl); + } + ResetEvent(ctx->event); + ReleaseMutex(ctx->mutex); + if (!pktl) { + if (s->flags & AVFMT_FLAG_NONBLOCK) { + return AVERROR(EAGAIN); + } else { + WaitForSingleObject(ctx->event, INFINITE); + } + } + } + + ctx->curbufsize -= pkt->size; + + return pkt->size; +} + +AVInputFormat ff_dshow_demuxer = { + "dshow", + NULL_IF_CONFIG_SMALL("DirectShow capture"), + sizeof(struct dshow_ctx), + NULL, + dshow_read_header, + dshow_read_packet, + dshow_read_close, + .flags = AVFMT_NOFILE, +}; diff --git a/libavdevice/dshow.h b/libavdevice/dshow.h new file mode 100644 index 0000000000..4e79680d98 --- /dev/null +++ b/libavdevice/dshow.h @@ -0,0 +1,266 @@ +/* + * DirectShow capture interface + * Copyright (c) 2010 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define DSHOWDEBUG 0 + +#include "avdevice.h" + +#define COBJMACROS +#include <windows.h> +#include <dshow.h> +#include <dvdmedia.h> + +long ff_copy_dshow_media_type(AM_MEDIA_TYPE *dst, const AM_MEDIA_TYPE *src); +void ff_print_AM_MEDIA_TYPE(const AM_MEDIA_TYPE *type); +void ff_printGUID(const GUID *g); + +#if DSHOWDEBUG +extern const AVClass *ff_dshow_context_class_ptr; +#define dshowdebug(...) av_log(&ff_dshow_context_class_ptr, AV_LOG_DEBUG, __VA_ARGS__) +#else +#define dshowdebug(...) +#endif + +static inline void nothing(void *foo) +{ +} + +struct GUIDoffset { + const GUID *iid; + int offset; +}; + +enum dshowDeviceType { + VideoDevice = 0, + AudioDevice = 1, +}; + +#define DECLARE_QUERYINTERFACE(class, ...) \ +long WINAPI \ +class##_QueryInterface(class *this, const GUID *riid, void **ppvObject) \ +{ \ + struct GUIDoffset ifaces[] = __VA_ARGS__; \ + int i; \ + dshowdebug(AV_STRINGIFY(class)"_QueryInterface(%p, %p, %p)\n", this, riid, ppvObject); \ + ff_printGUID(riid); \ + if (!ppvObject) \ + return E_POINTER; \ + for (i = 0; i < sizeof(ifaces)/sizeof(ifaces[0]); i++) { \ + if (IsEqualGUID(riid, ifaces[i].iid)) { \ + void *obj = (void *) ((uint8_t *) this + ifaces[i].offset); \ + class##_AddRef(this); \ + dshowdebug("\tfound %d with offset %d\n", i, ifaces[i].offset); \ + *ppvObject = (void *) obj; \ + return S_OK; \ + } \ + } \ + dshowdebug("\tE_NOINTERFACE\n"); \ + *ppvObject = NULL; \ + return E_NOINTERFACE; \ +} +#define DECLARE_ADDREF(class) \ +unsigned long WINAPI \ +class##_AddRef(class *this) \ +{ \ + dshowdebug(AV_STRINGIFY(class)"_AddRef(%p)\t%ld\n", this, this->ref+1); \ + return InterlockedIncrement(&this->ref); \ +} +#define DECLARE_RELEASE(class) \ +unsigned long WINAPI \ +class##_Release(class *this) \ +{ \ + long ref = InterlockedDecrement(&this->ref); \ + dshowdebug(AV_STRINGIFY(class)"_Release(%p)\t%ld\n", this, ref); \ + if (!ref) \ + class##_Destroy(this); \ + return ref; \ +} + +#define DECLARE_DESTROY(class, func) \ +void class##_Destroy(class *this) \ +{ \ + dshowdebug(AV_STRINGIFY(class)"_Destroy(%p)\n", this); \ + func(this); \ + if (this) { \ + if (this->vtbl) \ + CoTaskMemFree(this->vtbl); \ + CoTaskMemFree(this); \ + } \ +} +#define DECLARE_CREATE(class, setup, ...) \ +class *class##_Create(__VA_ARGS__) \ +{ \ + class *this = CoTaskMemAlloc(sizeof(class)); \ + void *vtbl = CoTaskMemAlloc(sizeof(*this->vtbl)); \ + dshowdebug(AV_STRINGIFY(class)"_Create(%p)\n", this); \ + if (!this || !vtbl) \ + goto fail; \ + ZeroMemory(this, sizeof(class)); \ + ZeroMemory(vtbl, sizeof(*this->vtbl)); \ + this->ref = 1; \ + this->vtbl = vtbl; \ + if (!setup) \ + goto fail; \ + dshowdebug("created "AV_STRINGIFY(class)" %p\n", this); \ + return this; \ +fail: \ + class##_Destroy(this); \ + dshowdebug("could not create "AV_STRINGIFY(class)"\n"); \ + return NULL; \ +} + +#define SETVTBL(vtbl, class, fn) \ + do { (vtbl)->fn = (void *) class##_##fn; } while(0) + +/***************************************************************************** + * Forward Declarations + ****************************************************************************/ +typedef struct libAVPin libAVPin; +typedef struct libAVMemInputPin libAVMemInputPin; +typedef struct libAVEnumPins libAVEnumPins; +typedef struct libAVEnumMediaTypes libAVEnumMediaTypes; +typedef struct libAVFilter libAVFilter; + +/***************************************************************************** + * libAVPin + ****************************************************************************/ +struct libAVPin { + IPinVtbl *vtbl; + long ref; + libAVFilter *filter; + IPin *connectedto; + AM_MEDIA_TYPE type; + IMemInputPinVtbl *imemvtbl; +}; + +long WINAPI libAVPin_QueryInterface (libAVPin *, const GUID *, void **); +unsigned long WINAPI libAVPin_AddRef (libAVPin *); +unsigned long WINAPI libAVPin_Release (libAVPin *); +long WINAPI libAVPin_Connect (libAVPin *, IPin *, const AM_MEDIA_TYPE *); +long WINAPI libAVPin_ReceiveConnection (libAVPin *, IPin *, const AM_MEDIA_TYPE *); +long WINAPI libAVPin_Disconnect (libAVPin *); +long WINAPI libAVPin_ConnectedTo (libAVPin *, IPin **); +long WINAPI libAVPin_ConnectionMediaType (libAVPin *, AM_MEDIA_TYPE *); +long WINAPI libAVPin_QueryPinInfo (libAVPin *, PIN_INFO *); +long WINAPI libAVPin_QueryDirection (libAVPin *, PIN_DIRECTION *); +long WINAPI libAVPin_QueryId (libAVPin *, wchar_t **); +long WINAPI libAVPin_QueryAccept (libAVPin *, const AM_MEDIA_TYPE *); +long WINAPI libAVPin_EnumMediaTypes (libAVPin *, IEnumMediaTypes **); +long WINAPI libAVPin_QueryInternalConnections(libAVPin *, IPin **, unsigned long *); +long WINAPI libAVPin_EndOfStream (libAVPin *); +long WINAPI libAVPin_BeginFlush (libAVPin *); +long WINAPI libAVPin_EndFlush (libAVPin *); +long WINAPI libAVPin_NewSegment (libAVPin *, REFERENCE_TIME, REFERENCE_TIME, double); + +long WINAPI libAVMemInputPin_QueryInterface (libAVMemInputPin *, const GUID *, void **); +unsigned long WINAPI libAVMemInputPin_AddRef (libAVMemInputPin *); +unsigned long WINAPI libAVMemInputPin_Release (libAVMemInputPin *); +long WINAPI libAVMemInputPin_GetAllocator (libAVMemInputPin *, IMemAllocator **); +long WINAPI libAVMemInputPin_NotifyAllocator (libAVMemInputPin *, IMemAllocator *, WINBOOL); +long WINAPI libAVMemInputPin_GetAllocatorRequirements(libAVMemInputPin *, ALLOCATOR_PROPERTIES *); +long WINAPI libAVMemInputPin_Receive (libAVMemInputPin *, IMediaSample *); +long WINAPI libAVMemInputPin_ReceiveMultiple (libAVMemInputPin *, IMediaSample **, long, long *); +long WINAPI libAVMemInputPin_ReceiveCanBlock (libAVMemInputPin *); + +void libAVPin_Destroy(libAVPin *); +libAVPin *libAVPin_Create (libAVFilter *filter); + +void libAVMemInputPin_Destroy(libAVMemInputPin *); + +/***************************************************************************** + * libAVEnumPins + ****************************************************************************/ +struct libAVEnumPins { + IEnumPinsVtbl *vtbl; + long ref; + int pos; + libAVPin *pin; + libAVFilter *filter; +}; + +long WINAPI libAVEnumPins_QueryInterface(libAVEnumPins *, const GUID *, void **); +unsigned long WINAPI libAVEnumPins_AddRef (libAVEnumPins *); +unsigned long WINAPI libAVEnumPins_Release (libAVEnumPins *); +long WINAPI libAVEnumPins_Next (libAVEnumPins *, unsigned long, IPin **, unsigned long *); +long WINAPI libAVEnumPins_Skip (libAVEnumPins *, unsigned long); +long WINAPI libAVEnumPins_Reset (libAVEnumPins *); +long WINAPI libAVEnumPins_Clone (libAVEnumPins *, libAVEnumPins **); + +void libAVEnumPins_Destroy(libAVEnumPins *); +libAVEnumPins *libAVEnumPins_Create (libAVPin *pin, libAVFilter *filter); + +/***************************************************************************** + * libAVEnumMediaTypes + ****************************************************************************/ +struct libAVEnumMediaTypes { + IEnumPinsVtbl *vtbl; + long ref; + int pos; + AM_MEDIA_TYPE type; +}; + +long WINAPI libAVEnumMediaTypes_QueryInterface(libAVEnumMediaTypes *, const GUID *, void **); +unsigned long WINAPI libAVEnumMediaTypes_AddRef (libAVEnumMediaTypes *); +unsigned long WINAPI libAVEnumMediaTypes_Release (libAVEnumMediaTypes *); +long WINAPI libAVEnumMediaTypes_Next (libAVEnumMediaTypes *, unsigned long, AM_MEDIA_TYPE **, unsigned long *); +long WINAPI libAVEnumMediaTypes_Skip (libAVEnumMediaTypes *, unsigned long); +long WINAPI libAVEnumMediaTypes_Reset (libAVEnumMediaTypes *); +long WINAPI libAVEnumMediaTypes_Clone (libAVEnumMediaTypes *, libAVEnumMediaTypes **); + +void libAVEnumMediaTypes_Destroy(libAVEnumMediaTypes *); +libAVEnumMediaTypes *libAVEnumMediaTypes_Create(const AM_MEDIA_TYPE *type); + +/***************************************************************************** + * libAVFilter + ****************************************************************************/ +struct libAVFilter { + IBaseFilterVtbl *vtbl; + long ref; + const wchar_t *name; + libAVPin *pin; + FILTER_INFO info; + FILTER_STATE state; + IReferenceClock *clock; + enum dshowDeviceType type; + void *priv_data; + int stream_index; + int64_t start_time; + void (*callback)(void *priv_data, int index, uint8_t *buf, int buf_size, int64_t time); +}; + +long WINAPI libAVFilter_QueryInterface (libAVFilter *, const GUID *, void **); +unsigned long WINAPI libAVFilter_AddRef (libAVFilter *); +unsigned long WINAPI libAVFilter_Release (libAVFilter *); +long WINAPI libAVFilter_GetClassID (libAVFilter *, CLSID *); +long WINAPI libAVFilter_Stop (libAVFilter *); +long WINAPI libAVFilter_Pause (libAVFilter *); +long WINAPI libAVFilter_Run (libAVFilter *, REFERENCE_TIME); +long WINAPI libAVFilter_GetState (libAVFilter *, DWORD, FILTER_STATE *); +long WINAPI libAVFilter_SetSyncSource (libAVFilter *, IReferenceClock *); +long WINAPI libAVFilter_GetSyncSource (libAVFilter *, IReferenceClock **); +long WINAPI libAVFilter_EnumPins (libAVFilter *, IEnumPins **); +long WINAPI libAVFilter_FindPin (libAVFilter *, const wchar_t *, IPin **); +long WINAPI libAVFilter_QueryFilterInfo(libAVFilter *, FILTER_INFO *); +long WINAPI libAVFilter_JoinFilterGraph(libAVFilter *, IFilterGraph *, const wchar_t *); +long WINAPI libAVFilter_QueryVendorInfo(libAVFilter *, wchar_t **); + +void libAVFilter_Destroy(libAVFilter *); +libAVFilter *libAVFilter_Create (void *, void *, enum dshowDeviceType); diff --git a/libavdevice/dshow_common.c b/libavdevice/dshow_common.c new file mode 100644 index 0000000000..c813dc165e --- /dev/null +++ b/libavdevice/dshow_common.c @@ -0,0 +1,141 @@ +/* + * Directshow capture interface + * Copyright (c) 2010 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dshow.h" + +long ff_copy_dshow_media_type(AM_MEDIA_TYPE *dst, const AM_MEDIA_TYPE *src) +{ + uint8_t *pbFormat = NULL; + + if (src->cbFormat) { + pbFormat = CoTaskMemAlloc(src->cbFormat); + if (!pbFormat) + return E_OUTOFMEMORY; + memcpy(pbFormat, src->pbFormat, src->cbFormat); + } + + *dst = *src; + dst->pUnk = NULL; + dst->pbFormat = pbFormat; + + return S_OK; +} + +void ff_printGUID(const GUID *g) +{ +#if DSHOWDEBUG + const uint32_t *d = (const uint32_t *) &g->Data1; + const uint16_t *w = (const uint16_t *) &g->Data2; + const uint8_t *c = (const uint8_t *) &g->Data4; + + dshowdebug("0x%08x 0x%04x 0x%04x %02x%02x%02x%02x%02x%02x%02x%02x", + d[0], w[0], w[1], + c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]); +#endif +} + +static const char *dshow_context_to_name(void *ptr) +{ + return "dshow"; +} +static const AVClass ff_dshow_context_class = { "DirectShow", dshow_context_to_name }; +const AVClass *ff_dshow_context_class_ptr = &ff_dshow_context_class; + +#define dstruct(pctx, sname, var, type) \ + dshowdebug(" "#var":\t%"type"\n", sname->var) + +#if DSHOWDEBUG +static void dump_bih(void *s, BITMAPINFOHEADER *bih) +{ + dshowdebug(" BITMAPINFOHEADER\n"); + dstruct(s, bih, biSize, "lu"); + dstruct(s, bih, biWidth, "ld"); + dstruct(s, bih, biHeight, "ld"); + dstruct(s, bih, biPlanes, "d"); + dstruct(s, bih, biBitCount, "d"); + dstruct(s, bih, biCompression, "lu"); + dshowdebug(" biCompression:\t\"%.4s\"\n", + (char*) &bih->biCompression); + dstruct(s, bih, biSizeImage, "lu"); + dstruct(s, bih, biXPelsPerMeter, "lu"); + dstruct(s, bih, biYPelsPerMeter, "lu"); + dstruct(s, bih, biClrUsed, "lu"); + dstruct(s, bih, biClrImportant, "lu"); +} +#endif + +void ff_print_AM_MEDIA_TYPE(const AM_MEDIA_TYPE *type) +{ +#if DSHOWDEBUG + dshowdebug(" majortype\t"); + ff_printGUID(&type->majortype); + dshowdebug("\n"); + dshowdebug(" subtype\t"); + ff_printGUID(&type->subtype); + dshowdebug("\n"); + dshowdebug(" bFixedSizeSamples\t%d\n", type->bFixedSizeSamples); + dshowdebug(" bTemporalCompression\t%d\n", type->bTemporalCompression); + dshowdebug(" lSampleSize\t%lu\n", type->lSampleSize); + dshowdebug(" formattype\t"); + ff_printGUID(&type->formattype); + dshowdebug("\n"); + dshowdebug(" pUnk\t%p\n", type->pUnk); + dshowdebug(" cbFormat\t%lu\n", type->cbFormat); + dshowdebug(" pbFormat\t%p\n", type->pbFormat); + + if (IsEqualGUID(&type->formattype, &FORMAT_VideoInfo)) { + VIDEOINFOHEADER *v = (void *) type->pbFormat; + dshowdebug(" rcSource: left %ld top %ld right %ld bottom %ld\n", + v->rcSource.left, v->rcSource.top, v->rcSource.right, v->rcSource.bottom); + dshowdebug(" rcTarget: left %ld top %ld right %ld bottom %ld\n", + v->rcTarget.left, v->rcTarget.top, v->rcTarget.right, v->rcTarget.bottom); + dshowdebug(" dwBitRate: %lu\n", v->dwBitRate); + dshowdebug(" dwBitErrorRate: %lu\n", v->dwBitErrorRate); + dshowdebug(" AvgTimePerFrame: %"PRId64"\n", v->AvgTimePerFrame); + dump_bih(NULL, &v->bmiHeader); + } else if (IsEqualGUID(&type->formattype, &FORMAT_VideoInfo2)) { + VIDEOINFOHEADER2 *v = (void *) type->pbFormat; + dshowdebug(" rcSource: left %ld top %ld right %ld bottom %ld\n", + v->rcSource.left, v->rcSource.top, v->rcSource.right, v->rcSource.bottom); + dshowdebug(" rcTarget: left %ld top %ld right %ld bottom %ld\n", + v->rcTarget.left, v->rcTarget.top, v->rcTarget.right, v->rcTarget.bottom); + dshowdebug(" dwBitRate: %lu\n", v->dwBitRate); + dshowdebug(" dwBitErrorRate: %lu\n", v->dwBitErrorRate); + dshowdebug(" AvgTimePerFrame: %"PRId64"\n", v->AvgTimePerFrame); + dshowdebug(" dwInterlaceFlags: %lu\n", v->dwInterlaceFlags); + dshowdebug(" dwCopyProtectFlags: %lu\n", v->dwCopyProtectFlags); + dshowdebug(" dwPictAspectRatioX: %lu\n", v->dwPictAspectRatioX); + dshowdebug(" dwPictAspectRatioY: %lu\n", v->dwPictAspectRatioY); +// dshowdebug(" dwReserved1: %lu\n", v->u.dwReserved1); /* mingw-w64 is buggy and doesn't name unnamed unions */ + dshowdebug(" dwReserved2: %lu\n", v->dwReserved2); + dump_bih(NULL, &v->bmiHeader); + } else if (IsEqualGUID(&type->formattype, &FORMAT_WaveFormatEx)) { + WAVEFORMATEX *fx = (void *) type->pbFormat; + dshowdebug(" wFormatTag: %u\n", fx->wFormatTag); + dshowdebug(" nChannels: %u\n", fx->nChannels); + dshowdebug(" nSamplesPerSec: %lu\n", fx->nSamplesPerSec); + dshowdebug(" nAvgBytesPerSec: %lu\n", fx->nAvgBytesPerSec); + dshowdebug(" nBlockAlign: %u\n", fx->nBlockAlign); + dshowdebug(" wBitsPerSample: %u\n", fx->wBitsPerSample); + dshowdebug(" cbSize: %u\n", fx->cbSize); + } +#endif +} diff --git a/libavdevice/dshow_enummediatypes.c b/libavdevice/dshow_enummediatypes.c new file mode 100644 index 0000000000..a700133ba6 --- /dev/null +++ b/libavdevice/dshow_enummediatypes.c @@ -0,0 +1,103 @@ +/* + * DirectShow capture interface + * Copyright (c) 2010 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dshow.h" + +DECLARE_QUERYINTERFACE(libAVEnumMediaTypes, + { {&IID_IUnknown,0}, {&IID_IEnumPins,0} }) +DECLARE_ADDREF(libAVEnumMediaTypes) +DECLARE_RELEASE(libAVEnumMediaTypes) + +long WINAPI +libAVEnumMediaTypes_Next(libAVEnumMediaTypes *this, unsigned long n, + AM_MEDIA_TYPE **types, unsigned long *fetched) +{ + int count = 0; + dshowdebug("libAVEnumMediaTypes_Next(%p)\n", this); + if (!types) + return E_POINTER; + if (!this->pos && n == 1) { + if (!IsEqualGUID(&this->type.majortype, &GUID_NULL)) { + AM_MEDIA_TYPE *type = av_malloc(sizeof(AM_MEDIA_TYPE)); + ff_copy_dshow_media_type(type, &this->type); + *types = type; + count = 1; + } + this->pos = 1; + } + if (fetched) + *fetched = count; + if (!count) + return S_FALSE; + return S_OK; +} +long WINAPI +libAVEnumMediaTypes_Skip(libAVEnumMediaTypes *this, unsigned long n) +{ + dshowdebug("libAVEnumMediaTypes_Skip(%p)\n", this); + if (n) /* Any skip will always fall outside of the only valid type. */ + return S_FALSE; + return S_OK; +} +long WINAPI +libAVEnumMediaTypes_Reset(libAVEnumMediaTypes *this) +{ + dshowdebug("libAVEnumMediaTypes_Reset(%p)\n", this); + this->pos = 0; + return S_OK; +} +long WINAPI +libAVEnumMediaTypes_Clone(libAVEnumMediaTypes *this, libAVEnumMediaTypes **enums) +{ + libAVEnumMediaTypes *new; + dshowdebug("libAVEnumMediaTypes_Clone(%p)\n", this); + if (!enums) + return E_POINTER; + new = libAVEnumMediaTypes_Create(&this->type); + if (!new) + return E_OUTOFMEMORY; + new->pos = this->pos; + *enums = new; + return S_OK; +} + +static int +libAVEnumMediaTypes_Setup(libAVEnumMediaTypes *this, const AM_MEDIA_TYPE *type) +{ + IEnumPinsVtbl *vtbl = this->vtbl; + SETVTBL(vtbl, libAVEnumMediaTypes, QueryInterface); + SETVTBL(vtbl, libAVEnumMediaTypes, AddRef); + SETVTBL(vtbl, libAVEnumMediaTypes, Release); + SETVTBL(vtbl, libAVEnumMediaTypes, Next); + SETVTBL(vtbl, libAVEnumMediaTypes, Skip); + SETVTBL(vtbl, libAVEnumMediaTypes, Reset); + SETVTBL(vtbl, libAVEnumMediaTypes, Clone); + + if (!type) { + this->type.majortype = GUID_NULL; + } else { + ff_copy_dshow_media_type(&this->type, type); + } + + return 1; +} +DECLARE_CREATE(libAVEnumMediaTypes, libAVEnumMediaTypes_Setup(this, type), const AM_MEDIA_TYPE *type) +DECLARE_DESTROY(libAVEnumMediaTypes, nothing) diff --git a/libavdevice/dshow_enumpins.c b/libavdevice/dshow_enumpins.c new file mode 100644 index 0000000000..97890fb88e --- /dev/null +++ b/libavdevice/dshow_enumpins.c @@ -0,0 +1,99 @@ +/* + * DirectShow capture interface + * Copyright (c) 2010 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dshow.h" + +DECLARE_QUERYINTERFACE(libAVEnumPins, + { {&IID_IUnknown,0}, {&IID_IEnumPins,0} }) +DECLARE_ADDREF(libAVEnumPins) +DECLARE_RELEASE(libAVEnumPins) + +long WINAPI +libAVEnumPins_Next(libAVEnumPins *this, unsigned long n, IPin **pins, + unsigned long *fetched) +{ + int count = 0; + dshowdebug("libAVEnumPins_Next(%p)\n", this); + if (!pins) + return E_POINTER; + if (!this->pos && n == 1) { + libAVPin_AddRef(this->pin); + *pins = (IPin *) this->pin; + count = 1; + this->pos = 1; + } + if (fetched) + *fetched = count; + if (!count) + return S_FALSE; + return S_OK; +} +long WINAPI +libAVEnumPins_Skip(libAVEnumPins *this, unsigned long n) +{ + dshowdebug("libAVEnumPins_Skip(%p)\n", this); + if (n) /* Any skip will always fall outside of the only valid pin. */ + return S_FALSE; + return S_OK; +} +long WINAPI +libAVEnumPins_Reset(libAVEnumPins *this) +{ + dshowdebug("libAVEnumPins_Reset(%p)\n", this); + this->pos = 0; + return S_OK; +} +long WINAPI +libAVEnumPins_Clone(libAVEnumPins *this, libAVEnumPins **pins) +{ + libAVEnumPins *new; + dshowdebug("libAVEnumPins_Clone(%p)\n", this); + if (!pins) + return E_POINTER; + new = libAVEnumPins_Create(this->pin, this->filter); + if (!new) + return E_OUTOFMEMORY; + new->pos = this->pos; + *pins = new; + return S_OK; +} + +static int +libAVEnumPins_Setup(libAVEnumPins *this, libAVPin *pin, libAVFilter *filter) +{ + IEnumPinsVtbl *vtbl = this->vtbl; + SETVTBL(vtbl, libAVEnumPins, QueryInterface); + SETVTBL(vtbl, libAVEnumPins, AddRef); + SETVTBL(vtbl, libAVEnumPins, Release); + SETVTBL(vtbl, libAVEnumPins, Next); + SETVTBL(vtbl, libAVEnumPins, Skip); + SETVTBL(vtbl, libAVEnumPins, Reset); + SETVTBL(vtbl, libAVEnumPins, Clone); + + this->pin = pin; + this->filter = filter; + libAVFilter_AddRef(this->filter); + + return 1; +} +DECLARE_CREATE(libAVEnumPins, libAVEnumPins_Setup(this, pin, filter), + libAVPin *pin, libAVFilter *filter) +DECLARE_DESTROY(libAVEnumPins, nothing) diff --git a/libavdevice/dshow_filter.c b/libavdevice/dshow_filter.c new file mode 100644 index 0000000000..e5a3be854b --- /dev/null +++ b/libavdevice/dshow_filter.c @@ -0,0 +1,196 @@ +/* + * DirectShow capture interface + * Copyright (c) 2010 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dshow.h" + +DECLARE_QUERYINTERFACE(libAVFilter, + { {&IID_IUnknown,0}, {&IID_IBaseFilter,0} }) +DECLARE_ADDREF(libAVFilter) +DECLARE_RELEASE(libAVFilter) + +long WINAPI +libAVFilter_GetClassID(libAVFilter *this, CLSID *id) +{ + dshowdebug("libAVFilter_GetClassID(%p)\n", this); + /* I'm not creating a ClassID just for this. */ + return E_FAIL; +} +long WINAPI +libAVFilter_Stop(libAVFilter *this) +{ + dshowdebug("libAVFilter_Stop(%p)\n", this); + this->state = State_Stopped; + return S_OK; +} +long WINAPI +libAVFilter_Pause(libAVFilter *this) +{ + dshowdebug("libAVFilter_Pause(%p)\n", this); + this->state = State_Paused; + return S_OK; +} +long WINAPI +libAVFilter_Run(libAVFilter *this, REFERENCE_TIME start) +{ + dshowdebug("libAVFilter_Run(%p) %"PRId64"\n", this, start); + this->state = State_Running; + this->start_time = start; + return S_OK; +} +long WINAPI +libAVFilter_GetState(libAVFilter *this, DWORD ms, FILTER_STATE *state) +{ + dshowdebug("libAVFilter_GetState(%p)\n", this); + if (!state) + return E_POINTER; + *state = this->state; + return S_OK; +} +long WINAPI +libAVFilter_SetSyncSource(libAVFilter *this, IReferenceClock *clock) +{ + dshowdebug("libAVFilter_SetSyncSource(%p)\n", this); + + if (this->clock != clock) { + if (this->clock) + IReferenceClock_Release(this->clock); + this->clock = clock; + if (clock) + IReferenceClock_AddRef(clock); + } + + return S_OK; +} +long WINAPI +libAVFilter_GetSyncSource(libAVFilter *this, IReferenceClock **clock) +{ + dshowdebug("libAVFilter_GetSyncSource(%p)\n", this); + + if (!clock) + return E_POINTER; + if (this->clock) + IReferenceClock_AddRef(this->clock); + *clock = this->clock; + + return S_OK; +} +long WINAPI +libAVFilter_EnumPins(libAVFilter *this, IEnumPins **enumpin) +{ + libAVEnumPins *new; + dshowdebug("libAVFilter_EnumPins(%p)\n", this); + + if (!enumpin) + return E_POINTER; + new = libAVEnumPins_Create(this->pin, this); + if (!new) + return E_OUTOFMEMORY; + + *enumpin = (IEnumPins *) new; + return S_OK; +} +long WINAPI +libAVFilter_FindPin(libAVFilter *this, const wchar_t *id, IPin **pin) +{ + libAVPin *found = NULL; + dshowdebug("libAVFilter_FindPin(%p)\n", this); + + if (!id || !pin) + return E_POINTER; + if (!wcscmp(id, L"In")) { + found = this->pin; + libAVPin_AddRef(found); + } + *pin = (IPin *) found; + if (!found) + return VFW_E_NOT_FOUND; + + return S_OK; +} +long WINAPI +libAVFilter_QueryFilterInfo(libAVFilter *this, FILTER_INFO *info) +{ + dshowdebug("libAVFilter_QueryFilterInfo(%p)\n", this); + + if (!info) + return E_POINTER; + if (this->info.pGraph) + IFilterGraph_AddRef(this->info.pGraph); + *info = this->info; + + return S_OK; +} +long WINAPI +libAVFilter_JoinFilterGraph(libAVFilter *this, IFilterGraph *graph, + const wchar_t *name) +{ + dshowdebug("libAVFilter_JoinFilterGraph(%p)\n", this); + + this->info.pGraph = graph; + if (name) + wcscpy(this->info.achName, name); + + return S_OK; +} +long WINAPI +libAVFilter_QueryVendorInfo(libAVFilter *this, wchar_t **info) +{ + dshowdebug("libAVFilter_QueryVendorInfo(%p)\n", this); + + if (!info) + return E_POINTER; + *info = wcsdup(L"libAV"); + + return S_OK; +} + +static int +libAVFilter_Setup(libAVFilter *this, void *priv_data, void *callback, + enum dshowDeviceType type) +{ + IBaseFilterVtbl *vtbl = this->vtbl; + SETVTBL(vtbl, libAVFilter, QueryInterface); + SETVTBL(vtbl, libAVFilter, AddRef); + SETVTBL(vtbl, libAVFilter, Release); + SETVTBL(vtbl, libAVFilter, GetClassID); + SETVTBL(vtbl, libAVFilter, Stop); + SETVTBL(vtbl, libAVFilter, Pause); + SETVTBL(vtbl, libAVFilter, Run); + SETVTBL(vtbl, libAVFilter, GetState); + SETVTBL(vtbl, libAVFilter, SetSyncSource); + SETVTBL(vtbl, libAVFilter, GetSyncSource); + SETVTBL(vtbl, libAVFilter, EnumPins); + SETVTBL(vtbl, libAVFilter, FindPin); + SETVTBL(vtbl, libAVFilter, QueryFilterInfo); + SETVTBL(vtbl, libAVFilter, JoinFilterGraph); + SETVTBL(vtbl, libAVFilter, QueryVendorInfo); + + this->pin = libAVPin_Create(this); + + this->priv_data = priv_data; + this->callback = callback; + this->type = type; + + return 1; +} +DECLARE_CREATE(libAVFilter, libAVFilter_Setup(this, priv_data, callback, type), + void *priv_data, void *callback, enum dshowDeviceType type) +DECLARE_DESTROY(libAVFilter, nothing) diff --git a/libavdevice/dshow_pin.c b/libavdevice/dshow_pin.c new file mode 100644 index 0000000000..f31ecc6c99 --- /dev/null +++ b/libavdevice/dshow_pin.c @@ -0,0 +1,361 @@ +/* + * DirectShow capture interface + * Copyright (c) 2010 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dshow.h" + +#include <stddef.h> +#define imemoffset offsetof(libAVPin, imemvtbl) + +DECLARE_QUERYINTERFACE(libAVPin, + { {&IID_IUnknown,0}, {&IID_IPin,0}, {&IID_IMemInputPin,imemoffset} }) +DECLARE_ADDREF(libAVPin) +DECLARE_RELEASE(libAVPin) + +long WINAPI +libAVPin_Connect(libAVPin *this, IPin *pin, const AM_MEDIA_TYPE *type) +{ + dshowdebug("libAVPin_Connect(%p, %p, %p)\n", this, pin, type); + /* Input pins receive connections. */ + return S_FALSE; +} +long WINAPI +libAVPin_ReceiveConnection(libAVPin *this, IPin *pin, + const AM_MEDIA_TYPE *type) +{ + enum dshowDeviceType devtype = this->filter->type; + dshowdebug("libAVPin_ReceiveConnection(%p)\n", this); + + if (!pin) + return E_POINTER; + if (this->connectedto) + return VFW_E_ALREADY_CONNECTED; + + ff_print_AM_MEDIA_TYPE(type); + if (devtype == VideoDevice) { + if (!IsEqualGUID(&type->majortype, &MEDIATYPE_Video)) + return VFW_E_TYPE_NOT_ACCEPTED; + } else { + if (!IsEqualGUID(&type->majortype, &MEDIATYPE_Audio)) + return VFW_E_TYPE_NOT_ACCEPTED; + } + + IPin_AddRef(pin); + this->connectedto = pin; + + ff_copy_dshow_media_type(&this->type, type); + + return S_OK; +} +long WINAPI +libAVPin_Disconnect(libAVPin *this) +{ + dshowdebug("libAVPin_Disconnect(%p)\n", this); + + if (this->filter->state != State_Stopped) + return VFW_E_NOT_STOPPED; + if (!this->connectedto) + return S_FALSE; + this->connectedto = NULL; + + return S_OK; +} +long WINAPI +libAVPin_ConnectedTo(libAVPin *this, IPin **pin) +{ + dshowdebug("libAVPin_ConnectedTo(%p)\n", this); + + if (!pin) + return E_POINTER; + if (!this->connectedto) + return VFW_E_NOT_CONNECTED; + IPin_AddRef(this->connectedto); + *pin = this->connectedto; + + return S_OK; +} +long WINAPI +libAVPin_ConnectionMediaType(libAVPin *this, AM_MEDIA_TYPE *type) +{ + dshowdebug("libAVPin_ConnectionMediaType(%p)\n", this); + + if (!type) + return E_POINTER; + if (!this->connectedto) + return VFW_E_NOT_CONNECTED; + + return ff_copy_dshow_media_type(type, &this->type); +} +long WINAPI +libAVPin_QueryPinInfo(libAVPin *this, PIN_INFO *info) +{ + dshowdebug("libAVPin_QueryPinInfo(%p)\n", this); + + if (!info) + return E_POINTER; + + if (this->filter) + libAVFilter_AddRef(this->filter); + + info->pFilter = (IBaseFilter *) this->filter; + info->dir = PINDIR_INPUT; + wcscpy(info->achName, L"Capture"); + + return S_OK; +} +long WINAPI +libAVPin_QueryDirection(libAVPin *this, PIN_DIRECTION *dir) +{ + dshowdebug("libAVPin_QueryDirection(%p)\n", this); + if (!dir) + return E_POINTER; + *dir = PINDIR_INPUT; + return S_OK; +} +long WINAPI +libAVPin_QueryId(libAVPin *this, wchar_t **id) +{ + dshowdebug("libAVPin_QueryId(%p)\n", this); + + if (!id) + return E_POINTER; + + *id = wcsdup(L"libAV Pin"); + + return S_OK; +} +long WINAPI +libAVPin_QueryAccept(libAVPin *this, const AM_MEDIA_TYPE *type) +{ + dshowdebug("libAVPin_QueryAccept(%p)\n", this); + return S_FALSE; +} +long WINAPI +libAVPin_EnumMediaTypes(libAVPin *this, IEnumMediaTypes **enumtypes) +{ + const AM_MEDIA_TYPE *type = NULL; + libAVEnumMediaTypes *new; + dshowdebug("libAVPin_EnumMediaTypes(%p)\n", this); + + if (!enumtypes) + return E_POINTER; + new = libAVEnumMediaTypes_Create(type); + if (!new) + return E_OUTOFMEMORY; + + *enumtypes = (IEnumMediaTypes *) new; + return S_OK; +} +long WINAPI +libAVPin_QueryInternalConnections(libAVPin *this, IPin **pin, + unsigned long *npin) +{ + dshowdebug("libAVPin_QueryInternalConnections(%p)\n", this); + return E_NOTIMPL; +} +long WINAPI +libAVPin_EndOfStream(libAVPin *this) +{ + dshowdebug("libAVPin_EndOfStream(%p)\n", this); + /* I don't care. */ + return S_OK; +} +long WINAPI +libAVPin_BeginFlush(libAVPin *this) +{ + dshowdebug("libAVPin_BeginFlush(%p)\n", this); + /* I don't care. */ + return S_OK; +} +long WINAPI +libAVPin_EndFlush(libAVPin *this) +{ + dshowdebug("libAVPin_EndFlush(%p)\n", this); + /* I don't care. */ + return S_OK; +} +long WINAPI +libAVPin_NewSegment(libAVPin *this, REFERENCE_TIME start, REFERENCE_TIME stop, + double rate) +{ + dshowdebug("libAVPin_NewSegment(%p)\n", this); + /* I don't care. */ + return S_OK; +} + +static int +libAVPin_Setup(libAVPin *this, libAVFilter *filter) +{ + IPinVtbl *vtbl = this->vtbl; + IMemInputPinVtbl *imemvtbl; + + if (!filter) + return 0; + + imemvtbl = av_malloc(sizeof(IMemInputPinVtbl)); + if (!imemvtbl) + return 0; + + SETVTBL(imemvtbl, libAVMemInputPin, QueryInterface); + SETVTBL(imemvtbl, libAVMemInputPin, AddRef); + SETVTBL(imemvtbl, libAVMemInputPin, Release); + SETVTBL(imemvtbl, libAVMemInputPin, GetAllocator); + SETVTBL(imemvtbl, libAVMemInputPin, NotifyAllocator); + SETVTBL(imemvtbl, libAVMemInputPin, GetAllocatorRequirements); + SETVTBL(imemvtbl, libAVMemInputPin, Receive); + SETVTBL(imemvtbl, libAVMemInputPin, ReceiveMultiple); + SETVTBL(imemvtbl, libAVMemInputPin, ReceiveCanBlock); + + this->imemvtbl = imemvtbl; + + SETVTBL(vtbl, libAVPin, QueryInterface); + SETVTBL(vtbl, libAVPin, AddRef); + SETVTBL(vtbl, libAVPin, Release); + SETVTBL(vtbl, libAVPin, Connect); + SETVTBL(vtbl, libAVPin, ReceiveConnection); + SETVTBL(vtbl, libAVPin, Disconnect); + SETVTBL(vtbl, libAVPin, ConnectedTo); + SETVTBL(vtbl, libAVPin, ConnectionMediaType); + SETVTBL(vtbl, libAVPin, QueryPinInfo); + SETVTBL(vtbl, libAVPin, QueryDirection); + SETVTBL(vtbl, libAVPin, QueryId); + SETVTBL(vtbl, libAVPin, QueryAccept); + SETVTBL(vtbl, libAVPin, EnumMediaTypes); + SETVTBL(vtbl, libAVPin, QueryInternalConnections); + SETVTBL(vtbl, libAVPin, EndOfStream); + SETVTBL(vtbl, libAVPin, BeginFlush); + SETVTBL(vtbl, libAVPin, EndFlush); + SETVTBL(vtbl, libAVPin, NewSegment); + + this->filter = filter; + + return 1; +} +DECLARE_CREATE(libAVPin, libAVPin_Setup(this, filter), libAVFilter *filter) +DECLARE_DESTROY(libAVPin, nothing) + +/***************************************************************************** + * libAVMemInputPin + ****************************************************************************/ +long WINAPI +libAVMemInputPin_QueryInterface(libAVMemInputPin *this, const GUID *riid, + void **ppvObject) +{ + libAVPin *pin = (libAVPin *) ((uint8_t *) this - imemoffset); + dshowdebug("libAVMemInputPin_QueryInterface(%p)\n", this); + return libAVPin_QueryInterface(pin, riid, ppvObject); +} +unsigned long WINAPI +libAVMemInputPin_AddRef(libAVMemInputPin *this) +{ + libAVPin *pin = (libAVPin *) ((uint8_t *) this - imemoffset); + dshowdebug("libAVMemInputPin_AddRef(%p)\n", this); + return libAVPin_AddRef(pin); +} +unsigned long WINAPI +libAVMemInputPin_Release(libAVMemInputPin *this) +{ + libAVPin *pin = (libAVPin *) ((uint8_t *) this - imemoffset); + dshowdebug("libAVMemInputPin_Release(%p)\n", this); + return libAVPin_Release(pin); +} +long WINAPI +libAVMemInputPin_GetAllocator(libAVMemInputPin *this, IMemAllocator **alloc) +{ + dshowdebug("libAVMemInputPin_GetAllocator(%p)\n", this); + return VFW_E_NO_ALLOCATOR; +} +long WINAPI +libAVMemInputPin_NotifyAllocator(libAVMemInputPin *this, IMemAllocator *alloc, + WINBOOL rdwr) +{ + dshowdebug("libAVMemInputPin_NotifyAllocator(%p)\n", this); + return S_OK; +} +long WINAPI +libAVMemInputPin_GetAllocatorRequirements(libAVMemInputPin *this, + ALLOCATOR_PROPERTIES *props) +{ + dshowdebug("libAVMemInputPin_GetAllocatorRequirements(%p)\n", this); + return E_NOTIMPL; +} +long WINAPI +libAVMemInputPin_Receive(libAVMemInputPin *this, IMediaSample *sample) +{ + libAVPin *pin = (libAVPin *) ((uint8_t *) this - imemoffset); + enum dshowDeviceType devtype = pin->filter->type; + void *priv_data; + uint8_t *buf; + int buf_size; + int index; + int64_t curtime; + + dshowdebug("libAVMemInputPin_Receive(%p)\n", this); + + if (!sample) + return E_POINTER; + + if (devtype == VideoDevice) { + /* PTS from video devices is unreliable. */ + IReferenceClock *clock = pin->filter->clock; + IReferenceClock_GetTime(clock, &curtime); + } else { + int64_t dummy; + IMediaSample_GetTime(sample, &curtime, &dummy); + curtime += pin->filter->start_time; + } + + buf_size = IMediaSample_GetActualDataLength(sample); + IMediaSample_GetPointer(sample, &buf); + priv_data = pin->filter->priv_data; + index = pin->filter->stream_index; + + pin->filter->callback(priv_data, index, buf, buf_size, curtime); + + return S_OK; +} +long WINAPI +libAVMemInputPin_ReceiveMultiple(libAVMemInputPin *this, + IMediaSample **samples, long n, long *nproc) +{ + int i; + dshowdebug("libAVMemInputPin_ReceiveMultiple(%p)\n", this); + + for (i = 0; i < n; i++) + libAVMemInputPin_Receive(this, samples[i]); + + *nproc = n; + return S_OK; +} +long WINAPI +libAVMemInputPin_ReceiveCanBlock(libAVMemInputPin *this) +{ + dshowdebug("libAVMemInputPin_ReceiveCanBlock(%p)\n", this); + /* I swear I will not block. */ + return S_FALSE; +} + +void +libAVMemInputPin_Destroy(libAVMemInputPin *this) +{ + libAVPin *pin = (libAVPin *) ((uint8_t *) this - imemoffset); + dshowdebug("libAVMemInputPin_Destroy(%p)\n", this); + return libAVPin_Destroy(pin); +} diff --git a/libavdevice/dv1394.c b/libavdevice/dv1394.c index a9bc058dc9..63eff2aec8 100644 --- a/libavdevice/dv1394.c +++ b/libavdevice/dv1394.c @@ -30,14 +30,14 @@ #include <time.h> #include <strings.h> -#include "libavformat/avformat.h" - -#undef DV1394_DEBUG - +#include "libavutil/log.h" +#include "libavutil/opt.h" +#include "avdevice.h" #include "libavformat/dv.h" #include "dv1394.h" struct dv1394_data { + AVClass *class; int fd; int channel; int format; @@ -90,15 +90,17 @@ static int dv1394_read_header(AVFormatContext * context, AVFormatParameters * ap if (!dv->dv_demux) goto failed; - if (ap->standard && !strcasecmp(ap->standard, "pal")) - dv->format = DV1394_PAL; - else - dv->format = DV1394_NTSC; +#if FF_API_FORMAT_PARAMETERS + if (ap->standard) { + if (!strcasecmp(ap->standard, "pal")) + dv->format = DV1394_PAL; + else + dv->format = DV1394_NTSC; + } if (ap->channel) dv->channel = ap->channel; - else - dv->channel = DV1394_DEFAULT_CHANNEL; +#endif /* Open and initialize DV1394 device */ dv->fd = open(context->filename, O_RDONLY); @@ -172,15 +174,13 @@ restart_poll: av_log(context, AV_LOG_ERROR, "Failed to get status: %s\n", strerror(errno)); return AVERROR(EIO); } -#ifdef DV1394_DEBUG - av_log(context, AV_LOG_DEBUG, "DV1394: status\n" + av_dlog(context, "DV1394: status\n" "\tactive_frame\t%d\n" "\tfirst_clear_frame\t%d\n" "\tn_clear_frames\t%d\n" "\tdropped_frames\t%d\n", s.active_frame, s.first_clear_frame, s.n_clear_frames, s.dropped_frames); -#endif dv->avail = s.n_clear_frames; dv->index = s.first_clear_frame; @@ -195,10 +195,8 @@ restart_poll: } } -#ifdef DV1394_DEBUG - av_log(context, AV_LOG_DEBUG, "index %d, avail %d, done %d\n", dv->index, dv->avail, + av_dlog(context, "index %d, avail %d, done %d\n", dv->index, dv->avail, dv->done); -#endif size = dv_produce_packet(dv->dv_demux, pkt, dv->ring + (dv->index * DV1394_PAL_FRAME_SIZE), @@ -227,6 +225,21 @@ static int dv1394_close(AVFormatContext * context) return 0; } +static const AVOption options[] = { + { "standard", "", offsetof(struct dv1394_data, format), FF_OPT_TYPE_INT, {.dbl = DV1394_NTSC}, DV1394_PAL, DV1394_NTSC, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "PAL", "", 0, FF_OPT_TYPE_CONST, {.dbl = DV1394_PAL}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "NTSC", "", 0, FF_OPT_TYPE_CONST, {.dbl = DV1394_NTSC}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "channel", "", offsetof(struct dv1394_data, channel), FF_OPT_TYPE_INT, {.dbl = DV1394_DEFAULT_CHANNEL}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM }, + { NULL }, +}; + +static const AVClass dv1394_class = { + .class_name = "DV1394 indev", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_dv1394_demuxer = { .name = "dv1394", .long_name = NULL_IF_CONFIG_SMALL("DV1394 A/V grab"), @@ -234,5 +247,6 @@ AVInputFormat ff_dv1394_demuxer = { .read_header = dv1394_read_header, .read_packet = dv1394_read_packet, .read_close = dv1394_close, - .flags = AVFMT_NOFILE + .flags = AVFMT_NOFILE, + .priv_class = &dv1394_class, }; diff --git a/libavdevice/fbdev.c b/libavdevice/fbdev.c index 4f670e9875..d5ba561db8 100644 --- a/libavdevice/fbdev.c +++ b/libavdevice/fbdev.c @@ -37,9 +37,12 @@ #include <time.h> #include <linux/fb.h> +#include "libavutil/log.h" #include "libavutil/mem.h" +#include "libavutil/opt.h" +#include "libavutil/parseutils.h" #include "libavutil/pixdesc.h" -#include "libavformat/avformat.h" +#include "avdevice.h" struct rgb_pixfmt_map_entry { int bits_per_pixel; @@ -74,8 +77,10 @@ static enum PixelFormat get_pixfmt_from_fb_varinfo(struct fb_var_screeninfo *var } typedef struct { + AVClass *class; ///< class for private options int frame_size; ///< size in bytes of a grabbed frame - AVRational time_base; ///< time base + AVRational fps; ///< framerate + char *framerate; ///< framerate string set by a private option int64_t time_frame; ///< time for the next frame to output (in 1/1000000 units) int fd; ///< framebuffer device file descriptor @@ -97,16 +102,20 @@ av_cold static int fbdev_read_header(AVFormatContext *avctx, enum PixelFormat pix_fmt; int ret, flags = O_RDONLY; + ret = av_parse_video_rate(&fbdev->fps, fbdev->framerate); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Couldn't parse framerate.\n"); + return ret; + } +#if FF_API_FORMAT_PARAMETERS + if (ap->time_base.num) + fbdev->fps = (AVRational){ap->time_base.den, ap->time_base.num}; +#endif + if (!(st = av_new_stream(avctx, 0))) return AVERROR(ENOMEM); av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in microseconds */ - if (ap->time_base.den <= 0) { - av_log(avctx, AV_LOG_ERROR, "Invalid time base %d/%d\n", - ap->time_base.num, ap->time_base.den); - return AVERROR(EINVAL); - } - /* NONBLOCK is ignored by the fbdev driver, only set for consistency */ if (avctx->flags & AVFMT_FLAG_NONBLOCK) flags |= O_NONBLOCK; @@ -146,7 +155,6 @@ av_cold static int fbdev_read_header(AVFormatContext *avctx, fbdev->bytes_per_pixel = (fbdev->varinfo.bits_per_pixel + 7) >> 3; fbdev->frame_linesize = fbdev->width * fbdev->bytes_per_pixel; fbdev->frame_size = fbdev->frame_linesize * fbdev->heigth; - fbdev->time_base = ap->time_base; fbdev->time_frame = AV_NOPTS_VALUE; fbdev->data = mmap(NULL, fbdev->fixinfo.smem_len, PROT_READ, MAP_SHARED, fbdev->fd, 0); if (fbdev->data == MAP_FAILED) { @@ -160,15 +168,15 @@ av_cold static int fbdev_read_header(AVFormatContext *avctx, st->codec->width = fbdev->width; st->codec->height = fbdev->heigth; st->codec->pix_fmt = pix_fmt; - st->codec->time_base = ap->time_base; + st->codec->time_base = (AVRational){fbdev->fps.den, fbdev->fps.num}; st->codec->bit_rate = - fbdev->width * fbdev->heigth * fbdev->bytes_per_pixel / av_q2d(ap->time_base) * 8; + fbdev->width * fbdev->heigth * fbdev->bytes_per_pixel * av_q2d(fbdev->fps) * 8; av_log(avctx, AV_LOG_INFO, - "w:%d h:%d bpp:%d pixfmt:%s tb:%d/%d bit_rate:%d\n", + "w:%d h:%d bpp:%d pixfmt:%s fps:%d/%d bit_rate:%d\n", fbdev->width, fbdev->heigth, fbdev->varinfo.bits_per_pixel, av_pix_fmt_descriptors[pix_fmt].name, - ap->time_base.num, ap->time_base.den, + fbdev->fps.num, fbdev->fps.den, st->codec->bit_rate); return 0; @@ -196,7 +204,7 @@ static int fbdev_read_packet(AVFormatContext *avctx, AVPacket *pkt) "time_frame:%"PRId64" curtime:%"PRId64" delay:%"PRId64"\n", fbdev->time_frame, curtime, delay); if (delay <= 0) { - fbdev->time_frame += INT64_C(1000000) * av_q2d(fbdev->time_base); + fbdev->time_frame += INT64_C(1000000) / av_q2d(fbdev->fps); break; } if (avctx->flags & AVFMT_FLAG_NONBLOCK) @@ -240,6 +248,20 @@ av_cold static int fbdev_read_close(AVFormatContext *avctx) return 0; } +#define OFFSET(x) offsetof(FBDevContext, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM +static const AVOption options[] = { + { "framerate","", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "25"}, 0, 0, DEC }, + { NULL }, +}; + +static const AVClass fbdev_class = { + .class_name = "fbdev indev", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_fbdev_demuxer = { .name = "fbdev", .long_name = NULL_IF_CONFIG_SMALL("Linux framebuffer"), @@ -248,4 +270,5 @@ AVInputFormat ff_fbdev_demuxer = { .read_packet = fbdev_read_packet, .read_close = fbdev_read_close, .flags = AVFMT_NOFILE, + .priv_class = &fbdev_class, }; diff --git a/libavdevice/jack_audio.c b/libavdevice/jack_audio.c index 9062e7f2dd..f78a2c9164 100644 --- a/libavdevice/jack_audio.c +++ b/libavdevice/jack_audio.c @@ -27,8 +27,8 @@ #include "libavutil/log.h" #include "libavutil/fifo.h" #include "libavcodec/avcodec.h" -#include "libavformat/avformat.h" #include "libavformat/timefilter.h" +#include "avdevice.h" /** * Size of the internal FIFO buffers as a number of audio packets diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c index abd82dc981..20170877dc 100644 --- a/libavdevice/libdc1394.c +++ b/libavdevice/libdc1394.c @@ -2,6 +2,7 @@ * IIDC1394 grab interface (uses libdc1394 and libraw1394) * Copyright (c) 2004 Roman Shaposhnik * Copyright (c) 2008 Alessandro Sappia + * Copyright (c) 2011 Martin Lambers * * This file is part of FFmpeg. * @@ -21,54 +22,48 @@ */ #include "config.h" -#include "libavformat/avformat.h" +#include "libavutil/log.h" +#include "libavutil/opt.h" +#include "avdevice.h" + +#include <stdlib.h> +#include <string.h> +#include "libavutil/parseutils.h" +#include "libavutil/pixdesc.h" -#if HAVE_LIBDC1394_2 #include <dc1394/dc1394.h> -#elif HAVE_LIBDC1394_1 -#include <libraw1394/raw1394.h> -#include <libdc1394/dc1394_control.h> - -#define DC1394_VIDEO_MODE_320x240_YUV422 MODE_320x240_YUV422 -#define DC1394_VIDEO_MODE_640x480_YUV411 MODE_640x480_YUV411 -#define DC1394_VIDEO_MODE_640x480_YUV422 MODE_640x480_YUV422 -#define DC1394_FRAMERATE_1_875 FRAMERATE_1_875 -#define DC1394_FRAMERATE_3_75 FRAMERATE_3_75 -#define DC1394_FRAMERATE_7_5 FRAMERATE_7_5 -#define DC1394_FRAMERATE_15 FRAMERATE_15 -#define DC1394_FRAMERATE_30 FRAMERATE_30 -#define DC1394_FRAMERATE_60 FRAMERATE_60 -#define DC1394_FRAMERATE_120 FRAMERATE_120 -#define DC1394_FRAMERATE_240 FRAMERATE_240 -#endif #undef free typedef struct dc1394_data { -#if HAVE_LIBDC1394_1 - raw1394handle_t handle; - dc1394_cameracapture camera; -#elif HAVE_LIBDC1394_2 + AVClass *class; dc1394_t *d; dc1394camera_t *camera; dc1394video_frame_t *frame; -#endif int current_frame; - int fps; + int frame_rate; /**< frames per 1000 seconds (fps * 1000) */ + char *video_size; /**< String describing video size, set by a private option. */ + char *pixel_format; /**< Set by a private option. */ + char *framerate; /**< Set by a private option. */ AVPacket packet; } dc1394_data; -struct dc1394_frame_format { - int width; - int height; - enum PixelFormat pix_fmt; - int frame_size_id; -} dc1394_frame_formats[] = { - { 320, 240, PIX_FMT_UYVY422, DC1394_VIDEO_MODE_320x240_YUV422 }, - { 640, 480, PIX_FMT_UYYVYY411, DC1394_VIDEO_MODE_640x480_YUV411 }, - { 640, 480, PIX_FMT_UYVY422, DC1394_VIDEO_MODE_640x480_YUV422 }, - { 0, 0, 0, 0 } /* gotta be the last one */ +/* The list of color codings that we support. + * We assume big endian for the dc1394 16bit modes: libdc1394 never sets the + * flag little_endian in dc1394video_frame_t. */ +struct dc1394_color_coding { + int pix_fmt; + int score; + uint32_t coding; +} dc1394_color_codings[] = { + { PIX_FMT_GRAY16BE, 1000, DC1394_COLOR_CODING_MONO16 }, + { PIX_FMT_RGB48BE, 1100, DC1394_COLOR_CODING_RGB16 }, + { PIX_FMT_GRAY8, 1200, DC1394_COLOR_CODING_MONO8 }, + { PIX_FMT_RGB24, 1300, DC1394_COLOR_CODING_RGB8 }, + { PIX_FMT_UYYVYY411, 1400, DC1394_COLOR_CODING_YUV411 }, + { PIX_FMT_UYVY422, 1500, DC1394_COLOR_CODING_YUV422 }, + { PIX_FMT_NONE, 0, 0 } /* gotta be the last one */ }; struct dc1394_frame_rate { @@ -86,167 +81,39 @@ struct dc1394_frame_rate { { 0, 0 } /* gotta be the last one */ }; -static inline int dc1394_read_common(AVFormatContext *c, AVFormatParameters *ap, - struct dc1394_frame_format **select_fmt, struct dc1394_frame_rate **select_fps) -{ - dc1394_data* dc1394 = c->priv_data; - AVStream* vst; - struct dc1394_frame_format *fmt; - struct dc1394_frame_rate *fps; - enum PixelFormat pix_fmt = ap->pix_fmt == PIX_FMT_NONE ? PIX_FMT_UYVY422 : ap->pix_fmt; /* defaults */ - int width = !ap->width ? 320 : ap->width; - int height = !ap->height ? 240 : ap->height; - int frame_rate = !ap->time_base.num ? 30000 : av_rescale(1000, ap->time_base.den, ap->time_base.num); - - for (fmt = dc1394_frame_formats; fmt->width; fmt++) - if (fmt->pix_fmt == pix_fmt && fmt->width == width && fmt->height == height) - break; - - for (fps = dc1394_frame_rates; fps->frame_rate; fps++) - if (fps->frame_rate == frame_rate) - break; - - if (!fps->frame_rate || !fmt->width) { - av_log(c, AV_LOG_ERROR, "Can't find matching camera format for %s, %dx%d@%d:1000fps\n", avcodec_get_pix_fmt_name(pix_fmt), - width, height, frame_rate); - goto out; - } - - /* create a video stream */ - vst = av_new_stream(c, 0); - if (!vst) - goto out; - av_set_pts_info(vst, 64, 1, 1000); - vst->codec->codec_type = AVMEDIA_TYPE_VIDEO; - vst->codec->codec_id = CODEC_ID_RAWVIDEO; - vst->codec->time_base.den = fps->frame_rate; - vst->codec->time_base.num = 1000; - vst->codec->width = fmt->width; - vst->codec->height = fmt->height; - vst->codec->pix_fmt = fmt->pix_fmt; - - /* packet init */ - av_init_packet(&dc1394->packet); - dc1394->packet.size = avpicture_get_size(fmt->pix_fmt, fmt->width, fmt->height); - dc1394->packet.stream_index = vst->index; - dc1394->packet.flags |= AV_PKT_FLAG_KEY; - - dc1394->current_frame = 0; - dc1394->fps = fps->frame_rate; - - vst->codec->bit_rate = av_rescale(dc1394->packet.size * 8, fps->frame_rate, 1000); - *select_fps = fps; - *select_fmt = fmt; - return 0; -out: - return -1; -} - -#if HAVE_LIBDC1394_1 -static int dc1394_v1_read_header(AVFormatContext *c, AVFormatParameters * ap) -{ - dc1394_data* dc1394 = c->priv_data; - AVStream* vst; - nodeid_t* camera_nodes; - int res; - struct dc1394_frame_format *fmt = NULL; - struct dc1394_frame_rate *fps = NULL; - - if (dc1394_read_common(c,ap,&fmt,&fps) != 0) - return -1; - - /* Now let us prep the hardware. */ - dc1394->handle = dc1394_create_handle(0); /* FIXME: gotta have ap->port */ - if (!dc1394->handle) { - av_log(c, AV_LOG_ERROR, "Can't acquire dc1394 handle on port %d\n", 0 /* ap->port */); - goto out; - } - camera_nodes = dc1394_get_camera_nodes(dc1394->handle, &res, 1); - if (!camera_nodes || camera_nodes[ap->channel] == DC1394_NO_CAMERA) { - av_log(c, AV_LOG_ERROR, "There's no IIDC camera on the channel %d\n", ap->channel); - goto out_handle; - } - res = dc1394_dma_setup_capture(dc1394->handle, camera_nodes[ap->channel], - 0, - FORMAT_VGA_NONCOMPRESSED, - fmt->frame_size_id, - SPEED_400, - fps->frame_rate_id, 8, 1, - c->filename, - &dc1394->camera); - dc1394_free_camera_nodes(camera_nodes); - if (res != DC1394_SUCCESS) { - av_log(c, AV_LOG_ERROR, "Can't prepare camera for the DMA capture\n"); - goto out_handle; - } - - res = dc1394_start_iso_transmission(dc1394->handle, dc1394->camera.node); - if (res != DC1394_SUCCESS) { - av_log(c, AV_LOG_ERROR, "Can't start isochronous transmission\n"); - goto out_handle_dma; - } - - return 0; - -out_handle_dma: - dc1394_dma_unlisten(dc1394->handle, &dc1394->camera); - dc1394_dma_release_camera(dc1394->handle, &dc1394->camera); -out_handle: - dc1394_destroy_handle(dc1394->handle); -out: - return -1; -} - -static int dc1394_v1_read_packet(AVFormatContext *c, AVPacket *pkt) -{ - struct dc1394_data *dc1394 = c->priv_data; - int res; - - /* discard stale frame */ - if (dc1394->current_frame++) { - if (dc1394_dma_done_with_buffer(&dc1394->camera) != DC1394_SUCCESS) - av_log(c, AV_LOG_ERROR, "failed to release %d frame\n", dc1394->current_frame); - } - - res = dc1394_dma_single_capture(&dc1394->camera); - - if (res == DC1394_SUCCESS) { - dc1394->packet.data = (uint8_t *)(dc1394->camera.capture_buffer); - dc1394->packet.pts = (dc1394->current_frame * 1000000) / dc1394->fps; - res = dc1394->packet.size; - } else { - av_log(c, AV_LOG_ERROR, "DMA capture failed\n"); - dc1394->packet.data = NULL; - res = -1; - } - - *pkt = dc1394->packet; - return res; -} - -static int dc1394_v1_close(AVFormatContext * context) -{ - struct dc1394_data *dc1394 = context->priv_data; - - dc1394_stop_iso_transmission(dc1394->handle, dc1394->camera.node); - dc1394_dma_unlisten(dc1394->handle, &dc1394->camera); - dc1394_dma_release_camera(dc1394->handle, &dc1394->camera); - dc1394_destroy_handle(dc1394->handle); +#define OFFSET(x) offsetof(dc1394_data, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM +static const AVOption options[] = { + { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "qvga"}, 0, 0, DEC }, + { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = "uyvy422"}, 0, 0, DEC }, + { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "ntsc"}, 0, 0, DEC }, + { NULL }, +}; - return 0; -} +static const AVClass libdc1394_class = { + .class_name = "libdc1394 indev", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; -#elif HAVE_LIBDC1394_2 -static int dc1394_v2_read_header(AVFormatContext *c, AVFormatParameters * ap) +static int dc1394_read_header(AVFormatContext *c, AVFormatParameters * ap) { dc1394_data* dc1394 = c->priv_data; + AVStream *vst; + const struct dc1394_color_coding *cc; + const struct dc1394_frame_rate *fr; dc1394camera_list_t *list; - int res, i; - struct dc1394_frame_format *fmt = NULL; - struct dc1394_frame_rate *fps = NULL; - - if (dc1394_read_common(c,ap,&fmt,&fps) != 0) - return -1; + dc1394video_modes_t video_modes; + dc1394video_mode_t video_mode; + dc1394framerates_t frame_rates; + dc1394framerate_t frame_rate; + uint32_t dc1394_width, dc1394_height, dc1394_color_coding; + int rate, best_rate; + int score, max_score; + int final_width, final_height, final_pix_fmt, final_frame_rate; + int res, i, j; + int ret=-1; /* Now let us prep the hardware. */ dc1394->d = dc1394_new(); @@ -265,6 +132,149 @@ static int dc1394_v2_read_header(AVFormatContext *c, AVFormatParameters * ap) /* Freeing list of cameras */ dc1394_camera_free_list (list); + /* Get the list of video modes supported by the camera. */ + res = dc1394_video_get_supported_modes (dc1394->camera, &video_modes); + if (res != DC1394_SUCCESS) { + av_log(c, AV_LOG_ERROR, "Could not get video formats.\n"); + goto out_camera; + } + + if (dc1394->pixel_format) { + if ((ap->pix_fmt = av_get_pix_fmt(dc1394->pixel_format)) == PIX_FMT_NONE) { + av_log(c, AV_LOG_ERROR, "No such pixel format: %s.\n", dc1394->pixel_format); + ret = AVERROR(EINVAL); + goto out; + } + } + + if (dc1394->video_size) { + if ((ret = av_parse_video_size(&ap->width, &ap->height, dc1394->video_size)) < 0) { + av_log(c, AV_LOG_ERROR, "Couldn't parse video size.\n"); + goto out; + } + } + + /* Choose the best mode. */ + rate = (ap->time_base.num ? av_rescale(1000, ap->time_base.den, ap->time_base.num) : -1); + max_score = -1; + for (i = 0; i < video_modes.num; i++) { + if (video_modes.modes[i] == DC1394_VIDEO_MODE_EXIF + || (video_modes.modes[i] >= DC1394_VIDEO_MODE_FORMAT7_MIN + && video_modes.modes[i] <= DC1394_VIDEO_MODE_FORMAT7_MAX)) { + /* These modes are currently not supported as they would require + * much more work. For the remaining modes, the functions + * dc1394_get_image_size_from_video_mode and + * dc1394_get_color_coding_from_video_mode do not need to query the + * camera, and thus cannot fail. */ + continue; + } + dc1394_get_color_coding_from_video_mode (NULL, video_modes.modes[i], + &dc1394_color_coding); + for (cc = dc1394_color_codings; cc->pix_fmt != PIX_FMT_NONE; cc++) + if (cc->coding == dc1394_color_coding) + break; + if (cc->pix_fmt == PIX_FMT_NONE) { + /* We currently cannot handle this color coding. */ + continue; + } + /* Here we know that the mode is supported. Get its frame size and the list + * of frame rates supported by the camera for this mode. This list is sorted + * in ascending order according to libdc1394 example programs. */ + dc1394_get_image_size_from_video_mode (NULL, video_modes.modes[i], + &dc1394_width, &dc1394_height); + res = dc1394_video_get_supported_framerates (dc1394->camera, video_modes.modes[i], + &frame_rates); + if (res != DC1394_SUCCESS || frame_rates.num == 0) { + av_log(c, AV_LOG_ERROR, "Cannot get frame rates for video mode.\n"); + goto out_camera; + } + /* Choose the best frame rate. */ + best_rate = -1; + for (j = 0; j < frame_rates.num; j++) { + for (fr = dc1394_frame_rates; fr->frame_rate; fr++) { + if (fr->frame_rate_id == frame_rates.framerates[j]) { + break; + } + } + if (!fr->frame_rate) { + /* This frame rate is not supported. */ + continue; + } + best_rate = fr->frame_rate; + frame_rate = fr->frame_rate_id; + if (ap->time_base.num && rate == fr->frame_rate) { + /* This is the requested frame rate. */ + break; + } + } + if (best_rate == -1) { + /* No supported rate found. */ + continue; + } + /* Here we know that both the mode and the rate are supported. Compute score. */ + if (ap->width && ap->height + && (dc1394_width == ap->width && dc1394_height == ap->height)) { + score = 110000; + } else { + score = dc1394_width * 10; // 1600 - 16000 + } + if (ap->pix_fmt == cc->pix_fmt) { + score += 90000; + } else { + score += cc->score; // 1000 - 1500 + } + if (ap->time_base.num && rate == best_rate) { + score += 70000; + } else { + score += best_rate / 1000; // 1 - 240 + } + if (score > max_score) { + video_mode = video_modes.modes[i]; + final_width = dc1394_width; + final_height = dc1394_height; + final_pix_fmt = cc->pix_fmt; + final_frame_rate = best_rate; + max_score = score; + } + } + if (max_score == -1) { + av_log(c, AV_LOG_ERROR, "No suitable video mode / frame rate available.\n"); + goto out_camera; + } + if (ap->width && ap->height && !(ap->width == final_width && ap->height == final_height)) { + av_log(c, AV_LOG_WARNING, "Requested frame size is not available, using fallback.\n"); + } + if (ap->pix_fmt != PIX_FMT_NONE && ap->pix_fmt != final_pix_fmt) { + av_log(c, AV_LOG_WARNING, "Requested pixel format is not supported, using fallback.\n"); + } + if (ap->time_base.num && rate != final_frame_rate) { + av_log(c, AV_LOG_WARNING, "Requested frame rate is not available, using fallback.\n"); + } + + /* create a video stream */ + vst = av_new_stream(c, 0); + if (!vst) + goto out_camera; + av_set_pts_info(vst, 64, 1, 1000); + vst->codec->codec_type = AVMEDIA_TYPE_VIDEO; + vst->codec->codec_id = CODEC_ID_RAWVIDEO; + vst->codec->time_base.den = final_frame_rate; + vst->codec->time_base.num = 1000; + vst->codec->width = final_width; + vst->codec->height = final_height; + vst->codec->pix_fmt = final_pix_fmt; + + /* packet init */ + av_init_packet(&dc1394->packet); + dc1394->packet.size = avpicture_get_size(final_pix_fmt, final_width, final_height); + dc1394->packet.stream_index = vst->index; + dc1394->packet.flags |= AV_PKT_FLAG_KEY; + + dc1394->current_frame = 0; + dc1394->frame_rate = final_frame_rate; + + vst->codec->bit_rate = av_rescale(dc1394->packet.size * 8, final_frame_rate, 1000); + /* Select MAX Speed possible from the cam */ if (dc1394->camera->bmode_capable>0) { dc1394_video_set_operation_mode(dc1394->camera, DC1394_OPERATION_MODE_1394B); @@ -281,13 +291,13 @@ static int dc1394_v2_read_header(AVFormatContext *c, AVFormatParameters * ap) goto out_camera; } - if (dc1394_video_set_mode(dc1394->camera, fmt->frame_size_id) != DC1394_SUCCESS) { + if (dc1394_video_set_mode(dc1394->camera, video_mode) != DC1394_SUCCESS) { av_log(c, AV_LOG_ERROR, "Couldn't set video format\n"); goto out_camera; } - if (dc1394_video_set_framerate(dc1394->camera,fps->frame_rate_id) != DC1394_SUCCESS) { - av_log(c, AV_LOG_ERROR, "Couldn't set framerate %d \n",fps->frame_rate); + if (dc1394_video_set_framerate(dc1394->camera, frame_rate) != DC1394_SUCCESS) { + av_log(c, AV_LOG_ERROR, "Could not set framerate %d.\n", final_frame_rate); goto out_camera; } if (dc1394_capture_setup(dc1394->camera, 10, DC1394_CAPTURE_FLAGS_DEFAULT)!=DC1394_SUCCESS) { @@ -307,10 +317,10 @@ out_camera: dc1394_camera_free (dc1394->camera); out: dc1394_free(dc1394->d); - return -1; + return ret; } -static int dc1394_v2_read_packet(AVFormatContext *c, AVPacket *pkt) +static int dc1394_read_packet(AVFormatContext *c, AVPacket *pkt) { struct dc1394_data *dc1394 = c->priv_data; int res; @@ -324,7 +334,7 @@ static int dc1394_v2_read_packet(AVFormatContext *c, AVPacket *pkt) res = dc1394_capture_dequeue(dc1394->camera, DC1394_CAPTURE_POLICY_WAIT, &dc1394->frame); if (res == DC1394_SUCCESS) { dc1394->packet.data = (uint8_t *)(dc1394->frame->image); - dc1394->packet.pts = (dc1394->current_frame * 1000000) / (dc1394->fps); + dc1394->packet.pts = (dc1394->current_frame * 1000000) / (dc1394->frame_rate); res = dc1394->frame->image_bytes; } else { av_log(c, AV_LOG_ERROR, "DMA capture failed\n"); @@ -336,7 +346,7 @@ static int dc1394_v2_read_packet(AVFormatContext *c, AVPacket *pkt) return res; } -static int dc1394_v2_close(AVFormatContext * context) +static int dc1394_close(AVFormatContext * context) { struct dc1394_data *dc1394 = context->priv_data; @@ -350,23 +360,11 @@ static int dc1394_v2_close(AVFormatContext * context) AVInputFormat ff_libdc1394_demuxer = { .name = "libdc1394", - .long_name = NULL_IF_CONFIG_SMALL("dc1394 v.2 A/V grab"), - .priv_data_size = sizeof(struct dc1394_data), - .read_header = dc1394_v2_read_header, - .read_packet = dc1394_v2_read_packet, - .read_close = dc1394_v2_close, - .flags = AVFMT_NOFILE -}; - -#endif -#if HAVE_LIBDC1394_1 -AVInputFormat ff_libdc1394_demuxer = { - .name = "libdc1394", - .long_name = NULL_IF_CONFIG_SMALL("dc1394 v.1 A/V grab"), + .long_name = NULL_IF_CONFIG_SMALL("dc1394 A/V grab"), .priv_data_size = sizeof(struct dc1394_data), - .read_header = dc1394_v1_read_header, - .read_packet = dc1394_v1_read_packet, - .read_close = dc1394_v1_close, - .flags = AVFMT_NOFILE + .read_header = dc1394_read_header, + .read_packet = dc1394_read_packet, + .read_close = dc1394_close, + .flags = AVFMT_NOFILE, + .priv_class = &libdc1394_class, }; -#endif diff --git a/libavdevice/oss_audio.c b/libavdevice/oss_audio.c index fc5d3c3dd1..2fde491a07 100644 --- a/libavdevice/oss_audio.c +++ b/libavdevice/oss_audio.c @@ -37,12 +37,14 @@ #include <sys/select.h> #include "libavutil/log.h" +#include "libavutil/opt.h" #include "libavcodec/avcodec.h" -#include "libavformat/avformat.h" +#include "avdevice.h" #define AUDIO_BLOCK_SIZE 4096 typedef struct { + AVClass *class; int fd; int sample_rate; int channels; @@ -214,15 +216,17 @@ static int audio_read_header(AVFormatContext *s1, AVFormatParameters *ap) AVStream *st; int ret; - if (ap->sample_rate <= 0 || ap->channels <= 0) - return -1; +#if FF_API_FORMAT_PARAMETERS + if (ap->sample_rate > 0) + s->sample_rate = ap->sample_rate; + if (ap->channels > 0) + s->channels = ap->channels; +#endif st = av_new_stream(s1, 0); if (!st) { return AVERROR(ENOMEM); } - s->sample_rate = ap->sample_rate; - s->channels = ap->channels; ret = audio_open(s1, 0, s1->filename); if (ret < 0) { @@ -291,6 +295,19 @@ static int audio_read_close(AVFormatContext *s1) } #if CONFIG_OSS_INDEV +static const AVOption options[] = { + { "sample_rate", "", offsetof(AudioData, sample_rate), FF_OPT_TYPE_INT, {.dbl = 48000}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM }, + { "channels", "", offsetof(AudioData, channels), FF_OPT_TYPE_INT, {.dbl = 2}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM }, + { NULL }, +}; + +static const AVClass oss_demuxer_class = { + .class_name = "OSS demuxer", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_oss_demuxer = { "oss", NULL_IF_CONFIG_SMALL("Open Sound System capture"), @@ -300,6 +317,7 @@ AVInputFormat ff_oss_demuxer = { audio_read_packet, audio_read_close, .flags = AVFMT_NOFILE, + .priv_class = &oss_demuxer_class, }; #endif diff --git a/libavdevice/sdl.c b/libavdevice/sdl.c new file mode 100644 index 0000000000..4b2566fc63 --- /dev/null +++ b/libavdevice/sdl.c @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2011 Stefano Sabatini + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * libSDL output device + */ + +#include <SDL.h> +#include "libavutil/avstring.h" +#include "libavutil/opt.h" +#include "libavutil/parseutils.h" +#include "libavutil/pixdesc.h" +#include "avdevice.h" + +typedef struct { + AVClass *class; + SDL_Surface *surface; + SDL_Overlay *overlay; + char *window_title; + char *icon_title; + char *window_size; + int window_width, window_height; + int overlay_width, overlay_height; + int overlay_fmt; + int sdl_was_already_inited; +} SDLContext; + +struct sdl_overlay_pix_fmt_entry { + enum PixelFormat pix_fmt; int overlay_fmt; +} sdl_overlay_pix_fmt_map[] = { + { PIX_FMT_YUV420P, SDL_IYUV_OVERLAY }, + { PIX_FMT_YUYV422, SDL_YUY2_OVERLAY }, + { PIX_FMT_UYVY422, SDL_UYVY_OVERLAY }, + { PIX_FMT_NONE, 0 }, +}; + +static int sdl_write_trailer(AVFormatContext *s) +{ + SDLContext *sdl = s->priv_data; + + av_freep(&sdl->window_title); + av_freep(&sdl->icon_title); + av_freep(&sdl->window_size); + + if (sdl->overlay) { + SDL_FreeYUVOverlay(sdl->overlay); + sdl->overlay = NULL; + } + if (!sdl->sdl_was_already_inited) + SDL_Quit(); + + return 0; +} + +static int sdl_write_header(AVFormatContext *s) +{ + SDLContext *sdl = s->priv_data; + AVStream *st = s->streams[0]; + AVCodecContext *encctx = st->codec; + float sar, dar; /* sample and display aspect ratios */ + int i, ret; + + if (!sdl->window_title) + sdl->window_title = av_strdup(s->filename); + if (!sdl->icon_title) + sdl->icon_title = av_strdup(sdl->window_title); + + if (SDL_WasInit(SDL_INIT_VIDEO)) { + av_log(s, AV_LOG_ERROR, + "SDL video subsystem was already inited, aborting.\n"); + sdl->sdl_was_already_inited = 1; + ret = AVERROR(EINVAL); + goto fail; + } + + if (SDL_Init(SDL_INIT_VIDEO) != 0) { + av_log(s, AV_LOG_ERROR, "Unable to initialize SDL: %s\n", SDL_GetError()); + ret = AVERROR(EINVAL); + goto fail; + } + + if ( s->nb_streams > 1 + || encctx->codec_type != AVMEDIA_TYPE_VIDEO + || encctx->codec_id != CODEC_ID_RAWVIDEO) { + av_log(s, AV_LOG_ERROR, "Only supports one rawvideo stream\n"); + ret = AVERROR(EINVAL); + goto fail; + } + + for (i = 0; sdl_overlay_pix_fmt_map[i].pix_fmt != PIX_FMT_NONE; i++) { + if (sdl_overlay_pix_fmt_map[i].pix_fmt == encctx->pix_fmt) { + sdl->overlay_fmt = sdl_overlay_pix_fmt_map[i].overlay_fmt; + break; + } + } + + if (!sdl->overlay_fmt) { + av_log(s, AV_LOG_ERROR, + "Unsupported pixel format '%s', choose one of yuv420p, yuyv422, or uyvy422.\n", + av_get_pix_fmt_name(encctx->pix_fmt)); + ret = AVERROR(EINVAL); + goto fail; + } + + if (sdl->window_size) { + if (av_parse_video_size(&sdl->window_width, &sdl->window_height, + sdl->window_size) < 0) { + av_log(s, AV_LOG_ERROR, "Invalid window size '%s'\n", sdl->window_size); + ret = AVERROR(EINVAL); + goto fail; + } + } + + /* compute overlay width and height from the codec context information */ + sar = st->sample_aspect_ratio.num ? av_q2d(st->sample_aspect_ratio) : 1; + dar = sar * (float)encctx->width / (float)encctx->height; + + /* we suppose the screen has a 1/1 sample aspect ratio */ + sdl->overlay_height = encctx->height; + sdl->overlay_width = ((int)rint(sdl->overlay_height * dar)); + if (sdl->overlay_width > encctx->width) { + sdl->overlay_width = encctx->width; + sdl->overlay_height = ((int)rint(sdl->overlay_width / dar)); + } + + if (!sdl->window_width || !sdl->window_height) { + sdl->window_width = sdl->overlay_width; + sdl->window_height = sdl->overlay_height; + } + + SDL_WM_SetCaption(sdl->window_title, sdl->icon_title); + sdl->surface = SDL_SetVideoMode(sdl->window_width, sdl->window_height, + 24, SDL_SWSURFACE); + if (!sdl->surface) { + av_log(s, AV_LOG_ERROR, "Unable to set video mode: %s\n", SDL_GetError()); + ret = AVERROR(EINVAL); + goto fail; + } + + sdl->overlay = SDL_CreateYUVOverlay(sdl->overlay_width, sdl->overlay_height, + sdl->overlay_fmt, sdl->surface); + if (!sdl->overlay || sdl->overlay->pitches[0] < sdl->overlay_width) { + av_log(s, AV_LOG_ERROR, + "SDL does not support an overlay with size of %dx%d pixels.\n", + sdl->overlay_width, sdl->overlay_height); + ret = AVERROR(EINVAL); + goto fail; + } + + av_log(s, AV_LOG_INFO, "w:%d h:%d fmt:%s sar:%f -> w:%d h:%d\n", + encctx->width, encctx->height, av_get_pix_fmt_name(encctx->pix_fmt), sar, + sdl->window_width, sdl->window_height); + return 0; + +fail: + sdl_write_trailer(s); + return ret; +} + +static int sdl_write_packet(AVFormatContext *s, AVPacket *pkt) +{ + SDLContext *sdl = s->priv_data; + AVCodecContext *encctx = s->streams[0]->codec; + SDL_Rect rect = { 0, 0, sdl->window_width, sdl->window_height }; + AVPicture pict; + int i; + + avpicture_fill(&pict, pkt->data, encctx->pix_fmt, encctx->width, encctx->height); + + SDL_FillRect(sdl->surface, &sdl->surface->clip_rect, + SDL_MapRGB(sdl->surface->format, 0, 0, 0)); + SDL_LockYUVOverlay(sdl->overlay); + for (i = 0; i < 3; i++) { + sdl->overlay->pixels [i] = pict.data [i]; + sdl->overlay->pitches[i] = pict.linesize[i]; + } + SDL_DisplayYUVOverlay(sdl->overlay, &rect); + SDL_UnlockYUVOverlay(sdl->overlay); + + SDL_UpdateRect(sdl->surface, 0, 0, sdl->overlay_width, sdl->overlay_height); + + return 0; +} + +#define OFFSET(x) offsetof(SDLContext,x) + +static const AVOption options[] = { + { "window_title", "SDL window title", OFFSET(window_title), FF_OPT_TYPE_STRING, {.str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, + { "icon_title", "SDL iconified window title", OFFSET(icon_title) , FF_OPT_TYPE_STRING, {.str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, + { "window_size", "SDL window forced size", OFFSET(window_size) , FF_OPT_TYPE_STRING, {.str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, + { NULL }, +}; + +static const AVClass sdl_class = { + .class_name = "sdl outdev", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + +AVOutputFormat ff_sdl_muxer = { + .name = "sdl", + .long_name = NULL_IF_CONFIG_SMALL("SDL output device"), + .priv_data_size = sizeof(SDLContext), + .audio_codec = CODEC_ID_NONE, + .video_codec = CODEC_ID_RAWVIDEO, + .write_header = sdl_write_header, + .write_packet = sdl_write_packet, + .write_trailer = sdl_write_trailer, + .flags = AVFMT_NOFILE, + .priv_class = &sdl_class, +}; diff --git a/libavdevice/sndio_common.c b/libavdevice/sndio_common.c index 7e3b9a3eb9..048e72e8a2 100644 --- a/libavdevice/sndio_common.c +++ b/libavdevice/sndio_common.c @@ -22,7 +22,7 @@ #include <stdint.h> #include <sndio.h> -#include "libavformat/avformat.h" +#include "avdevice.h" #include "sndio_common.h" diff --git a/libavdevice/sndio_common.h b/libavdevice/sndio_common.h index 485bf93240..12218b4b24 100644 --- a/libavdevice/sndio_common.h +++ b/libavdevice/sndio_common.h @@ -25,9 +25,11 @@ #include <stdint.h> #include <sndio.h> -#include "libavformat/avformat.h" +#include "libavutil/log.h" +#include "avdevice.h" typedef struct { + AVClass *class; struct sio_hdl *hdl; enum CodecID codec_id; int64_t hwpos; diff --git a/libavdevice/sndio_dec.c b/libavdevice/sndio_dec.c index 9582b5490e..3014fd2787 100644 --- a/libavdevice/sndio_dec.c +++ b/libavdevice/sndio_dec.c @@ -23,6 +23,7 @@ #include <sndio.h> #include "libavformat/avformat.h" +#include "libavutil/opt.h" #include "sndio_common.h" @@ -33,16 +34,17 @@ static av_cold int audio_read_header(AVFormatContext *s1, AVStream *st; int ret; - if (ap->sample_rate <= 0 || ap->channels <= 0) - return AVERROR(EINVAL); +#if FF_API_FORMAT_PARAMETERS + if (ap->sample_rate > 0) + s->sample_rate = ap->sample_rate; + if (ap->channels > 0) + s->channels = ap->channels; +#endif st = av_new_stream(s1, 0); if (!st) return AVERROR(ENOMEM); - s->sample_rate = ap->sample_rate; - s->channels = ap->channels; - ret = ff_sndio_open(s1, 0, s1->filename); if (ret < 0) return ret; @@ -97,6 +99,19 @@ static av_cold int audio_read_close(AVFormatContext *s1) return 0; } +static const AVOption options[] = { + { "sample_rate", "", offsetof(SndioData, sample_rate), FF_OPT_TYPE_INT, {.dbl = 48000}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM }, + { "channels", "", offsetof(SndioData, channels), FF_OPT_TYPE_INT, {.dbl = 2}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM }, + { NULL }, +}; + +static const AVClass sndio_demuxer_class = { + .class_name = "sndio indev", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_sndio_demuxer = { .name = "sndio", .long_name = NULL_IF_CONFIG_SMALL("sndio audio capture"), @@ -105,4 +120,5 @@ AVInputFormat ff_sndio_demuxer = { .read_packet = audio_read_packet, .read_close = audio_read_close, .flags = AVFMT_NOFILE, + .priv_class = &sndio_demuxer_class, }; diff --git a/libavdevice/sndio_enc.c b/libavdevice/sndio_enc.c index 30f19107cd..9ad5cad08a 100644 --- a/libavdevice/sndio_enc.c +++ b/libavdevice/sndio_enc.c @@ -22,8 +22,7 @@ #include <stdint.h> #include <sndio.h> -#include "libavformat/avformat.h" - +#include "avdevice.h" #include "sndio_common.h" static av_cold int audio_write_header(AVFormatContext *s1) diff --git a/libavdevice/v4l.c b/libavdevice/v4l.c index 9a155f9df6..28a4e94599 100644 --- a/libavdevice/v4l.c +++ b/libavdevice/v4l.c @@ -19,11 +19,16 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "avdevice.h" + +#if FF_API_V4L + #undef __STRICT_ANSI__ //workaround due to broken kernel headers #include "config.h" #include "libavutil/rational.h" #include "libavutil/imgutils.h" -#include "libavformat/avformat.h" +#include "libavutil/log.h" +#include "libavutil/opt.h" #include "libavcodec/dsputil.h" #include <unistd.h> #include <fcntl.h> @@ -34,8 +39,10 @@ #include <linux/videodev.h> #include <time.h> #include <strings.h> +#include "avdevice.h" typedef struct { + AVClass *class; int fd; int frame_format; /* see VIDEO_PALETTE_xxx */ int use_mmap; @@ -49,6 +56,7 @@ typedef struct { struct video_mbuf gb_buffers; struct video_mmap gb_buf; int gb_frame; + int standard; } VideoData; static const struct { @@ -79,6 +87,8 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap) int j; int vformat_num = FF_ARRAY_ELEMS(video_formats); + av_log(s1, AV_LOG_WARNING, "V4L input device is deprecated and will be removed in the next release."); + if (ap->time_base.den <= 0) { av_log(s1, AV_LOG_ERROR, "Wrong time base (%d)\n", ap->time_base.den); return -1; @@ -131,13 +141,18 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap) } /* set tv standard */ - if (ap->standard && !ioctl(video_fd, VIDIOCGTUNER, &tuner)) { - if (!strcasecmp(ap->standard, "pal")) - tuner.mode = VIDEO_MODE_PAL; - else if (!strcasecmp(ap->standard, "secam")) - tuner.mode = VIDEO_MODE_SECAM; - else - tuner.mode = VIDEO_MODE_NTSC; + if (!ioctl(video_fd, VIDIOCGTUNER, &tuner)) { +#if FF_API_FORMAT_PARAMETERS + if (ap->standard) { + if (!strcasecmp(ap->standard, "pal")) + s->standard = VIDEO_MODE_PAL; + else if (!strcasecmp(ap->standard, "secam")) + s->standard = VIDEO_MODE_SECAM; + else + s->standard = VIDEO_MODE_NTSC; + } +#endif + tuner.mode = s->standard; ioctl(video_fd, VIDIOCSTUNER, &tuner); } @@ -339,6 +354,21 @@ static int grab_read_close(AVFormatContext *s1) return 0; } +static const AVOption options[] = { + { "standard", "", offsetof(VideoData, standard), FF_OPT_TYPE_INT, {.dbl = VIDEO_MODE_NTSC}, VIDEO_MODE_PAL, VIDEO_MODE_NTSC, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "PAL", "", 0, FF_OPT_TYPE_CONST, {.dbl = VIDEO_MODE_PAL}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "SECAM", "", 0, FF_OPT_TYPE_CONST, {.dbl = VIDEO_MODE_SECAM}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { "NTSC", "", 0, FF_OPT_TYPE_CONST, {.dbl = VIDEO_MODE_NTSC}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "standard" }, + { NULL }, +}; + +static const AVClass v4l_class = { + .class_name = "V4L indev", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_v4l_demuxer = { "video4linux", NULL_IF_CONFIG_SMALL("Video4Linux device grab"), @@ -348,4 +378,6 @@ AVInputFormat ff_v4l_demuxer = { grab_read_packet, grab_read_close, .flags = AVFMT_NOFILE, + .priv_class = &v4l_class, }; +#endif /* FF_API_V4L */ diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c index 1f1a4bcfb8..7223654891 100644 --- a/libavdevice/v4l2.c +++ b/libavdevice/v4l2.c @@ -1,15 +1,7 @@ /* - * Video4Linux2 grab interface * Copyright (c) 2000,2001 Fabrice Bellard * Copyright (c) 2006 Luca Abeni * - * Part of this file is based on the V4L2 video capture example - * (http://v4l2spec.bytesex.org/v4l2spec/capture.c) - * - * Thanks to Michael Niedermayer for providing the mapping between - * V4L2_PIX_FMT_* and PIX_FMT_* - * - * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -27,9 +19,19 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +/** + * @file + * Video4Linux2 grab interface + * + * Part of this file is based on the V4L2 video capture example + * (http://v4l2spec.bytesex.org/v4l2spec/capture.c) + * + * Thanks to Michael Niedermayer for providing the mapping between + * V4L2_PIX_FMT_* and PIX_FMT_* + */ + #undef __STRICT_ANSI__ //workaround due to broken kernel headers #include "config.h" -#include "libavformat/avformat.h" #include <unistd.h> #include <fcntl.h> #include <sys/ioctl.h> @@ -44,6 +46,11 @@ #include <time.h> #include <strings.h> #include "libavutil/imgutils.h" +#include "libavutil/log.h" +#include "libavutil/opt.h" +#include "avdevice.h" +#include "libavutil/parseutils.h" +#include "libavutil/pixdesc.h" static const int desired_video_buffers = 256; @@ -54,6 +61,7 @@ enum io_method { }; struct video_data { + AVClass *class; int fd; int frame_format; /* V4L2_PIX_FMT_* */ enum io_method io_method; @@ -64,6 +72,11 @@ struct video_data { int buffers; void **buf_start; unsigned int *buf_len; + char *standard; + int channel; + char *video_size; /**< String describing video size, set by a private option. */ + char *pixel_format; /**< Set by a private option. */ + char *framerate; /**< Set by a private option. */ }; struct buff_data { @@ -110,7 +123,6 @@ static int device_open(AVFormatContext *ctx, uint32_t *capabilities) if (fd < 0) { av_log(ctx, AV_LOG_ERROR, "Cannot open video device %s : %s\n", ctx->filename, strerror(errno)); - return AVERROR(errno); } @@ -126,13 +138,11 @@ static int device_open(AVFormatContext *ctx, uint32_t *capabilities) av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_QUERYCAP): %s\n", strerror(errno)); close(fd); - return AVERROR(err); } if ((cap.capabilities & V4L2_CAP_VIDEO_CAPTURE) == 0) { av_log(ctx, AV_LOG_ERROR, "Not a video capture device\n"); close(fd); - return AVERROR(ENODEV); } *capabilities = cap.capabilities; @@ -144,10 +154,9 @@ static int device_init(AVFormatContext *ctx, int *width, int *height, uint32_t p { struct video_data *s = ctx->priv_data; int fd = s->fd; - struct v4l2_format fmt; + struct v4l2_format fmt = {0}; int res; - memset(&fmt, 0, sizeof(struct v4l2_format)); fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; fmt.fmt.pix.width = *width; fmt.fmt.pix.height = *height; @@ -230,10 +239,9 @@ static enum CodecID fmt_v4l2codec(uint32_t v4l2_fmt) static int mmap_init(AVFormatContext *ctx) { struct video_data *s = ctx->priv_data; - struct v4l2_requestbuffers req; + struct v4l2_requestbuffers req = {0}; int i, res; - memset(&req, 0, sizeof(struct v4l2_requestbuffers)); req.count = desired_video_buffers; req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; req.memory = V4L2_MEMORY_MMAP; @@ -244,41 +252,35 @@ static int mmap_init(AVFormatContext *ctx) } else { av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_REQBUFS)\n"); } - return AVERROR(errno); } if (req.count < 2) { av_log(ctx, AV_LOG_ERROR, "Insufficient buffer memory\n"); - return AVERROR(ENOMEM); } s->buffers = req.count; s->buf_start = av_malloc(sizeof(void *) * s->buffers); if (s->buf_start == NULL) { av_log(ctx, AV_LOG_ERROR, "Cannot allocate buffer pointers\n"); - return AVERROR(ENOMEM); } s->buf_len = av_malloc(sizeof(unsigned int) * s->buffers); if (s->buf_len == NULL) { av_log(ctx, AV_LOG_ERROR, "Cannot allocate buffer sizes\n"); av_free(s->buf_start); - return AVERROR(ENOMEM); } for (i = 0; i < req.count; i++) { - struct v4l2_buffer buf; + struct v4l2_buffer buf = {0}; - memset(&buf, 0, sizeof(struct v4l2_buffer)); buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf.memory = V4L2_MEMORY_MMAP; buf.index = i; res = ioctl(s->fd, VIDIOC_QUERYBUF, &buf); if (res < 0) { av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_QUERYBUF)\n"); - return AVERROR(errno); } @@ -292,7 +294,6 @@ static int mmap_init(AVFormatContext *ctx) PROT_READ | PROT_WRITE, MAP_SHARED, s->fd, buf.m.offset); if (s->buf_start[i] == MAP_FAILED) { av_log(ctx, AV_LOG_ERROR, "mmap: %s\n", strerror(errno)); - return AVERROR(errno); } } @@ -307,7 +308,7 @@ static int read_init(AVFormatContext *ctx) static void mmap_release_buffer(AVPacket *pkt) { - struct v4l2_buffer buf; + struct v4l2_buffer buf = {0}; int res, fd; struct buff_data *buf_descriptor = pkt->priv; @@ -315,7 +316,6 @@ static void mmap_release_buffer(AVPacket *pkt) return; } - memset(&buf, 0, sizeof(struct v4l2_buffer)); buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf.memory = V4L2_MEMORY_MMAP; buf.index = buf_descriptor->index; @@ -333,11 +333,10 @@ static void mmap_release_buffer(AVPacket *pkt) static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt) { struct video_data *s = ctx->priv_data; - struct v4l2_buffer buf; + struct v4l2_buffer buf = {0}; struct buff_data *buf_descriptor; int res; - memset(&buf, 0, sizeof(struct v4l2_buffer)); buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf.memory = V4L2_MEMORY_MMAP; @@ -346,17 +345,15 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt) if (res < 0) { if (errno == EAGAIN) { pkt->size = 0; - return AVERROR(EAGAIN); } av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_DQBUF): %s\n", strerror(errno)); return AVERROR(errno); } - assert (buf.index < s->buffers); + assert(buf.index < s->buffers); if (s->frame_size > 0 && buf.bytesused != s->frame_size) { av_log(ctx, AV_LOG_ERROR, "The v4l2 frame is %d bytes, but %d bytes are expected\n", buf.bytesused, s->frame_size); - return AVERROR_INVALIDDATA; } @@ -394,9 +391,8 @@ static int mmap_start(AVFormatContext *ctx) int i, res; for (i = 0; i < s->buffers; i++) { - struct v4l2_buffer buf; + struct v4l2_buffer buf = {0}; - memset(&buf, 0, sizeof(struct v4l2_buffer)); buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf.memory = V4L2_MEMORY_MMAP; buf.index = i; @@ -404,7 +400,6 @@ static int mmap_start(AVFormatContext *ctx) res = ioctl(s->fd, VIDIOC_QBUF, &buf); if (res < 0) { av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_QBUF): %s\n", strerror(errno)); - return AVERROR(errno); } } @@ -413,7 +408,6 @@ static int mmap_start(AVFormatContext *ctx) res = ioctl(s->fd, VIDIOC_STREAMON, &type); if (res < 0) { av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_STREAMON): %s\n", strerror(errno)); - return AVERROR(errno); } @@ -440,98 +434,111 @@ static void mmap_close(struct video_data *s) static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap) { struct video_data *s = s1->priv_data; - struct v4l2_input input; - struct v4l2_standard standard; - struct v4l2_streamparm streamparm = { 0 }; + struct v4l2_input input = {0}; + struct v4l2_standard standard = {0}; + struct v4l2_streamparm streamparm = {0}; struct v4l2_fract *tpf = &streamparm.parm.capture.timeperframe; - int i; + int i, ret; + AVRational fps; streamparm.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - if (ap->channel>=0) { - /* set tv video input */ - memset (&input, 0, sizeof (input)); - input.index = ap->channel; - if (ioctl(s->fd, VIDIOC_ENUMINPUT, &input) < 0) { - av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl enum input failed:\n"); - return AVERROR(EIO); - } + if (s->framerate && (ret = av_parse_video_rate(&fps, s->framerate)) < 0) { + av_log(s1, AV_LOG_ERROR, "Couldn't parse framerate.\n"); + return ret; + } +#if FF_API_FORMAT_PARAMETERS + if (ap->channel > 0) + s->channel = ap->channel; + if (ap->time_base.num) + fps = (AVRational){ap->time_base.den, ap->time_base.num}; +#endif - av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set input_id: %d, input: %s\n", - ap->channel, input.name); - if (ioctl(s->fd, VIDIOC_S_INPUT, &input.index) < 0) { - av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set input(%d) failed\n", - ap->channel); - return AVERROR(EIO); - } + /* set tv video input */ + input.index = s->channel; + if (ioctl(s->fd, VIDIOC_ENUMINPUT, &input) < 0) { + av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl enum input failed:\n"); + return AVERROR(EIO); + } + + av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set input_id: %d, input: %s\n", + s->channel, input.name); + if (ioctl(s->fd, VIDIOC_S_INPUT, &input.index) < 0) { + av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set input(%d) failed\n", + s->channel); + return AVERROR(EIO); } +#if FF_API_FORMAT_PARAMETERS if (ap->standard) { + av_freep(&s->standard); + s->standard = av_strdup(ap->standard); + } +#endif + + if (s->standard) { av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set standard: %s\n", - ap->standard); + s->standard); /* set tv standard */ - memset (&standard, 0, sizeof (standard)); - for(i=0;;i++) { + for (i = 0;; i++) { standard.index = i; - if (ioctl(s->fd, VIDIOC_ENUMSTD, &standard) < 0) { - av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set standard(%s) failed\n", - ap->standard); - return AVERROR(EIO); - } - - if (!strcasecmp(standard.name, ap->standard)) { + ret = ioctl(s->fd, VIDIOC_ENUMSTD, &standard); + if (ret < 0 || !strcasecmp(standard.name, s->standard)) break; - } + } + if (ret < 0) { + av_log(s1, AV_LOG_ERROR, "Unknown standard '%s'\n", s->standard); + return ret; } av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set standard: %s, id: %"PRIu64"\n", - ap->standard, (uint64_t)standard.id); + s->standard, (uint64_t)standard.id); if (ioctl(s->fd, VIDIOC_S_STD, &standard.id) < 0) { av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set standard(%s) failed\n", - ap->standard); + s->standard); return AVERROR(EIO); } } - if (ap->time_base.num && ap->time_base.den) { + if (fps.num && fps.den) { av_log(s1, AV_LOG_DEBUG, "Setting time per frame to %d/%d\n", - ap->time_base.num, ap->time_base.den); - tpf->numerator = ap->time_base.num; - tpf->denominator = ap->time_base.den; + fps.den, fps.num); + tpf->numerator = fps.den; + tpf->denominator = fps.num; if (ioctl(s->fd, VIDIOC_S_PARM, &streamparm) != 0) { av_log(s1, AV_LOG_ERROR, "ioctl set time per frame(%d/%d) failed\n", - ap->time_base.num, ap->time_base.den); + fps.den, fps.num); return AVERROR(EIO); } - if (ap->time_base.den != tpf->denominator || - ap->time_base.num != tpf->numerator) { + if (fps.num != tpf->denominator || + fps.den != tpf->numerator) { av_log(s1, AV_LOG_INFO, "The driver changed the time per frame from %d/%d to %d/%d\n", - ap->time_base.num, ap->time_base.den, + fps.den, fps.num, tpf->numerator, tpf->denominator); } } else { - /* if timebase value is not set in ap, read the timebase value from the driver */ + /* if timebase value is not set, read the timebase value from the driver */ if (ioctl(s->fd, VIDIOC_G_PARM, &streamparm) != 0) { av_log(s1, AV_LOG_ERROR, "ioctl(VIDIOC_G_PARM): %s\n", strerror(errno)); return AVERROR(errno); } } - ap->time_base.num = tpf->numerator; - ap->time_base.den = tpf->denominator; + s1->streams[0]->codec->time_base.den = tpf->denominator; + s1->streams[0]->codec->time_base.num = tpf->numerator; return 0; } static uint32_t device_try_init(AVFormatContext *s1, - const AVFormatParameters *ap, + enum PixelFormat pix_fmt, int *width, int *height, enum CodecID *codec_id) { - uint32_t desired_format = fmt_ff2v4l(ap->pix_fmt, s1->video_codec_id); + uint32_t desired_format = fmt_ff2v4l(pix_fmt, s1->video_codec_id); if (desired_format == 0 || device_init(s1, width, height, desired_format) < 0) { @@ -561,23 +568,41 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap) { struct video_data *s = s1->priv_data; AVStream *st; - int res; + int res = 0; uint32_t desired_format, capabilities; enum CodecID codec_id; + enum PixelFormat pix_fmt = PIX_FMT_NONE; st = av_new_stream(s1, 0); if (!st) { - return AVERROR(ENOMEM); + res = AVERROR(ENOMEM); + goto out; } av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in us */ - s->width = ap->width; - s->height = ap->height; + if (s->video_size && (res = av_parse_video_size(&s->width, &s->height, s->video_size)) < 0) { + av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n"); + goto out; + } + if (s->pixel_format && (pix_fmt = av_get_pix_fmt(s->pixel_format)) == PIX_FMT_NONE) { + av_log(s1, AV_LOG_ERROR, "No such pixel format: %s.\n", s->pixel_format); + res = AVERROR(EINVAL); + goto out; + } +#if FF_API_FORMAT_PARAMETERS + if (ap->width > 0) + s->width = ap->width; + if (ap->height > 0) + s->height = ap->height; + if (ap->pix_fmt) + pix_fmt = ap->pix_fmt; +#endif capabilities = 0; s->fd = device_open(s1, &capabilities); if (s->fd < 0) { - return AVERROR(EIO); + res = AVERROR(EIO); + goto out; } av_log(s1, AV_LOG_VERBOSE, "[%d]Capabilities: %x\n", s->fd, capabilities); @@ -588,27 +613,29 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap) fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; if (ioctl(s->fd, VIDIOC_G_FMT, &fmt) < 0) { av_log(s1, AV_LOG_ERROR, "ioctl(VIDIOC_G_FMT): %s\n", strerror(errno)); - return AVERROR(errno); + res = AVERROR(errno); + goto out; } s->width = fmt.fmt.pix.width; s->height = fmt.fmt.pix.height; av_log(s1, AV_LOG_VERBOSE, "Setting frame size to %dx%d\n", s->width, s->height); } - desired_format = device_try_init(s1, ap, &s->width, &s->height, &codec_id); + desired_format = device_try_init(s1, pix_fmt, &s->width, &s->height, &codec_id); if (desired_format == 0) { av_log(s1, AV_LOG_ERROR, "Cannot find a proper format for " - "codec_id %d, pix_fmt %d.\n", s1->video_codec_id, ap->pix_fmt); + "codec_id %d, pix_fmt %d.\n", s1->video_codec_id, pix_fmt); close(s->fd); - return AVERROR(EIO); + res = AVERROR(EIO); + goto out; } - if (av_image_check_size(s->width, s->height, 0, s1) < 0) - return AVERROR(EINVAL); + if ((res = av_image_check_size(s->width, s->height, 0, s1) < 0)) + goto out; s->frame_format = desired_format; - if (v4l2_set_parameters(s1, ap) < 0) - return AVERROR(EIO); + if ((res = v4l2_set_parameters(s1, ap) < 0)) + goto out; st->codec->pix_fmt = fmt_v4l2ff(desired_format, codec_id); s->frame_size = avpicture_get_size(st->codec->pix_fmt, s->width, s->height); @@ -624,8 +651,8 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap) } if (res < 0) { close(s->fd); - - return AVERROR(EIO); + res = AVERROR(EIO); + goto out; } s->top_field_first = first_field(s->fd); @@ -633,11 +660,10 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap) st->codec->codec_id = codec_id; st->codec->width = s->width; st->codec->height = s->height; - st->codec->time_base.den = ap->time_base.den; - st->codec->time_base.num = ap->time_base.num; st->codec->bit_rate = s->frame_size * 1/av_q2d(st->codec->time_base) * 8; - return 0; +out: + return res; } static int v4l2_read_packet(AVFormatContext *s1, AVPacket *pkt) @@ -680,6 +706,25 @@ static int v4l2_read_close(AVFormatContext *s1) return 0; } +#define OFFSET(x) offsetof(struct video_data, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM + +static const AVOption options[] = { + { "standard", "", OFFSET(standard), FF_OPT_TYPE_STRING, {.str = NULL }, 0, 0, AV_OPT_FLAG_DECODING_PARAM }, + { "channel", "", OFFSET(channel), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM }, + { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC }, + { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC }, + { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC }, + { NULL }, +}; + +static const AVClass v4l2_class = { + .class_name = "V4L2 indev", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_v4l2_demuxer = { "video4linux2", NULL_IF_CONFIG_SMALL("Video4Linux2 device grab"), @@ -689,4 +734,5 @@ AVInputFormat ff_v4l2_demuxer = { v4l2_read_packet, v4l2_read_close, .flags = AVFMT_NOFILE, + .priv_class = &v4l2_class, }; diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c index 2155db6ece..a8e67e7dda 100644 --- a/libavdevice/vfwcap.c +++ b/libavdevice/vfwcap.c @@ -19,27 +19,29 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavformat/avformat.h" +#include "libavutil/log.h" +#include "libavutil/opt.h" +#include "libavutil/parseutils.h" #include <windows.h> #include <vfw.h> - -//#define DEBUG_VFW +#include "avdevice.h" /* Defines for VFW missing from MinGW. * Remove this when MinGW incorporates them. */ #define HWND_MESSAGE ((HWND)-3) -#define BI_RGB 0 - /* End of missing MinGW defines */ struct vfw_ctx { + const AVClass *class; HWND hwnd; HANDLE mutex; HANDLE event; AVPacketList *pktl; unsigned int curbufsize; unsigned int frame_num; + char *video_size; /**< A string describing video size, set by a private option. */ + char *framerate; /**< Set by a private option. */ }; static enum PixelFormat vfw_pixfmt(DWORD biCompression, WORD biBitCount) @@ -116,7 +118,7 @@ static void dump_captureparms(AVFormatContext *s, CAPTUREPARMS *cparms) static void dump_videohdr(AVFormatContext *s, VIDEOHDR *vhdr) { -#ifdef DEBUG_VFW +#ifdef DEBUG av_log(s, AV_LOG_DEBUG, "VIDEOHDR\n"); dstruct(s, vhdr, lpData, "p"); dstruct(s, vhdr, dwBufferLength, "lu"); @@ -244,9 +246,8 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap) CAPTUREPARMS cparms; DWORD biCompression; WORD biBitCount; - int width; - int height; int ret; + AVRational fps; if (!strcmp(s->filename, "list")) { for (devnum = 0; devnum <= 9; devnum++) { @@ -264,10 +265,10 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap) return AVERROR(EIO); } - if(!ap->time_base.den) { - av_log(s, AV_LOG_ERROR, "A time base must be specified.\n"); - return AVERROR(EIO); - } +#if FF_API_FORMAT_PARAMETERS + if (ap->time_base.num) + fps = (AVRational){ap->time_base.den, ap->time_base.num}; +#endif ctx->hwnd = capCreateCaptureWindow(NULL, 0, 0, 0, 0, 0, HWND_MESSAGE, 0); if(!ctx->hwnd) { @@ -318,10 +319,20 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap) dump_bih(s, &bi->bmiHeader); - width = ap->width ? ap->width : bi->bmiHeader.biWidth ; - height = ap->height ? ap->height : bi->bmiHeader.biHeight; - bi->bmiHeader.biWidth = width ; - bi->bmiHeader.biHeight = height; + + if (ctx->video_size) { + ret = av_parse_video_size(&bi->bmiHeader.biWidth, &bi->bmiHeader.biHeight, ctx->video_size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n"); + goto fail_bi; + } + } +#if FF_API_FORMAT_PARAMETERS + if (ap->width > 0) + bi->bmiHeader.biWidth = ap->width; + if (ap->height > 0) + bi->bmiHeader.biHeight = ap->height; +#endif if (0) { /* For testing yet unsupported compressions @@ -356,7 +367,7 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap) cparms.fYield = 1; // Spawn a background thread cparms.dwRequestMicroSecPerFrame = - (ap->time_base.num*1000000) / ap->time_base.den; + (fps.den*1000000) / fps.num; cparms.fAbortLeftMouse = 0; cparms.fAbortRightMouse = 0; cparms.fCaptureAudio = 0; @@ -368,10 +379,10 @@ static int vfw_read_header(AVFormatContext *s, AVFormatParameters *ap) goto fail_io; codec = st->codec; - codec->time_base = ap->time_base; + codec->time_base = (AVRational){fps.den, fps.num}; codec->codec_type = AVMEDIA_TYPE_VIDEO; - codec->width = width; - codec->height = height; + codec->width = bi->bmiHeader.biWidth; + codec->height = bi->bmiHeader.biHeight; codec->pix_fmt = vfw_pixfmt(biCompression, biBitCount); if(codec->pix_fmt == PIX_FMT_NONE) { codec->codec_id = vfw_codecid(biCompression); @@ -452,6 +463,21 @@ static int vfw_read_packet(AVFormatContext *s, AVPacket *pkt) return pkt->size; } +#define OFFSET(x) offsetof(struct vfw_ctx, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM +static const AVOption options[] = { + { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC }, + { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "ntsc"}, 0, 0, DEC }, + { NULL }, +}; + +static const AVClass vfw_class = { + .class_name = "VFW indev", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_vfwcap_demuxer = { "vfwcap", NULL_IF_CONFIG_SMALL("VFW video capture"), @@ -461,4 +487,5 @@ AVInputFormat ff_vfwcap_demuxer = { vfw_read_packet, vfw_read_close, .flags = AVFMT_NOFILE, + .priv_class = &vfw_class, }; diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c index 090af4930f..09c121ee7a 100644 --- a/libavdevice/x11grab.c +++ b/libavdevice/x11grab.c @@ -36,7 +36,9 @@ */ #include "config.h" -#include "libavformat/avformat.h" +#include "libavutil/log.h" +#include "libavutil/opt.h" +#include "libavutil/parseutils.h" #include <time.h> #include <X11/X.h> #include <X11/Xlib.h> @@ -46,16 +48,19 @@ #include <sys/shm.h> #include <X11/extensions/XShm.h> #include <X11/extensions/Xfixes.h> +#include "avdevice.h" /** * X11 Device Demuxer context */ struct x11_grab { + const AVClass *class; /**< Class for private options. */ int frame_size; /**< Size in bytes of a grabbed frame */ AVRational time_base; /**< Time base */ int64_t time_frame; /**< Current time */ + char *video_size; /**< String describing video size, set by a private option. */ int height; /**< Height of the grab frame */ int width; /**< Width of the grab frame */ int x_off; /**< Horizontal top-left corner coordinate */ @@ -66,6 +71,7 @@ struct x11_grab int use_shm; /**< !0 when using XShm extension */ XShmSegmentInfo shminfo; /**< When using XShm, keeps track of XShm infos */ int nomouse; + char *framerate; /**< Set by a private option. */ }; /** @@ -91,6 +97,8 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap) int y_off = 0; int use_shm; char *dpyname, *offset; + int ret = 0; + AVRational framerate; dpyname = av_strdup(s1->filename); offset = strchr(dpyname, '+'); @@ -100,23 +108,37 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap) *offset= 0; } - av_log(s1, AV_LOG_INFO, "device: %s -> display: %s x: %d y: %d width: %d height: %d\n", s1->filename, dpyname, x_off, y_off, ap->width, ap->height); + if ((ret = av_parse_video_size(&x11grab->width, &x11grab->height, x11grab->video_size)) < 0) { + av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n"); + goto out; + } + if ((ret = av_parse_video_rate(&framerate, x11grab->framerate)) < 0) { + av_log(s1, AV_LOG_ERROR, "Could not parse framerate: %s.\n", x11grab->framerate); + goto out; + } +#if FF_API_FORMAT_PARAMETERS + if (ap->width > 0) + x11grab->width = ap->width; + if (ap->height > 0) + x11grab->height = ap->height; + if (ap->time_base.num) + framerate = (AVRational){ap->time_base.den, ap->time_base.num}; +#endif + av_log(s1, AV_LOG_INFO, "device: %s -> display: %s x: %d y: %d width: %d height: %d\n", + s1->filename, dpyname, x_off, y_off, x11grab->width, x11grab->height); dpy = XOpenDisplay(dpyname); av_freep(&dpyname); if(!dpy) { av_log(s1, AV_LOG_ERROR, "Could not open X display.\n"); - return AVERROR(EIO); - } - - if (ap->width <= 0 || ap->height <= 0 || ap->time_base.den <= 0) { - av_log(s1, AV_LOG_ERROR, "AVParameters don't have video size and/or rate. Use -s and -r.\n"); - return AVERROR(EIO); + ret = AVERROR(EIO); + goto out; } st = av_new_stream(s1, 0); if (!st) { - return AVERROR(ENOMEM); + ret = AVERROR(ENOMEM); + goto out; } av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in us */ @@ -131,13 +153,14 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap) ZPixmap, NULL, &x11grab->shminfo, - ap->width, ap->height); + x11grab->width, x11grab->height); x11grab->shminfo.shmid = shmget(IPC_PRIVATE, image->bytes_per_line * image->height, IPC_CREAT|0777); if (x11grab->shminfo.shmid == -1) { av_log(s1, AV_LOG_ERROR, "Fatal: Can't get shared memory!\n"); - return AVERROR(ENOMEM); + ret = AVERROR(ENOMEM); + goto out; } x11grab->shminfo.shmaddr = image->data = shmat(x11grab->shminfo.shmid, 0, 0); x11grab->shminfo.readOnly = False; @@ -145,12 +168,13 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap) if (!XShmAttach(dpy, &x11grab->shminfo)) { av_log(s1, AV_LOG_ERROR, "Fatal: Failed to attach shared memory!\n"); /* needs some better error subroutine :) */ - return AVERROR(EIO); + ret = AVERROR(EIO); + goto out; } } else { image = XGetImage(dpy, RootWindow(dpy, DefaultScreen(dpy)), x_off,y_off, - ap->width,ap->height, + x11grab->width, x11grab->height, AllPlanes, ZPixmap); } @@ -173,7 +197,8 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap) } else { av_log(s1, AV_LOG_ERROR, "RGB ordering at image depth %i not supported ... aborting\n", image->bits_per_pixel); av_log(s1, AV_LOG_ERROR, "color masks: r 0x%.6lx g 0x%.6lx b 0x%.6lx\n", image->red_mask, image->green_mask, image->blue_mask); - return AVERROR(EIO); + ret = AVERROR(EIO); + goto out; } break; case 24: @@ -188,7 +213,8 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap) } else { av_log(s1, AV_LOG_ERROR,"rgb ordering at image depth %i not supported ... aborting\n", image->bits_per_pixel); av_log(s1, AV_LOG_ERROR, "color masks: r 0x%.6lx g 0x%.6lx b 0x%.6lx\n", image->red_mask, image->green_mask, image->blue_mask); - return AVERROR(EIO); + ret = AVERROR(EIO); + goto out; } break; case 32: @@ -211,15 +237,14 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap) break; default: av_log(s1, AV_LOG_ERROR, "image depth %i not supported ... aborting\n", image->bits_per_pixel); - return -1; + ret = AVERROR(EINVAL); + goto out; } - x11grab->frame_size = ap->width * ap->height * image->bits_per_pixel/8; + x11grab->frame_size = x11grab->width * x11grab->height * image->bits_per_pixel/8; x11grab->dpy = dpy; - x11grab->width = ap->width; - x11grab->height = ap->height; - x11grab->time_base = ap->time_base; - x11grab->time_frame = av_gettime() / av_q2d(ap->time_base); + x11grab->time_base = (AVRational){framerate.den, framerate.num}; + x11grab->time_frame = av_gettime() / av_q2d(x11grab->time_base); x11grab->x_off = x_off; x11grab->y_off = y_off; x11grab->image = image; @@ -227,13 +252,14 @@ x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap) st->codec->codec_type = AVMEDIA_TYPE_VIDEO; st->codec->codec_id = CODEC_ID_RAWVIDEO; - st->codec->width = ap->width; - st->codec->height = ap->height; + st->codec->width = x11grab->width; + st->codec->height = x11grab->height; st->codec->pix_fmt = input_pixfmt; - st->codec->time_base = ap->time_base; - st->codec->bit_rate = x11grab->frame_size * 1/av_q2d(ap->time_base) * 8; + st->codec->time_base = x11grab->time_base; + st->codec->bit_rate = x11grab->frame_size * 1/av_q2d(x11grab->time_base) * 8; - return 0; +out: + return ret; } /** @@ -436,6 +462,21 @@ x11grab_read_close(AVFormatContext *s1) return 0; } +#define OFFSET(x) offsetof(struct x11_grab, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM +static const AVOption options[] = { + { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "vga"}, 0, 0, DEC }, + { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "ntsc"}, 0, 0, DEC }, + { NULL }, +}; + +static const AVClass x11_class = { + .class_name = "X11grab indev", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + /** x11 grabber device demuxer declaration */ AVInputFormat ff_x11_grab_device_demuxer = { @@ -447,4 +488,5 @@ AVInputFormat ff_x11_grab_device_demuxer = x11grab_read_packet, x11grab_read_close, .flags = AVFMT_NOFILE, + .priv_class = &x11_class, }; diff --git a/libavfilter/Makefile b/libavfilter/Makefile index de34089468..2324fb999e 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -6,7 +6,7 @@ FFLIBS-$(CONFIG_MOVIE_FILTER) += avformat avcodec FFLIBS-$(CONFIG_SCALE_FILTER) += swscale FFLIBS-$(CONFIG_MP_FILTER) += avcodec -HEADERS = avcodec.h avfilter.h avfiltergraph.h +HEADERS = avcodec.h avfilter.h avfiltergraph.h vsrc_buffer.h OBJS = allfilters.o \ avfilter.o \ @@ -46,12 +46,14 @@ OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o OBJS-$(CONFIG_PIXDESCTEST_FILTER) += vf_pixdesctest.o OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o +OBJS-$(CONFIG_SELECT_FILTER) += vf_select.o OBJS-$(CONFIG_SETDAR_FILTER) += vf_aspect.o OBJS-$(CONFIG_SETPTS_FILTER) += vf_setpts.o OBJS-$(CONFIG_SETSAR_FILTER) += vf_aspect.o OBJS-$(CONFIG_SETTB_FILTER) += vf_settb.o OBJS-$(CONFIG_SHOWINFO_FILTER) += vf_showinfo.o OBJS-$(CONFIG_SLICIFY_FILTER) += vf_slicify.o +OBJS-$(CONFIG_SPLIT_FILTER) += vf_split.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o OBJS-$(CONFIG_VFLIP_FILTER) += vf_vflip.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 1973602305..5f1065f23f 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -62,12 +62,14 @@ void avfilter_register_all(void) REGISTER_FILTER (PAD, pad, vf); REGISTER_FILTER (PIXDESCTEST, pixdesctest, vf); REGISTER_FILTER (SCALE, scale, vf); + REGISTER_FILTER (SELECT, select, vf); REGISTER_FILTER (SETDAR, setdar, vf); REGISTER_FILTER (SETPTS, setpts, vf); REGISTER_FILTER (SETSAR, setsar, vf); REGISTER_FILTER (SETTB, settb, vf); REGISTER_FILTER (SHOWINFO, showinfo, vf); REGISTER_FILTER (SLICIFY, slicify, vf); + REGISTER_FILTER (SPLIT, split, vf); REGISTER_FILTER (TRANSPOSE, transpose, vf); REGISTER_FILTER (UNSHARP, unsharp, vf); REGISTER_FILTER (VFLIP, vflip, vf); diff --git a/libavfilter/avcodec.c b/libavfilter/avcodec.c index c2f8651106..2010040d14 100644 --- a/libavfilter/avcodec.c +++ b/libavfilter/avcodec.c @@ -40,3 +40,34 @@ void avfilter_copy_frame_props(AVFilterBufferRef *dst, const AVFrame *src) dst->video->pict_type = src->pict_type; } } + +AVFilterBufferRef *avfilter_get_video_buffer_ref_from_frame(const AVFrame *frame, + int perms) +{ + AVFilterBufferRef *picref = + avfilter_get_video_buffer_ref_from_arrays(frame->data, frame->linesize, perms, + frame->width, frame->height, + frame->format); + if (!picref) + return NULL; + avfilter_copy_frame_props(picref, frame); + return picref; +} + +int avfilter_fill_frame_from_video_buffer_ref(AVFrame *frame, + const AVFilterBufferRef *picref) +{ + if (!picref || !picref->video || !frame) + return AVERROR(EINVAL); + + memcpy(frame->data, picref->data, sizeof(frame->data)); + memcpy(frame->linesize, picref->linesize, sizeof(frame->linesize)); + frame->pkt_pos = picref->pos; + frame->interlaced_frame = picref->video->interlaced; + frame->top_field_first = picref->video->top_field_first; + frame->key_frame = picref->video->key_frame; + frame->pict_type = picref->video->pict_type; + frame->sample_aspect_ratio = picref->video->sample_aspect_ratio; + + return 0; +} diff --git a/libavfilter/avcodec.h b/libavfilter/avcodec.h index f438860d0b..dec5ae4a7c 100644 --- a/libavfilter/avcodec.h +++ b/libavfilter/avcodec.h @@ -30,6 +30,7 @@ #include "libavcodec/avcodec.h" // AVFrame #include "avfilter.h" +#include "vsrc_buffer.h" /** * Copy the frame properties of src to dst, without copying the actual @@ -37,4 +38,34 @@ */ void avfilter_copy_frame_props(AVFilterBufferRef *dst, const AVFrame *src); +/** + * Create and return a picref reference from the data and properties + * contained in frame. + * + * @param perms permissions to assign to the new buffer reference + */ +AVFilterBufferRef *avfilter_get_video_buffer_ref_from_frame(const AVFrame *frame, int perms); + +/** + * Fill an AVFrame with the information stored in picref. + * + * @param frame an already allocated AVFrame + * @param picref a video buffer reference + * @return 0 in case of success, a negative AVERROR code in case of + * failure + */ +int avfilter_fill_frame_from_video_buffer_ref(AVFrame *frame, + const AVFilterBufferRef *picref); + +/** + * Add frame data to buffer_src. + * + * @param buffer_src pointer to a buffer source context + * @param flags a combination of AV_VSRC_BUF_FLAG_* flags + * @return >= 0 in case of success, a negative AVERROR code in case of + * failure + */ +int av_vsrc_buffer_add_frame(AVFilterContext *buffer_src, + const AVFrame *frame, int flags); + #endif /* AVFILTER_AVCODEC_H */ diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c index 72e0a87f8e..3b2e3ca2be 100644 --- a/libavfilter/avfilter.c +++ b/libavfilter/avfilter.c @@ -77,8 +77,8 @@ static void store_in_pool(AVFilterBufferRef *ref) av_assert0(ref->buf->data[0]); - if(pool->count == POOL_SIZE){ - AVFilterBufferRef *ref1= pool->pic[0]; + if (pool->count == POOL_SIZE) { + AVFilterBufferRef *ref1 = pool->pic[0]; av_freep(&ref1->video); av_freep(&ref1->audio); av_freep(&ref1->buf->data[0]); @@ -89,9 +89,9 @@ static void store_in_pool(AVFilterBufferRef *ref) pool->pic[POOL_SIZE-1] = NULL; } - for(i=0; i<POOL_SIZE; i++){ - if(!pool->pic[i]){ - pool->pic[i]= ref; + for (i = 0; i < POOL_SIZE; i++) { + if (!pool->pic[i]) { + pool->pic[i] = ref; pool->count++; break; } @@ -102,8 +102,8 @@ void avfilter_unref_buffer(AVFilterBufferRef *ref) { if (!ref) return; - if (!(--ref->buf->refcount)){ - if(!ref->buf->free){ + if (!(--ref->buf->refcount)) { + if (!ref->buf->free) { store_in_pool(ref); return; } @@ -165,6 +165,31 @@ int avfilter_link(AVFilterContext *src, unsigned srcpad, return 0; } +void avfilter_link_free(AVFilterLink **link) +{ + if (!*link) + return; + + if ((*link)->pool) { + int i; + for (i = 0; i < POOL_SIZE; i++) { + if ((*link)->pool->pic[i]) { + AVFilterBufferRef *picref = (*link)->pool->pic[i]; + /* free buffer: picrefs stored in the pool are not + * supposed to contain a free callback */ + av_freep(&picref->buf->data[0]); + av_freep(&picref->buf); + + av_freep(&picref->audio); + av_freep(&picref->video); + av_freep(&picref); + } + } + av_freep(&(*link)->pool); + } + av_freep(link); +} + int avfilter_insert_filter(AVFilterLink *link, AVFilterContext *filt, unsigned filt_srcpad_idx, unsigned filt_dstpad_idx) { @@ -280,10 +305,9 @@ static void ff_dlog_ref(void *ctx, AVFilterBufferRef *ref, int end) av_get_picture_type_char(ref->video->pict_type)); } if (ref->audio) { - av_dlog(ctx, " cl:%"PRId64"d sn:%d s:%d sr:%d p:%d", + av_dlog(ctx, " cl:%"PRId64"d n:%d r:%d p:%d", ref->audio->channel_layout, ref->audio->nb_samples, - ref->audio->size, ref->audio->sample_rate, ref->audio->planar); } @@ -340,7 +364,7 @@ AVFilterBufferRef *avfilter_get_video_buffer(AVFilterLink *link, int perms, int } AVFilterBufferRef * -avfilter_get_video_buffer_ref_from_arrays(uint8_t *data[4], int linesize[4], int perms, +avfilter_get_video_buffer_ref_from_arrays(uint8_t * const data[4], const int linesize[4], int perms, int w, int h, enum PixelFormat format) { AVFilterBuffer *pic = av_mallocz(sizeof(AVFilterBuffer)); @@ -380,16 +404,16 @@ fail: } AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, + enum AVSampleFormat sample_fmt, int nb_samples, int64_t channel_layout, int planar) { AVFilterBufferRef *ret = NULL; if (link->dstpad->get_audio_buffer) - ret = link->dstpad->get_audio_buffer(link, perms, sample_fmt, size, channel_layout, planar); + ret = link->dstpad->get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout, planar); if (!ret) - ret = avfilter_default_get_audio_buffer(link, perms, sample_fmt, size, channel_layout, planar); + ret = avfilter_default_get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout, planar); if (ret) ret->type = AVMEDIA_TYPE_AUDIO; @@ -397,6 +421,48 @@ AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms, return ret; } +AVFilterBufferRef * +avfilter_get_audio_buffer_ref_from_arrays(uint8_t *data[8], int linesize[8], int perms, + int nb_samples, enum AVSampleFormat sample_fmt, + int64_t channel_layout, int planar) +{ + AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer)); + AVFilterBufferRef *samplesref = av_mallocz(sizeof(AVFilterBufferRef)); + + if (!samples || !samplesref) + goto fail; + + samplesref->buf = samples; + samplesref->buf->free = ff_avfilter_default_free_buffer; + if (!(samplesref->audio = av_mallocz(sizeof(AVFilterBufferRefAudioProps)))) + goto fail; + + samplesref->audio->nb_samples = nb_samples; + samplesref->audio->channel_layout = channel_layout; + samplesref->audio->planar = planar; + + /* make sure the buffer gets read permission or it's useless for output */ + samplesref->perms = perms | AV_PERM_READ; + + samples->refcount = 1; + samplesref->type = AVMEDIA_TYPE_AUDIO; + samplesref->format = sample_fmt; + + memcpy(samples->data, data, sizeof(samples->data)); + memcpy(samples->linesize, linesize, sizeof(samples->linesize)); + memcpy(samplesref->data, data, sizeof(samplesref->data)); + memcpy(samplesref->linesize, linesize, sizeof(samplesref->linesize)); + + return samplesref; + +fail: + if (samplesref && samplesref->audio) + av_freep(&samplesref->audio); + av_freep(&samplesref); + av_freep(&samples); + return NULL; +} + int avfilter_request_frame(AVFilterLink *link) { FF_DPRINTF_START(NULL, request_frame); ff_dlog_link(NULL, link, 1); @@ -520,6 +586,7 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) { void (*filter_samples)(AVFilterLink *, AVFilterBufferRef *); AVFilterPad *dst = link->dstpad; + int i; FF_DPRINTF_START(NULL, filter_samples); ff_dlog_link(NULL, link, 1); @@ -536,14 +603,15 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) link->cur_buf = avfilter_default_get_audio_buffer(link, dst->min_perms, samplesref->format, - samplesref->audio->size, + samplesref->audio->nb_samples, samplesref->audio->channel_layout, samplesref->audio->planar); link->cur_buf->pts = samplesref->pts; link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate; /* Copy actual data into new samples buffer */ - memcpy(link->cur_buf->data[0], samplesref->data[0], samplesref->audio->size); + for (i = 0; samplesref->data[i]; i++) + memcpy(link->cur_buf->data[i], samplesref->data[i], samplesref->linesize[0]); avfilter_unref_buffer(samplesref); } else @@ -683,7 +751,7 @@ void avfilter_free(AVFilterContext *filter) avfilter_formats_unref(&link->in_formats); avfilter_formats_unref(&link->out_formats); } - av_freep(&link); + avfilter_link_free(&link); } for (i = 0; i < filter->output_count; i++) { if ((link = filter->outputs[i])) { @@ -692,7 +760,7 @@ void avfilter_free(AVFilterContext *filter) avfilter_formats_unref(&link->in_formats); avfilter_formats_unref(&link->out_formats); } - av_freep(&link); + avfilter_link_free(&link); } av_freep(&filter->name); diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h index 8251f2bf0e..7a844c4a11 100644 --- a/libavfilter/avfilter.h +++ b/libavfilter/avfilter.h @@ -26,7 +26,7 @@ #include "libavutil/samplefmt.h" #define LIBAVFILTER_VERSION_MAJOR 1 -#define LIBAVFILTER_VERSION_MINOR 78 +#define LIBAVFILTER_VERSION_MINOR 79 #define LIBAVFILTER_VERSION_MICRO 0 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ @@ -98,8 +98,7 @@ typedef struct AVFilterBuffer { */ typedef struct AVFilterBufferRefAudioProps { int64_t channel_layout; ///< channel layout of audio buffer - int nb_samples; ///< number of audio samples - int size; ///< audio buffer size + int nb_samples; ///< number of audio samples per channel uint32_t sample_rate; ///< audio buffer sample rate int planar; ///< audio buffer - planar or packed } AVFilterBufferRefAudioProps; @@ -234,7 +233,8 @@ typedef struct AVFilterFormats { * Create a list of supported formats. This is intended for use in * AVFilter->query_formats(). * - * @param fmts list of media formats, terminated by -1 + * @param fmts list of media formats, terminated by -1. If NULL an + * empty list is created. * @return the format list, with no existing references */ AVFilterFormats *avfilter_make_format_list(const int *fmts); @@ -372,7 +372,7 @@ struct AVFilterPad { * Input audio pads only. */ AVFilterBufferRef *(*get_audio_buffer)(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, + enum AVSampleFormat sample_fmt, int nb_samples, int64_t channel_layout, int planar); /** @@ -461,7 +461,7 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, /** default handler for get_audio_buffer() for audio inputs */ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, + enum AVSampleFormat sample_fmt, int nb_samples, int64_t channel_layout, int planar); /** @@ -636,6 +636,11 @@ int avfilter_link(AVFilterContext *src, unsigned srcpad, AVFilterContext *dst, unsigned dstpad); /** + * Free the link in *link, and set its pointer to NULL. + */ +void avfilter_link_free(AVFilterLink **link); + +/** * Negotiate the media format, dimensions, etc of all inputs to a filter. * * @param filter the filter to negotiate the properties for its inputs @@ -669,7 +674,7 @@ AVFilterBufferRef *avfilter_get_video_buffer(AVFilterLink *link, int perms, * @param format the pixel format of the image specified by the data and linesize arrays */ AVFilterBufferRef * -avfilter_get_video_buffer_ref_from_arrays(uint8_t *data[4], int linesize[4], int perms, +avfilter_get_video_buffer_ref_from_arrays(uint8_t * const data[4], const int linesize[4], int perms, int w, int h, enum PixelFormat format); /** @@ -679,17 +684,34 @@ avfilter_get_video_buffer_ref_from_arrays(uint8_t *data[4], int linesize[4], int * be requested * @param perms the required access permissions * @param sample_fmt the format of each sample in the buffer to allocate - * @param size the buffer size in bytes + * @param nb_samples the number of samples per channel * @param channel_layout the number and type of channels per sample in the buffer to allocate * @param planar audio data layout - planar or packed * @return A reference to the samples. This must be unreferenced with * avfilter_unref_buffer when you are finished with it. */ AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, + enum AVSampleFormat sample_fmt, int nb_samples, int64_t channel_layout, int planar); /** + * Create an audio buffer reference wrapped around an already + * allocated samples buffer. + * + * @param data pointers to the samples plane buffers + * @param linesize linesize for the samples plane buffers + * @param perms the required access permissions + * @param nb_samples number of samples per channel + * @param sample_fmt the format of each sample in the buffer to allocate + * @param channel_layout the channel layout of the buffer + * @param planar audio data layout - planar or packed + */ +AVFilterBufferRef * +avfilter_get_audio_buffer_ref_from_arrays(uint8_t *data[8], int linesize[8], int perms, + int nb_samples, enum AVSampleFormat sample_fmt, + int64_t channel_layout, int planar); + +/** * Request an input frame from the filter at the other end of the link. * * @param link the input link @@ -861,4 +883,4 @@ static inline void avfilter_insert_outpad(AVFilterContext *f, unsigned index, &f->output_pads, &f->outputs, p); } -#endif /* AVFILTER_AVFILTER_H */ +#endif /* AVFILTER_AVFILTER_H */ diff --git a/libavfilter/avfiltergraph.c b/libavfilter/avfiltergraph.c index bdf22b3df9..60d529ba73 100644 --- a/libavfilter/avfiltergraph.c +++ b/libavfilter/avfiltergraph.c @@ -90,7 +90,7 @@ int ff_avfilter_graph_check_validity(AVFilterGraph *graph, AVClass *log_ctx) av_log(log_ctx, AV_LOG_ERROR, "Input pad \"%s\" for the filter \"%s\" of type \"%s\" not connected to any source\n", filt->input_pads[j].name, filt->name, filt->filter->name); - return -1; + return AVERROR(EINVAL); } } @@ -99,7 +99,7 @@ int ff_avfilter_graph_check_validity(AVFilterGraph *graph, AVClass *log_ctx) av_log(log_ctx, AV_LOG_ERROR, "Output pad \"%s\" for the filter \"%s\" of type \"%s\" not connected to any destination\n", filt->output_pads[j].name, filt->name, filt->filter->name); - return -1; + return AVERROR(EINVAL); } } } @@ -178,7 +178,7 @@ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx) av_log(log_ctx, AV_LOG_ERROR, "Impossible to convert between the formats supported by the filter " "'%s' and the filter '%s'\n", link->src->name, link->dst->name); - return -1; + return AVERROR(EINVAL); } } } @@ -216,9 +216,11 @@ static void pick_formats(AVFilterGraph *graph) int ff_avfilter_graph_config_formats(AVFilterGraph *graph, AVClass *log_ctx) { + int ret; + /* find supported formats from sub-filters, and merge along links */ - if (query_formats(graph, log_ctx)) - return -1; + if ((ret = query_formats(graph, log_ctx)) < 0) + return ret; /* Once everything is merged, it's possible that we'll still have * multiple valid media format choices. We pick the first one. */ @@ -227,7 +229,7 @@ int ff_avfilter_graph_config_formats(AVFilterGraph *graph, AVClass *log_ctx) return 0; } -int avfilter_graph_config(AVFilterGraph *graphctx, AVClass *log_ctx) +int avfilter_graph_config(AVFilterGraph *graphctx, void *log_ctx) { int ret; diff --git a/libavfilter/avfiltergraph.h b/libavfilter/avfiltergraph.h index 0140af0801..a975926fd1 100644 --- a/libavfilter/avfiltergraph.h +++ b/libavfilter/avfiltergraph.h @@ -76,7 +76,7 @@ int avfilter_graph_create_filter(AVFilterContext **filt_ctx, AVFilter *filt, * @param log_ctx context used for logging * @return 0 in case of success, a negative AVERROR code otherwise */ -int avfilter_graph_config(AVFilterGraph *graphctx, AVClass *log_ctx); +int avfilter_graph_config(AVFilterGraph *graphctx, void *log_ctx); /** * Free a graph, destroy its links, and set *graph to NULL. @@ -108,16 +108,32 @@ typedef struct AVFilterInOut { } AVFilterInOut; /** + * Create an AVFilterInOut. + * Must be free with avfilter_inout_free(). + */ +AVFilterInOut *avfilter_inout_alloc(void); + +/** + * Free the AVFilterInOut in *inout, and set its pointer to NULL. + * If *inout is NULL, do nothing. + */ +void avfilter_inout_free(AVFilterInOut **inout); + +/** * Add a graph described by a string to a graph. * * @param graph the filter graph where to link the parsed graph context * @param filters string to be parsed - * @param inputs linked list to the inputs of the graph - * @param outputs linked list to the outputs of the graph + * @param inputs linked list to the inputs of the graph, may be NULL. + * It is updated to contain the list of open inputs after the parsing, + * should be freed with avfilter_inout_free(). + * @param outputs linked list to the outputs of the graph, may be NULL. + * It is updated to contain the list of open outputs after the parsing, + * should be freed with avfilter_inout_free(). * @return zero on success, a negative AVERROR code on error */ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters, - AVFilterInOut *inputs, AVFilterInOut *outputs, - AVClass *log_ctx); + AVFilterInOut **inputs, AVFilterInOut **outputs, + void *log_ctx); -#endif /* AVFILTER_AVFILTERGRAPH_H */ +#endif /* AVFILTER_AVFILTERGRAPH_H */ diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c index 9ee23e57b7..c39ed64048 100644 --- a/libavfilter/defaults.c +++ b/libavfilter/defaults.c @@ -40,28 +40,30 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per uint8_t *data[4]; int i; AVFilterBufferRef *picref = NULL; - AVFilterPool *pool= link->pool; - - if(pool) for(i=0; i<POOL_SIZE; i++){ - picref= pool->pic[i]; - if(picref && picref->buf->format == link->format && picref->buf->w == w && picref->buf->h == h){ - AVFilterBuffer *pic= picref->buf; - pool->pic[i]= NULL; - pool->count--; - picref->video->w = w; - picref->video->h = h; - picref->perms = perms | AV_PERM_READ; - picref->format= link->format; - pic->refcount = 1; - memcpy(picref->data, pic->data, sizeof(picref->data)); - memcpy(picref->linesize, pic->linesize, sizeof(picref->linesize)); - return picref; + AVFilterPool *pool = link->pool; + + if (pool) { + for (i = 0; i < POOL_SIZE; i++) { + picref = pool->pic[i]; + if (picref && picref->buf->format == link->format && picref->buf->w == w && picref->buf->h == h) { + AVFilterBuffer *pic = picref->buf; + pool->pic[i] = NULL; + pool->count--; + picref->video->w = w; + picref->video->h = h; + picref->perms = perms | AV_PERM_READ; + picref->format = link->format; + pic->refcount = 1; + memcpy(picref->data, pic->data, sizeof(picref->data)); + memcpy(picref->linesize, pic->linesize, sizeof(picref->linesize)); + return picref; + } } - }else + } else pool = link->pool = av_mallocz(sizeof(AVFilterPool)); - // +2 is needed for swscaler, +16 to be SIMD-friendly - if ((i=av_image_alloc(data, linesize, w, h, link->format, 16)) < 0) + // align: +2 is needed for swscaler, +16 to be SIMD-friendly + if ((i = av_image_alloc(data, linesize, w, h, link->format, 16)) < 0) return NULL; picref = avfilter_get_video_buffer_ref_from_arrays(data, linesize, @@ -72,88 +74,37 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per } memset(data[0], 128, i); - picref->buf->priv= pool; - picref->buf->free= NULL; + picref->buf->priv = pool; + picref->buf->free = NULL; return picref; } AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms, - enum AVSampleFormat sample_fmt, int size, + enum AVSampleFormat sample_fmt, int nb_samples, int64_t channel_layout, int planar) { - AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer)); - AVFilterBufferRef *ref = NULL; - int i, sample_size, chans_nb, bufsize, per_channel_size, step_size = 0; - char *buf; - - if (!samples || !(ref = av_mallocz(sizeof(AVFilterBufferRef)))) - goto fail; - - ref->buf = samples; - ref->format = sample_fmt; - - ref->audio = av_mallocz(sizeof(AVFilterBufferRefAudioProps)); - if (!ref->audio) - goto fail; - - ref->audio->channel_layout = channel_layout; - ref->audio->size = size; - ref->audio->planar = planar; - - /* make sure the buffer gets read permission or it's useless for output */ - ref->perms = perms | AV_PERM_READ; - - samples->refcount = 1; - samples->free = ff_avfilter_default_free_buffer; - - sample_size = av_get_bits_per_sample_fmt(sample_fmt) >>3; - chans_nb = av_get_channel_layout_nb_channels(channel_layout); - - per_channel_size = size/chans_nb; - ref->audio->nb_samples = per_channel_size/sample_size; - - /* Set the number of bytes to traverse to reach next sample of a particular channel: - * For planar, this is simply the sample size. - * For packed, this is the number of samples * sample_size. - */ - for (i = 0; i < chans_nb; i++) - samples->linesize[i] = planar > 0 ? per_channel_size : sample_size; - memset(&samples->linesize[chans_nb], 0, (8-chans_nb) * sizeof(samples->linesize[0])); + AVFilterBufferRef *samplesref = NULL; + int linesize[8]; + uint8_t *data[8]; + int nb_channels = av_get_channel_layout_nb_channels(channel_layout); /* Calculate total buffer size, round to multiple of 16 to be SIMD friendly */ - bufsize = (size + 15)&~15; - buf = av_malloc(bufsize); - if (!buf) - goto fail; - - /* For planar, set the start point of each channel's data within the buffer - * For packed, set the start point of the entire buffer only - */ - samples->data[0] = buf; - if (buf && planar) { - for (i = 1; i < chans_nb; i++) { - step_size += per_channel_size; - samples->data[i] = buf + step_size; - } - } else { - for (i = 1; i < chans_nb; i++) - samples->data[i] = buf; - } - - memset(&samples->data[chans_nb], 0, (8-chans_nb) * sizeof(samples->data[0])); - - memcpy(ref->data, samples->data, sizeof(ref->data)); - memcpy(ref->linesize, samples->linesize, sizeof(ref->linesize)); + if (av_samples_alloc(data, linesize, + nb_channels, nb_samples, sample_fmt, + planar, 16) < 0) + return NULL; - return ref; + samplesref = + avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms, + nb_samples, sample_fmt, + channel_layout, planar); + if (!samplesref) { + av_free(data[0]); + return NULL; + } -fail: - if (ref) - av_free(ref->audio); - av_free(ref); - av_free(samples); - return NULL; + return samplesref; } void avfilter_default_start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref) @@ -210,7 +161,7 @@ void avfilter_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *sa if (outlink) { outlink->out_buf = avfilter_default_get_audio_buffer(inlink, AV_PERM_WRITE, samplesref->format, - samplesref->audio->size, + samplesref->audio->nb_samples, samplesref->audio->channel_layout, samplesref->audio->planar); outlink->out_buf->pts = samplesref->pts; diff --git a/libavfilter/formats.c b/libavfilter/formats.c index 101ef09e5f..58593fcce0 100644 --- a/libavfilter/formats.c +++ b/libavfilter/formats.c @@ -44,6 +44,8 @@ AVFilterFormats *avfilter_merge_formats(AVFilterFormats *a, AVFilterFormats *b) AVFilterFormats *ret; unsigned i, j, k = 0; + if (a == b) return a; + ret = av_mallocz(sizeof(AVFilterFormats)); /* merge list of formats */ @@ -73,15 +75,18 @@ AVFilterFormats *avfilter_merge_formats(AVFilterFormats *a, AVFilterFormats *b) AVFilterFormats *avfilter_make_format_list(const int *fmts) { AVFilterFormats *formats; - int count; + int count = 0; - for (count = 0; fmts[count] != -1; count++) - ; + if (fmts) + for (count = 0; fmts[count] != -1; count++) + ; formats = av_mallocz(sizeof(AVFilterFormats)); - formats->formats = av_malloc(sizeof(*formats->formats) * count); formats->format_count = count; - memcpy(formats->formats, fmts, sizeof(*formats->formats) * count); + if (count) { + formats->formats = av_malloc(sizeof(*formats->formats) * count); + memcpy(formats->formats, fmts, sizeof(*formats->formats) * count); + } return formats; } diff --git a/libavfilter/gradfun.h b/libavfilter/gradfun.h index 3dacbcb252..3c01085b83 100644 --- a/libavfilter/gradfun.h +++ b/libavfilter/gradfun.h @@ -33,16 +33,16 @@ typedef struct { int chroma_r; ///< blur radius for the chroma planes uint16_t *buf; ///< holds image data for blur algorithm passed into filter. /// DSP functions. - void (*filter_line) (uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers); - void (*blur_line) (uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width); + void (*filter_line) (uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers); + void (*blur_line) (uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width); } GradFunContext; -void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers); -void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width); +void ff_gradfun_filter_line_c(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers); +void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width); -void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers); -void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers); +void ff_gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers); +void ff_gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers); -void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width); +void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width); #endif /* AVFILTER_GRADFUN_H */ diff --git a/libavfilter/graphparser.c b/libavfilter/graphparser.c index 4f11529900..d62ba8d205 100644 --- a/libavfilter/graphparser.c +++ b/libavfilter/graphparser.c @@ -36,7 +36,7 @@ */ static int link_filter(AVFilterContext *src, int srcpad, AVFilterContext *dst, int dstpad, - AVClass *log_ctx) + void *log_ctx) { int ret; if ((ret = avfilter_link(src, srcpad, dst, dstpad))) { @@ -55,7 +55,7 @@ static int link_filter(AVFilterContext *src, int srcpad, * @return a pointer (that need to be freed after use) to the name * between parenthesis */ -static char *parse_link_name(const char **buf, AVClass *log_ctx) +static char *parse_link_name(const char **buf, void *log_ctx) { const char *start = *buf; char *name; @@ -92,7 +92,7 @@ static char *parse_link_name(const char **buf, AVClass *log_ctx) * @return 0 in case of success, a negative AVERROR code otherwise */ static int create_filter(AVFilterContext **filt_ctx, AVFilterGraph *ctx, int index, - const char *filt_name, const char *args, AVClass *log_ctx) + const char *filt_name, const char *args, void *log_ctx) { AVFilter *filt; char inst_name[30]; @@ -151,7 +151,7 @@ static int create_filter(AVFilterContext **filt_ctx, AVFilterGraph *ctx, int ind * @return 0 in case of success, a negative AVERROR code otherwise */ static int parse_filter(AVFilterContext **filt_ctx, const char **buf, AVFilterGraph *graph, - int index, AVClass *log_ctx) + int index, void *log_ctx) { char *opts = NULL; char *name = av_get_token(buf, "=,;[\n"); @@ -168,13 +168,18 @@ static int parse_filter(AVFilterContext **filt_ctx, const char **buf, AVFilterGr return ret; } -static void free_inout(AVFilterInOut *head) +AVFilterInOut *avfilter_inout_alloc(void) { - while (head) { - AVFilterInOut *next = head->next; - av_free(head->name); - av_free(head); - head = next; + return av_mallocz(sizeof(AVFilterInOut)); +} + +void avfilter_inout_free(AVFilterInOut **inout) +{ + while (*inout) { + AVFilterInOut *next = (*inout)->next; + av_freep(&(*inout)->name); + av_freep(inout); + *inout = next; } } @@ -201,7 +206,7 @@ static void insert_inout(AVFilterInOut **inouts, AVFilterInOut *element) static int link_filter_inouts(AVFilterContext *filt_ctx, AVFilterInOut **curr_inputs, - AVFilterInOut **open_inputs, AVClass *log_ctx) + AVFilterInOut **open_inputs, void *log_ctx) { int pad = filt_ctx->input_count, ret; @@ -249,7 +254,7 @@ static int link_filter_inouts(AVFilterContext *filt_ctx, } static int parse_inputs(const char **buf, AVFilterInOut **curr_inputs, - AVFilterInOut **open_outputs, AVClass *log_ctx) + AVFilterInOut **open_outputs, void *log_ctx) { int pad = 0; @@ -284,7 +289,7 @@ static int parse_inputs(const char **buf, AVFilterInOut **curr_inputs, static int parse_outputs(const char **buf, AVFilterInOut **curr_inputs, AVFilterInOut **open_inputs, - AVFilterInOut **open_outputs, AVClass *log_ctx) + AVFilterInOut **open_outputs, void *log_ctx) { int ret, pad = 0; @@ -328,8 +333,8 @@ static int parse_outputs(const char **buf, AVFilterInOut **curr_inputs, } int avfilter_graph_parse(AVFilterGraph *graph, const char *filters, - AVFilterInOut *open_inputs, - AVFilterInOut *open_outputs, AVClass *log_ctx) + AVFilterInOut **open_inputs, AVFilterInOut **open_outputs, + void *log_ctx) { int index = 0, ret; char chr = 0; @@ -341,7 +346,7 @@ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters, const char *filterchain = filters; filters += strspn(filters, WHITESPACES); - if ((ret = parse_inputs(&filters, &curr_inputs, &open_outputs, log_ctx)) < 0) + if ((ret = parse_inputs(&filters, &curr_inputs, open_outputs, log_ctx)) < 0) goto fail; if ((ret = parse_filter(&filter, &filters, graph, index, log_ctx)) < 0) @@ -350,14 +355,14 @@ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters, if (filter->input_count == 1 && !curr_inputs && !index) { /* First input can be omitted if it is "[in]" */ const char *tmp = "[in]"; - if ((ret = parse_inputs(&tmp, &curr_inputs, &open_outputs, log_ctx)) < 0) + if ((ret = parse_inputs(&tmp, &curr_inputs, open_outputs, log_ctx)) < 0) goto fail; } - if ((ret = link_filter_inouts(filter, &curr_inputs, &open_inputs, log_ctx)) < 0) + if ((ret = link_filter_inouts(filter, &curr_inputs, open_inputs, log_ctx)) < 0) goto fail; - if ((ret = parse_outputs(&filters, &curr_inputs, &open_inputs, &open_outputs, + if ((ret = parse_outputs(&filters, &curr_inputs, open_inputs, open_outputs, log_ctx)) < 0) goto fail; @@ -382,10 +387,10 @@ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters, goto fail; } - if (open_inputs && !strcmp(open_inputs->name, "out") && curr_inputs) { + if (*open_inputs && !strcmp((*open_inputs)->name, "out") && curr_inputs) { /* Last output can be omitted if it is "[out]" */ const char *tmp = "[out]"; - if ((ret = parse_outputs(&tmp, &curr_inputs, &open_inputs, &open_outputs, + if ((ret = parse_outputs(&tmp, &curr_inputs, open_inputs, open_outputs, log_ctx)) < 0) goto fail; } @@ -396,8 +401,8 @@ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters, for (; graph->filter_count > 0; graph->filter_count--) avfilter_free(graph->filters[graph->filter_count - 1]); av_freep(&graph->filters); - free_inout(open_inputs); - free_inout(open_outputs); - free_inout(curr_inputs); + avfilter_inout_free(open_inputs); + avfilter_inout_free(open_outputs); + avfilter_inout_free(&curr_inputs); return ret; } diff --git a/libavfilter/internal.h b/libavfilter/internal.h index 159e979168..be1e9b08f2 100644 --- a/libavfilter/internal.h +++ b/libavfilter/internal.h @@ -31,7 +31,7 @@ typedef struct AVFilterPool { AVFilterBufferRef *pic[POOL_SIZE]; int count; -}AVFilterPool; +} AVFilterPool; /** * Check for the validity of graph. @@ -58,4 +58,4 @@ int ff_avfilter_graph_config_formats(AVFilterGraph *graphctx, AVClass *log_ctx); /** default handler for freeing audio/video buffer when there are no references left */ void ff_avfilter_default_free_buffer(AVFilterBuffer *buf); -#endif /* AVFILTER_INTERNAL_H */ +#endif /* AVFILTER_INTERNAL_H */ diff --git a/libavfilter/libmpcodecs/vf_detc.c b/libavfilter/libmpcodecs/vf_detc.c index 8dd51da794..28d20e09b7 100644 --- a/libavfilter/libmpcodecs/vf_detc.c +++ b/libavfilter/libmpcodecs/vf_detc.c @@ -382,7 +382,7 @@ static void uninit(struct vf_instance *vf) } static struct { - char *name; + const char *name; int (*func)(struct vf_priv_s *p, mp_image_t *new, mp_image_t *old); int needread; } anal_funcs[] = { diff --git a/libavfilter/libmpcodecs/vf_dint.c b/libavfilter/libmpcodecs/vf_dint.c index 7038381221..ac5bf54a54 100644 --- a/libavfilter/libmpcodecs/vf_dint.c +++ b/libavfilter/libmpcodecs/vf_dint.c @@ -32,7 +32,7 @@ struct vf_priv_s { float sense; // first parameter float level; // second parameter unsigned int imgfmt; - char diff; + int diff; uint32_t max; // int dfr; // int rdfr; @@ -73,7 +73,7 @@ static int config (struct vf_instance *vf, vf->priv->diff = 31; mp_msg (MSGT_VFILTER, MSGL_INFO, "Drop-interlaced: %dx%d diff %d / level %u\n", vf->priv->pmpi->width, vf->priv->pmpi->height, - (int)vf->priv->diff, (unsigned int)vf->priv->max); + vf->priv->diff, (unsigned int)vf->priv->max); // vf->priv->rdfr = vf->priv->dfr = 0; vf->priv->was_dint = 0; return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt); @@ -81,10 +81,10 @@ static int config (struct vf_instance *vf, static int put_image (struct vf_instance *vf, mp_image_t *mpi, double pts) { - char rrow0[MAXROWSIZE]; - char rrow1[MAXROWSIZE]; - char rrow2[MAXROWSIZE]; - char *row0 = rrow0, *row1 = rrow1, *row2 = rrow2/*, *row3 = rrow3*/; + int8_t rrow0[MAXROWSIZE]; + int8_t rrow1[MAXROWSIZE]; + int8_t rrow2[MAXROWSIZE]; + int8_t *row0 = rrow0, *row1 = rrow1, *row2 = rrow2/*, *row3 = rrow3*/; int rowsize = mpi->width; uint32_t nok = 0, max = vf->priv->max; int diff = vf->priv->diff; diff --git a/libavfilter/libmpcodecs/vf_divtc.c b/libavfilter/libmpcodecs/vf_divtc.c index 25447f0596..4c171d1728 100644 --- a/libavfilter/libmpcodecs/vf_divtc.c +++ b/libavfilter/libmpcodecs/vf_divtc.c @@ -42,7 +42,7 @@ struct vf_priv_s ocount, sum[5]; double threshold; FILE *file; - char *bdata; + int8_t *bdata; unsigned int *csdata; int *history; }; @@ -384,8 +384,8 @@ static int analyze(struct vf_priv_s *p) { int *buf=0, *bp, bufsize=0, n, b, f, i, j, m, s; unsigned int *cbuf=0, *cp; - char *pbuf; - char lbuf[256]; + int8_t *pbuf; + int8_t lbuf[256]; int sum[5]; double d; @@ -598,7 +598,8 @@ static void uninit(struct vf_instance *vf) static int vf_open(vf_instance_t *vf, char *args) { struct vf_priv_s *p; - char *filename="framediff.log", *ap, *q, *a; + const char *filename="framediff.log"; + char *ap, *q, *a; if(args && !(args=av_strdup(args))) { diff --git a/libavfilter/libmpcodecs/vf_gradfun.c b/libavfilter/libmpcodecs/vf_gradfun.c index 2732f55d9a..fd4236cc37 100644 --- a/libavfilter/libmpcodecs/vf_gradfun.c +++ b/libavfilter/libmpcodecs/vf_gradfun.c @@ -188,7 +188,7 @@ static void filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, } #endif // HAVE_SSSE3 -#if HAVE_SSE2 && HAVE_6REGS +#if HAVE_SSE && HAVE_6REGS #define BLURV(load)\ intptr_t x = -2*width;\ __asm__ volatile(\ @@ -231,7 +231,7 @@ static void blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, BLURV("movdqa"); } } -#endif // HAVE_6REGS && HAVE_SSE2 +#endif // HAVE_6REGS && HAVE_SSE static void filter(struct vf_priv_s *ctx, uint8_t *dst, uint8_t *src, int width, int height, int dstride, int sstride, int r) @@ -385,7 +385,7 @@ static int vf_open(vf_instance_t *vf, char *args) vf->priv->blur_line = blur_line_c; vf->priv->filter_line = filter_line_c; -#if HAVE_SSE2 && HAVE_6REGS +#if HAVE_SSE && HAVE_6REGS if (gCpuCaps.hasSSE2) vf->priv->blur_line = blur_line_sse2; #endif diff --git a/libavfilter/libmpcodecs/vf_ilpack.c b/libavfilter/libmpcodecs/vf_ilpack.c index 77555a7b41..db4a849e1f 100644 --- a/libavfilter/libmpcodecs/vf_ilpack.c +++ b/libavfilter/libmpcodecs/vf_ilpack.c @@ -28,6 +28,7 @@ #include "img_format.h" #include "mp_image.h" #include "vf.h" +#include "libavutil/attributes.h" typedef void (pack_func_t)(unsigned char *dst, unsigned char *y, unsigned char *u, unsigned char *v, int w, int us, int vs); @@ -38,7 +39,8 @@ struct vf_priv_s { }; static void pack_nn_C(unsigned char *dst, unsigned char *y, - unsigned char *u, unsigned char *v, int w) + unsigned char *u, unsigned char *v, int w, + int av_unused us, int av_unused vs) { int j; for (j = w/2; j; j--) { @@ -77,7 +79,8 @@ static void pack_li_1_C(unsigned char *dst, unsigned char *y, #if HAVE_MMX static void pack_nn_MMX(unsigned char *dst, unsigned char *y, - unsigned char *u, unsigned char *v, int w) + unsigned char *u, unsigned char *v, int w, + int av_unused us, int av_unused vs) { __asm__ volatile ("" ASMALIGN(4) @@ -103,7 +106,7 @@ static void pack_nn_MMX(unsigned char *dst, unsigned char *y, : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8) : "memory" ); - pack_nn_C(dst, y, u, v, (w&7)); + pack_nn_C(dst, y, u, v, (w&7), 0, 0); } #if HAVE_EBX_AVAILABLE @@ -413,12 +416,12 @@ static int vf_open(vf_instance_t *vf, char *args) vf->priv->mode = 1; if (args) sscanf(args, "%d", &vf->priv->mode); - pack_nn = (pack_func_t *)pack_nn_C; + pack_nn = pack_nn_C; pack_li_0 = pack_li_0_C; pack_li_1 = pack_li_1_C; #if HAVE_MMX if(gCpuCaps.hasMMX) { - pack_nn = (pack_func_t *)pack_nn_MMX; + pack_nn = pack_nn_MMX; #if HAVE_EBX_AVAILABLE pack_li_0 = pack_li_0_MMX; pack_li_1 = pack_li_1_MMX; diff --git a/libavfilter/libmpcodecs/vf_pp7.c b/libavfilter/libmpcodecs/vf_pp7.c index f8b64b658a..c075d6619c 100644 --- a/libavfilter/libmpcodecs/vf_pp7.c +++ b/libavfilter/libmpcodecs/vf_pp7.c @@ -286,8 +286,8 @@ static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stri int x, y; const int stride= is_luma ? p->temp_stride : ((width+16+15)&(~15)); uint8_t *p_src= p->src + 8*stride; - DCTELEM *block= p->src; - DCTELEM *temp= p->src + 32; + DCTELEM *block= (DCTELEM *)p->src; + DCTELEM *temp= (DCTELEM *)(p->src + 32); if (!src || !dst) return; // HACK avoid crash for Y8 colourspace for(y=0; y<height; y++){ diff --git a/libavfilter/libmpcodecs/vf_sab.c b/libavfilter/libmpcodecs/vf_sab.c index 377c9e33a8..51e9d78158 100644 --- a/libavfilter/libmpcodecs/vf_sab.c +++ b/libavfilter/libmpcodecs/vf_sab.c @@ -102,7 +102,7 @@ static int allocStuff(FilterParam *f, int width, int height){ swsF.lumH= swsF.lumV= vec; swsF.chrH= swsF.chrV= NULL; f->preFilterContext= sws_getContext( - width, height, PIX_FMT_GRAY8, width, height, PIX_FMT_GRAY8, get_sws_cpuflags()|SWS_POINT, &swsF, NULL, NULL); + width, height, PIX_FMT_GRAY8, width, height, PIX_FMT_GRAY8, SWS_POINT, &swsF, NULL, NULL); sws_freeVec(vec); vec = sws_getGaussianVec(f->strength, 5.0); diff --git a/libavfilter/libmpcodecs/vf_scale.h b/libavfilter/libmpcodecs/vf_scale.h index 91ed103c30..4de3b48ec3 100644 --- a/libavfilter/libmpcodecs/vf_scale.h +++ b/libavfilter/libmpcodecs/vf_scale.h @@ -29,7 +29,6 @@ extern float sws_lum_sharpen; extern int sws_flags; -int get_sws_cpuflags(void); struct SwsContext *sws_getContextFromCmdLine(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat); #endif /* MPLAYER_VF_SCALE_H */ diff --git a/libavfilter/libmpcodecs/vf_smartblur.c b/libavfilter/libmpcodecs/vf_smartblur.c index 3e20880f04..5bfcb2806e 100644 --- a/libavfilter/libmpcodecs/vf_smartblur.c +++ b/libavfilter/libmpcodecs/vf_smartblur.c @@ -87,7 +87,7 @@ static int allocStuff(FilterParam *f, int width, int height){ swsF.lumH= swsF.lumV= vec; swsF.chrH= swsF.chrV= NULL; f->filterContext= sws_getContext( - width, height, PIX_FMT_GRAY8, width, height, PIX_FMT_GRAY8, SWS_BICUBIC | get_sws_cpuflags(), &swsF, NULL, NULL); + width, height, PIX_FMT_GRAY8, width, height, PIX_FMT_GRAY8, SWS_BICUBIC, &swsF, NULL, NULL); sws_freeVec(vec); @@ -183,11 +183,11 @@ static inline void blur(uint8_t *dst, uint8_t *src, int w, int h, int dstStride, static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){ int cw= mpi->w >> mpi->chroma_x_shift; int ch= mpi->h >> mpi->chroma_y_shift; - FilterParam *f= &vf->priv; + int threshold = vf->priv->luma.threshold || vf->priv->chroma.threshold; mp_image_t *dmpi=vf_get_image(vf->next,mpi->imgfmt, MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE| - (f->threshold) ? MP_IMGFLAG_READABLE : 0, + (threshold ? MP_IMGFLAG_READABLE : 0), mpi->w,mpi->h); assert(mpi->flags&MP_IMGFLAG_PLANAR); diff --git a/libavfilter/libmpcodecs/vf_unsharp.c b/libavfilter/libmpcodecs/vf_unsharp.c index cd464321f4..db22f78e9d 100644 --- a/libavfilter/libmpcodecs/vf_unsharp.c +++ b/libavfilter/libmpcodecs/vf_unsharp.c @@ -132,7 +132,7 @@ static int config( struct vf_instance *vf, int z, stepsX, stepsY; FilterParam *fp; - char *effect; + const char *effect; // allocate buffers diff --git a/libavfilter/vf_crop.c b/libavfilter/vf_crop.c index 55dc13e9aa..8182a36531 100644 --- a/libavfilter/vf_crop.c +++ b/libavfilter/vf_crop.c @@ -264,11 +264,9 @@ static void start_frame(AVFilterLink *link, AVFilterBufferRef *picref) crop->x &= ~((1 << crop->hsub) - 1); crop->y &= ~((1 << crop->vsub) - 1); -#ifdef DEBUG - av_log(ctx, AV_LOG_DEBUG, - "n:%d t:%f x:%d y:%d x+w:%d y+h:%d\n", - (int)crop->var_values[VAR_N], crop->var_values[VAR_T], crop->x, crop->y, crop->x+crop->w, crop->y+crop->h); -#endif + av_dlog(ctx, "n:%d t:%f x:%d y:%d x+w:%d y+h:%d\n", + (int)crop->var_values[VAR_N], crop->var_values[VAR_T], crop->x, + crop->y, crop->x+crop->w, crop->y+crop->h); ref2->data[0] += crop->y * ref2->linesize[0]; ref2->data[0] += crop->x * crop->max_step[0]; diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c index b26029bb8f..cf0eb43344 100644 --- a/libavfilter/vf_drawtext.c +++ b/libavfilter/vf_drawtext.c @@ -49,9 +49,11 @@ typedef struct { const AVClass *class; uint8_t *fontfile; ///< font to be used uint8_t *text; ///< text to be drawn - uint8_t *text_priv; ///< used to detect whether text changed + uint8_t *expanded_text; ///< used to contain the strftime()-expanded text + size_t expanded_text_size; ///< size in bytes of the expanded_text buffer int ft_load_flags; ///< flags used for loading fonts, see FT_LOAD_* FT_Vector *positions; ///< positions for each element in the text + size_t nb_positions; ///< number of elements of positions array char *textfile; ///< file with text to be drawn unsigned int x; ///< x position to start drawing text unsigned int y; ///< y position to start drawing text @@ -84,37 +86,37 @@ typedef struct { #define OFFSET(x) offsetof(DrawTextContext, x) static const AVOption drawtext_options[]= { -{"fontfile", "set font file", OFFSET(fontfile), FF_OPT_TYPE_STRING, 0, CHAR_MIN, CHAR_MAX }, -{"text", "set text", OFFSET(text), FF_OPT_TYPE_STRING, 0, CHAR_MIN, CHAR_MAX }, -{"textfile", "set text file", OFFSET(textfile), FF_OPT_TYPE_STRING, 0, CHAR_MIN, CHAR_MAX }, -{"fontcolor","set foreground color", OFFSET(fontcolor_string), FF_OPT_TYPE_STRING, 0, CHAR_MIN, CHAR_MAX }, -{"boxcolor", "set box color", OFFSET(boxcolor_string), FF_OPT_TYPE_STRING, 0, CHAR_MIN, CHAR_MAX }, -{"shadowcolor", "set shadow color", OFFSET(shadowcolor_string), FF_OPT_TYPE_STRING, 0, CHAR_MIN, CHAR_MAX }, -{"box", "set box", OFFSET(draw_box), FF_OPT_TYPE_INT, 0, 0, 1 }, -{"fontsize", "set font size", OFFSET(fontsize), FF_OPT_TYPE_INT, 16, 1, 72 }, -{"x", "set x", OFFSET(x), FF_OPT_TYPE_INT, 0, 0, INT_MAX }, -{"y", "set y", OFFSET(y), FF_OPT_TYPE_INT, 0, 0, INT_MAX }, -{"shadowx", "set x", OFFSET(shadowx), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX }, -{"shadowy", "set y", OFFSET(shadowy), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX }, -{"tabsize", "set tab size", OFFSET(tabsize), FF_OPT_TYPE_INT, 4, 0, INT_MAX }, +{"fontfile", "set font file", OFFSET(fontfile), FF_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX }, +{"text", "set text", OFFSET(text), FF_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX }, +{"textfile", "set text file", OFFSET(textfile), FF_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX }, +{"fontcolor","set foreground color", OFFSET(fontcolor_string), FF_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX }, +{"boxcolor", "set box color", OFFSET(boxcolor_string), FF_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX }, +{"shadowcolor", "set shadow color", OFFSET(shadowcolor_string), FF_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX }, +{"box", "set box", OFFSET(draw_box), FF_OPT_TYPE_INT, {.dbl=0}, 0, 1 }, +{"fontsize", "set font size", OFFSET(fontsize), FF_OPT_TYPE_INT, {.dbl=16}, 1, 72 }, +{"x", "set x", OFFSET(x), FF_OPT_TYPE_INT, {.dbl=0}, 0, INT_MAX }, +{"y", "set y", OFFSET(y), FF_OPT_TYPE_INT, {.dbl=0}, 0, INT_MAX }, +{"shadowx", "set x", OFFSET(shadowx), FF_OPT_TYPE_INT, {.dbl=0}, INT_MIN, INT_MAX }, +{"shadowy", "set y", OFFSET(shadowy), FF_OPT_TYPE_INT, {.dbl=0}, INT_MIN, INT_MAX }, +{"tabsize", "set tab size", OFFSET(tabsize), FF_OPT_TYPE_INT, {.dbl=4}, 0, INT_MAX }, /* FT_LOAD_* flags */ -{"ft_load_flags", "set font loading flags for libfreetype", OFFSET(ft_load_flags), FF_OPT_TYPE_FLAGS, FT_LOAD_DEFAULT|FT_LOAD_RENDER, 0, INT_MAX, 0, "ft_load_flags" }, -{"default", "set default", 0, FF_OPT_TYPE_CONST, FT_LOAD_DEFAULT, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"no_scale", "set no_scale", 0, FF_OPT_TYPE_CONST, FT_LOAD_NO_SCALE, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"no_hinting", "set no_hinting", 0, FF_OPT_TYPE_CONST, FT_LOAD_NO_HINTING, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"render", "set render", 0, FF_OPT_TYPE_CONST, FT_LOAD_RENDER, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"no_bitmap", "set no_bitmap", 0, FF_OPT_TYPE_CONST, FT_LOAD_NO_BITMAP, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"vertical_layout", "set vertical_layout", 0, FF_OPT_TYPE_CONST, FT_LOAD_VERTICAL_LAYOUT, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"force_autohint", "set force_autohint", 0, FF_OPT_TYPE_CONST, FT_LOAD_FORCE_AUTOHINT, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"crop_bitmap", "set crop_bitmap", 0, FF_OPT_TYPE_CONST, FT_LOAD_CROP_BITMAP, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"pedantic", "set pedantic", 0, FF_OPT_TYPE_CONST, FT_LOAD_PEDANTIC, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"ignore_global_advance_width", "set ignore_global_advance_width", 0, FF_OPT_TYPE_CONST, FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"no_recurse", "set no_recurse", 0, FF_OPT_TYPE_CONST, FT_LOAD_NO_RECURSE, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"ignore_transform", "set ignore_transform", 0, FF_OPT_TYPE_CONST, FT_LOAD_IGNORE_TRANSFORM, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"monochrome", "set monochrome", 0, FF_OPT_TYPE_CONST, FT_LOAD_MONOCHROME, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"linear_design", "set linear_design", 0, FF_OPT_TYPE_CONST, FT_LOAD_LINEAR_DESIGN, INT_MIN, INT_MAX, 0, "ft_load_flags" }, -{"no_autohint", "set no_autohint", 0, FF_OPT_TYPE_CONST, FT_LOAD_NO_AUTOHINT, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"ft_load_flags", "set font loading flags for libfreetype", OFFSET(ft_load_flags), FF_OPT_TYPE_FLAGS, {.dbl=FT_LOAD_DEFAULT|FT_LOAD_RENDER}, 0, INT_MAX, 0, "ft_load_flags" }, +{"default", "set default", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_DEFAULT}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"no_scale", "set no_scale", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_NO_SCALE}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"no_hinting", "set no_hinting", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_NO_HINTING}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"render", "set render", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_RENDER}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"no_bitmap", "set no_bitmap", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_NO_BITMAP}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"vertical_layout", "set vertical_layout", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_VERTICAL_LAYOUT}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"force_autohint", "set force_autohint", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_FORCE_AUTOHINT}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"crop_bitmap", "set crop_bitmap", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_CROP_BITMAP}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"pedantic", "set pedantic", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_PEDANTIC}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"ignore_global_advance_width", "set ignore_global_advance_width", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"no_recurse", "set no_recurse", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_NO_RECURSE}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"ignore_transform", "set ignore_transform", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_IGNORE_TRANSFORM}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"monochrome", "set monochrome", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_MONOCHROME}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"linear_design", "set linear_design", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_LINEAR_DESIGN}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, +{"no_autohint", "set no_autohint", 0, FF_OPT_TYPE_CONST, {.dbl=FT_LOAD_NO_AUTOHINT}, INT_MIN, INT_MAX, 0, "ft_load_flags" }, {NULL}, }; @@ -349,6 +351,7 @@ static av_cold void uninit(AVFilterContext *ctx) av_freep(&dtext->fontfile); av_freep(&dtext->text); + av_freep(&dtext->expanded_text); av_freep(&dtext->fontcolor_string); av_freep(&dtext->boxcolor_string); av_freep(&dtext->positions); @@ -517,7 +520,7 @@ static inline int is_newline(uint32_t c) static int draw_glyphs(DrawTextContext *dtext, AVFilterBufferRef *picref, int width, int height, const uint8_t rgbcolor[4], const uint8_t yuvcolor[4], int x, int y) { - char *text = dtext->text; + char *text = HAVE_LOCALTIME_R ? dtext->expanded_text : dtext->text; uint32_t code = 0; int i; uint8_t *p; @@ -559,45 +562,51 @@ static int draw_text(AVFilterContext *ctx, AVFilterBufferRef *picref, uint32_t code = 0, prev_code = 0; int x = 0, y = 0, i = 0, ret; int text_height, baseline; + char *text = dtext->text; uint8_t *p; - int str_w = 0; + int str_w = 0, len; int y_min = 32000, y_max = -32000; FT_Vector delta; Glyph *glyph = NULL, *prev_glyph = NULL; Glyph dummy = { 0 }; - if (dtext->text != dtext->text_priv) { #if HAVE_LOCALTIME_R - time_t now = time(0); - struct tm ltime; - uint8_t *buf = NULL; - int buflen = 2*strlen(dtext->text) + 1, len; - - localtime_r(&now, <ime); - - while ((buf = av_realloc(buf, buflen))) { - *buf = 1; - if ((len = strftime(buf, buflen, dtext->text, <ime)) != 0 || *buf == 0) - break; - buflen *= 2; - } - if (!buf) - return AVERROR(ENOMEM); - av_freep(&dtext->text); - dtext->text = dtext->text_priv = buf; -#else - dtext->text_priv = dtext->text; + time_t now = time(0); + struct tm ltime; + uint8_t *buf = dtext->expanded_text; + int buf_size = dtext->expanded_text_size; + + if (!buf) { + buf_size = 2*strlen(dtext->text)+1; + buf = av_malloc(buf_size); + } + + localtime_r(&now, <ime); + + do { + *buf = 1; + if (strftime(buf, buf_size, dtext->text, <ime) != 0 || *buf == 0) + break; + buf_size *= 2; + } while ((buf = av_realloc(buf, buf_size))); + + if (!buf) + return AVERROR(ENOMEM); + text = dtext->expanded_text = buf; + dtext->expanded_text_size = buf_size; #endif - if (!(dtext->positions = av_realloc(dtext->positions, - strlen(dtext->text)*sizeof(*dtext->positions)))) + if ((len = strlen(text)) > dtext->nb_positions) { + if (!(dtext->positions = + av_realloc(dtext->positions, len*sizeof(*dtext->positions)))) return AVERROR(ENOMEM); + dtext->nb_positions = len; } x = dtext->x; y = dtext->y; /* load and cache glyphs */ - for (i = 0, p = dtext->text; *p; i++) { + for (i = 0, p = text; *p; i++) { GET_UTF8(code, *p++, continue;); /* get glyph */ @@ -614,7 +623,7 @@ static int draw_text(AVFilterContext *ctx, AVFilterBufferRef *picref, /* compute and save position for each glyph */ glyph = NULL; - for (i = 0, p = dtext->text; *p; i++) { + for (i = 0, p = text; *p; i++) { GET_UTF8(code, *p++, continue;); /* skip the \n in the sequence \r\n */ diff --git a/libavfilter/vf_fieldorder.c b/libavfilter/vf_fieldorder.c index 59ca77821a..0913b6950e 100644 --- a/libavfilter/vf_fieldorder.c +++ b/libavfilter/vf_fieldorder.c @@ -153,7 +153,7 @@ static void end_frame(AVFilterLink *inlink) AVFilterBufferRef *inpicref = inlink->cur_buf; AVFilterBufferRef *outpicref = outlink->out_buf; - int h, w, plane, line_step, line_size, line; + int h, plane, line_step, line_size, line; uint8_t *cpy_src, *cpy_dst; if ( inpicref->video->interlaced @@ -162,7 +162,6 @@ static void end_frame(AVFilterLink *inlink) "picture will move %s one line\n", fieldorder->dst_tff ? "up" : "down"); h = inpicref->video->h; - w = inpicref->video->w; for (plane = 0; plane < 4 && inpicref->data[plane]; plane++) { line_step = inpicref->linesize[plane]; line_size = fieldorder->line_size[plane]; diff --git a/libavfilter/vf_gradfun.c b/libavfilter/vf_gradfun.c index 83ed3d79e4..32dd3c1072 100644 --- a/libavfilter/vf_gradfun.c +++ b/libavfilter/vf_gradfun.c @@ -49,7 +49,7 @@ DECLARE_ALIGNED(16, static const uint16_t, dither)[8][8] = { {0x54,0x34,0x4C,0x2C,0x52,0x32,0x4A,0x2A}, }; -void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) +void ff_gradfun_filter_line_c(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers) { int x; for (x = 0; x < width; x++, dc += x & 1) { @@ -63,7 +63,7 @@ void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int widt } } -void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width) +void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width) { int x, v, old; for (x = 0; x < width; x++) { @@ -74,7 +74,7 @@ void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t } } -static void filter(GradFunContext *ctx, uint8_t *dst, uint8_t *src, int width, int height, int dst_linesize, int src_linesize, int r) +static void filter(GradFunContext *ctx, uint8_t *dst, const uint8_t *src, int width, int height, int dst_linesize, int src_linesize, int r) { int bstride = FFALIGN(width, 16) / 2; int y; diff --git a/libavfilter/vf_libopencv.c b/libavfilter/vf_libopencv.c index 8401b4dcb7..d21acade0e 100644 --- a/libavfilter/vf_libopencv.c +++ b/libavfilter/vf_libopencv.c @@ -26,7 +26,7 @@ /* #define DEBUG */ #include <opencv/cv.h> -#include <opencv/cxtypes.h> +#include <opencv/cxcore.h> #include "libavutil/avstring.h" #include "libavutil/file.h" #include "avfilter.h" diff --git a/libavfilter/vf_mp.c b/libavfilter/vf_mp.c index 866dbb16d2..0642b44f00 100644 --- a/libavfilter/vf_mp.c +++ b/libavfilter/vf_mp.c @@ -287,15 +287,6 @@ zrmjpeg CpuCaps gCpuCaps; //FIXME initialize this so optims work -//exact copy from vf_scale.c -int get_sws_cpuflags(void){ - return - (gCpuCaps.hasMMX ? SWS_CPU_CAPS_MMX : 0) - | (gCpuCaps.hasMMX2 ? SWS_CPU_CAPS_MMX2 : 0) - | (gCpuCaps.has3DNow ? SWS_CPU_CAPS_3DNOW : 0) - | (gCpuCaps.hasAltiVec ? SWS_CPU_CAPS_ALTIVEC : 0); -} - static void sws_getFlagsAndFilterFromCmdLine(int *flags, SwsFilter **srcFilterParam, SwsFilter **dstFilterParam) { static int firstTime=1; @@ -348,7 +339,7 @@ struct SwsContext *sws_getContextFromCmdLine(int srcW, int srcH, int srcFormat, if (srcFormat == IMGFMT_RGB8 || srcFormat == IMGFMT_BGR8) sfmt = PIX_FMT_PAL8; sws_getFlagsAndFilterFromCmdLine(&flags, &srcFilterParam, &dstFilterParam); - return sws_getContext(srcW, srcH, sfmt, dstW, dstH, dfmt, flags | get_sws_cpuflags(), srcFilterParam, dstFilterParam, NULL); + return sws_getContext(srcW, srcH, sfmt, dstW, dstH, dfmt, flags , srcFilterParam, dstFilterParam, NULL); } typedef struct { @@ -891,7 +882,7 @@ static void end_frame(AVFilterLink *inlink) } free_mp_image(mpi); -// avfilter_unref_buffer(inpic); + avfilter_unref_buffer(inpic); } AVFilter avfilter_vf_mp = { diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c index 9ff93bd411..e172a2e586 100644 --- a/libavfilter/vf_scale.c +++ b/libavfilter/vf_scale.c @@ -270,7 +270,8 @@ static int scale_slice(AVFilterLink *link, struct SwsContext *sws, int y, int h, ScaleContext *scale = link->dst->priv; AVFilterBufferRef *cur_pic = link->cur_buf; AVFilterBufferRef *out_buf = link->dst->outputs[0]->out_buf; - const uint8_t *in[4], *out[4]; + const uint8_t *in[4]; + uint8_t *out[4]; int in_stride[4],out_stride[4]; int i; diff --git a/libavfilter/vf_select.c b/libavfilter/vf_select.c new file mode 100644 index 0000000000..3d05167d13 --- /dev/null +++ b/libavfilter/vf_select.c @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2011 Stefano Sabatini + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * filter for selecting which frame passes in the filterchain + */ + +#include "libavutil/eval.h" +#include "libavutil/fifo.h" +#include "avfilter.h" + +static const char *var_names[] = { + "E", ///< Euler number + "PHI", ///< golden ratio + "PI", ///< greek pi + + "TB", ///< timebase + + "pts", ///< original pts in the file of the frame + "start_pts", ///< first PTS in the stream, expressed in TB units + "prev_pts", ///< previous frame PTS + "prev_selected_pts", ///< previous selected frame PTS + + "t", ///< first PTS in seconds + "start_t", ///< first PTS in the stream, expressed in seconds + "prev_t", ///< previous frame time + "prev_selected_t", ///< previously selected time + + "pict_type", ///< the type of picture in the movie + "PICT_TYPE_I", + "PICT_TYPE_P", + "PICT_TYPE_B", + "PICT_TYPE_S", + "PICT_TYPE_SI", + "PICT_TYPE_SP", + "PICT_TYPE_BI", + + "interlace_type", ///< the frame interlace type + "INTERLACE_TYPE_P", + "INTERLACE_TYPE_T", + "INTERLACE_TYPE_B", + + "n", ///< frame number (starting from zero) + "selected_n", ///< selected frame number (starting from zero) + "prev_selected_n", ///< number of the last selected frame + + "key", ///< tell if the frame is a key frame + "pos", ///< original position in the file of the frame + + NULL +}; + +enum var_name { + VAR_E, + VAR_PHI, + VAR_PI, + + VAR_TB, + + VAR_PTS, + VAR_START_PTS, + VAR_PREV_PTS, + VAR_PREV_SELECTED_PTS, + + VAR_T, + VAR_START_T, + VAR_PREV_T, + VAR_PREV_SELECTED_T, + + VAR_PICT_TYPE, + VAR_PICT_TYPE_I, + VAR_PICT_TYPE_P, + VAR_PICT_TYPE_B, + VAR_PICT_TYPE_S, + VAR_PICT_TYPE_SI, + VAR_PICT_TYPE_SP, + VAR_PICT_TYPE_BI, + + VAR_INTERLACE_TYPE, + VAR_INTERLACE_TYPE_P, + VAR_INTERLACE_TYPE_T, + VAR_INTERLACE_TYPE_B, + + VAR_N, + VAR_SELECTED_N, + VAR_PREV_SELECTED_N, + + VAR_KEY, + VAR_POS, + + VAR_VARS_NB +}; + +#define FIFO_SIZE 8 + +typedef struct { + AVExpr *expr; + double var_values[VAR_VARS_NB]; + double select; + int cache_frames; + AVFifoBuffer *pending_frames; ///< FIFO buffer of video frames +} SelectContext; + +static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque) +{ + SelectContext *select = ctx->priv; + int ret; + + if ((ret = av_expr_parse(&select->expr, args ? args : "1", + var_names, NULL, NULL, NULL, NULL, 0, ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "Error while parsing expression '%s'\n", args); + return ret; + } + + select->pending_frames = av_fifo_alloc(FIFO_SIZE*sizeof(AVFilterBufferRef*)); + if (!select->pending_frames) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate pending frames buffer.\n"); + return AVERROR(ENOMEM); + } + return 0; +} + +#define INTERLACE_TYPE_P 0 +#define INTERLACE_TYPE_T 1 +#define INTERLACE_TYPE_B 2 + +static int config_input(AVFilterLink *inlink) +{ + SelectContext *select = inlink->dst->priv; + + select->var_values[VAR_E] = M_E; + select->var_values[VAR_PHI] = M_PHI; + select->var_values[VAR_PI] = M_PI; + + select->var_values[VAR_N] = 0.0; + select->var_values[VAR_SELECTED_N] = 0.0; + + select->var_values[VAR_TB] = av_q2d(inlink->time_base); + + select->var_values[VAR_PREV_PTS] = NAN; + select->var_values[VAR_PREV_SELECTED_PTS] = NAN; + select->var_values[VAR_PREV_SELECTED_T] = NAN; + select->var_values[VAR_START_PTS] = NAN; + select->var_values[VAR_START_T] = NAN; + + select->var_values[VAR_PICT_TYPE_I] = AV_PICTURE_TYPE_I; + select->var_values[VAR_PICT_TYPE_P] = AV_PICTURE_TYPE_P; + select->var_values[VAR_PICT_TYPE_B] = AV_PICTURE_TYPE_B; + select->var_values[VAR_PICT_TYPE_SI] = AV_PICTURE_TYPE_SI; + select->var_values[VAR_PICT_TYPE_SP] = AV_PICTURE_TYPE_SP; + + select->var_values[VAR_INTERLACE_TYPE_P] = INTERLACE_TYPE_P; + select->var_values[VAR_INTERLACE_TYPE_T] = INTERLACE_TYPE_T; + select->var_values[VAR_INTERLACE_TYPE_B] = INTERLACE_TYPE_B;; + + return 0; +} + +#define D2TS(d) (isnan(d) ? AV_NOPTS_VALUE : (int64_t)(d)) +#define TS2D(ts) ((ts) == AV_NOPTS_VALUE ? NAN : (double)(ts)) + +static int select_frame(AVFilterContext *ctx, AVFilterBufferRef *picref) +{ + SelectContext *select = ctx->priv; + AVFilterLink *inlink = ctx->inputs[0]; + double res; + + if (isnan(select->var_values[VAR_START_PTS])) + select->var_values[VAR_START_PTS] = TS2D(picref->pts); + + select->var_values[VAR_PTS] = TS2D(picref->pts); + select->var_values[VAR_T ] = picref->pts * av_q2d(inlink->time_base); + select->var_values[VAR_POS] = picref->pos == -1 ? NAN : picref->pos; + select->var_values[VAR_PREV_PTS] = TS2D(picref ->pts); + + select->var_values[VAR_INTERLACE_TYPE] = + !picref->video->interlaced ? INTERLACE_TYPE_P : + picref->video->top_field_first ? INTERLACE_TYPE_T : INTERLACE_TYPE_B; + select->var_values[VAR_PICT_TYPE] = picref->video->pict_type; + + res = av_expr_eval(select->expr, select->var_values, NULL); + av_log(inlink->dst, AV_LOG_DEBUG, + "n:%d pts:%d t:%f pos:%d interlace_type:%c key:%d pict_type:%c " + "-> select:%f\n", + (int)select->var_values[VAR_N], + (int)select->var_values[VAR_PTS], + select->var_values[VAR_T], + (int)select->var_values[VAR_POS], + select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_P ? 'P' : + select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_T ? 'T' : + select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_B ? 'B' : '?', + (int)select->var_values[VAR_KEY], + av_get_picture_type_char(select->var_values[VAR_PICT_TYPE]), + res); + + select->var_values[VAR_N] += 1.0; + + if (res) { + select->var_values[VAR_PREV_SELECTED_N] = select->var_values[VAR_N]; + select->var_values[VAR_PREV_SELECTED_PTS] = select->var_values[VAR_PTS]; + select->var_values[VAR_PREV_SELECTED_T] = select->var_values[VAR_T]; + select->var_values[VAR_SELECTED_N] += 1.0; + } + return res; +} + +static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref) +{ + SelectContext *select = inlink->dst->priv; + + select->select = select_frame(inlink->dst, picref); + if (select->select) { + /* frame was requested through poll_frame */ + if (select->cache_frames) { + if (!av_fifo_space(select->pending_frames)) + av_log(inlink->dst, AV_LOG_ERROR, + "Buffering limit reached, cannot cache more frames\n"); + else + av_fifo_generic_write(select->pending_frames, &picref, + sizeof(picref), NULL); + return; + } + avfilter_start_frame(inlink->dst->outputs[0], avfilter_ref_buffer(picref, ~0)); + } +} + +static void draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir) +{ + SelectContext *select = inlink->dst->priv; + + if (select->select && !select->cache_frames) + avfilter_draw_slice(inlink->dst->outputs[0], y, h, slice_dir); +} + +static void end_frame(AVFilterLink *inlink) +{ + SelectContext *select = inlink->dst->priv; + AVFilterBufferRef *picref = inlink->cur_buf; + + if (select->select) { + if (select->cache_frames) + return; + avfilter_end_frame(inlink->dst->outputs[0]); + } + avfilter_unref_buffer(picref); +} + +static int request_frame(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + SelectContext *select = ctx->priv; + AVFilterLink *inlink = outlink->src->inputs[0]; + select->select = 0; + + if (av_fifo_size(select->pending_frames)) { + AVFilterBufferRef *picref; + av_fifo_generic_read(select->pending_frames, &picref, sizeof(picref), NULL); + avfilter_start_frame(outlink, avfilter_ref_buffer(picref, ~0)); + avfilter_draw_slice(outlink, 0, outlink->h, 1); + avfilter_end_frame(outlink); + avfilter_unref_buffer(picref); + return 0; + } + + while (!select->select) { + int ret = avfilter_request_frame(inlink); + if (ret < 0) + return ret; + } + + return 0; +} + +static int poll_frame(AVFilterLink *outlink) +{ + SelectContext *select = outlink->src->priv; + AVFilterLink *inlink = outlink->src->inputs[0]; + int count, ret; + + if (!av_fifo_size(select->pending_frames)) { + if ((count = avfilter_poll_frame(inlink)) <= 0) + return count; + /* request frame from input, and apply select condition to it */ + select->cache_frames = 1; + while (count-- && av_fifo_space(select->pending_frames)) { + ret = avfilter_request_frame(inlink); + if (ret < 0) + break; + } + select->cache_frames = 0; + } + + return av_fifo_size(select->pending_frames)/sizeof(AVFilterBufferRef *); +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + SelectContext *select = ctx->priv; + AVFilterBufferRef *picref; + int i; + + av_expr_free(select->expr); + select->expr = NULL; + + for (i = 0; i < av_fifo_size(select->pending_frames)/sizeof(picref); i++) { + av_fifo_generic_read(select->pending_frames, &picref, sizeof(picref), NULL); + avfilter_unref_buffer(picref); + } + av_fifo_free(select->pending_frames); +} + +AVFilter avfilter_vf_select = { + .name = "select", + .description = NULL_IF_CONFIG_SMALL("Select frames to pass in output."), + .init = init, + .uninit = uninit, + + .priv_size = sizeof(SelectContext), + + .inputs = (AVFilterPad[]) {{ .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .get_video_buffer = avfilter_null_get_video_buffer, + .config_props = config_input, + .start_frame = start_frame, + .draw_slice = draw_slice, + .end_frame = end_frame }, + { .name = NULL }}, + .outputs = (AVFilterPad[]) {{ .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .poll_frame = poll_frame, + .request_frame = request_frame, }, + { .name = NULL}}, +}; diff --git a/libavfilter/vf_showinfo.c b/libavfilter/vf_showinfo.c index d512199602..82aa3b9901 100644 --- a/libavfilter/vf_showinfo.c +++ b/libavfilter/vf_showinfo.c @@ -43,19 +43,25 @@ static void end_frame(AVFilterLink *inlink) AVFilterContext *ctx = inlink->dst; ShowInfoContext *showinfo = ctx->priv; AVFilterBufferRef *picref = inlink->cur_buf; - uint32_t plane_crc[4], crc = 0; - int plane; + uint32_t plane_checksum[4] = {0}, checksum = 0; + int i, plane, vsub = av_pix_fmt_descriptors[inlink->format].log2_chroma_h; - for (plane = 0; plane < 4; plane++) { + for (plane = 0; picref->data[plane] && plane < 4; plane++) { size_t linesize = av_image_get_linesize(picref->format, picref->video->w, plane); - plane_crc[plane] = av_adler32_update(0 , picref->data[plane], linesize); - crc = av_adler32_update(crc, picref->data[plane], linesize); + uint8_t *data = picref->data[plane]; + int h = plane == 1 || plane == 2 ? inlink->h >> vsub : inlink->h; + + for (i = 0; i < h; i++) { + plane_checksum[plane] = av_adler32_update(plane_checksum[plane], data, linesize); + checksum = av_adler32_update(checksum, data, linesize); + data += picref->linesize[plane]; + } } av_log(ctx, AV_LOG_INFO, "n:%d pts:%"PRId64" pts_time:%f pos:%"PRId64" " "fmt:%s sar:%d/%d s:%dx%d i:%c iskey:%d type:%c " - "crc:%u plane_crc:[%u %u %u %u]\n", + "checksum:%u plane_checksum:[%u %u %u %u]\n", showinfo->frame, picref->pts, picref ->pts * av_q2d(inlink->time_base), picref->pos, av_pix_fmt_descriptors[picref->format].name, @@ -65,7 +71,7 @@ static void end_frame(AVFilterLink *inlink) picref->video->top_field_first ? 'T' : 'B', /* Top / Bottom */ picref->video->key_frame, av_get_picture_type_char(picref->video->pict_type), - crc, plane_crc[0], plane_crc[1], plane_crc[2], plane_crc[3]); + checksum, plane_checksum[0], plane_checksum[1], plane_checksum[2], plane_checksum[3]); showinfo->frame++; avfilter_end_frame(inlink->dst->outputs[0]); @@ -83,7 +89,7 @@ AVFilter avfilter_vf_showinfo = { .get_video_buffer = avfilter_null_get_video_buffer, .start_frame = avfilter_null_start_frame, .end_frame = end_frame, - .min_perms = AV_PERM_READ, }, + .min_perms = AV_PERM_READ, }, { .name = NULL}}, .outputs = (AVFilterPad[]) {{ .name = "default", diff --git a/libavfilter/vf_split.c b/libavfilter/vf_split.c new file mode 100644 index 0000000000..cbebf264fa --- /dev/null +++ b/libavfilter/vf_split.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2007 Bobby Bingham + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Video splitter + */ + +#include "avfilter.h" + +static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref) +{ + avfilter_start_frame(inlink->dst->outputs[0], + avfilter_ref_buffer(picref, ~AV_PERM_WRITE)); + avfilter_start_frame(inlink->dst->outputs[1], + avfilter_ref_buffer(picref, ~AV_PERM_WRITE)); +} + +static void draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir) +{ + avfilter_draw_slice(inlink->dst->outputs[0], y, h, slice_dir); + avfilter_draw_slice(inlink->dst->outputs[1], y, h, slice_dir); +} + +static void end_frame(AVFilterLink *inlink) +{ + avfilter_end_frame(inlink->dst->outputs[0]); + avfilter_end_frame(inlink->dst->outputs[1]); + + avfilter_unref_buffer(inlink->cur_buf); +} + +AVFilter avfilter_vf_split = { + .name = "split", + .description = NULL_IF_CONFIG_SMALL("Pass on the input to two outputs."), + + .inputs = (AVFilterPad[]) {{ .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .get_video_buffer= avfilter_null_get_video_buffer, + .start_frame = start_frame, + .draw_slice = draw_slice, + .end_frame = end_frame, }, + { .name = NULL}}, + .outputs = (AVFilterPad[]) {{ .name = "output1", + .type = AVMEDIA_TYPE_VIDEO, }, + { .name = "output2", + .type = AVMEDIA_TYPE_VIDEO, }, + { .name = NULL}}, +}; diff --git a/libavfilter/vf_unsharp.c b/libavfilter/vf_unsharp.c index 7aa7a43651..fa75de5d94 100644 --- a/libavfilter/vf_unsharp.c +++ b/libavfilter/vf_unsharp.c @@ -63,7 +63,7 @@ typedef struct { FilterParam chroma; ///< chroma parameters (width, height, amount) } UnsharpContext; -static void unsharpen(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, FilterParam *fp) +static void unsharpen(uint8_t *dst, const uint8_t *src, int dst_stride, int src_stride, int width, int height, FilterParam *fp) { uint32_t **sc = fp->sc; uint32_t sr[(MAX_SIZE * MAX_SIZE) - 1], tmp1, tmp2; @@ -96,7 +96,7 @@ static void unsharpen(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride tmp1 = sc[z + 1][x + fp->steps_x] + tmp2; sc[z + 1][x + fp->steps_x] = tmp2; } if (x >= fp->steps_x && y >= fp->steps_y) { - uint8_t* srx = src - fp->steps_y * src_stride + x - fp->steps_x; + const uint8_t* srx = src - fp->steps_y * src_stride + x - fp->steps_x; uint8_t* dsx = dst - fp->steps_y * dst_stride + x - fp->steps_x; res = (int32_t)*srx + ((((int32_t) * srx - (int32_t)((tmp1 + fp->halfscale) >> fp->scalebits)) * fp->amount) >> 16); diff --git a/libavfilter/vsrc_buffer.c b/libavfilter/vsrc_buffer.c index 84f3b33c3f..246444b3ac 100644 --- a/libavfilter/vsrc_buffer.c +++ b/libavfilter/vsrc_buffer.c @@ -29,8 +29,7 @@ #include "libavutil/imgutils.h" typedef struct { - AVFrame frame; - int has_frame; + AVFilterBufferRef *picref; int h, w; enum PixelFormat pix_fmt; AVRational time_base; ///< time_base to set in the output link @@ -38,92 +37,114 @@ typedef struct { char sws_param[256]; } BufferSourceContext; -int av_vsrc_buffer_add_frame2(AVFilterContext *buffer_filter, AVFrame *frame, - const char *sws_param) +int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_filter, + AVFilterBufferRef *picref, int flags) { BufferSourceContext *c = buffer_filter->priv; + AVFilterLink *outlink = buffer_filter->outputs[0]; int ret; - if (c->has_frame) { - av_log(buffer_filter, AV_LOG_ERROR, - "Buffering several frames is not supported. " - "Please consume all available frames before adding a new one.\n" - ); - //return -1; - } - - if(!c->sws_param[0]){ - snprintf(c->sws_param, 255, "%d:%d:%s", c->w, c->h, sws_param); + if (c->picref) { + if (flags & AV_VSRC_BUF_FLAG_OVERWRITE) { + avfilter_unref_buffer(c->picref); + c->picref = NULL; + } else { + av_log(buffer_filter, AV_LOG_ERROR, + "Buffering several frames is not supported. " + "Please consume all available frames before adding a new one.\n"); + return AVERROR(EINVAL); + } } - if (frame->width != c->w || frame->height != c->h || frame->format != c->pix_fmt) { - AVFilterContext *scale= buffer_filter->outputs[0]->dst; + if (picref->video->w != c->w || picref->video->h != c->h || picref->format != c->pix_fmt) { + AVFilterContext *scale = buffer_filter->outputs[0]->dst; AVFilterLink *link; + char scale_param[1024]; av_log(buffer_filter, AV_LOG_INFO, "Buffer video input changed from size:%dx%d fmt:%s to size:%dx%d fmt:%s\n", c->w, c->h, av_pix_fmt_descriptors[c->pix_fmt].name, - frame->width, frame->height, av_pix_fmt_descriptors[frame->format].name); + picref->video->w, picref->video->h, av_pix_fmt_descriptors[picref->format].name); - if(!scale || strcmp(scale->filter->name,"scale")){ - AVFilter *f= avfilter_get_by_name("scale"); + if (!scale || strcmp(scale->filter->name, "scale")) { + AVFilter *f = avfilter_get_by_name("scale"); av_log(buffer_filter, AV_LOG_INFO, "Inserting scaler filter\n"); - if(avfilter_open(&scale, f, "Input equalizer") < 0) - return -1; + if ((ret = avfilter_open(&scale, f, "Input equalizer")) < 0) + return ret; - if((ret=avfilter_init_filter(scale, c->sws_param, NULL))<0){ + snprintf(scale_param, sizeof(scale_param)-1, "%d:%d:%s", c->w, c->h, c->sws_param); + if ((ret = avfilter_init_filter(scale, scale_param, NULL)) < 0) { avfilter_free(scale); return ret; } - if((ret=avfilter_insert_filter(buffer_filter->outputs[0], scale, 0, 0))<0){ + if ((ret = avfilter_insert_filter(buffer_filter->outputs[0], scale, 0, 0)) < 0) { avfilter_free(scale); return ret; } scale->outputs[0]->time_base = scale->inputs[0]->time_base; scale->outputs[0]->format= c->pix_fmt; - } else if(!strcmp(scale->filter->name, "scale")) { - snprintf(c->sws_param, 255, "%d:%d:%s", scale->outputs[0]->w, scale->outputs[0]->h, sws_param); - scale->filter->init(scale, c->sws_param, NULL); + } else if (!strcmp(scale->filter->name, "scale")) { + snprintf(scale_param, sizeof(scale_param)-1, "%d:%d:%s", + scale->outputs[0]->w, scale->outputs[0]->h, c->sws_param); + scale->filter->init(scale, scale_param, NULL); } - c->pix_fmt = scale->inputs[0]->format = frame->format; - c->w = scale->inputs[0]->w = frame->width; - c->h = scale->inputs[0]->h = frame->height; + c->pix_fmt = scale->inputs[0]->format = picref->format; + c->w = scale->inputs[0]->w = picref->video->w; + c->h = scale->inputs[0]->h = picref->video->h; - link= scale->outputs[0]; + link = scale->outputs[0]; if ((ret = link->srcpad->config_props(link)) < 0) return ret; } - c->frame = *frame; - memcpy(c->frame.data , frame->data , sizeof(frame->data)); - memcpy(c->frame.linesize, frame->linesize, sizeof(frame->linesize)); - c->has_frame = 1; + c->picref = avfilter_get_video_buffer(outlink, AV_PERM_WRITE, + picref->video->w, picref->video->h); + av_image_copy(c->picref->data, c->picref->linesize, + picref->data, picref->linesize, + picref->format, picref->video->w, picref->video->h); + avfilter_copy_buffer_ref_props(c->picref, picref); return 0; } -int av_vsrc_buffer_add_frame(AVFilterContext *buffer_filter, AVFrame *frame) +#if CONFIG_AVCODEC +#include "avcodec.h" + +int av_vsrc_buffer_add_frame(AVFilterContext *buffer_src, + const AVFrame *frame, int flags) { - return av_vsrc_buffer_add_frame2(buffer_filter, frame, ""); + int ret; + AVFilterBufferRef *picref = + avfilter_get_video_buffer_ref_from_frame(frame, AV_PERM_WRITE); + if (!picref) + return AVERROR(ENOMEM); + ret = av_vsrc_buffer_add_video_buffer_ref(buffer_src, picref, flags); + picref->buf->data[0] = NULL; + avfilter_unref_buffer(picref); + + return ret; } +#endif static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque) { BufferSourceContext *c = ctx->priv; char pix_fmt_str[128]; int n = 0; + *c->sws_param = 0; if (!args || - (n = sscanf(args, "%d:%d:%127[^:]:%d:%d:%d:%d", &c->w, &c->h, pix_fmt_str, + (n = sscanf(args, "%d:%d:%127[^:]:%d:%d:%d:%d:%255c", &c->w, &c->h, pix_fmt_str, &c->time_base.num, &c->time_base.den, - &c->sample_aspect_ratio.num, &c->sample_aspect_ratio.den)) != 7) { - av_log(ctx, AV_LOG_ERROR, "Expected 7 arguments, but only %d found in '%s'\n", n, args); + &c->sample_aspect_ratio.num, &c->sample_aspect_ratio.den, c->sws_param)) < 7) { + av_log(ctx, AV_LOG_ERROR, "Expected at least 7 arguments, but only %d found in '%s'\n", n, args); return AVERROR(EINVAL); } + if ((c->pix_fmt = av_get_pix_fmt(pix_fmt_str)) == PIX_FMT_NONE) { char *tail; c->pix_fmt = strtol(pix_fmt_str, &tail, 10); @@ -133,10 +154,10 @@ static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque) } } - av_log(ctx, AV_LOG_INFO, "w:%d h:%d pixfmt:%s tb:%d/%d sar:%d/%d\n", + av_log(ctx, AV_LOG_INFO, "w:%d h:%d pixfmt:%s tb:%d/%d sar:%d/%d sws_param:%s\n", c->w, c->h, av_pix_fmt_descriptors[c->pix_fmt].name, c->time_base.num, c->time_base.den, - c->sample_aspect_ratio.num, c->sample_aspect_ratio.den); + c->sample_aspect_ratio.num, c->sample_aspect_ratio.den, c->sws_param); return 0; } @@ -164,31 +185,18 @@ static int config_props(AVFilterLink *link) static int request_frame(AVFilterLink *link) { BufferSourceContext *c = link->src->priv; - AVFilterBufferRef *picref; - if (!c->has_frame) { - av_log(link->src, AV_LOG_ERROR, + if (!c->picref) { + av_log(link->src, AV_LOG_WARNING, "request_frame() called with no available frame!\n"); - //return -1; + return AVERROR(EINVAL); } - /* This picture will be needed unmodified later for decoding the next - * frame */ - picref = avfilter_get_video_buffer(link, AV_PERM_WRITE | AV_PERM_PRESERVE | - AV_PERM_REUSE2, - link->w, link->h); - - av_image_copy(picref->data, picref->linesize, - c->frame.data, c->frame.linesize, - picref->format, link->w, link->h); - avfilter_copy_frame_props(picref, &c->frame); - - avfilter_start_frame(link, avfilter_ref_buffer(picref, ~0)); + avfilter_start_frame(link, avfilter_ref_buffer(c->picref, ~0)); avfilter_draw_slice(link, 0, link->h, 1); avfilter_end_frame(link); - avfilter_unref_buffer(picref); - - c->has_frame = 0; + avfilter_unref_buffer(c->picref); + c->picref = NULL; return 0; } @@ -196,7 +204,7 @@ static int request_frame(AVFilterLink *link) static int poll_frame(AVFilterLink *link) { BufferSourceContext *c = link->src->priv; - return !!(c->has_frame); + return !!(c->picref); } AVFilter avfilter_vsrc_buffer = { diff --git a/libavfilter/vsrc_buffer.h b/libavfilter/vsrc_buffer.h index 2dda546e01..b661d414ea 100644 --- a/libavfilter/vsrc_buffer.h +++ b/libavfilter/vsrc_buffer.h @@ -1,5 +1,4 @@ /* - * Memory buffer source filter * Copyright (c) 2008 Vitor Sessak * * This file is part of FFmpeg. @@ -27,12 +26,24 @@ * memory buffer source API for video */ -#include "libavcodec/avcodec.h" /* AVFrame */ #include "avfilter.h" -int av_vsrc_buffer_add_frame(AVFilterContext *buffer_filter, AVFrame *frame); +/** + * Tell av_vsrc_buffer_add_video_buffer_ref() to overwrite the already + * cached video buffer with the new added one, otherwise the function + * will complain and exit. + */ +#define AV_VSRC_BUF_FLAG_OVERWRITE 1 -int av_vsrc_buffer_add_frame2(AVFilterContext *buffer_filter, AVFrame *frame, - const char *sws_param); +/** + * Add video buffer data in picref to buffer_src. + * + * @param buffer_src pointer to a buffer source context + * @param flags a combination of AV_VSRC_BUF_FLAG_* flags + * @return >= 0 in case of success, a negative AVERROR code in case of + * failure + */ +int av_vsrc_buffer_add_video_buffer_ref(AVFilterContext *buffer_src, + AVFilterBufferRef *picref, int flags); #endif /* AVFILTER_VSRC_BUFFER_H */ diff --git a/libavfilter/vsrc_movie.c b/libavfilter/vsrc_movie.c index e36412f480..6c5c831fed 100644 --- a/libavfilter/vsrc_movie.c +++ b/libavfilter/vsrc_movie.c @@ -153,7 +153,7 @@ static int movie_init(AVFilterContext *ctx) movie->w = movie->codec_ctx->width; movie->h = movie->codec_ctx->height; - av_log(ctx, AV_LOG_INFO, "seek_point:%lld format_name:%s file_name:%s stream_index:%d\n", + av_log(ctx, AV_LOG_INFO, "seek_point:%"PRIi64" format_name:%s file_name:%s stream_index:%d\n", movie->seek_point, movie->format_name, movie->file_name, movie->stream_index); diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c index 894a44b9ff..05d4a6fd6e 100644 --- a/libavfilter/x86/gradfun.c +++ b/libavfilter/x86/gradfun.c @@ -23,7 +23,7 @@ DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F}; DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; -void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) +void ff_gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers) { #if HAVE_MMX intptr_t x; @@ -71,7 +71,7 @@ void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int w #endif } -void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) +void ff_gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers) { #if HAVE_SSSE3 intptr_t x; @@ -118,7 +118,7 @@ void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int #endif // HAVE_SSSE3 } -void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width) +void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width) { #if HAVE_SSE #define BLURV(load)\ diff --git a/libavformat/4xm.c b/libavformat/4xm.c index 699277248d..0b79c761a5 100644 --- a/libavformat/4xm.c +++ b/libavformat/4xm.c @@ -246,7 +246,7 @@ static int fourxm_read_packet(AVFormatContext *s, FourxmDemuxContext *fourxm = s->priv_data; AVIOContext *pb = s->pb; unsigned int fourcc_tag; - unsigned int size, out_size; + unsigned int size; int ret = 0; unsigned int track_number; int packet_read = 0; @@ -295,7 +295,7 @@ static int fourxm_read_packet(AVFormatContext *s, case snd__TAG: track_number = avio_rl32(pb); - out_size= avio_rl32(pb); + avio_skip(pb, 4); size-=8; if (track_number < fourxm->track_count && fourxm->tracks[track_number].channels>0) { diff --git a/libavformat/Makefile b/libavformat/Makefile index cbaa4d6840..05f524690b 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -25,7 +25,7 @@ OBJS-$(CONFIG_AC3_MUXER) += rawenc.o OBJS-$(CONFIG_ADTS_MUXER) += adtsenc.o OBJS-$(CONFIG_AEA_DEMUXER) += aea.o pcm.o OBJS-$(CONFIG_AIFF_DEMUXER) += aiffdec.o riff.o pcm.o isom.o -OBJS-$(CONFIG_AIFF_MUXER) += aiffenc.o riff.o +OBJS-$(CONFIG_AIFF_MUXER) += aiffenc.o riff.o isom.o OBJS-$(CONFIG_AMR_DEMUXER) += amr.o OBJS-$(CONFIG_AMR_MUXER) += amr.o OBJS-$(CONFIG_ANM_DEMUXER) += anm.o @@ -234,6 +234,7 @@ OBJS-$(CONFIG_RSO_MUXER) += rsoenc.o rso.o OBJS-$(CONFIG_RPL_DEMUXER) += rpl.o OBJS-$(CONFIG_RTP_MUXER) += rtp.o \ rtpenc_aac.o \ + rtpenc_latm.o \ rtpenc_amr.o \ rtpenc_h263.o \ rtpenc_mpv.o \ @@ -340,9 +341,6 @@ OBJS-$(CONFIG_UDP_PROTOCOL) += udp.o # libavdevice dependencies OBJS-$(CONFIG_JACK_INDEV) += timefilter.o -EXAMPLES = output TESTPROGS = timefilter include $(SUBDIR)../subdir.mak - -$(SUBDIR)output-example$(EXESUF): ELIBS = -lswscale diff --git a/libavformat/aacdec.c b/libavformat/aacdec.c index 47e9bf3a18..4ac2cadb8f 100644 --- a/libavformat/aacdec.c +++ b/libavformat/aacdec.c @@ -44,7 +44,7 @@ static int adts_aac_probe(AVProbeData *p) uint32_t header = AV_RB16(buf2); if((header&0xFFF6) != 0xFFF0) break; - fsize = (AV_RB32(buf2+3)>>13) & 0x8FFF; + fsize = (AV_RB32(buf2 + 3) >> 13) & 0x1FFF; if(fsize < 7) break; buf2 += fsize; diff --git a/libavformat/aiffdec.c b/libavformat/aiffdec.c index e691bba29d..9608910340 100644 --- a/libavformat/aiffdec.c +++ b/libavformat/aiffdec.c @@ -20,6 +20,7 @@ */ #include "libavutil/intfloat_readwrite.h" +#include "libavutil/dict.h" #include "avformat.h" #include "pcm.h" #include "aiff.h" @@ -77,7 +78,7 @@ static void get_meta(AVFormatContext *s, const char *key, int size) } size += (size&1)-res; str[res] = 0; - av_metadata_set2(&s->metadata, key, str, AV_METADATA_DONT_STRDUP_VAL); + av_dict_set(&s->metadata, key, str, AV_METADATA_DONT_STRDUP_VAL); }else size+= size&1; diff --git a/libavformat/aiffenc.c b/libavformat/aiffenc.c index cc3cbc4762..34ab0cc246 100644 --- a/libavformat/aiffenc.c +++ b/libavformat/aiffenc.c @@ -22,6 +22,7 @@ #include "avformat.h" #include "aiff.h" #include "avio_internal.h" +#include "isom.h" typedef struct { int64_t form; @@ -61,6 +62,12 @@ static int aiff_write_header(AVFormatContext *s) avio_wb32(pb, 0xA2805140); } + if (enc->channels > 2 && enc->channel_layout) { + ffio_wfourcc(pb, "CHAN"); + avio_wb32(pb, 12); + ff_mov_write_chan(pb, enc->channel_layout); + } + /* Common chunk */ ffio_wfourcc(pb, "COMM"); avio_wb32(pb, aifc ? 24 : 18); /* size */ diff --git a/libavformat/ape.c b/libavformat/ape.c index 4b37c7972d..bda6b32b1d 100644 --- a/libavformat/ape.c +++ b/libavformat/ape.c @@ -26,8 +26,6 @@ #include "avformat.h" #include "apetag.h" -#define ENABLE_DEBUG 0 - /* The earliest and latest file formats supported by this library */ #define APE_MIN_VERSION 3950 #define APE_MAX_VERSION 3990 @@ -96,19 +94,19 @@ static int ape_probe(AVProbeData * p) static void ape_dumpinfo(AVFormatContext * s, APEContext * ape_ctx) { -#if ENABLE_DEBUG +#ifdef DEBUG int i; av_log(s, AV_LOG_DEBUG, "Descriptor Block:\n\n"); av_log(s, AV_LOG_DEBUG, "magic = \"%c%c%c%c\"\n", ape_ctx->magic[0], ape_ctx->magic[1], ape_ctx->magic[2], ape_ctx->magic[3]); - av_log(s, AV_LOG_DEBUG, "fileversion = %d\n", ape_ctx->fileversion); - av_log(s, AV_LOG_DEBUG, "descriptorlength = %d\n", ape_ctx->descriptorlength); - av_log(s, AV_LOG_DEBUG, "headerlength = %d\n", ape_ctx->headerlength); - av_log(s, AV_LOG_DEBUG, "seektablelength = %d\n", ape_ctx->seektablelength); - av_log(s, AV_LOG_DEBUG, "wavheaderlength = %d\n", ape_ctx->wavheaderlength); - av_log(s, AV_LOG_DEBUG, "audiodatalength = %d\n", ape_ctx->audiodatalength); - av_log(s, AV_LOG_DEBUG, "audiodatalength_high = %d\n", ape_ctx->audiodatalength_high); - av_log(s, AV_LOG_DEBUG, "wavtaillength = %d\n", ape_ctx->wavtaillength); + av_log(s, AV_LOG_DEBUG, "fileversion = %"PRId16"\n", ape_ctx->fileversion); + av_log(s, AV_LOG_DEBUG, "descriptorlength = %"PRIu32"\n", ape_ctx->descriptorlength); + av_log(s, AV_LOG_DEBUG, "headerlength = %"PRIu32"\n", ape_ctx->headerlength); + av_log(s, AV_LOG_DEBUG, "seektablelength = %"PRIu32"\n", ape_ctx->seektablelength); + av_log(s, AV_LOG_DEBUG, "wavheaderlength = %"PRIu32"\n", ape_ctx->wavheaderlength); + av_log(s, AV_LOG_DEBUG, "audiodatalength = %"PRIu32"\n", ape_ctx->audiodatalength); + av_log(s, AV_LOG_DEBUG, "audiodatalength_high = %"PRIu32"\n", ape_ctx->audiodatalength_high); + av_log(s, AV_LOG_DEBUG, "wavtaillength = %"PRIu32"\n", ape_ctx->wavtaillength); av_log(s, AV_LOG_DEBUG, "md5 = "); for (i = 0; i < 16; i++) av_log(s, AV_LOG_DEBUG, "%02x", ape_ctx->md5[i]); @@ -116,14 +114,14 @@ static void ape_dumpinfo(AVFormatContext * s, APEContext * ape_ctx) av_log(s, AV_LOG_DEBUG, "\nHeader Block:\n\n"); - av_log(s, AV_LOG_DEBUG, "compressiontype = %d\n", ape_ctx->compressiontype); - av_log(s, AV_LOG_DEBUG, "formatflags = %d\n", ape_ctx->formatflags); - av_log(s, AV_LOG_DEBUG, "blocksperframe = %d\n", ape_ctx->blocksperframe); - av_log(s, AV_LOG_DEBUG, "finalframeblocks = %d\n", ape_ctx->finalframeblocks); - av_log(s, AV_LOG_DEBUG, "totalframes = %d\n", ape_ctx->totalframes); - av_log(s, AV_LOG_DEBUG, "bps = %d\n", ape_ctx->bps); - av_log(s, AV_LOG_DEBUG, "channels = %d\n", ape_ctx->channels); - av_log(s, AV_LOG_DEBUG, "samplerate = %d\n", ape_ctx->samplerate); + av_log(s, AV_LOG_DEBUG, "compressiontype = %"PRIu16"\n", ape_ctx->compressiontype); + av_log(s, AV_LOG_DEBUG, "formatflags = %"PRIu16"\n", ape_ctx->formatflags); + av_log(s, AV_LOG_DEBUG, "blocksperframe = %"PRIu32"\n", ape_ctx->blocksperframe); + av_log(s, AV_LOG_DEBUG, "finalframeblocks = %"PRIu32"\n", ape_ctx->finalframeblocks); + av_log(s, AV_LOG_DEBUG, "totalframes = %"PRIu32"\n", ape_ctx->totalframes); + av_log(s, AV_LOG_DEBUG, "bps = %"PRIu16"\n", ape_ctx->bps); + av_log(s, AV_LOG_DEBUG, "channels = %"PRIu16"\n", ape_ctx->channels); + av_log(s, AV_LOG_DEBUG, "samplerate = %"PRIu32"\n", ape_ctx->samplerate); av_log(s, AV_LOG_DEBUG, "\nSeektable\n\n"); if ((ape_ctx->seektablelength / sizeof(uint32_t)) != ape_ctx->totalframes) { @@ -140,12 +138,14 @@ static void ape_dumpinfo(AVFormatContext * s, APEContext * ape_ctx) av_log(s, AV_LOG_DEBUG, "\nFrames\n\n"); for (i = 0; i < ape_ctx->totalframes; i++) - av_log(s, AV_LOG_DEBUG, "%8d %8lld %8d (%d samples)\n", i, ape_ctx->frames[i].pos, ape_ctx->frames[i].size, ape_ctx->frames[i].nblocks); + av_log(s, AV_LOG_DEBUG, "%8d %8"PRId64" %8d (%d samples)\n", i, + ape_ctx->frames[i].pos, ape_ctx->frames[i].size, + ape_ctx->frames[i].nblocks); av_log(s, AV_LOG_DEBUG, "\nCalculated information:\n\n"); - av_log(s, AV_LOG_DEBUG, "junklength = %d\n", ape_ctx->junklength); - av_log(s, AV_LOG_DEBUG, "firstframe = %d\n", ape_ctx->firstframe); - av_log(s, AV_LOG_DEBUG, "totalsamples = %d\n", ape_ctx->totalsamples); + av_log(s, AV_LOG_DEBUG, "junklength = %"PRIu32"\n", ape_ctx->junklength); + av_log(s, AV_LOG_DEBUG, "firstframe = %"PRIu32"\n", ape_ctx->firstframe); + av_log(s, AV_LOG_DEBUG, "totalsamples = %"PRIu32"\n", ape_ctx->totalsamples); #endif } @@ -169,7 +169,8 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap) ape->fileversion = avio_rl16(pb); if (ape->fileversion < APE_MIN_VERSION || ape->fileversion > APE_MAX_VERSION) { - av_log(s, AV_LOG_ERROR, "Unsupported file version - %d.%02d\n", ape->fileversion / 1000, (ape->fileversion % 1000) / 10); + av_log(s, AV_LOG_ERROR, "Unsupported file version - %"PRId16".%02"PRId16"\n", + ape->fileversion / 1000, (ape->fileversion % 1000) / 10); return -1; } @@ -247,11 +248,12 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap) return AVERROR(EINVAL); } if(ape->totalframes > UINT_MAX / sizeof(APEFrame)){ - av_log(s, AV_LOG_ERROR, "Too many frames: %d\n", ape->totalframes); + av_log(s, AV_LOG_ERROR, "Too many frames: %"PRIu32"\n", + ape->totalframes); return -1; } if (ape->seektablelength && (ape->seektablelength / sizeof(*ape->seektable)) < ape->totalframes) { - av_log(s, AV_LOG_ERROR, "Number of seek entries is less than number of frames: %zd vs. %d\n", + av_log(s, AV_LOG_ERROR, "Number of seek entries is less than number of frames: %ld vs. %"PRIu32"\n", ape->seektablelength / sizeof(*ape->seektable), ape->totalframes); return AVERROR_INVALIDDATA; } @@ -276,7 +278,7 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap) ape->frames[0].nblocks = ape->blocksperframe; ape->frames[0].skip = 0; for (i = 1; i < ape->totalframes; i++) { - ape->frames[i].pos = ape->seektable[i] + ape->junklength; //ape->frames[i-1].pos + ape->blocksperframe; + ape->frames[i].pos = ape->seektable[i] + ape->junklength; ape->frames[i].nblocks = ape->blocksperframe; ape->frames[i - 1].size = ape->frames[i].pos - ape->frames[i - 1].pos; ape->frames[i].skip = (ape->frames[i].pos - ape->frames[0].pos) & 3; @@ -301,7 +303,9 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap) avio_seek(pb, 0, SEEK_SET); } - av_log(s, AV_LOG_DEBUG, "Decoding file - v%d.%02d, compression level %d\n", ape->fileversion / 1000, (ape->fileversion % 1000) / 10, ape->compressiontype); + av_log(s, AV_LOG_DEBUG, "Decoding file - v%d.%02d, compression level %"PRIu16"\n", + ape->fileversion / 1000, (ape->fileversion % 1000) / 10, + ape->compressiontype); /* now we are ready: build format streams */ st = av_new_stream(s, 0); diff --git a/libavformat/apetag.c b/libavformat/apetag.c index fb46d2bac2..8d53e4cdf7 100644 --- a/libavformat/apetag.c +++ b/libavformat/apetag.c @@ -21,11 +21,10 @@ */ #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "avformat.h" #include "apetag.h" -#define ENABLE_DEBUG 0 - #define APE_TAG_VERSION 2000 #define APE_TAG_FOOTER_BYTES 32 #define APE_TAG_FLAG_CONTAINS_HEADER (1 << 31) @@ -35,11 +34,11 @@ static int ape_tag_read_field(AVFormatContext *s) { AVIOContext *pb = s->pb; uint8_t key[1024], *value; - uint32_t size, flags; + uint32_t size; int i, c; size = avio_rl32(pb); /* field size */ - flags = avio_rl32(pb); /* field flags */ + avio_skip(pb, 4); /* field flags */ for (i = 0; i < sizeof(key) - 1; i++) { c = avio_r8(pb); if (c < 0x20 || c > 0x7E) @@ -59,7 +58,7 @@ static int ape_tag_read_field(AVFormatContext *s) return AVERROR(ENOMEM); avio_read(pb, value, size); value[size] = 0; - av_metadata_set2(&s->metadata, key, value, AV_METADATA_DONT_STRDUP_VAL); + av_dict_set(&s->metadata, key, value, AV_DICT_DONT_STRDUP_VAL); return 0; } diff --git a/libavformat/applehttp.c b/libavformat/applehttp.c index d0de6bbc3e..dec77f80e6 100644 --- a/libavformat/applehttp.c +++ b/libavformat/applehttp.c @@ -28,6 +28,7 @@ #include "libavutil/avstring.h" #include "libavutil/intreadwrite.h" #include "libavutil/opt.h" +#include "libavutil/dict.h" #include "avformat.h" #include "internal.h" #include <unistd.h> @@ -507,7 +508,7 @@ static int applehttp_read_header(AVFormatContext *s, AVFormatParameters *ap) } avcodec_copy_context(st->codec, v->ctx->streams[j]->codec); if (v->bandwidth) - av_metadata_set2(&st->metadata, "variant_bitrate", bitrate_str, + av_dict_set(&st->metadata, "variant_bitrate", bitrate_str, 0); } stream_offset += v->ctx->nb_streams; diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c index 4a4f1d927c..725e013b17 100644 --- a/libavformat/asfdec.c +++ b/libavformat/asfdec.c @@ -21,8 +21,10 @@ //#define DEBUG +#include "libavutil/bswap.h" #include "libavutil/common.h" #include "libavutil/avstring.h" +#include "libavutil/dict.h" #include "libavcodec/mpegaudio.h" #include "avformat.h" #include "avio_internal.h" @@ -171,7 +173,8 @@ static void get_tag(AVFormatContext *s, const char *key, int type, int len) av_log(s, AV_LOG_DEBUG, "Unsupported value type %d in tag %s.\n", type, key); goto finish; } - av_metadata_set2(&s->metadata, key, value, 0); + if (*value) + av_dict_set(&s->metadata, key, value, 0); finish: av_freep(&value); avio_seek(s->pb, off + len, SEEK_SET); @@ -208,7 +211,6 @@ static int asf_read_stream_properties(AVFormatContext *s, int64_t size) ff_asf_guid g; enum AVMediaType type; int type_specific_size, sizeX; - uint64_t total_size; unsigned int tag1; int64_t pos1, pos2, start_time; int test_for_ext_stream_audio, is_dvr_ms_audio=0; @@ -255,7 +257,7 @@ static int asf_read_stream_properties(AVFormatContext *s, int64_t size) return -1; } ff_get_guid(pb, &g); - total_size = avio_rl64(pb); + avio_skip(pb, 8); /* total_size */ type_specific_size = avio_rl32(pb); avio_rl32(pb); st->id = avio_rl16(pb) & 0x7f; /* stream id */ @@ -393,7 +395,7 @@ static int asf_read_ext_stream_properties(AVFormatContext *s, int64_t size) AVIOContext *pb = s->pb; ff_asf_guid g; int ext_len, payload_ext_ct, stream_ct, i; - uint32_t ext_d, leak_rate, stream_num; + uint32_t leak_rate, stream_num; unsigned int stream_languageid_index; avio_rl64(pb); // starttime @@ -427,7 +429,7 @@ static int asf_read_ext_stream_properties(AVFormatContext *s, int64_t size) for (i=0; i<payload_ext_ct; i++){ ff_get_guid(pb, &g); - ext_d=avio_rl16(pb); + avio_skip(pb, 2); ext_len=avio_rl32(pb); avio_skip(pb, ext_len); } @@ -511,7 +513,7 @@ static int asf_read_metadata(AVFormatContext *s, int64_t size) { AVIOContext *pb = s->pb; ASFContext *asf = s->priv_data; - int n, stream_num, name_len, value_len, value_type, value_num; + int n, stream_num, name_len, value_len, value_num; int ret, i; n = avio_rl16(pb); @@ -521,7 +523,7 @@ static int asf_read_metadata(AVFormatContext *s, int64_t size) avio_rl16(pb); //lang_list_index stream_num= avio_rl16(pb); name_len= avio_rl16(pb); - value_type= avio_rl16(pb); + avio_skip(pb, 2); /* value_type */ value_len= avio_rl32(pb); if ((ret = avio_get_str16le(pb, name_len, name, sizeof(name))) < name_len) @@ -626,10 +628,8 @@ static int asf_read_header(AVFormatContext *s, AVFormatParameters *ap) // if so the next iteration will pick it up continue; } else if (!ff_guidcmp(&g, &ff_asf_head1_guid)) { - int v1, v2; ff_get_guid(pb, &g); - v1 = avio_rl32(pb); - v2 = avio_rl16(pb); + avio_skip(pb, 6); continue; } else if (!ff_guidcmp(&g, &ff_asf_marker_header)) { asf_read_marker(s, gsize); @@ -684,7 +684,7 @@ static int asf_read_header(AVFormatContext *s, AVFormatParameters *ap) const char primary_tag[3] = { rfc1766[0], rfc1766[1], '\0' }; // ignore country code if any const char *iso6392 = av_convert_lang_to(primary_tag, AV_LANG_ISO639_2_BIBL); if (iso6392) - av_metadata_set2(&st->metadata, "language", iso6392, 0); + av_dict_set(&st->metadata, "language", iso6392, 0); } } } @@ -799,7 +799,7 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb){ ASFContext *asf = s->priv_data; int rsize = 1; int num = avio_r8(pb); - int64_t ts0, ts1; + int64_t ts0; asf->packet_segments--; asf->packet_key_frame = num >> 7; @@ -822,7 +822,7 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb){ // av_log(s, AV_LOG_DEBUG, "\n"); avio_skip(pb, 10); ts0= avio_rl64(pb); - ts1= avio_rl64(pb); + avio_skip(pb, 8);; avio_skip(pb, 12); avio_rl32(pb); avio_skip(pb, asf->packet_replic_size - 8 - 38 - 4); @@ -849,7 +849,10 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb){ } if (asf->packet_flags & 0x01) { DO_2BITS(asf->packet_segsizetype >> 6, asf->packet_frag_size, 0); // 0 is illegal - if(asf->packet_frag_size > asf->packet_size_left - rsize){ + if (rsize > asf->packet_size_left) { + av_log(s, AV_LOG_ERROR, "packet_replic_size is invalid\n"); + return -1; + } else if(asf->packet_frag_size > asf->packet_size_left - rsize){ if (asf->packet_frag_size > asf->packet_size_left - rsize + asf->packet_padsize) { av_log(s, AV_LOG_ERROR, "packet_frag_size is invalid (%d-%d)\n", asf->packet_size_left, rsize); return -1; @@ -1257,21 +1260,22 @@ static int asf_read_seek(AVFormatContext *s, int stream_index, int64_t pts, int if (!asf->index_read) asf_build_simple_index(s, stream_index); - if(!(asf->index_read && st->index_entries)){ - if(av_seek_frame_binary(s, stream_index, pts, flags)<0) - return -1; - }else{ + if((asf->index_read && st->index_entries)){ index= av_index_search_timestamp(st, pts, flags); - if(index<0) - return -1; + if(index >= 0) { + /* find the position */ + pos = st->index_entries[index].pos; - /* find the position */ - pos = st->index_entries[index].pos; - - /* do the seek */ - av_log(s, AV_LOG_DEBUG, "SEEKTO: %"PRId64"\n", pos); - avio_seek(s->pb, pos, SEEK_SET); + /* do the seek */ + av_log(s, AV_LOG_DEBUG, "SEEKTO: %"PRId64"\n", pos); + avio_seek(s->pb, pos, SEEK_SET); + asf_reset_header(s); + return 0; + } } + /* no index or seeking by index failed */ + if(av_seek_frame_binary(s, stream_index, pts, flags)<0) + return -1; asf_reset_header(s); return 0; } @@ -1286,4 +1290,5 @@ AVInputFormat ff_asf_demuxer = { asf_read_close, asf_read_seek, asf_read_pts, + .flags = AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH, }; diff --git a/libavformat/asfenc.c b/libavformat/asfenc.c index fe5a67ce4b..5c929f4547 100644 --- a/libavformat/asfenc.c +++ b/libavformat/asfenc.c @@ -19,10 +19,10 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "avformat.h" -#include "metadata.h" #include "riff.h" #include "asf.h" #include "avio_internal.h" +#include "libavutil/dict.h" #undef NDEBUG #include <assert.h> @@ -296,7 +296,7 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size, int64_t data { ASFContext *asf = s->priv_data; AVIOContext *pb = s->pb; - AVMetadataTag *tags[5]; + AVDictionaryEntry *tags[5]; int header_size, n, extra_size, extra_size2, wav_extra_size, file_time; int has_title; int metadata_count; @@ -307,11 +307,11 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size, int64_t data ff_metadata_conv(&s->metadata, ff_asf_metadata_conv, NULL); - tags[0] = av_metadata_get(s->metadata, "title" , NULL, 0); - tags[1] = av_metadata_get(s->metadata, "author" , NULL, 0); - tags[2] = av_metadata_get(s->metadata, "copyright", NULL, 0); - tags[3] = av_metadata_get(s->metadata, "comment" , NULL, 0); - tags[4] = av_metadata_get(s->metadata, "rating" , NULL, 0); + tags[0] = av_dict_get(s->metadata, "title" , NULL, 0); + tags[1] = av_dict_get(s->metadata, "author" , NULL, 0); + tags[2] = av_dict_get(s->metadata, "copyright", NULL, 0); + tags[3] = av_dict_get(s->metadata, "comment" , NULL, 0); + tags[4] = av_dict_get(s->metadata, "rating" , NULL, 0); duration = asf->duration + PREROLL_TIME * 10000; has_title = tags[0] || tags[1] || tags[2] || tags[3] || tags[4]; @@ -381,10 +381,10 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size, int64_t data end_header(pb, hpos); } if (metadata_count) { - AVMetadataTag *tag = NULL; + AVDictionaryEntry *tag = NULL; hpos = put_header(pb, &ff_asf_extended_content_header); avio_wl16(pb, metadata_count); - while ((tag = av_metadata_get(s->metadata, "", tag, AV_METADATA_IGNORE_SUFFIX))) { + while ((tag = av_dict_get(s->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { put_str16(pb, tag->key); avio_wl16(pb, 0); put_str16(pb, tag->value); diff --git a/libavformat/avformat.h b/libavformat/avformat.h index e558096cf2..05cbf0a6b1 100644 --- a/libavformat/avformat.h +++ b/libavformat/avformat.h @@ -23,8 +23,7 @@ /** - * I return the LIBAVFORMAT_VERSION_INT constant. You got - * a fucking problem with that, douchebag? + * Return the LIBAVFORMAT_VERSION_INT constant. */ unsigned avformat_version(void); @@ -41,6 +40,7 @@ const char *avformat_license(void); #include <time.h> #include <stdio.h> /* FILE */ #include "libavcodec/avcodec.h" +#include "libavutil/dict.h" #include "avio.h" #include "libavformat/version.h" @@ -107,21 +107,24 @@ struct AVFormatContext; * variant_bitrate -- the total bitrate of the bitrate variant that the current stream is part of */ -#define AV_METADATA_MATCH_CASE 1 -#define AV_METADATA_IGNORE_SUFFIX 2 -#define AV_METADATA_DONT_STRDUP_KEY 4 -#define AV_METADATA_DONT_STRDUP_VAL 8 -#define AV_METADATA_DONT_OVERWRITE 16 ///< Don't overwrite existing tags. +#if FF_API_OLD_METADATA2 +/** + * @defgroup old_metadata Old metadata API + * The following functions are deprecated, use + * their equivalents from libavutil/dict.h instead. + * @{ + */ -typedef struct { - char *key; - char *value; -}AVMetadataTag; +#define AV_METADATA_MATCH_CASE AV_DICT_MATCH_CASE +#define AV_METADATA_IGNORE_SUFFIX AV_DICT_IGNORE_SUFFIX +#define AV_METADATA_DONT_STRDUP_KEY AV_DICT_DONT_STRDUP_KEY +#define AV_METADATA_DONT_STRDUP_VAL AV_DICT_DONT_STRDUP_VAL +#define AV_METADATA_DONT_OVERWRITE AV_DICT_DONT_OVERWRITE + +typedef attribute_deprecated AVDictionary AVMetadata; +typedef attribute_deprecated AVDictionaryEntry AVMetadataTag; -typedef struct AVMetadata AVMetadata; -#if FF_API_OLD_METADATA2 typedef struct AVMetadataConv AVMetadataConv; -#endif /** * Get a metadata element with matching key. @@ -131,8 +134,8 @@ typedef struct AVMetadataConv AVMetadataConv; * @param flags Allows case as well as suffix-insensitive comparisons. * @return Found tag or NULL, changing key or value leads to undefined behavior. */ -AVMetadataTag * -av_metadata_get(AVMetadata *m, const char *key, const AVMetadataTag *prev, int flags); +attribute_deprecated AVDictionaryEntry * +av_metadata_get(AVDictionary *m, const char *key, const AVDictionaryEntry *prev, int flags); #if FF_API_OLD_METADATA /** @@ -158,30 +161,32 @@ attribute_deprecated int av_metadata_set(AVMetadata **pm, const char *key, const * Passing a NULL value will cause an existing tag to be deleted. * @return >= 0 on success otherwise an error code <0 */ -int av_metadata_set2(AVMetadata **pm, const char *key, const char *value, int flags); +attribute_deprecated int av_metadata_set2(AVDictionary **pm, const char *key, const char *value, int flags); -#if FF_API_OLD_METADATA2 /** * This function is provided for compatibility reason and currently does nothing. */ attribute_deprecated void av_metadata_conv(struct AVFormatContext *ctx, const AVMetadataConv *d_conv, const AVMetadataConv *s_conv); -#endif /** - * Copy metadata from one AVMetadata struct into another. - * @param dst pointer to a pointer to a AVMetadata struct. If *dst is NULL, + * Copy metadata from one AVDictionary struct into another. + * @param dst pointer to a pointer to a AVDictionary struct. If *dst is NULL, * this function will allocate a struct for you and put it in *dst - * @param src pointer to source AVMetadata struct + * @param src pointer to source AVDictionary struct * @param flags flags to use when setting metadata in *dst - * @note metadata is read using the AV_METADATA_IGNORE_SUFFIX flag + * @note metadata is read using the AV_DICT_IGNORE_SUFFIX flag */ -void av_metadata_copy(AVMetadata **dst, AVMetadata *src, int flags); +attribute_deprecated void av_metadata_copy(AVDictionary **dst, AVDictionary *src, int flags); /** - * Free all the memory allocated for an AVMetadata struct. + * Free all the memory allocated for an AVDictionary struct. + */ +attribute_deprecated void av_metadata_free(AVDictionary **m); +/** + * @} */ -void av_metadata_free(AVMetadata **m); +#endif /* packet functions */ @@ -242,20 +247,21 @@ typedef struct AVProbeData { #define AVPROBE_PADDING_SIZE 32 ///< extra allocated bytes at the end of the probe buffer typedef struct AVFormatParameters { - AVRational time_base; - int sample_rate; - int channels; - int width; - int height; - enum PixelFormat pix_fmt; - int channel; /**< Used to select DV channel. */ - const char *standard; /**< TV standard, NTSC, PAL, SECAM */ - unsigned int mpeg2ts_raw:1; /**< Force raw MPEG-2 transport stream output, if possible. */ - unsigned int mpeg2ts_compute_pcr:1; /**< Compute exact PCR for each transport - stream packet (only meaningful if - mpeg2ts_raw is TRUE). */ - unsigned int initial_pause:1; /**< Do not begin to play the stream - immediately (RTSP only). */ +#if FF_API_FORMAT_PARAMETERS + attribute_deprecated AVRational time_base; + attribute_deprecated int sample_rate; + attribute_deprecated int channels; + attribute_deprecated int width; + attribute_deprecated int height; + attribute_deprecated enum PixelFormat pix_fmt; + attribute_deprecated int channel; /**< Used to select DV channel. */ + attribute_deprecated const char *standard; /**< deprecated, use demuxer-specific options instead. */ + attribute_deprecated unsigned int mpeg2ts_raw:1; /**< deprecated, use mpegtsraw demuxer */ + /**< deprecated, use mpegtsraw demuxer-specific options instead */ + attribute_deprecated unsigned int mpeg2ts_compute_pcr:1; + attribute_deprecated unsigned int initial_pause:1; /**< Do not begin to play the stream + immediately (RTSP only). */ +#endif unsigned int prealloced_context:1; #if FF_API_PARAMETERS_CODEC_ID attribute_deprecated enum CodecID video_codec_id; @@ -276,6 +282,11 @@ typedef struct AVFormatParameters { #define AVFMT_VARIABLE_FPS 0x0400 /**< Format allows variable fps. */ #define AVFMT_NODIMENSIONS 0x0800 /**< Format does not need width/height */ #define AVFMT_NOSTREAMS 0x1000 /**< Format does not require any streams */ +#define AVFMT_NOBINSEARCH 0x2000 /**< Format does not allow to fallback to binary search via read_timestamp */ +#define AVFMT_NOGENSEARCH 0x4000 /**< Format does not allow to fallback to generic search */ +#define AVFMT_TS_NONSTRICT 0x8000 /**< Format does not require strictly + increasing timestamps, but they must + still be monotonic */ typedef struct AVOutputFormat { const char *name; @@ -588,7 +599,7 @@ typedef struct AVStream { */ AVRational sample_aspect_ratio; - AVMetadata *metadata; + AVDictionary *metadata; /* Intended mostly for av_read_frame() support. Not supposed to be used by */ /* external applications; try to use something else if at all possible. */ @@ -630,6 +641,13 @@ typedef struct AVStream { int codec_info_nb_frames; /** + * Stream Identifier + * This is the MPEG-TS stream identifier +1 + * 0 means unknown + */ + int stream_identifier; + + /** * Stream informations used internally by av_find_stream_info() */ #define MAX_STD_TIMEBASES (60*12+5) @@ -666,7 +684,11 @@ typedef struct AVProgram { enum AVDiscard discard; ///< selects which program to discard and which to feed to the caller unsigned int *stream_index; unsigned int nb_stream_indexes; - AVMetadata *metadata; + AVDictionary *metadata; + + int program_num; + int pmt_pid; + int pcr_pid; } AVProgram; #define AVFMTCTX_NOHEADER 0x0001 /**< signal that no header is present @@ -679,7 +701,7 @@ typedef struct AVChapter { #if FF_API_OLD_METADATA attribute_deprecated char *title; ///< chapter title #endif - AVMetadata *metadata; + AVDictionary *metadata; } AVChapter; #if FF_API_MAX_STREAMS @@ -789,10 +811,13 @@ typedef struct AVFormatContext { #define AVFMT_FLAG_IGNDTS 0x0008 ///< Ignore DTS on frames that contain both DTS & PTS #define AVFMT_FLAG_NOFILLIN 0x0010 ///< Do not infer any values from other values, just return what is stored in the container #define AVFMT_FLAG_NOPARSE 0x0020 ///< Do not use AVParsers, you also must set AVFMT_FLAG_NOFILLIN as the fillin code works on frames and no parsing -> no frames. Also seeking to frames can not work if parsing to find frame boundaries has been disabled -#define AVFMT_FLAG_RTP_HINT 0x0040 ///< Add RTP hinting to the output file +#if FF_API_FLAG_RTP_HINT +#define AVFMT_FLAG_RTP_HINT 0x0040 ///< Deprecated, use the -movflags rtphint muxer specific AVOption instead +#endif +#define AVFMT_FLAG_MP4A_LATM 0x0080 ///< Enable RTP MP4A-LATM payload #define AVFMT_FLAG_SORT_DTS 0x10000 ///< try to interleave outputted packets by dts (using this flag can slow demuxing down) #define AVFMT_FLAG_PRIV_OPT 0x20000 ///< Enable use of private options by delaying codec open (this could be made default once all code is converted) - +#define AVFMT_FLAG_KEEP_SIDE_DATA 0x40000 ///< Dont merge side data but keep it seperate. int loop_input; /** @@ -868,7 +893,7 @@ typedef struct AVFormatContext { struct AVPacketList *packet_buffer_end; - AVMetadata *metadata; + AVDictionary *metadata; /** * Remaining size available for raw_packet_buffer, in bytes. @@ -885,6 +910,17 @@ typedef struct AVFormatContext { * - decoding: Unused. */ int64_t start_time_realtime; + + /** + * decoding: number of frames used to probe fps + */ + int fps_probe_size; + + /** + * Transport stream id. + * This will be moved into demuxer private options. Thus no API/ABI compatibility + */ + int ts_id; } AVFormatContext; typedef struct AVPacketList { @@ -1138,12 +1174,34 @@ int av_demuxer_open(AVFormatContext *ic, AVFormatParameters *ap); */ AVFormatContext *avformat_alloc_context(void); +#if FF_API_ALLOC_OUTPUT_CONTEXT /** - * Allocate an AVFormatContext. - * avformat_free_context() can be used to free the context and everything - * allocated by the framework within it. + * @deprecated deprecated in favor of avformat_alloc_output_context2() */ -AVFormatContext *avformat_alloc_output_context(const char *format, AVOutputFormat *oformat, const char *filename); +attribute_deprecated +AVFormatContext *avformat_alloc_output_context(const char *format, + AVOutputFormat *oformat, + const char *filename); +#endif + +/** + * Allocate an AVFormatContext for an output format. + * avformat_free_context() can be used to free the context and + * everything allocated by the framework within it. + * + * @param *ctx is set to the created format context, or to NULL in + * case of failure + * @param oformat format to use for allocating the context, if NULL + * format_name and filename are used instead + * @param format_name the name of output format to use for allocating the + * context, if NULL filename is used instead + * @param filename the name of the filename to use for allocating the + * context, may be NULL + * @return >= 0 in case of success, a negative AVERROR code in case of + * failure + */ +int avformat_alloc_output_context2(AVFormatContext **ctx, AVOutputFormat *oformat, + const char *format_name, const char *filename); /** * Read packets of a media file to get stream information. This @@ -1493,6 +1551,9 @@ int av_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out, int av_write_trailer(AVFormatContext *s); #if FF_API_DUMP_FORMAT +/** + * @deprecated Deprecated in favor of av_dump_format(). + */ attribute_deprecated void dump_format(AVFormatContext *ic, int index, const char *url, diff --git a/libavformat/avidec.c b/libavformat/avidec.c index 39686491e2..d3ae8bee73 100644 --- a/libavformat/avidec.c +++ b/libavformat/avidec.c @@ -19,13 +19,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -//#define DEBUG -//#define DEBUG_SEEK - #include <strings.h> #include "libavutil/intreadwrite.h" #include "libavutil/bswap.h" #include "libavutil/opt.h" +#include "libavutil/dict.h" #include "avformat.h" #include "avi.h" #include "dv.h" @@ -159,10 +157,8 @@ static int read_braindead_odml_indx(AVFormatContext *s, int frame_num){ int64_t last_pos= -1; int64_t filesize= avio_size(s->pb); -#ifdef DEBUG_SEEK - av_log(s, AV_LOG_ERROR, "longs_pre_entry:%d index_type:%d entries_in_use:%d chunk_id:%X base:%16"PRIX64"\n", - longs_pre_entry,index_type, entries_in_use, chunk_id, base); -#endif + av_dlog(s, "longs_pre_entry:%d index_type:%d entries_in_use:%d chunk_id:%X base:%16"PRIX64"\n", + longs_pre_entry,index_type, entries_in_use, chunk_id, base); if(stream_id >= s->nb_streams || stream_id < 0) return -1; @@ -280,8 +276,8 @@ static int avi_read_tag(AVFormatContext *s, AVStream *st, uint32_t tag, uint32_t AV_WL32(key, tag); - return av_metadata_set2(st ? &st->metadata : &s->metadata, key, value, - AV_METADATA_DONT_STRDUP_VAL); + return av_dict_set(st ? &st->metadata : &s->metadata, key, value, + AV_DICT_DONT_STRDUP_VAL); } static void avi_read_info(AVFormatContext *s, uint64_t end) @@ -296,7 +292,7 @@ static void avi_read_info(AVFormatContext *s, uint64_t end) static const char months[12][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; -static void avi_metadata_creation_time(AVMetadata **metadata, char *date) +static void avi_metadata_creation_time(AVDictionary **metadata, char *date) { char month[4], time[9], buffer[64]; int i, day, year; @@ -307,11 +303,11 @@ static void avi_metadata_creation_time(AVMetadata **metadata, char *date) if (!strcasecmp(month, months[i])) { snprintf(buffer, sizeof(buffer), "%.4d-%.2d-%.2d %s", year, i+1, day, time); - av_metadata_set2(metadata, "creation_time", buffer, 0); + av_dict_set(metadata, "creation_time", buffer, 0); } } else if (date[4] == '/' && date[7] == '/') { date[4] = date[7] = '-'; - av_metadata_set2(metadata, "creation_time", date, 0); + av_dict_set(metadata, "creation_time", date, 0); } } @@ -339,7 +335,7 @@ static void avi_read_nikon(AVFormatContext *s, uint64_t end) break; } if (name) - av_metadata_set2(&s->metadata, name, buffer, 0); + av_dict_set(&s->metadata, name, buffer, 0); avio_skip(s->pb, size); } break; @@ -792,7 +788,7 @@ static int read_gab2_sub(AVStream *st, AVPacket *pkt) { ret = avio_get_str16le(pb, desc_len, desc, sizeof(desc)); avio_skip(pb, desc_len - ret); if (*desc) - av_metadata_set2(&st->metadata, "title", desc, 0); + av_dict_set(&st->metadata, "title", desc, 0); avio_rl16(pb); /* flags? */ avio_rl32(pb); /* data size */ @@ -1178,10 +1174,8 @@ static int avi_read_idx1(AVFormatContext *s, int size) flags = avio_rl32(pb); pos = avio_rl32(pb); len = avio_rl32(pb); -#if defined(DEBUG_SEEK) - av_log(s, AV_LOG_DEBUG, "%d: tag=0x%x flags=0x%x pos=0x%x len=%d/", - i, tag, flags, pos, len); -#endif + av_dlog(s, "%d: tag=0x%x flags=0x%x pos=0x%x len=%d/", + i, tag, flags, pos, len); if(i==0 && pos > avi->movi_list) avi->movi_list= 0; //FIXME better check pos += avi->movi_list; @@ -1250,22 +1244,18 @@ static int avi_load_index(AVFormatContext *s) if (avio_seek(pb, avi->movi_end, SEEK_SET) < 0) goto the_end; // maybe truncated file -#ifdef DEBUG_SEEK - printf("movi_end=0x%"PRIx64"\n", avi->movi_end); -#endif + av_dlog(s, "movi_end=0x%"PRIx64"\n", avi->movi_end); for(;;) { if (url_feof(pb)) break; tag = avio_rl32(pb); size = avio_rl32(pb); -#ifdef DEBUG_SEEK - printf("tag=%c%c%c%c size=0x%x\n", - tag & 0xff, - (tag >> 8) & 0xff, - (tag >> 16) & 0xff, - (tag >> 24) & 0xff, - size); -#endif + av_dlog(s, "tag=%c%c%c%c size=0x%x\n", + tag & 0xff, + (tag >> 8) & 0xff, + (tag >> 16) & 0xff, + (tag >> 24) & 0xff, + size); switch(tag) { case MKTAG('i', 'd', 'x', '1'): if (avi_read_idx1(s, size) < 0) diff --git a/libavformat/avienc.c b/libavformat/avienc.c index 9f488d9600..8a53bb53d3 100644 --- a/libavformat/avienc.c +++ b/libavformat/avienc.c @@ -23,6 +23,7 @@ #include "avio_internal.h" #include "riff.h" #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" /* * TODO: @@ -157,7 +158,7 @@ static int avi_write_header(AVFormatContext *s) int bitrate, n, i, nb_frames, au_byterate, au_ssize, au_scale; AVCodecContext *stream, *video_enc; int64_t list1, list2, strh, strf; - AVMetadataTag *t = NULL; + AVDictionaryEntry *t = NULL; if (s->nb_streams > AVI_MAX_STREAM_COUNT) { av_log(s, AV_LOG_ERROR, "AVI does not support >%d streams\n", @@ -297,7 +298,7 @@ static int avi_write_header(AVFormatContext *s) return -1; } ff_end_tag(pb, strf); - if ((t = av_metadata_get(s->streams[i]->metadata, "title", NULL, 0))) { + if ((t = av_dict_get(s->streams[i]->metadata, "title", NULL, 0))) { avi_write_info_tag(s->pb, "strn", t->value); t = NULL; } @@ -379,7 +380,7 @@ static int avi_write_header(AVFormatContext *s) ffio_wfourcc(pb, "INFO"); ff_metadata_conv(&s->metadata, ff_avi_metadata_conv, NULL); for (i = 0; *ff_avi_tags[i]; i++) { - if ((t = av_metadata_get(s->metadata, ff_avi_tags[i], NULL, AV_METADATA_MATCH_CASE))) + if ((t = av_dict_get(s->metadata, ff_avi_tags[i], NULL, AV_DICT_MATCH_CASE))) avi_write_info_tag(s->pb, t->key, t->value); } ff_end_tag(pb, list2); diff --git a/libavformat/avio.c b/libavformat/avio.c index 16f8c2f2dd..80644795d5 100644 --- a/libavformat/avio.c +++ b/libavformat/avio.c @@ -62,11 +62,11 @@ URLProtocol *av_protocol_next(URLProtocol *p) const char *avio_enum_protocols(void **opaque, int output) { - URLProtocol **p = opaque; - *p = *p ? (*p)->next : first_protocol; - if (!*p) return NULL; - if ((output && (*p)->url_write) || (!output && (*p)->url_read)) - return (*p)->name; + URLProtocol *p = *opaque; + p = p ? p->next : first_protocol; + if (!p) return NULL; + if ((output && p->url_write) || (!output && p->url_read)) + return p->name; return avio_enum_protocols(opaque, output); } diff --git a/libavformat/avio.h b/libavformat/avio.h index 9d805822d4..5885349ca7 100644 --- a/libavformat/avio.h +++ b/libavformat/avio.h @@ -391,6 +391,7 @@ attribute_deprecated int av_register_protocol(URLProtocol *protocol); * freed with av_free(). * * @param buffer Memory block for input/output operations via AVIOContext. + * The buffer must be allocated with av_malloc() and friends. * @param buffer_size The buffer size is very important for performance. * For protocols with fixed blocksize it should be set to this blocksize. * For others a typical size is a cache page, e.g. 4kb. diff --git a/libavformat/avio_internal.h b/libavformat/avio_internal.h index 92528f2d18..4c80163d31 100644 --- a/libavformat/avio_internal.h +++ b/libavformat/avio_internal.h @@ -98,4 +98,4 @@ int ffio_open_dyn_packet_buf(AVIOContext **s, int max_packet_size); */ int ffio_fdopen(AVIOContext **s, URLContext *h); -#endif // AVFORMAT_AVIO_INTERNAL_H +#endif /* AVFORMAT_AVIO_INTERNAL_H */ diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c index 456205bc76..c6cea6adfb 100644 --- a/libavformat/aviobuf.c +++ b/libavformat/aviobuf.c @@ -115,6 +115,8 @@ AVIOContext *avio_alloc_context( int64_t (*seek)(void *opaque, int64_t offset, int whence)) { AVIOContext *s = av_mallocz(sizeof(AVIOContext)); + if (!s) + return NULL; ffio_init_context(s, buffer, buffer_size, write_flag, opaque, read_packet, write_packet, seek); return s; diff --git a/libavformat/cafdec.c b/libavformat/cafdec.c index ea9a3c17ae..b24ff1d023 100644 --- a/libavformat/cafdec.c +++ b/libavformat/cafdec.c @@ -29,6 +29,7 @@ #include "riff.h" #include "isom.h" #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "caf.h" typedef struct { @@ -187,7 +188,7 @@ static void read_info_chunk(AVFormatContext *s, int64_t size) char value[1024]; get_strz(pb, key, sizeof(key)); get_strz(pb, value, sizeof(value)); - av_metadata_set2(&s->metadata, key, value, 0); + av_dict_set(&s->metadata, key, value, 0); } } diff --git a/libavformat/cafenc.c b/libavformat/cafenc.c index 0f33c6b592..13636f30ee 100644 --- a/libavformat/cafenc.c +++ b/libavformat/cafenc.c @@ -134,7 +134,11 @@ static int caf_write_header(AVFormatContext *s) avio_wb32(pb, enc->channels); //< mChannelsPerFrame avio_wb32(pb, enc->bits_per_coded_sample); //< mBitsPerChannel - ff_mov_write_chan(s, enc->channel_layout, "chan"); + if (enc->channel_layout) { + ffio_wfourcc(pb, "chan"); + avio_wb64(pb, 12); + ff_mov_write_chan(pb, enc->channel_layout); + } ffio_wfourcc(pb, "data"); //< Audio Data chunk caf->data = avio_tell(pb); diff --git a/libavformat/cavsvideodec.c b/libavformat/cavsvideodec.c index 6db9ad9fcf..4a399a26d1 100644 --- a/libavformat/cavsvideodec.c +++ b/libavformat/cavsvideodec.c @@ -65,13 +65,4 @@ static int cavsvideo_probe(AVProbeData *p) return 0; } -AVInputFormat ff_cavsvideo_demuxer = { - "cavsvideo", - NULL_IF_CONFIG_SMALL("raw Chinese AVS video"), - 0, - cavsvideo_probe, - ff_raw_video_read_header, - ff_raw_read_partial_packet, - .flags= AVFMT_GENERIC_INDEX, - .value = CODEC_ID_CAVS, -}; +FF_DEF_RAWVIDEO_DEMUXER(cavsvideo, "raw Chinese AVS video", cavsvideo_probe, NULL, CODEC_ID_CAVS) diff --git a/libavformat/crypto.c b/libavformat/crypto.c index c83bf92895..d52a503e38 100644 --- a/libavformat/crypto.c +++ b/libavformat/crypto.c @@ -97,8 +97,8 @@ static int crypto_open(URLContext *h, const char *uri, int flags) return 0; err: - av_free(c->key); - av_free(c->iv); + av_freep(&c->key); + av_freep(&c->iv); return ret; } diff --git a/libavformat/diracdec.c b/libavformat/diracdec.c index 6c6896163b..6afda533dc 100644 --- a/libavformat/diracdec.c +++ b/libavformat/diracdec.c @@ -31,13 +31,4 @@ static int dirac_probe(AVProbeData *p) return 0; } -AVInputFormat ff_dirac_demuxer = { - "dirac", - NULL_IF_CONFIG_SMALL("raw Dirac"), - 0, - dirac_probe, - ff_raw_video_read_header, - ff_raw_read_partial_packet, - .flags= AVFMT_GENERIC_INDEX, - .value = CODEC_ID_DIRAC, -}; +FF_DEF_RAWVIDEO_DEMUXER(dirac, "raw Dirac", dirac_probe, NULL, CODEC_ID_DIRAC) diff --git a/libavformat/dnxhddec.c b/libavformat/dnxhddec.c index 035e1c4b30..f89782a880 100644 --- a/libavformat/dnxhddec.c +++ b/libavformat/dnxhddec.c @@ -42,13 +42,4 @@ static int dnxhd_probe(AVProbeData *p) return AVPROBE_SCORE_MAX; } -AVInputFormat ff_dnxhd_demuxer = { - "dnxhd", - NULL_IF_CONFIG_SMALL("raw DNxHD (SMPTE VC-3)"), - 0, - dnxhd_probe, - ff_raw_video_read_header, - ff_raw_read_partial_packet, - .flags= AVFMT_GENERIC_INDEX, - .value = CODEC_ID_DNXHD, -}; +FF_DEF_RAWVIDEO_DEMUXER(dnxhd, "raw DNxHD (SMPTE VC-3)", dnxhd_probe, NULL, CODEC_ID_DNXHD) diff --git a/libavformat/ffmdec.c b/libavformat/ffmdec.c index 604e75183c..e776d0c384 100644 --- a/libavformat/ffmdec.c +++ b/libavformat/ffmdec.c @@ -163,8 +163,6 @@ static int ffm_read_data(AVFormatContext *s, return size1 - size; } -//#define DEBUG_SEEK - /* ensure that acutal seeking happens between FFM_PACKET_SIZE and file_size - FFM_PACKET_SIZE */ static void ffm_seek1(AVFormatContext *s, int64_t pos1) @@ -175,9 +173,7 @@ static void ffm_seek1(AVFormatContext *s, int64_t pos1) pos = FFMIN(pos1, ffm->file_size - FFM_PACKET_SIZE); pos = FFMAX(pos, FFM_PACKET_SIZE); -#ifdef DEBUG_SEEK - av_log(s, AV_LOG_DEBUG, "seek to %"PRIx64" -> %"PRIx64"\n", pos1, pos); -#endif + av_dlog(s, "seek to %"PRIx64" -> %"PRIx64"\n", pos1, pos); avio_seek(pb, pos, SEEK_SET); } @@ -189,9 +185,7 @@ static int64_t get_dts(AVFormatContext *s, int64_t pos) ffm_seek1(s, pos); avio_skip(pb, 4); dts = avio_rb64(pb); -#ifdef DEBUG_SEEK - av_log(s, AV_LOG_DEBUG, "dts=%0.6f\n", dts / 1000000.0); -#endif + av_dlog(s, "dts=%0.6f\n", dts / 1000000.0); return dts; } @@ -464,9 +458,7 @@ static int ffm_seek(AVFormatContext *s, int stream_index, int64_t wanted_pts, in int64_t pts_min, pts_max, pts; double pos1; -#ifdef DEBUG_SEEK - av_log(s, AV_LOG_DEBUG, "wanted_pts=%0.6f\n", wanted_pts / 1000000.0); -#endif + av_dlog(s, "wanted_pts=%0.6f\n", wanted_pts / 1000000.0); /* find the position using linear interpolation (better than dichotomy in typical cases) */ pos_min = FFM_PACKET_SIZE; diff --git a/libavformat/ffmeta.h b/libavformat/ffmeta.h index f96dbb2efb..ae8778d614 100644 --- a/libavformat/ffmeta.h +++ b/libavformat/ffmeta.h @@ -19,11 +19,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef AVFORMAT_META_H -#define AVFORMAT_META_H +#ifndef AVFORMAT_FFMETA_H +#define AVFORMAT_FFMETA_H #define ID_STRING ";FFMETADATA" #define ID_CHAPTER "[CHAPTER]" #define ID_STREAM "[STREAM]" -#endif /* AVFORMAT_META_H */ +#endif /* AVFORMAT_FFMETA_H */ diff --git a/libavformat/ffmetadec.c b/libavformat/ffmetadec.c index a8bffa09af..0063e6eafa 100644 --- a/libavformat/ffmetadec.c +++ b/libavformat/ffmetadec.c @@ -22,6 +22,7 @@ #include "avformat.h" #include "ffmeta.h" #include "internal.h" +#include "libavutil/dict.h" static int probe(AVProbeData *p) { @@ -93,7 +94,7 @@ static uint8_t *unescape(uint8_t *buf, int size) return ret; } -static int read_tag(uint8_t *line, AVMetadata **m) +static int read_tag(uint8_t *line, AVDictionary **m) { uint8_t *key, *value, *p = line; @@ -117,13 +118,13 @@ static int read_tag(uint8_t *line, AVMetadata **m) return AVERROR(ENOMEM); } - av_metadata_set2(m, key, value, AV_METADATA_DONT_STRDUP_KEY | AV_METADATA_DONT_STRDUP_VAL); + av_dict_set(m, key, value, AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL); return 0; } static int read_header(AVFormatContext *s, AVFormatParameters *ap) { - AVMetadata **m = &s->metadata; + AVDictionary **m = &s->metadata; uint8_t line[1024]; while(!url_feof(s->pb)) { diff --git a/libavformat/ffmetaenc.c b/libavformat/ffmetaenc.c index 596185b70c..200c9940a4 100644 --- a/libavformat/ffmetaenc.c +++ b/libavformat/ffmetaenc.c @@ -23,6 +23,7 @@ #include "avformat.h" #include "ffmeta.h" +#include "libavutil/dict.h" static void write_escape_str(AVIOContext *s, const uint8_t *str) @@ -37,10 +38,10 @@ static void write_escape_str(AVIOContext *s, const uint8_t *str) } } -static void write_tags(AVIOContext *s, AVMetadata *m) +static void write_tags(AVIOContext *s, AVDictionary *m) { - AVMetadataTag *t = NULL; - while ((t = av_metadata_get(m, "", t, AV_METADATA_IGNORE_SUFFIX))) { + AVDictionaryEntry *t = NULL; + while ((t = av_dict_get(m, "", t, AV_DICT_IGNORE_SUFFIX))) { write_escape_str(s, t->key); avio_w8(s, '='); write_escape_str(s, t->value); diff --git a/libavformat/flacenc.c b/libavformat/flacenc.c index 1674527816..24e0287ef8 100644 --- a/libavformat/flacenc.c +++ b/libavformat/flacenc.c @@ -22,7 +22,6 @@ #include "libavcodec/flac.h" #include "avformat.h" #include "flacenc.h" -#include "metadata.h" #include "vorbiscomment.h" #include "libavcodec/bytestream.h" @@ -39,7 +38,7 @@ static int flac_write_block_padding(AVIOContext *pb, unsigned int n_padding_byte return 0; } -static int flac_write_block_comment(AVIOContext *pb, AVMetadata **m, +static int flac_write_block_comment(AVIOContext *pb, AVDictionary **m, int last_block, int bitexact) { const char *vendor = bitexact ? "ffmpeg" : LIBAVFORMAT_IDENT; diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c index 44d21683ba..4fb562f4e2 100644 --- a/libavformat/flvdec.c +++ b/libavformat/flvdec.c @@ -25,6 +25,7 @@ */ #include "libavutil/avstring.h" +#include "libavutil/dict.h" #include "libavcodec/bytestream.h" #include "libavcodec/mpeg4audio.h" #include "avformat.h" @@ -259,17 +260,17 @@ static int amf_parse_object(AVFormatContext *s, AVStream *astream, AVStream *vst if(amf_type == AMF_DATA_TYPE_BOOL) { av_strlcpy(str_val, num_val > 0 ? "true" : "false", sizeof(str_val)); - av_metadata_set2(&s->metadata, key, str_val, 0); + av_dict_set(&s->metadata, key, str_val, 0); } else if(amf_type == AMF_DATA_TYPE_NUMBER) { snprintf(str_val, sizeof(str_val), "%.f", num_val); - av_metadata_set2(&s->metadata, key, str_val, 0); + av_dict_set(&s->metadata, key, str_val, 0); if(!strcmp(key, "duration")) s->duration = num_val * AV_TIME_BASE; else if(!strcmp(key, "videodatarate") && vcodec && 0 <= (int)(num_val * 1024.0)) vcodec->bit_rate = num_val * 1024.0; else if(!strcmp(key, "audiodatarate") && acodec && 0 <= (int)(num_val * 1024.0)) acodec->bit_rate = num_val * 1024.0; } else if (amf_type == AMF_DATA_TYPE_STRING) - av_metadata_set2(&s->metadata, key, str_val, 0); + av_dict_set(&s->metadata, key, str_val, 0); } return 0; @@ -375,7 +376,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt) size = avio_rb24(s->pb); dts = avio_rb24(s->pb); dts |= avio_r8(s->pb) << 24; -// av_log(s, AV_LOG_DEBUG, "type:%d, size:%d, dts:%d\n", type, size, dts); + av_dlog(s, "type:%d, size:%d, dts:%"PRId64"\n", type, size, dts); if (url_feof(s->pb)) return AVERROR_EOF; avio_skip(s->pb, 3); /* stream id, always 0 */ @@ -421,7 +422,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt) st= create_stream(s, is_audio); s->ctx_flags &= ~AVFMTCTX_NOHEADER; } -// av_log(s, AV_LOG_DEBUG, "%d %X %d \n", is_audio, flags, st->discard); + av_dlog(s, "%d %X %d \n", is_audio, flags, st->discard); if( (st->discard >= AVDISCARD_NONKEY && !((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY || is_audio)) ||(st->discard >= AVDISCARD_BIDIR && ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_DISP_INTER && !is_audio)) || st->discard >= AVDISCARD_ALL diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c index 3e933ce922..cea0d02e3c 100644 --- a/libavformat/flvenc.c +++ b/libavformat/flvenc.c @@ -25,6 +25,7 @@ #include "internal.h" #include "avc.h" #include "metadata.h" +#include "libavutil/dict.h" #undef NDEBUG #include <assert.h> @@ -179,7 +180,7 @@ static int flv_write_header(AVFormatContext *s) int i; double framerate = 0.0; int metadata_size_pos, data_size; - AVMetadataTag *tag = NULL; + AVDictionaryEntry *tag = NULL; for(i=0; i<s->nb_streams; i++){ AVCodecContext *enc = s->streams[i]->codec; @@ -276,7 +277,7 @@ static int flv_write_header(AVFormatContext *s) put_amf_double(pb, audio_enc->codec_tag); } - while ((tag = av_metadata_get(s->metadata, "", tag, AV_METADATA_IGNORE_SUFFIX))) { + while ((tag = av_dict_get(s->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { put_amf_string(pb, tag->key); avio_w8(pb, AMF_DATA_TYPE_STRING); put_amf_string(pb, tag->value); @@ -440,7 +441,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt) av_free(data); - return 0; + return pb->error; } AVOutputFormat ff_flv_muxer = { diff --git a/libavformat/gif.c b/libavformat/gif.c index f5f8147e4f..0960705cf3 100644 --- a/libavformat/gif.c +++ b/libavformat/gif.c @@ -295,9 +295,7 @@ static int gif_write_video(AVFormatContext *s, AVCodecContext *enc, const uint8_t *buf, int size) { AVIOContext *pb = s->pb; - GIFContext *gif = s->priv_data; int jiffies; - int64_t delay; /* graphic control extension block */ avio_w8(pb, 0x21); @@ -307,8 +305,6 @@ static int gif_write_video(AVFormatContext *s, /* 1 jiffy is 1/70 s */ /* the delay_time field indicates the number of jiffies - 1 */ - delay = gif->file_time - gif->time; - /* XXX: should use delay, in order to be more accurate */ /* instead of using the same rounded value each time */ /* XXX: don't even remember if I really use it for now */ diff --git a/libavformat/h261dec.c b/libavformat/h261dec.c index 1478ed663f..354a7c78c1 100644 --- a/libavformat/h261dec.c +++ b/libavformat/h261dec.c @@ -62,14 +62,4 @@ static int h261_probe(AVProbeData *p) return 0; } -AVInputFormat ff_h261_demuxer = { - "h261", - NULL_IF_CONFIG_SMALL("raw H.261"), - 0, - h261_probe, - ff_raw_video_read_header, - ff_raw_read_partial_packet, - .flags= AVFMT_GENERIC_INDEX, - .extensions = "h261", - .value = CODEC_ID_H261, -}; +FF_DEF_RAWVIDEO_DEMUXER(h261, "raw H.261", h261_probe, "h261", CODEC_ID_H261) diff --git a/libavformat/h263dec.c b/libavformat/h263dec.c index 634923d885..b07e9782b8 100644 --- a/libavformat/h263dec.c +++ b/libavformat/h263dec.c @@ -64,14 +64,4 @@ static int h263_probe(AVProbeData *p) return 0; } -AVInputFormat ff_h263_demuxer = { - "h263", - NULL_IF_CONFIG_SMALL("raw H.263"), - 0, - h263_probe, - ff_raw_video_read_header, - ff_raw_read_partial_packet, - .flags= AVFMT_GENERIC_INDEX, -// .extensions = "h263", //FIXME remove after writing mpeg4_probe - .value = CODEC_ID_H263, -}; +FF_DEF_RAWVIDEO_DEMUXER(h263, "raw H.263", h263_probe, NULL, CODEC_ID_H263) diff --git a/libavformat/h264dec.c b/libavformat/h264dec.c index cec8f85275..e7c6d70deb 100644 --- a/libavformat/h264dec.c +++ b/libavformat/h264dec.c @@ -67,14 +67,4 @@ static int h264_probe(AVProbeData *p) return 0; } -AVInputFormat ff_h264_demuxer = { - "h264", - NULL_IF_CONFIG_SMALL("raw H.264 video format"), - 0, - h264_probe, - ff_raw_video_read_header, - ff_raw_read_partial_packet, - .flags= AVFMT_GENERIC_INDEX, - .extensions = "h26l,h264,264", //FIXME remove after writing mpeg4_probe - .value = CODEC_ID_H264, -}; +FF_DEF_RAWVIDEO_DEMUXER(h264 , "raw H.264 video format", h264_probe, "h26l,h264,264", CODEC_ID_H264) diff --git a/libavformat/id3v1.c b/libavformat/id3v1.c index 37d46956ef..2d1e806920 100644 --- a/libavformat/id3v1.c +++ b/libavformat/id3v1.c @@ -21,6 +21,7 @@ #include "id3v1.h" #include "libavcodec/avcodec.h" +#include "libavutil/dict.h" const char * const ff_id3v1_genre_str[ID3v1_GENRE_MAX + 1] = { [0] = "Blues", @@ -191,7 +192,7 @@ static void get_string(AVFormatContext *s, const char *key, *q = '\0'; if (*str) - av_metadata_set2(&s->metadata, key, str, 0); + av_dict_set(&s->metadata, key, str, 0); } /** @@ -215,11 +216,11 @@ static int parse_tag(AVFormatContext *s, const uint8_t *buf) get_string(s, "comment", buf + 97, 30); if (buf[125] == 0 && buf[126] != 0) { snprintf(str, sizeof(str), "%d", buf[126]); - av_metadata_set2(&s->metadata, "track", str, 0); + av_dict_set(&s->metadata, "track", str, 0); } genre = buf[127]; if (genre <= ID3v1_GENRE_MAX) - av_metadata_set2(&s->metadata, "genre", ff_id3v1_genre_str[genre], 0); + av_dict_set(&s->metadata, "genre", ff_id3v1_genre_str[genre], 0); return 0; } diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c index 95353276b5..930ab5c870 100644 --- a/libavformat/id3v2.c +++ b/libavformat/id3v2.c @@ -1,5 +1,4 @@ /* - * ID3v2 header parser * Copyright (c) 2003 Fabrice Bellard * * This file is part of FFmpeg. @@ -19,11 +18,19 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +/** + * @file + * ID3v2 header parser + * + * Specifications available at: + * http://id3.org/Developer_Information + */ + #include "id3v2.h" #include "id3v1.h" #include "libavutil/avstring.h" #include "libavutil/intreadwrite.h" -#include "metadata.h" +#include "libavutil/dict.h" #include "avio_internal.h" int ff_id3v2_match(const uint8_t *buf, const char * magic) @@ -133,7 +140,7 @@ static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const cha val = dst; if (val) - av_metadata_set2(&s->metadata, key, val, AV_METADATA_DONT_OVERWRITE); + av_dict_set(&s->metadata, key, val, AV_DICT_DONT_OVERWRITE); } static int is_number(const char *str) @@ -142,44 +149,44 @@ static int is_number(const char *str) return !*str; } -static AVMetadataTag* get_date_tag(AVMetadata *m, const char *tag) +static AVDictionaryEntry* get_date_tag(AVDictionary *m, const char *tag) { - AVMetadataTag *t; - if ((t = av_metadata_get(m, tag, NULL, AV_METADATA_MATCH_CASE)) && + AVDictionaryEntry *t; + if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) && strlen(t->value) == 4 && is_number(t->value)) return t; return NULL; } -static void merge_date(AVMetadata **m) +static void merge_date(AVDictionary **m) { - AVMetadataTag *t; + AVDictionaryEntry *t; char date[17] = {0}; // YYYY-MM-DD hh:mm if (!(t = get_date_tag(*m, "TYER")) && !(t = get_date_tag(*m, "TYE"))) return; av_strlcpy(date, t->value, 5); - av_metadata_set2(m, "TYER", NULL, 0); - av_metadata_set2(m, "TYE", NULL, 0); + av_dict_set(m, "TYER", NULL, 0); + av_dict_set(m, "TYE", NULL, 0); if (!(t = get_date_tag(*m, "TDAT")) && !(t = get_date_tag(*m, "TDA"))) goto finish; snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value); - av_metadata_set2(m, "TDAT", NULL, 0); - av_metadata_set2(m, "TDA", NULL, 0); + av_dict_set(m, "TDAT", NULL, 0); + av_dict_set(m, "TDA", NULL, 0); if (!(t = get_date_tag(*m, "TIME")) && !(t = get_date_tag(*m, "TIM"))) goto finish; snprintf(date + 10, sizeof(date) - 10, " %.2s:%.2s", t->value, t->value + 2); - av_metadata_set2(m, "TIME", NULL, 0); - av_metadata_set2(m, "TIM", NULL, 0); + av_dict_set(m, "TIME", NULL, 0); + av_dict_set(m, "TIM", NULL, 0); finish: if (date[0]) - av_metadata_set2(m, "date", date, 0); + av_dict_set(m, "date", date, 0); } static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags) @@ -221,7 +228,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t avio_skip(s->pb, get_size(s->pb, 4)); while (len >= taghdrlen) { - unsigned int tflags; + unsigned int tflags = 0; int tunsync = 0; if (isv34) { @@ -238,7 +245,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t tag[3] = 0; tlen = avio_rb24(s->pb); } - if (tlen > (1<<28)) + if (tlen > (1<<28) || !tlen) break; len -= taghdrlen + tlen; @@ -248,6 +255,8 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t next = avio_tell(s->pb) + tlen; if (tflags & ID3v2_FLAG_DATALEN) { + if (tlen < 4) + break; avio_rb32(s->pb); tlen -= 4; } @@ -259,6 +268,10 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t if (unsync || tunsync) { int i, j; av_fast_malloc(&buffer, &buffer_size, tlen); + if (!buffer) { + av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen); + goto seek; + } for (i = 0, j = 0; i < tlen; i++, j++) { buffer[j] = avio_r8(s->pb); if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) { @@ -279,6 +292,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t break; } /* Skip to end of tag */ +seek: avio_seek(s->pb, next, SEEK_SET); } diff --git a/libavformat/iff.c b/libavformat/iff.c index da4e858501..db988a6ecd 100644 --- a/libavformat/iff.c +++ b/libavformat/iff.c @@ -1,5 +1,4 @@ /* - * IFF (.iff) file demuxer * Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net> * Copyright (c) 2010 Peter Ross <pross@xvid.org> * Copyright (c) 2010 Sebastian Vater <cdgs.basty@googlemail.com> @@ -31,6 +30,7 @@ #include "libavcodec/bytestream.h" #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "avformat.h" #define ID_8SVX MKTAG('8','S','V','X') @@ -60,8 +60,6 @@ #define RIGHT 4 #define STEREO 6 -#define PACKET_SIZE 1024 - /** * This number of bytes if added at the beginning of each AVPacket * which contain additional information about video properties @@ -88,7 +86,8 @@ typedef struct { uint32_t body_size; uint32_t sent_bytes; uint32_t audio_frame_count; - unsigned compression; ///< delta compression method used + svx8_compression_type svx8_compression; + bitmap_compression_type bitmap_compression; ///< delta compression method used unsigned bpp; ///< bits per plane to decode (differs from bits_per_coded_sample if HAM) unsigned ham; ///< 0 if non-HAM or number of hold bits (6 for bpp > 6, 4 otherwise) unsigned flags; ///< 1 for EHB, 0 is no extra half darkening @@ -96,19 +95,6 @@ typedef struct { unsigned masking; ///< masking method used } IffDemuxContext; - -static void interleave_stereo(const uint8_t *src, uint8_t *dest, int size) -{ - uint8_t *end = dest + size; - size = size>>1; - - while(dest < end) { - *dest++ = *src; - *dest++ = *(src+size); - src++; - } -} - /* Metadata string read */ static int get_metadata(AVFormatContext *s, const char *const tag, @@ -124,7 +110,7 @@ static int get_metadata(AVFormatContext *s, return AVERROR(EIO); } buf[data_size] = 0; - av_metadata_set2(&s->metadata, tag, buf, AV_METADATA_DONT_STRDUP_VAL); + av_dict_set(&s->metadata, tag, buf, AV_DICT_DONT_STRDUP_VAL); return 0; } @@ -146,7 +132,6 @@ static int iff_read_header(AVFormatContext *s, AVStream *st; uint8_t *buf; uint32_t chunk_id, data_size; - int compression = -1; uint32_t screenmode = 0; unsigned transparency = 0; unsigned masking = 0; // no mask @@ -178,7 +163,7 @@ static int iff_read_header(AVFormatContext *s, st->codec->sample_rate = avio_rb16(pb); if (data_size >= 16) { avio_skip(pb, 1); - compression = avio_r8(pb); + iff->svx8_compression = avio_r8(pb); } break; @@ -209,6 +194,7 @@ static int iff_read_header(AVFormatContext *s, break; case ID_BMHD: + iff->bitmap_compression = -1; st->codec->codec_type = AVMEDIA_TYPE_VIDEO; if (data_size <= 8) return AVERROR_INVALIDDATA; @@ -219,7 +205,7 @@ static int iff_read_header(AVFormatContext *s, if (data_size >= 10) masking = avio_r8(pb); if (data_size >= 11) - compression = avio_r8(pb); + iff->bitmap_compression = avio_r8(pb); if (data_size >= 14) { avio_skip(pb, 1); // padding transparency = avio_rb16(pb); @@ -231,21 +217,10 @@ static int iff_read_header(AVFormatContext *s, break; case ID_ANNO: - case ID_TEXT: - metadata_tag = "comment"; - break; - - case ID_AUTH: - metadata_tag = "artist"; - break; - - case ID_COPYRIGHT: - metadata_tag = "copyright"; - break; - - case ID_NAME: - metadata_tag = "title"; - break; + case ID_TEXT: metadata_tag = "comment"; break; + case ID_AUTH: metadata_tag = "artist"; break; + case ID_COPYRIGHT: metadata_tag = "copyright"; break; + case ID_NAME: metadata_tag = "title"; break; } if (metadata_tag) { @@ -263,9 +238,9 @@ static int iff_read_header(AVFormatContext *s, case AVMEDIA_TYPE_AUDIO: av_set_pts_info(st, 32, 1, st->codec->sample_rate); - switch(compression) { + switch (iff->svx8_compression) { case COMP_NONE: - st->codec->codec_id = CODEC_ID_PCM_S8; + st->codec->codec_id = CODEC_ID_8SVX_RAW; break; case COMP_FIB: st->codec->codec_id = CODEC_ID_8SVX_FIB; @@ -274,17 +249,17 @@ static int iff_read_header(AVFormatContext *s, st->codec->codec_id = CODEC_ID_8SVX_EXP; break; default: - av_log(s, AV_LOG_ERROR, "unknown compression method\n"); + av_log(s, AV_LOG_ERROR, + "Unknown SVX8 compression method '%d'\n", iff->svx8_compression); return -1; } - st->codec->bits_per_coded_sample = 8; + st->codec->bits_per_coded_sample = iff->svx8_compression == COMP_NONE ? 8 : 4; st->codec->bit_rate = st->codec->channels * st->codec->sample_rate * st->codec->bits_per_coded_sample; st->codec->block_align = st->codec->channels * st->codec->bits_per_coded_sample; break; case AVMEDIA_TYPE_VIDEO: - iff->compression = compression; iff->bpp = st->codec->bits_per_coded_sample; if ((screenmode & 0x800 /* Hold And Modify */) && iff->bpp <= 8) { iff->ham = iff->bpp > 6 ? 6 : 4; @@ -302,14 +277,14 @@ static int iff_read_header(AVFormatContext *s, } buf = st->codec->extradata; bytestream_put_be16(&buf, IFF_EXTRA_VIDEO_SIZE); - bytestream_put_byte(&buf, iff->compression); + bytestream_put_byte(&buf, iff->bitmap_compression); bytestream_put_byte(&buf, iff->bpp); bytestream_put_byte(&buf, iff->ham); bytestream_put_byte(&buf, iff->flags); bytestream_put_be16(&buf, iff->transparency); bytestream_put_byte(&buf, iff->masking); - switch (compression) { + switch (iff->bitmap_compression) { case BITMAP_RAW: st->codec->codec_id = CODEC_ID_IFF_ILBM; break; @@ -317,7 +292,8 @@ static int iff_read_header(AVFormatContext *s, st->codec->codec_id = CODEC_ID_IFF_BYTERUN1; break; default: - av_log(s, AV_LOG_ERROR, "unknown compression method\n"); + av_log(s, AV_LOG_ERROR, + "Unknown bitmap compression method '%d'\n", iff->bitmap_compression); return AVERROR_INVALIDDATA; } break; @@ -339,15 +315,8 @@ static int iff_read_packet(AVFormatContext *s, if(iff->sent_bytes >= iff->body_size) return AVERROR(EIO); - if(st->codec->channels == 2) { - uint8_t sample_buffer[PACKET_SIZE]; - - ret = avio_read(pb, sample_buffer, PACKET_SIZE); - if(av_new_packet(pkt, PACKET_SIZE) < 0) { - av_log(s, AV_LOG_ERROR, "cannot allocate packet\n"); - return AVERROR(ENOMEM); - } - interleave_stereo(sample_buffer, pkt->data, PACKET_SIZE); + if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO) { + ret = av_get_packet(pb, pkt, iff->body_size); } else if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) { uint8_t *buf; @@ -359,22 +328,14 @@ static int iff_read_packet(AVFormatContext *s, bytestream_put_be16(&buf, 2); ret = avio_read(pb, buf, iff->body_size); } else { - ret = av_get_packet(pb, pkt, PACKET_SIZE); + av_abort(); } if(iff->sent_bytes == 0) pkt->flags |= AV_PKT_FLAG_KEY; + iff->sent_bytes = iff->body_size; - if(st->codec->codec_type == AVMEDIA_TYPE_AUDIO) { - iff->sent_bytes += PACKET_SIZE; - } else { - iff->sent_bytes = iff->body_size; - } pkt->stream_index = 0; - if(st->codec->codec_type == AVMEDIA_TYPE_AUDIO) { - pkt->pts = iff->audio_frame_count; - iff->audio_frame_count += ret / st->codec->channels; - } return ret; } diff --git a/libavformat/img2.c b/libavformat/img2.c index 19292c39ec..11ebf12d43 100644 --- a/libavformat/img2.c +++ b/libavformat/img2.c @@ -22,12 +22,17 @@ #include "libavutil/intreadwrite.h" #include "libavutil/avstring.h" +#include "libavutil/log.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "libavutil/parseutils.h" #include "avformat.h" #include "avio_internal.h" #include "internal.h" #include <strings.h> typedef struct { + const AVClass *class; /**< Class for private options. */ int img_first; int img_last; int img_number; @@ -35,6 +40,9 @@ typedef struct { int is_pipe; int split_planes; /**< use independent file for each Y, U, V plane */ char path[1024]; + char *pixel_format; /**< Set by a private option. */ + char *video_size; /**< Set by a private option. */ + char *framerate; /**< Set by a private option. */ } VideoData; typedef struct { @@ -77,6 +85,7 @@ static const IdStrMap img_tags[] = { { CODEC_ID_SUNRAST , "sunras"}, { CODEC_ID_JPEG2000 , "j2k"}, { CODEC_ID_JPEG2000 , "jp2"}, + { CODEC_ID_JPEG2000 , "jpc"}, { CODEC_ID_DPX , "dpx"}, { CODEC_ID_PICTOR , "pic"}, { CODEC_ID_NONE , NULL} @@ -201,8 +210,11 @@ enum CodecID av_guess_image2_codec(const char *filename){ static int read_header(AVFormatContext *s1, AVFormatParameters *ap) { VideoData *s = s1->priv_data; - int first_index, last_index; + int first_index, last_index, ret = 0; + int width = 0, height = 0; AVStream *st; + enum PixelFormat pix_fmt = PIX_FMT_NONE; + AVRational framerate; s1->ctx_flags |= AVFMTCTX_NOHEADER; @@ -211,6 +223,29 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap) return AVERROR(ENOMEM); } + if (s->pixel_format && (pix_fmt = av_get_pix_fmt(s->pixel_format)) == PIX_FMT_NONE) { + av_log(s1, AV_LOG_ERROR, "No such pixel format: %s.\n", s->pixel_format); + return AVERROR(EINVAL); + } + if (s->video_size && (ret = av_parse_video_size(&width, &height, s->video_size)) < 0) { + av_log(s, AV_LOG_ERROR, "Could not parse video size: %s.\n", s->video_size); + return ret; + } + if ((ret = av_parse_video_rate(&framerate, s->framerate)) < 0) { + av_log(s, AV_LOG_ERROR, "Could not parse framerate: %s.\n", s->framerate); + return ret; + } +#if FF_API_FORMAT_PARAMETERS + if (ap->pix_fmt != PIX_FMT_NONE) + pix_fmt = ap->pix_fmt; + if (ap->width > 0) + width = ap->width; + if (ap->height > 0) + height = ap->height; + if (ap->time_base.num) + framerate = (AVRational){ap->time_base.den, ap->time_base.num}; +#endif + av_strlcpy(s->path, s1->filename, sizeof(s->path)); s->img_number = 0; s->img_count = 0; @@ -223,15 +258,11 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap) st->need_parsing = AVSTREAM_PARSE_FULL; } - if (!ap->time_base.num) { - av_set_pts_info(st, 60, 1, 25); - } else { - av_set_pts_info(st, 60, ap->time_base.num, ap->time_base.den); - } + av_set_pts_info(st, 60, framerate.den, framerate.num); - if(ap->width && ap->height){ - st->codec->width = ap->width; - st->codec->height= ap->height; + if (width && height) { + st->codec->width = width; + st->codec->height = height; } if (!s->is_pipe) { @@ -257,8 +288,8 @@ static int read_header(AVFormatContext *s1, AVFormatParameters *ap) st->codec->codec_type = AVMEDIA_TYPE_VIDEO; st->codec->codec_id = av_str2id(img_tags, s->path); } - if(st->codec->codec_type == AVMEDIA_TYPE_VIDEO && ap->pix_fmt != PIX_FMT_NONE) - st->codec->pix_fmt = ap->pix_fmt; + if(st->codec->codec_type == AVMEDIA_TYPE_VIDEO && pix_fmt != PIX_FMT_NONE) + st->codec->pix_fmt = pix_fmt; return 0; } @@ -429,6 +460,22 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt) #endif /* CONFIG_IMAGE2_MUXER || CONFIG_IMAGE2PIPE_MUXER */ +#define OFFSET(x) offsetof(VideoData, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM +static const AVOption options[] = { + { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC }, + { "video_size", "", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC }, + { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "25"}, 0, 0, DEC }, + { NULL }, +}; + +static const AVClass img2_class = { + .class_name = "image2 demuxer", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + /* input */ #if CONFIG_IMAGE2_DEMUXER AVInputFormat ff_image2_demuxer = { @@ -439,6 +486,7 @@ AVInputFormat ff_image2_demuxer = { .read_header = read_header, .read_packet = read_packet, .flags = AVFMT_NOFILE, + .priv_class = &img2_class, }; #endif #if CONFIG_IMAGE2PIPE_DEMUXER @@ -448,6 +496,7 @@ AVInputFormat ff_image2pipe_demuxer = { .priv_data_size = sizeof(VideoData), .read_header = read_header, .read_packet = read_packet, + .priv_class = &img2_class, }; #endif diff --git a/libavformat/ingenientdec.c b/libavformat/ingenientdec.c index eb1e6f6521..4a7ff09dbe 100644 --- a/libavformat/ingenientdec.c +++ b/libavformat/ingenientdec.c @@ -61,11 +61,12 @@ static int ingenient_read_packet(AVFormatContext *s, AVPacket *pkt) AVInputFormat ff_ingenient_demuxer = { "ingenient", NULL_IF_CONFIG_SMALL("raw Ingenient MJPEG"), - 0, + sizeof(FFRawVideoDemuxerContext), NULL, ff_raw_video_read_header, ingenient_read_packet, .flags= AVFMT_GENERIC_INDEX, .extensions = "cgi", // FIXME .value = CODEC_ID_MJPEG, + .priv_class = &ff_rawvideo_demuxer_class, }; diff --git a/libavformat/internal.h b/libavformat/internal.h index d75e0676e5..64992dadfa 100644 --- a/libavformat/internal.h +++ b/libavformat/internal.h @@ -26,6 +26,12 @@ #define MAX_URL_SIZE 4096 +#ifdef DEBUG +# define hex_dump_debug(class, buf, size) av_hex_dump_log(class, AV_LOG_DEBUG, buf, size) +#else +# define hex_dump_debug(class, buf, size) +#endif + typedef struct AVCodecTag { enum CodecID id; unsigned int tag; @@ -128,10 +134,12 @@ int ff_url_join(char *str, int size, const char *proto, * @param dest_type the destination address type, may be NULL * @param port the destination port of the media stream, 0 if unknown * @param ttl the time to live of the stream, 0 if not multicast + * @param fmt the AVFormatContext, which might contain options modifying + * the generated SDP */ void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, - int port, int ttl); + int port, int ttl, AVFormatContext *fmt); /** * Write a packet to another muxer than the one the user originally diff --git a/libavformat/ipmovie.c b/libavformat/ipmovie.c index 8ea59a3b08..471d6a4bc0 100644 --- a/libavformat/ipmovie.c +++ b/libavformat/ipmovie.c @@ -35,17 +35,6 @@ #include "libavutil/intreadwrite.h" #include "avformat.h" -/* debugging support: #define DEBUG_IPMOVIE as non-zero to see extremely - * verbose information about the demux process */ -#define DEBUG_IPMOVIE 0 - -#if DEBUG_IPMOVIE -#undef printf -#define debug_ipmovie printf -#else -static inline void debug_ipmovie(const char *format, ...) { } -#endif - #define CHUNK_PREAMBLE_SIZE 4 #define OPCODE_PREAMBLE_SIZE 4 @@ -150,8 +139,8 @@ static int load_ipmovie_packet(IPMVEContext *s, AVIOContext *pb, s->audio_frame_count += (s->audio_chunk_size - 6) / s->audio_channels; - debug_ipmovie("sending audio frame with pts %"PRId64" (%d audio frames)\n", - pkt->pts, s->audio_frame_count); + av_dlog(NULL, "sending audio frame with pts %"PRId64" (%d audio frames)\n", + pkt->pts, s->audio_frame_count); chunk_type = CHUNK_VIDEO; @@ -184,8 +173,7 @@ static int load_ipmovie_packet(IPMVEContext *s, AVIOContext *pb, pkt->stream_index = s->video_stream_index; pkt->pts = s->video_pts; - debug_ipmovie("sending video frame with pts %"PRId64"\n", - pkt->pts); + av_dlog(NULL, "sending video frame with pts %"PRId64"\n", pkt->pts); s->video_pts += s->frame_pts_inc; @@ -233,36 +221,36 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb, chunk_size = AV_RL16(&chunk_preamble[0]); chunk_type = AV_RL16(&chunk_preamble[2]); - debug_ipmovie("chunk type 0x%04X, 0x%04X bytes: ", chunk_type, chunk_size); + av_dlog(NULL, "chunk type 0x%04X, 0x%04X bytes: ", chunk_type, chunk_size); switch (chunk_type) { case CHUNK_INIT_AUDIO: - debug_ipmovie("initialize audio\n"); + av_dlog(NULL, "initialize audio\n"); break; case CHUNK_AUDIO_ONLY: - debug_ipmovie("audio only\n"); + av_dlog(NULL, "audio only\n"); break; case CHUNK_INIT_VIDEO: - debug_ipmovie("initialize video\n"); + av_dlog(NULL, "initialize video\n"); break; case CHUNK_VIDEO: - debug_ipmovie("video (and audio)\n"); + av_dlog(NULL, "video (and audio)\n"); break; case CHUNK_SHUTDOWN: - debug_ipmovie("shutdown\n"); + av_dlog(NULL, "shutdown\n"); break; case CHUNK_END: - debug_ipmovie("end\n"); + av_dlog(NULL, "end\n"); break; default: - debug_ipmovie("invalid chunk\n"); + av_dlog(NULL, "invalid chunk\n"); chunk_type = CHUNK_BAD; break; @@ -288,29 +276,29 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb, chunk_size -= OPCODE_PREAMBLE_SIZE; chunk_size -= opcode_size; if (chunk_size < 0) { - debug_ipmovie("chunk_size countdown just went negative\n"); + av_dlog(NULL, "chunk_size countdown just went negative\n"); chunk_type = CHUNK_BAD; break; } - debug_ipmovie(" opcode type %02X, version %d, 0x%04X bytes: ", - opcode_type, opcode_version, opcode_size); + av_dlog(NULL, " opcode type %02X, version %d, 0x%04X bytes: ", + opcode_type, opcode_version, opcode_size); switch (opcode_type) { case OPCODE_END_OF_STREAM: - debug_ipmovie("end of stream\n"); + av_dlog(NULL, "end of stream\n"); avio_skip(pb, opcode_size); break; case OPCODE_END_OF_CHUNK: - debug_ipmovie("end of chunk\n"); + av_dlog(NULL, "end of chunk\n"); avio_skip(pb, opcode_size); break; case OPCODE_CREATE_TIMER: - debug_ipmovie("create timer\n"); + av_dlog(NULL, "create timer\n"); if ((opcode_version > 0) || (opcode_size > 6)) { - debug_ipmovie("bad create_timer opcode\n"); + av_dlog(NULL, "bad create_timer opcode\n"); chunk_type = CHUNK_BAD; break; } @@ -320,14 +308,15 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb, break; } s->frame_pts_inc = ((uint64_t)AV_RL32(&scratch[0])) * AV_RL16(&scratch[4]); - debug_ipmovie(" %.2f frames/second (timer div = %d, subdiv = %d)\n", - 1000000.0/s->frame_pts_inc, AV_RL32(&scratch[0]), AV_RL16(&scratch[4])); + av_dlog(NULL, " %.2f frames/second (timer div = %d, subdiv = %d)\n", + 1000000.0 / s->frame_pts_inc, AV_RL32(&scratch[0]), + AV_RL16(&scratch[4])); break; case OPCODE_INIT_AUDIO_BUFFERS: - debug_ipmovie("initialize audio buffers\n"); + av_dlog(NULL, "initialize audio buffers\n"); if ((opcode_version > 1) || (opcode_size > 10)) { - debug_ipmovie("bad init_audio_buffers opcode\n"); + av_dlog(NULL, "bad init_audio_buffers opcode\n"); chunk_type = CHUNK_BAD; break; } @@ -349,23 +338,22 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb, s->audio_type = CODEC_ID_PCM_S16LE; else s->audio_type = CODEC_ID_PCM_U8; - debug_ipmovie("audio: %d bits, %d Hz, %s, %s format\n", - s->audio_bits, - s->audio_sample_rate, - (s->audio_channels == 2) ? "stereo" : "mono", - (s->audio_type == CODEC_ID_INTERPLAY_DPCM) ? - "Interplay audio" : "PCM"); + av_dlog(NULL, "audio: %d bits, %d Hz, %s, %s format\n", + s->audio_bits, s->audio_sample_rate, + (s->audio_channels == 2) ? "stereo" : "mono", + (s->audio_type == CODEC_ID_INTERPLAY_DPCM) ? + "Interplay audio" : "PCM"); break; case OPCODE_START_STOP_AUDIO: - debug_ipmovie("start/stop audio\n"); + av_dlog(NULL, "start/stop audio\n"); avio_skip(pb, opcode_size); break; case OPCODE_INIT_VIDEO_BUFFERS: - debug_ipmovie("initialize video buffers\n"); + av_dlog(NULL, "initialize video buffers\n"); if ((opcode_version > 2) || (opcode_size > 8)) { - debug_ipmovie("bad init_video_buffers opcode\n"); + av_dlog(NULL, "bad init_video_buffers opcode\n"); chunk_type = CHUNK_BAD; break; } @@ -381,8 +369,8 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb, } else { s->video_bpp = 16; } - debug_ipmovie("video resolution: %d x %d\n", - s->video_width, s->video_height); + av_dlog(NULL, "video resolution: %d x %d\n", + s->video_width, s->video_height); break; case OPCODE_UNKNOWN_06: @@ -392,17 +380,17 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb, case OPCODE_UNKNOWN_13: case OPCODE_UNKNOWN_14: case OPCODE_UNKNOWN_15: - debug_ipmovie("unknown (but documented) opcode %02X\n", opcode_type); + av_dlog(NULL, "unknown (but documented) opcode %02X\n", opcode_type); avio_skip(pb, opcode_size); break; case OPCODE_SEND_BUFFER: - debug_ipmovie("send buffer\n"); + av_dlog(NULL, "send buffer\n"); avio_skip(pb, opcode_size); break; case OPCODE_AUDIO_FRAME: - debug_ipmovie("audio frame\n"); + av_dlog(NULL, "audio frame\n"); /* log position and move on for now */ s->audio_chunk_offset = avio_tell(pb); @@ -411,26 +399,26 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb, break; case OPCODE_SILENCE_FRAME: - debug_ipmovie("silence frame\n"); + av_dlog(NULL, "silence frame\n"); avio_skip(pb, opcode_size); break; case OPCODE_INIT_VIDEO_MODE: - debug_ipmovie("initialize video mode\n"); + av_dlog(NULL, "initialize video mode\n"); avio_skip(pb, opcode_size); break; case OPCODE_CREATE_GRADIENT: - debug_ipmovie("create gradient\n"); + av_dlog(NULL, "create gradient\n"); avio_skip(pb, opcode_size); break; case OPCODE_SET_PALETTE: - debug_ipmovie("set palette\n"); + av_dlog(NULL, "set palette\n"); /* check for the logical maximum palette size * (3 * 256 + 4 bytes) */ if (opcode_size > 0x304) { - debug_ipmovie("demux_ipmovie: set_palette opcode too large\n"); + av_dlog(NULL, "demux_ipmovie: set_palette opcode too large\n"); chunk_type = CHUNK_BAD; break; } @@ -444,7 +432,7 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb, last_color = first_color + AV_RL16(&scratch[2]) - 1; /* sanity check (since they are 16 bit values) */ if ((first_color > 0xFF) || (last_color > 0xFF)) { - debug_ipmovie("demux_ipmovie: set_palette indexes out of range (%d -> %d)\n", + av_dlog(NULL, "demux_ipmovie: set_palette indexes out of range (%d -> %d)\n", first_color, last_color); chunk_type = CHUNK_BAD; break; @@ -463,12 +451,12 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb, break; case OPCODE_SET_PALETTE_COMPRESSED: - debug_ipmovie("set palette compressed\n"); + av_dlog(NULL, "set palette compressed\n"); avio_skip(pb, opcode_size); break; case OPCODE_SET_DECODING_MAP: - debug_ipmovie("set decoding map\n"); + av_dlog(NULL, "set decoding map\n"); /* log position and move on for now */ s->decode_map_chunk_offset = avio_tell(pb); @@ -477,7 +465,7 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb, break; case OPCODE_VIDEO_DATA: - debug_ipmovie("set video data\n"); + av_dlog(NULL, "set video data\n"); /* log position and move on for now */ s->video_chunk_offset = avio_tell(pb); @@ -486,7 +474,7 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb, break; default: - debug_ipmovie("*** unknown opcode type\n"); + av_dlog(NULL, "*** unknown opcode type\n"); chunk_type = CHUNK_BAD; break; diff --git a/libavformat/isom.c b/libavformat/isom.c index 3259128d3a..dcc3b74af3 100644 --- a/libavformat/isom.c +++ b/libavformat/isom.c @@ -27,7 +27,6 @@ #include "internal.h" #include "isom.h" #include "riff.h" -#include "avio_internal.h" #include "libavcodec/mpeg4audio.h" #include "libavcodec/mpegaudiodata.h" @@ -460,16 +459,18 @@ void ff_mov_read_chan(AVFormatContext *s, int64_t size, AVCodecContext *codec) uint32_t layout_tag; AVIOContext *pb = s->pb; const MovChannelLayout *layouts = mov_channel_layout; - if (size != 12) { + layout_tag = avio_rb32(pb); + size -= 4; + if (layout_tag == 0) { //< kCAFChannelLayoutTag_UseChannelDescriptions // Channel descriptions not implemented av_log_ask_for_sample(s, "Unimplemented container channel layout.\n"); avio_skip(pb, size); return; } - layout_tag = avio_rb32(pb); if (layout_tag == 0x10000) { //< kCAFChannelLayoutTag_UseChannelBitmap codec->channel_layout = avio_rb32(pb); - avio_skip(pb, 4); + size -= 4; + avio_skip(pb, size); return; } while (layouts->channel_layout) { @@ -481,27 +482,20 @@ void ff_mov_read_chan(AVFormatContext *s, int64_t size, AVCodecContext *codec) } if (!codec->channel_layout) av_log(s, AV_LOG_WARNING, "Unknown container channel layout.\n"); - avio_skip(pb, 8); + avio_skip(pb, size); } -void ff_mov_write_chan(AVFormatContext *s, int64_t channel_layout, - const char *chunk_type) +void ff_mov_write_chan(AVIOContext *pb, int64_t channel_layout) { - AVIOContext *pb = s->pb; const MovChannelLayout *layouts; uint32_t layout_tag = 0; - if (!channel_layout) - return; - for (layouts = mov_channel_layout; layouts->channel_layout; layouts++) if (channel_layout == layouts->channel_layout) { layout_tag = layouts->layout_tag; break; } - ffio_wfourcc(pb, chunk_type); - avio_wb64(pb, 12); //< mChunkSize if (layout_tag) { avio_wb32(pb, layout_tag); //< mChannelLayoutTag avio_wb32(pb, 0); //< mChannelBitmap diff --git a/libavformat/isom.h b/libavformat/isom.h index 6e42a1f841..43fa477a7d 100644 --- a/libavformat/isom.h +++ b/libavformat/isom.h @@ -155,7 +155,6 @@ enum CodecID ff_mov_get_lpcm_codec_id(int bps, int flags); int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries); void ff_mov_read_chan(AVFormatContext *s, int64_t size, AVCodecContext *codec); -void ff_mov_write_chan(AVFormatContext *s, int64_t channel_layout, - const char *chunk_type); +void ff_mov_write_chan(AVIOContext *pb, int64_t channel_layout); #endif /* AVFORMAT_ISOM_H */ diff --git a/libavformat/m4vdec.c b/libavformat/m4vdec.c index f830c7afcf..88f838022e 100644 --- a/libavformat/m4vdec.c +++ b/libavformat/m4vdec.c @@ -49,14 +49,4 @@ static int mpeg4video_probe(AVProbeData *probe_packet) return 0; } -AVInputFormat ff_m4v_demuxer = { - "m4v", - NULL_IF_CONFIG_SMALL("raw MPEG-4 video format"), - 0, - mpeg4video_probe, /** probing for MPEG-4 data */ - ff_raw_video_read_header, - ff_raw_read_partial_packet, - .flags= AVFMT_GENERIC_INDEX, - .extensions = "m4v", - .value = CODEC_ID_MPEG4, -}; +FF_DEF_RAWVIDEO_DEMUXER(m4v, "raw MPEG-4 video format", mpeg4video_probe, "m4v", CODEC_ID_MPEG4) diff --git a/libavformat/matroska.c b/libavformat/matroska.c index c7e9663316..52481d7556 100644 --- a/libavformat/matroska.c +++ b/libavformat/matroska.c @@ -99,3 +99,27 @@ const AVMetadataConv ff_mkv_metadata_conv[] = { { "PART_NUMBER" , "track" }, { 0 } }; + +const char * const matroska_video_stereo_mode[MATROSKA_VIDEO_STEREO_MODE_COUNT] = { + "mono", + "left_right", + "bottom_top", + "top_bottom", + "checkerboard_rl", + "checkerboard_lr" + "row_interleaved_rl", + "row_interleaved_lr", + "col_interleaved_rl", + "col_interleaved_lr", + "anaglyph_cyan_red", + "right_left", + "anaglyph_green_magenta", + "block_lr", + "block_rl", +}; + +const char * const matroska_video_stereo_plane[MATROSKA_VIDEO_STEREO_PLANE_COUNT] = { + "left", + "right", + "background", +}; diff --git a/libavformat/matroska.h b/libavformat/matroska.h index 0d0d0bcff4..ab7e3269fa 100644 --- a/libavformat/matroska.h +++ b/libavformat/matroska.h @@ -77,8 +77,13 @@ #define MATROSKA_ID_TRACKNUMBER 0xD7 #define MATROSKA_ID_TRACKUID 0x73C5 #define MATROSKA_ID_TRACKTYPE 0x83 -#define MATROSKA_ID_TRACKAUDIO 0xE1 -#define MATROSKA_ID_TRACKVIDEO 0xE0 +#define MATROSKA_ID_TRACKVIDEO 0xE0 +#define MATROSKA_ID_TRACKAUDIO 0xE1 +#define MATROSKA_ID_TRACKOPERATION 0xE2 +#define MATROSKA_ID_TRACKCOMBINEPLANES 0xE3 +#define MATROSKA_ID_TRACKPLANE 0xE4 +#define MATROSKA_ID_TRACKPLANEUID 0xE5 +#define MATROSKA_ID_TRACKPLANETYPE 0xE6 #define MATROSKA_ID_CODECID 0x86 #define MATROSKA_ID_CODECPRIVATE 0x63A2 #define MATROSKA_ID_CODECNAME 0x258688 @@ -111,7 +116,7 @@ #define MATROSKA_ID_VIDEOPIXELCROPR 0x54DD #define MATROSKA_ID_VIDEODISPLAYUNIT 0x54B2 #define MATROSKA_ID_VIDEOFLAGINTERLACED 0x9A -#define MATROSKA_ID_VIDEOSTEREOMODE 0x53B9 +#define MATROSKA_ID_VIDEOSTEREOMODE 0x53B8 #define MATROSKA_ID_VIDEOASPECTRATIO 0x54B3 #define MATROSKA_ID_VIDEOCOLORSPACE 0x2EB524 @@ -218,6 +223,24 @@ typedef enum { MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP = 3, } MatroskaTrackEncodingCompAlgo; +typedef enum { + MATROSKA_VIDEO_STEREOMODE_TYPE_MONO = 0, + MATROSKA_VIDEO_STEREOMODE_TYPE_LEFT_RIGHT = 1, + MATROSKA_VIDEO_STEREOMODE_TYPE_BOTTOM_TOP = 2, + MATROSKA_VIDEO_STEREOMODE_TYPE_TOP_BOTTOM = 3, + MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_RL = 4, + MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_LR = 5, + MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_RL = 6, + MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_LR = 7, + MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_RL = 8, + MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_LR = 9, + MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_CYAN_RED = 10, + MATROSKA_VIDEO_STEREOMODE_TYPE_RIGHT_LEFT = 11, + MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_GREEN_MAG = 12, + MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_LR = 13, + MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_RL = 14, +} MatroskaVideoStereoModeType; + /* * Matroska Codec IDs, strings */ @@ -235,8 +258,13 @@ typedef struct CodecMime{ /* max. depth in the EBML tree structure */ #define EBML_MAX_DEPTH 16 +#define MATROSKA_VIDEO_STEREO_MODE_COUNT 15 +#define MATROSKA_VIDEO_STEREO_PLANE_COUNT 3 + extern const CodecTags ff_mkv_codec_tags[]; extern const CodecMime ff_mkv_mime_tags[]; extern const AVMetadataConv ff_mkv_metadata_conv[]; +extern const char * const matroska_video_stereo_mode[MATROSKA_VIDEO_STEREO_MODE_COUNT]; +extern const char * const matroska_video_stereo_plane[MATROSKA_VIDEO_STEREO_PLANE_COUNT]; #endif /* AVFORMAT_MATROSKA_H */ diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c index ab67e110c2..efb80a017d 100644 --- a/libavformat/matroskadec.c +++ b/libavformat/matroskadec.c @@ -42,6 +42,7 @@ #include "libavutil/intreadwrite.h" #include "libavutil/avstring.h" #include "libavutil/lzo.h" +#include "libavutil/dict.h" #if CONFIG_ZLIB #include <zlib.h> #endif @@ -111,7 +112,8 @@ typedef struct { uint64_t display_height; uint64_t pixel_width; uint64_t pixel_height; - uint64_t fourcc; + EbmlBin color_space; + uint64_t stereo_mode; } MatroskaTrackVideo; typedef struct { @@ -132,6 +134,15 @@ typedef struct { } MatroskaTrackAudio; typedef struct { + uint64_t uid; + uint64_t type; +} MatroskaTrackPlane; + +typedef struct { + EbmlList combine_planes; +} MatroskaTrackOperation; + +typedef struct { uint64_t num; uint64_t uid; uint64_t type; @@ -145,6 +156,7 @@ typedef struct { uint64_t flag_forced; MatroskaTrackVideo video; MatroskaTrackAudio audio; + MatroskaTrackOperation operation; EbmlList encodings; AVStream *stream; @@ -290,14 +302,14 @@ static EbmlSyntax matroska_track_video[] = { { MATROSKA_ID_VIDEODISPLAYHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_height) }, { MATROSKA_ID_VIDEOPIXELWIDTH, EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) }, { MATROSKA_ID_VIDEOPIXELHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) }, - { MATROSKA_ID_VIDEOCOLORSPACE, EBML_UINT, 0, offsetof(MatroskaTrackVideo,fourcc) }, + { MATROSKA_ID_VIDEOCOLORSPACE, EBML_BIN, 0, offsetof(MatroskaTrackVideo,color_space) }, + { MATROSKA_ID_VIDEOSTEREOMODE, EBML_UINT, 0, offsetof(MatroskaTrackVideo,stereo_mode) }, { MATROSKA_ID_VIDEOPIXELCROPB, EBML_NONE }, { MATROSKA_ID_VIDEOPIXELCROPT, EBML_NONE }, { MATROSKA_ID_VIDEOPIXELCROPL, EBML_NONE }, { MATROSKA_ID_VIDEOPIXELCROPR, EBML_NONE }, { MATROSKA_ID_VIDEODISPLAYUNIT, EBML_NONE }, { MATROSKA_ID_VIDEOFLAGINTERLACED,EBML_NONE }, - { MATROSKA_ID_VIDEOSTEREOMODE, EBML_NONE }, { MATROSKA_ID_VIDEOASPECTRATIO, EBML_NONE }, { 0 } }; @@ -329,6 +341,22 @@ static EbmlSyntax matroska_track_encodings[] = { { 0 } }; +static EbmlSyntax matroska_track_plane[] = { + { MATROSKA_ID_TRACKPLANEUID, EBML_UINT, 0, offsetof(MatroskaTrackPlane,uid) }, + { MATROSKA_ID_TRACKPLANETYPE, EBML_UINT, 0, offsetof(MatroskaTrackPlane,type) }, + { 0 } +}; + +static EbmlSyntax matroska_track_combine_planes[] = { + { MATROSKA_ID_TRACKPLANE, EBML_NEST, sizeof(MatroskaTrackPlane), offsetof(MatroskaTrackOperation,combine_planes), {.n=matroska_track_plane} }, + { 0 } +}; + +static EbmlSyntax matroska_track_operation[] = { + { MATROSKA_ID_TRACKCOMBINEPLANES, EBML_NEST, 0, 0, {.n=matroska_track_combine_planes} }, + { 0 } +}; + static EbmlSyntax matroska_track[] = { { MATROSKA_ID_TRACKNUMBER, EBML_UINT, 0, offsetof(MatroskaTrack,num) }, { MATROSKA_ID_TRACKNAME, EBML_UTF8, 0, offsetof(MatroskaTrack,name) }, @@ -343,6 +371,7 @@ static EbmlSyntax matroska_track[] = { { MATROSKA_ID_TRACKFLAGFORCED, EBML_UINT, 0, offsetof(MatroskaTrack,flag_forced), {.u=0} }, { MATROSKA_ID_TRACKVIDEO, EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} }, { MATROSKA_ID_TRACKAUDIO, EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} }, + { MATROSKA_ID_TRACKOPERATION, EBML_NEST, 0, offsetof(MatroskaTrack,operation), {.n=matroska_track_operation} }, { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} }, { MATROSKA_ID_TRACKFLAGENABLED, EBML_NONE }, { MATROSKA_ID_TRACKFLAGLACING, EBML_NONE }, @@ -1009,7 +1038,8 @@ static void matroska_fix_ass_packet(MatroskaDemuxContext *matroska, char *line, *layer, *ptr = pkt->data, *end = ptr+pkt->size; for (; *ptr!=',' && ptr<end-1; ptr++); if (*ptr == ',') - layer = ++ptr; + ptr++; + layer = ptr; for (; *ptr!=',' && ptr<end-1; ptr++); if (*ptr == ',') { int64_t end_pts = pkt->pts + display_duration; @@ -1044,7 +1074,7 @@ static void matroska_merge_packets(AVPacket *out, AVPacket *in) } static void matroska_convert_tag(AVFormatContext *s, EbmlList *list, - AVMetadata **metadata, char *prefix) + AVDictionary **metadata, char *prefix) { MatroskaTag *tags = list->elem; char key[1024]; @@ -1060,14 +1090,14 @@ static void matroska_convert_tag(AVFormatContext *s, EbmlList *list, if (prefix) snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name); else av_strlcpy(key, tags[i].name, sizeof(key)); if (tags[i].def || !lang) { - av_metadata_set2(metadata, key, tags[i].string, 0); + av_dict_set(metadata, key, tags[i].string, 0); if (tags[i].sub.nb_elem) matroska_convert_tag(s, &tags[i].sub, metadata, key); } if (lang) { av_strlcat(key, "-", sizeof(key)); av_strlcat(key, lang, sizeof(key)); - av_metadata_set2(metadata, key, tags[i].string, 0); + av_dict_set(metadata, key, tags[i].string, 0); if (tags[i].sub.nb_elem) matroska_convert_tag(s, &tags[i].sub, metadata, key); } @@ -1202,20 +1232,25 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) uint64_t max_start = 0; Ebml ebml = { 0 }; AVStream *st; - int i, j, res; + int i, j, k, res; matroska->ctx = s; /* First read the EBML header. */ if (ebml_parse(matroska, ebml_syntax, &ebml) || ebml.version > EBML_VERSION || ebml.max_size > sizeof(uint64_t) - || ebml.id_length > sizeof(uint32_t) || ebml.doctype_version > 2) { + || ebml.id_length > sizeof(uint32_t) || ebml.doctype_version > 3) { av_log(matroska->ctx, AV_LOG_ERROR, "EBML header using unsupported features\n" "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n", ebml.version, ebml.doctype, ebml.doctype_version); ebml_free(ebml_syntax, &ebml); return AVERROR_PATCHWELCOME; + } else if (ebml.doctype_version == 3) { + av_log(matroska->ctx, AV_LOG_WARNING, + "EBML header using unsupported features\n" + "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n", + ebml.version, ebml.doctype, ebml.doctype_version); } for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++) if (!strcmp(ebml.doctype, matroska_doctypes[i])) @@ -1235,7 +1270,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) if (matroska->duration) matroska->ctx->duration = matroska->duration * matroska->time_scale * 1000 / AV_TIME_BASE; - av_metadata_set2(&s->metadata, "title", matroska->title, 0); + av_dict_set(&s->metadata, "title", matroska->title, 0); tracks = matroska->tracks.elem; for (i=0; i < matroska->tracks.nb_elem; i++) { @@ -1246,6 +1281,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) uint8_t *extradata = NULL; int extradata_size = 0; int extradata_offset = 0; + uint32_t fourcc = 0; AVIOContext b; /* Apply some sanity checks. */ @@ -1267,6 +1303,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) track->video.display_width = track->video.pixel_width; if (!track->video.display_height) track->video.display_height = track->video.pixel_height; + if (track->video.color_space.size == 4) + fourcc = AV_RL32(track->video.color_space.data); } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) { if (!track->audio.out_samplerate) track->audio.out_samplerate = track->audio.samplerate; @@ -1326,8 +1364,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) && track->codec_priv.size >= 40 && track->codec_priv.data != NULL) { track->ms_compat = 1; - track->video.fourcc = AV_RL32(track->codec_priv.data + 16); - codec_id = ff_codec_get_id(ff_codec_bmp_tags, track->video.fourcc); + fourcc = AV_RL32(track->codec_priv.data + 16); + codec_id = ff_codec_get_id(ff_codec_bmp_tags, fourcc); extradata_offset = 40; } else if (!strcmp(track->codec_id, "A_MS/ACM") && track->codec_priv.size >= 14 @@ -1343,8 +1381,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) } else if (!strcmp(track->codec_id, "V_QUICKTIME") && (track->codec_priv.size >= 86) && (track->codec_priv.data != NULL)) { - track->video.fourcc = AV_RL32(track->codec_priv.data); - codec_id=ff_codec_get_id(codec_movvideo_tags, track->video.fourcc); + fourcc = AV_RL32(track->codec_priv.data); + codec_id = ff_codec_get_id(codec_movvideo_tags, fourcc); } else if (codec_id == CODEC_ID_PCM_S16BE) { switch (track->audio.bitdepth) { case 8: codec_id = CODEC_ID_PCM_U8; break; @@ -1433,8 +1471,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) st->codec->codec_id = codec_id; st->start_time = 0; if (strcmp(track->language, "und")) - av_metadata_set2(&st->metadata, "language", track->language, 0); - av_metadata_set2(&st->metadata, "title", track->name, 0); + av_dict_set(&st->metadata, "language", track->language, 0); + av_dict_set(&st->metadata, "title", track->name, 0); if (track->flag_default) st->disposition |= AV_DISPOSITION_DEFAULT; @@ -1462,8 +1500,10 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) } if (track->type == MATROSKA_TRACK_TYPE_VIDEO) { + MatroskaTrackPlane *planes = track->operation.combine_planes.elem; + st->codec->codec_type = AVMEDIA_TYPE_VIDEO; - st->codec->codec_tag = track->video.fourcc; + st->codec->codec_tag = fourcc; st->codec->width = track->video.pixel_width; st->codec->height = track->video.pixel_height; av_reduce(&st->sample_aspect_ratio.num, @@ -1475,6 +1515,25 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) st->need_parsing = AVSTREAM_PARSE_HEADERS; if (track->default_duration) st->avg_frame_rate = av_d2q(1000000000.0/track->default_duration, INT_MAX); + + /* export stereo mode flag as metadata tag */ + if (track->video.stereo_mode && track->video.stereo_mode < MATROSKA_VIDEO_STEREO_MODE_COUNT) + av_dict_set(&st->metadata, "stereo_mode", matroska_video_stereo_mode[track->video.stereo_mode], 0); + + /* if we have virtual track, mark the real tracks */ + for (j=0; j < track->operation.combine_planes.nb_elem; j++) { + char buf[32]; + if (planes[j].type >= MATROSKA_VIDEO_STEREO_PLANE_COUNT) + continue; + snprintf(buf, sizeof(buf), "%s_%d", + matroska_video_stereo_plane[planes[j].type], i); + for (k=0; k < matroska->tracks.nb_elem; k++) + if (planes[j].uid == tracks[k].uid) { + av_dict_set(&s->streams[k]->metadata, + "stereo_mode", buf, 0); + break; + } + } } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) { st->codec->codec_type = AVMEDIA_TYPE_AUDIO; st->codec->sample_rate = track->audio.out_samplerate; @@ -1495,7 +1554,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) AVStream *st = av_new_stream(s, 0); if (st == NULL) break; - av_metadata_set2(&st->metadata, "filename",attachements[j].filename, 0); + av_dict_set(&st->metadata, "filename",attachements[j].filename, 0); st->codec->codec_id = CODEC_ID_NONE; st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT; st->codec->extradata = av_malloc(attachements[j].bin.size); @@ -1523,7 +1582,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap) ff_new_chapter(s, chapters[i].uid, (AVRational){1, 1000000000}, chapters[i].start, chapters[i].end, chapters[i].title); - av_metadata_set2(&chapters[i].chapter->metadata, + av_dict_set(&chapters[i].chapter->metadata, "title", chapters[i].title, 0); max_start = chapters[i].start; } diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index 387cead8cd..9e42019415 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -30,6 +30,7 @@ #include "libavutil/intreadwrite.h" #include "libavutil/random_seed.h" #include "libavutil/lfg.h" +#include "libavutil/dict.h" #include "libavcodec/xiph.h" #include "libavcodec/mpeg4audio.h" #include <strings.h> @@ -523,7 +524,7 @@ static int mkv_write_tracks(AVFormatContext *s) int bit_depth = av_get_bits_per_sample(codec->codec_id); int sample_rate = codec->sample_rate; int output_sample_rate = 0; - AVMetadataTag *tag; + AVDictionaryEntry *tag; if (!bit_depth) bit_depth = av_get_bits_per_sample_fmt(codec->sample_fmt); @@ -536,9 +537,9 @@ static int mkv_write_tracks(AVFormatContext *s) put_ebml_uint (pb, MATROSKA_ID_TRACKUID , i + 1); put_ebml_uint (pb, MATROSKA_ID_TRACKFLAGLACING , 0); // no lacing (yet) - if ((tag = av_metadata_get(st->metadata, "title", NULL, 0))) + if ((tag = av_dict_get(st->metadata, "title", NULL, 0))) put_ebml_string(pb, MATROSKA_ID_TRACKNAME, tag->value); - tag = av_metadata_get(st->metadata, "language", NULL, 0); + tag = av_dict_get(st->metadata, "language", NULL, 0); put_ebml_string(pb, MATROSKA_ID_TRACKLANGUAGE, tag ? tag->value:"und"); if (st->disposition) @@ -586,12 +587,38 @@ static int mkv_write_tracks(AVFormatContext *s) // XXX: interlace flag? put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELWIDTH , codec->width); put_ebml_uint (pb, MATROSKA_ID_VIDEOPIXELHEIGHT, codec->height); + + if ((tag = av_dict_get(st->metadata, "stereo_mode", NULL, 0)) || + (tag = av_dict_get( s->metadata, "stereo_mode", NULL, 0))) { + // save stereo mode flag + uint64_t st_mode = MATROSKA_VIDEO_STEREO_MODE_COUNT; + + for (j=0; j<MATROSKA_VIDEO_STEREO_MODE_COUNT; j++) + if (!strcmp(tag->value, matroska_video_stereo_mode[j])){ + st_mode = j; + break; + } + + if ((mkv->mode == MODE_WEBM && st_mode > 3 && st_mode != 11) + || st_mode >= MATROSKA_VIDEO_STEREO_MODE_COUNT) { + av_log(s, AV_LOG_ERROR, + "The specified stereo mode is not valid.\n"); + return AVERROR(EINVAL); + } else + put_ebml_uint(pb, MATROSKA_ID_VIDEOSTEREOMODE, st_mode); + } + if (st->sample_aspect_ratio.num) { int d_width = codec->width*av_q2d(st->sample_aspect_ratio); put_ebml_uint(pb, MATROSKA_ID_VIDEODISPLAYWIDTH , d_width); put_ebml_uint(pb, MATROSKA_ID_VIDEODISPLAYHEIGHT, codec->height); put_ebml_uint(pb, MATROSKA_ID_VIDEODISPLAYUNIT, 3); } + + if (codec->codec_id == CODEC_ID_RAWVIDEO) { + uint32_t color_space = av_le2ne32(codec->codec_tag); + put_ebml_binary(pb, MATROSKA_ID_VIDEOCOLORSPACE, &color_space, sizeof(color_space)); + } end_ebml_master(pb, subinfo); break; @@ -656,7 +683,7 @@ static int mkv_write_chapters(AVFormatContext *s) for (i = 0; i < s->nb_chapters; i++) { ebml_master chapteratom, chapterdisplay; AVChapter *c = s->chapters[i]; - AVMetadataTag *t = NULL; + AVDictionaryEntry *t = NULL; chapteratom = start_ebml_master(pb, MATROSKA_ID_CHAPTERATOM, 0); put_ebml_uint(pb, MATROSKA_ID_CHAPTERUID, c->id); @@ -666,7 +693,7 @@ static int mkv_write_chapters(AVFormatContext *s) av_rescale_q(c->end, c->time_base, scale)); put_ebml_uint(pb, MATROSKA_ID_CHAPTERFLAGHIDDEN , 0); put_ebml_uint(pb, MATROSKA_ID_CHAPTERFLAGENABLED, 1); - if ((t = av_metadata_get(c->metadata, "title", NULL, 0))) { + if ((t = av_dict_get(c->metadata, "title", NULL, 0))) { chapterdisplay = start_ebml_master(pb, MATROSKA_ID_CHAPTERDISPLAY, 0); put_ebml_string(pb, MATROSKA_ID_CHAPSTRING, t->value); put_ebml_string(pb, MATROSKA_ID_CHAPLANG , "und"); @@ -679,7 +706,7 @@ static int mkv_write_chapters(AVFormatContext *s) return 0; } -static void mkv_write_simpletag(AVIOContext *pb, AVMetadataTag *t) +static void mkv_write_simpletag(AVIOContext *pb, AVDictionaryEntry *t) { uint8_t *key = av_strdup(t->key); uint8_t *p = key; @@ -709,12 +736,12 @@ static void mkv_write_simpletag(AVIOContext *pb, AVMetadataTag *t) av_freep(&key); } -static int mkv_write_tag(AVFormatContext *s, AVMetadata *m, unsigned int elementid, +static int mkv_write_tag(AVFormatContext *s, AVDictionary *m, unsigned int elementid, unsigned int uid, ebml_master *tags) { MatroskaMuxContext *mkv = s->priv_data; ebml_master tag, targets; - AVMetadataTag *t = NULL; + AVDictionaryEntry *t = NULL; int ret; if (!tags->pos) { @@ -730,8 +757,8 @@ static int mkv_write_tag(AVFormatContext *s, AVMetadata *m, unsigned int element put_ebml_uint(s->pb, elementid, uid); end_ebml_master(s->pb, targets); - while ((t = av_metadata_get(m, "", t, AV_METADATA_IGNORE_SUFFIX))) - if (strcasecmp(t->key, "title")) + while ((t = av_dict_get(m, "", t, AV_DICT_IGNORE_SUFFIX))) + if (strcasecmp(t->key, "title") && strcasecmp(t->key, "stereo_mode")) mkv_write_simpletag(s->pb, t); end_ebml_master(s->pb, tag); @@ -745,7 +772,7 @@ static int mkv_write_tags(AVFormatContext *s) ff_metadata_conv_ctx(s, ff_mkv_metadata_conv, NULL); - if (av_metadata_get(s->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX)) { + if (av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) { ret = mkv_write_tag(s, s->metadata, 0, 0, &tags); if (ret < 0) return ret; } @@ -753,7 +780,7 @@ static int mkv_write_tags(AVFormatContext *s) for (i = 0; i < s->nb_streams; i++) { AVStream *st = s->streams[i]; - if (!av_metadata_get(st->metadata, "", 0, AV_METADATA_IGNORE_SUFFIX)) + if (!av_dict_get(st->metadata, "", 0, AV_DICT_IGNORE_SUFFIX)) continue; ret = mkv_write_tag(s, st->metadata, MATROSKA_ID_TAGTARGETS_TRACKUID, i + 1, &tags); @@ -763,7 +790,7 @@ static int mkv_write_tags(AVFormatContext *s) for (i = 0; i < s->nb_chapters; i++) { AVChapter *ch = s->chapters[i]; - if (!av_metadata_get(ch->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX)) + if (!av_dict_get(ch->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) continue; ret = mkv_write_tag(s, ch->metadata, MATROSKA_ID_TAGTARGETS_CHAPTERUID, ch->id, &tags); @@ -780,7 +807,7 @@ static int mkv_write_header(AVFormatContext *s) MatroskaMuxContext *mkv = s->priv_data; AVIOContext *pb = s->pb; ebml_master ebml_header, segment_info; - AVMetadataTag *tag; + AVDictionaryEntry *tag; int ret, i; if (!strcmp(s->oformat->name, "webm")) mkv->mode = MODE_WEBM; @@ -817,7 +844,7 @@ static int mkv_write_header(AVFormatContext *s) segment_info = start_ebml_master(pb, MATROSKA_ID_INFO, 0); put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, 1000000); - if ((tag = av_metadata_get(s->metadata, "title", NULL, 0))) + if ((tag = av_dict_get(s->metadata, "title", NULL, 0))) put_ebml_string(pb, MATROSKA_ID_TITLE, tag->value); if (!(s->streams[0]->codec->flags & CODEC_FLAG_BITEXACT)) { uint32_t segment_uid[4]; @@ -1199,7 +1226,7 @@ AVOutputFormat ff_webm_muxer = { mkv_write_header, mkv_write_packet, mkv_write_trailer, - .flags = AVFMT_GLOBALHEADER | AVFMT_VARIABLE_FPS, + .flags = AVFMT_GLOBALHEADER | AVFMT_VARIABLE_FPS | AVFMT_TS_NONSTRICT, }; #endif diff --git a/libavformat/metadata.c b/libavformat/metadata.c index 68d42fd257..8caeffa316 100644 --- a/libavformat/metadata.c +++ b/libavformat/metadata.c @@ -21,71 +21,20 @@ #include <strings.h> #include "avformat.h" #include "metadata.h" +#include "libavutil/dict.h" -AVMetadataTag * -av_metadata_get(AVMetadata *m, const char *key, const AVMetadataTag *prev, int flags) +#if FF_API_OLD_METADATA2 +AVDictionaryEntry * +av_metadata_get(AVDictionary *m, const char *key, const AVDictionaryEntry *prev, int flags) { - unsigned int i, j; - - if(!m) - return NULL; - - if(prev) i= prev - m->elems + 1; - else i= 0; - - for(; i<m->count; i++){ - const char *s= m->elems[i].key; - if(flags & AV_METADATA_MATCH_CASE) for(j=0; s[j] == key[j] && key[j]; j++); - else for(j=0; toupper(s[j]) == toupper(key[j]) && key[j]; j++); - if(key[j]) - continue; - if(s[j] && !(flags & AV_METADATA_IGNORE_SUFFIX)) - continue; - return &m->elems[i]; - } - return NULL; + return av_dict_get(m, key, prev, flags); } -int av_metadata_set2(AVMetadata **pm, const char *key, const char *value, int flags) +int av_metadata_set2(AVDictionary **pm, const char *key, const char *value, int flags) { - AVMetadata *m= *pm; - AVMetadataTag *tag= av_metadata_get(m, key, NULL, flags); - - if(!m) - m=*pm= av_mallocz(sizeof(*m)); - - if(tag){ - if (flags & AV_METADATA_DONT_OVERWRITE) - return 0; - av_free(tag->value); - av_free(tag->key); - *tag= m->elems[--m->count]; - }else{ - AVMetadataTag *tmp= av_realloc(m->elems, (m->count+1) * sizeof(*m->elems)); - if(tmp){ - m->elems= tmp; - }else - return AVERROR(ENOMEM); - } - if(value){ - if(flags & AV_METADATA_DONT_STRDUP_KEY){ - m->elems[m->count].key = key; - }else - m->elems[m->count].key = av_strdup(key ); - if(flags & AV_METADATA_DONT_STRDUP_VAL){ - m->elems[m->count].value= value; - }else - m->elems[m->count].value= av_strdup(value); - m->count++; - } - if(!m->count) { - av_free(m->elems); - av_freep(pm); - } - - return 0; + return av_dict_set(pm, key, value, flags); } - +#endif #if FF_API_OLD_METADATA int av_metadata_set(AVMetadata **pm, const char *key, const char *value) { @@ -94,41 +43,38 @@ int av_metadata_set(AVMetadata **pm, const char *key, const char *value) #endif #if FF_API_OLD_METADATA2 + void av_metadata_conv(AVFormatContext *ctx, const AVMetadataConv *d_conv, const AVMetadataConv *s_conv) { return; } -#endif -void av_metadata_free(AVMetadata **pm) +void av_metadata_free(AVDictionary **pm) { - AVMetadata *m= *pm; + av_dict_free(pm); +} - if(m){ - while(m->count--){ - av_free(m->elems[m->count].key); - av_free(m->elems[m->count].value); - } - av_free(m->elems); - } - av_freep(pm); +void av_metadata_copy(AVDictionary **dst, AVDictionary *src, int flags) +{ + av_dict_copy(dst, src, flags); } +#endif -void ff_metadata_conv(AVMetadata **pm, const AVMetadataConv *d_conv, +void ff_metadata_conv(AVDictionary **pm, const AVMetadataConv *d_conv, const AVMetadataConv *s_conv) { /* TODO: use binary search to look up the two conversion tables if the tables are getting big enough that it would matter speed wise */ const AVMetadataConv *sc, *dc; - AVMetadataTag *mtag = NULL; - AVMetadata *dst = NULL; + AVDictionaryEntry *mtag = NULL; + AVDictionary *dst = NULL; const char *key; if (d_conv == s_conv) return; - while((mtag=av_metadata_get(*pm, "", mtag, AV_METADATA_IGNORE_SUFFIX))) { + while ((mtag = av_dict_get(*pm, "", mtag, AV_DICT_IGNORE_SUFFIX))) { key = mtag->key; if (s_conv) for (sc=s_conv; sc->native; sc++) @@ -142,9 +88,9 @@ void ff_metadata_conv(AVMetadata **pm, const AVMetadataConv *d_conv, key = dc->native; break; } - av_metadata_set2(&dst, key, mtag->value, 0); + av_dict_set(&dst, key, mtag->value, 0); } - av_metadata_free(pm); + av_dict_free(pm); *pm = dst; } @@ -161,10 +107,3 @@ void ff_metadata_conv_ctx(AVFormatContext *ctx, const AVMetadataConv *d_conv, ff_metadata_conv(&ctx->programs[i]->metadata, d_conv, s_conv); } -void av_metadata_copy(AVMetadata **dst, AVMetadata *src, int flags) -{ - AVMetadataTag *t = NULL; - - while ((t = av_metadata_get(src, "", t, AV_METADATA_IGNORE_SUFFIX))) - av_metadata_set2(dst, t->key, t->value, flags); -} diff --git a/libavformat/metadata.h b/libavformat/metadata.h index dd9b69005c..aa96118f80 100644 --- a/libavformat/metadata.h +++ b/libavformat/metadata.h @@ -29,11 +29,7 @@ #include "avformat.h" - -struct AVMetadata{ - int count; - AVMetadataTag *elems; -}; +#include "libavutil/dict.h" struct AVMetadataConv{ const char *native; @@ -48,7 +44,7 @@ void ff_metadata_demux_compat(AVFormatContext *s); void ff_metadata_mux_compat(AVFormatContext *s); #endif -void ff_metadata_conv(AVMetadata **pm, const AVMetadataConv *d_conv, +void ff_metadata_conv(AVDictionary **pm, const AVMetadataConv *d_conv, const AVMetadataConv *s_conv); void ff_metadata_conv_ctx(AVFormatContext *ctx, const AVMetadataConv *d_conv, const AVMetadataConv *s_conv); diff --git a/libavformat/mmf.c b/libavformat/mmf.c index fc6fcc3caa..89e33b4ce2 100644 --- a/libavformat/mmf.c +++ b/libavformat/mmf.c @@ -186,13 +186,13 @@ static int mmf_read_header(AVFormatContext *s, unsigned int tag; AVIOContext *pb = s->pb; AVStream *st; - int64_t file_size, size; + int64_t size; int rate, params; tag = avio_rl32(pb); if (tag != MKTAG('M', 'M', 'M', 'D')) return -1; - file_size = avio_rb32(pb); + avio_skip(pb, 4); /* file_size */ /* Skip some unused chunks that may or may not be present */ for(;; avio_skip(pb, size)) { @@ -263,12 +263,10 @@ static int mmf_read_packet(AVFormatContext *s, AVPacket *pkt) { MMFContext *mmf = s->priv_data; - AVStream *st; int ret, size; if (url_feof(s->pb)) return AVERROR(EIO); - st = s->streams[0]; size = MAX_SIZE; if(size > mmf->data_size) diff --git a/libavformat/mms.h b/libavformat/mms.h index 788851e24f..0117089d24 100644 --- a/libavformat/mms.h +++ b/libavformat/mms.h @@ -60,4 +60,5 @@ typedef struct { int ff_mms_asf_header_parser(MMSContext * mms); int ff_mms_read_data(MMSContext *mms, uint8_t *buf, const int size); int ff_mms_read_header(MMSContext * mms, uint8_t * buf, const int size); -#endif + +#endif /* AVFORMAT_MMS_H */ diff --git a/libavformat/mmsh.c b/libavformat/mmsh.c index 70664b9776..105e53c82b 100644 --- a/libavformat/mmsh.c +++ b/libavformat/mmsh.c @@ -28,7 +28,7 @@ #include <string.h> #include "libavutil/intreadwrite.h" #include "libavutil/avstring.h" -#include "libavformat/internal.h" +#include "internal.h" #include "mms.h" #include "asf.h" #include "http.h" @@ -231,7 +231,7 @@ static int mmsh_open(URLContext *h, const char *uri, int flags) host, sizeof(host), &port, path, sizeof(path), location); if (port<0) port = 80; // default mmsh protocol port - ff_url_join(httpname, sizeof(httpname), "http", NULL, host, port, path); + ff_url_join(httpname, sizeof(httpname), "http", NULL, host, port, "%s", path); if (ffurl_alloc(&mms->mms_hd, httpname, AVIO_RDONLY) < 0) { return AVERROR(EIO); diff --git a/libavformat/mmst.c b/libavformat/mmst.c index 775ab7ea7f..7c1041ec85 100644 --- a/libavformat/mmst.c +++ b/libavformat/mmst.c @@ -152,7 +152,7 @@ static int send_command_packet(MMSTContext *mmst) return 0; } -static void mms_put_utf16(MMSContext *mms, uint8_t *src) +static void mms_put_utf16(MMSContext *mms, const uint8_t *src) { AVIOContext bic; int size = mms->write_out_ptr - mms->out_buffer; diff --git a/libavformat/mov.c b/libavformat/mov.c index c16add7713..302ab18857 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -23,11 +23,11 @@ #include <limits.h> //#define DEBUG -//#define DEBUG_METADATA //#define MOV_EXPORT_ALL_METADATA #include "libavutil/intreadwrite.h" #include "libavutil/avstring.h" +#include "libavutil/dict.h" #include "avformat.h" #include "avio_internal.h" #include "riff.h" @@ -79,15 +79,15 @@ typedef struct MOVParseTableEntry { static const MOVParseTableEntry mov_default_parse_table[]; -static int mov_metadata_trkn(MOVContext *c, AVIOContext *pb, unsigned len) +static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb, unsigned len, const char *type) { char buf[16]; avio_rb16(pb); // unknown snprintf(buf, sizeof(buf), "%d", avio_rb16(pb)); - av_metadata_set2(&c->fc->metadata, "track", buf, 0); + av_dict_set(&c->fc->metadata, type, buf, 0); - avio_rb16(pb); // total tracks + avio_rb16(pb); // total tracks/discs return 0; } @@ -138,7 +138,7 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) const char *key = NULL; uint16_t str_size, langcode = 0; uint32_t data_type = 0; - int (*parse)(MOVContext*, AVIOContext*, unsigned) = NULL; + int (*parse)(MOVContext*, AVIOContext*, unsigned, const char *) = NULL; switch (atom.type) { case MKTAG(0xa9,'n','a','m'): key = "title"; break; @@ -164,7 +164,9 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) case MKTAG( 't','v','e','n'): key = "episode_id";break; case MKTAG( 't','v','n','n'): key = "network"; break; case MKTAG( 't','r','k','n'): key = "track"; - parse = mov_metadata_trkn; break; + parse = mov_metadata_track_or_disc_number; break; + case MKTAG( 'd','i','s','k'): key = "disc"; + parse = mov_metadata_track_or_disc_number; break; } if (c->itunes_metadata && atom.size > 8) { @@ -199,7 +201,7 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) str_size = FFMIN3(sizeof(str)-1, str_size, atom.size); if (parse) - parse(c, pb, str_size); + parse(c, pb, str_size, key); else { if (data_type == 3 || (data_type == 0 && langcode < 0x800)) { // MAC Encoded mov_read_mac_string(c, pb, str_size, str, sizeof(str)); @@ -207,17 +209,15 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) avio_read(pb, str, str_size); str[str_size] = 0; } - av_metadata_set2(&c->fc->metadata, key, str, 0); + av_dict_set(&c->fc->metadata, key, str, 0); if (*language && strcmp(language, "und")) { snprintf(key2, sizeof(key2), "%s-%s", key, language); - av_metadata_set2(&c->fc->metadata, key2, str, 0); + av_dict_set(&c->fc->metadata, key2, str, 0); } } -#ifdef DEBUG_METADATA - av_log(c->fc, AV_LOG_DEBUG, "lang \"%3s\" ", language); - av_log(c->fc, AV_LOG_DEBUG, "tag \"%s\" value \"%s\" atom \"%.4s\" %d %lld\n", - key, str, (char*)&atom.type, str_size, atom.size); -#endif + av_dlog(c->fc, "lang \"%3s\" ", language); + av_dlog(c->fc, "tag \"%s\" value \"%s\" atom \"%.4s\" %d %"PRId64"\n", + key, str, (char*)&atom.type, str_size, atom.size); return 0; } @@ -431,7 +431,7 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom) { AVStream *st; uint32_t type; - uint32_t ctype; + uint32_t av_unused ctype; if (c->fc->nb_streams < 1) // meta before first trak return 0; @@ -467,21 +467,21 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom) int ff_mov_read_esds(AVFormatContext *fc, AVIOContext *pb, MOVAtom atom) { AVStream *st; - int tag, len; + int tag; if (fc->nb_streams < 1) return 0; st = fc->streams[fc->nb_streams-1]; avio_rb32(pb); /* version + flags */ - len = ff_mp4_read_descr(fc, pb, &tag); + ff_mp4_read_descr(fc, pb, &tag); if (tag == MP4ESDescrTag) { avio_rb16(pb); /* ID */ avio_r8(pb); /* priority */ } else avio_rb16(pb); /* ID */ - len = ff_mp4_read_descr(fc, pb, &tag); + ff_mp4_read_descr(fc, pb, &tag); if (tag == MP4DecConfigDescrTag) ff_mp4_read_dec_config_descr(fc, st, pb); return 0; @@ -513,6 +513,19 @@ static int mov_read_dac3(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; } +static int mov_read_wfex(MOVContext *c, AVIOContext *pb, MOVAtom atom) +{ + AVStream *st; + + if (c->fc->nb_streams < 1) + return 0; + st = c->fc->streams[c->fc->nb_streams-1]; + + ff_get_wav_header(pb, st->codec, atom.size); + + return 0; +} + static int mov_read_pasp(MOVContext *c, AVIOContext *pb, MOVAtom atom) { const int num = avio_rb32(pb); @@ -558,10 +571,10 @@ static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom) if (strcmp(type, "qt ")) c->isom = 1; av_log(c->fc, AV_LOG_DEBUG, "ISO: File Type Major Brand: %.4s\n",(char *)&type); - av_metadata_set2(&c->fc->metadata, "major_brand", type, 0); + av_dict_set(&c->fc->metadata, "major_brand", type, 0); minor_ver = avio_rb32(pb); /* minor version */ snprintf(minor_ver_str, sizeof(minor_ver_str), "%d", minor_ver); - av_metadata_set2(&c->fc->metadata, "minor_version", minor_ver_str, 0); + av_dict_set(&c->fc->metadata, "minor_version", minor_ver_str, 0); comp_brand_size = atom.size - 8; if (comp_brand_size < 0) @@ -571,7 +584,7 @@ static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom) return AVERROR(ENOMEM); avio_read(pb, comp_brands_str, comp_brand_size); comp_brands_str[comp_brand_size] = 0; - av_metadata_set2(&c->fc->metadata, "compatible_brands", comp_brands_str, 0); + av_dict_set(&c->fc->metadata, "compatible_brands", comp_brands_str, 0); av_freep(&comp_brands_str); return 0; @@ -595,7 +608,7 @@ static int mov_read_moof(MOVContext *c, AVIOContext *pb, MOVAtom atom) return mov_read_default(c, pb, atom); } -static void mov_metadata_creation_time(AVMetadata **metadata, time_t time) +static void mov_metadata_creation_time(AVDictionary **metadata, time_t time) { char buffer[32]; if (time) { @@ -604,7 +617,7 @@ static void mov_metadata_creation_time(AVMetadata **metadata, time_t time) ptm = gmtime(&time); if (!ptm) return; strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", ptm); - av_metadata_set2(metadata, "creation_time", buffer, 0); + av_dict_set(metadata, "creation_time", buffer, 0); } } @@ -641,7 +654,7 @@ static int mov_read_mdhd(MOVContext *c, AVIOContext *pb, MOVAtom atom) lang = avio_rb16(pb); /* language */ if (ff_mov_lang_to_iso639(lang, language)) - av_metadata_set2(&st->metadata, "language", language, 0); + av_dict_set(&st->metadata, "language", language, 0); avio_rb16(pb); /* quality */ return 0; @@ -718,7 +731,7 @@ static int mov_read_enda(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; st = c->fc->streams[c->fc->nb_streams-1]; - little_endian = avio_rb16(pb); + little_endian = avio_rb16(pb) & 0xFF; av_dlog(c->fc, "enda %d\n", little_endian); if (little_endian == 1) { switch (st->codec->codec_id) { @@ -1525,10 +1538,11 @@ static void mov_build_index(MOVContext *mov, AVStream *st) /* adjust first dts according to edit list */ if (sc->time_offset && mov->time_scale > 0) { - int rescaled = sc->time_offset < 0 ? av_rescale(sc->time_offset, sc->time_scale, mov->time_scale) : sc->time_offset; - current_dts = -rescaled; + if (sc->time_offset < 0) + sc->time_offset = av_rescale(sc->time_offset, sc->time_scale, mov->time_scale); + current_dts = -sc->time_offset; if (sc->ctts_data && sc->stts_data && - sc->ctts_data[0].duration / sc->stts_data[0].duration > 16) { + sc->ctts_data[0].duration / FFMAX(sc->stts_data[0].duration, 1) > 16) { /* more than 16 frames delay, dts are likely wrong this happens with files created by iMovie */ sc->wrong_dts = 1; @@ -1690,13 +1704,13 @@ static void mov_build_index(MOVContext *mov, AVStream *st) } } -static int mov_open_dref(AVIOContext **pb, char *src, MOVDref *ref) +static int mov_open_dref(AVIOContext **pb, const char *src, MOVDref *ref) { /* try relative path, we do not try the absolute because it can leak information about our system to an attacker */ if (ref->nlvl_to > 0 && ref->nlvl_from > 0) { char filename[1024]; - char *src_path; + const char *src_path; int i, l; /* find a source dir */ @@ -1914,6 +1928,10 @@ static int mov_read_tkhd(MOVContext *c, AVIOContext *pb, MOVAtom atom) sc->width = width >> 16; sc->height = height >> 16; + if (display_matrix[0][0] == -65536 && display_matrix[1][1] == -65536) { + av_dict_set(&st->metadata, "rotate", "180", 0); + } + // transform the display width/height according to the matrix // skip this if the display matrix is the default identity matrix // or if it is rotating the picture, ex iPhone 3GS @@ -2056,7 +2074,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom) if (flags & 0x001) data_offset = avio_rb32(pb); if (flags & 0x004) first_sample_flags = avio_rb32(pb); - dts = st->duration; + dts = st->duration - sc->time_offset; offset = frag->base_data_offset + data_offset; distance = 0; av_dlog(c->fc, "first sample flags 0x%x\n", first_sample_flags); @@ -2085,7 +2103,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom) offset += sample_size; } frag->moof_offset = offset; - st->duration = dts; + st->duration = dts + sc->time_offset; return 0; } @@ -2149,9 +2167,6 @@ static int mov_read_cmov(MOVContext *c, AVIOContext *pb, MOVAtom atom) goto free_and_return; atom.type = MKTAG('m','o','o','v'); atom.size = moov_len; -#ifdef DEBUG -// { int fd = open("/tmp/uncompheader.mov", O_WRONLY | O_CREAT); write(fd, moov_data, moov_len); close(fd); } -#endif ret = mov_read_default(c, &ctx, atom); free_and_return: av_free(moov_data); @@ -2265,6 +2280,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = { { MKTAG('e','s','d','s'), mov_read_esds }, { MKTAG('d','a','c','3'), mov_read_dac3 }, /* AC-3 info */ { MKTAG('w','i','d','e'), mov_read_wide }, /* place holder */ +{ MKTAG('w','f','e','x'), mov_read_wfex }, { MKTAG('c','m','o','v'), mov_read_cmov }, { MKTAG('c','h','a','n'), mov_read_chan }, { 0, NULL } diff --git a/libavformat/movenc.c b/libavformat/movenc.c index 1b61706da3..ebfcacb107 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -32,10 +32,27 @@ #include "libavcodec/put_bits.h" #include "internal.h" #include "libavutil/avstring.h" +#include "libavutil/opt.h" +#include "libavutil/dict.h" +#include "rtpenc.h" #undef NDEBUG #include <assert.h> +static const AVOption options[] = { + { "movflags", "MOV muxer flags", offsetof(MOVMuxContext, flags), FF_OPT_TYPE_FLAGS, {.dbl = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" }, + { "rtphint", "Add RTP hint tracks", 0, FF_OPT_TYPE_CONST, {.dbl = FF_MOV_FLAG_RTP_HINT}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" }, + FF_RTP_FLAG_OPTS(MOVMuxContext, rtp_flags), + { NULL }, +}; + +static const AVClass mov_muxer_class = { + .class_name = "MOV/3GP/MP4/3G2 muxer", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + //FIXME support 64 bit variant with wide placeholders static int64_t updateSize(AVIOContext *pb, int64_t pos) { @@ -1274,20 +1291,49 @@ static int mov_write_tapt_tag(AVIOContext *pb, MOVTrack *track) // This box seems important for the psp playback ... without it the movie seems to hang static int mov_write_edts_tag(AVIOContext *pb, MOVTrack *track) { - avio_wb32(pb, 0x24); /* size */ + int64_t duration = av_rescale_rnd(track->trackDuration, MOV_TIMESCALE, + track->timescale, AV_ROUND_UP); + int version = duration < INT32_MAX ? 0 : 1; + int entry_size, entry_count, size; + int64_t delay, start_ct = track->cluster[0].cts; + delay = av_rescale_rnd(track->cluster[0].dts + start_ct, MOV_TIMESCALE, + track->timescale, AV_ROUND_DOWN); + version |= delay < INT32_MAX ? 0 : 1; + + entry_size = (version == 1) ? 20 : 12; + entry_count = 1 + (delay > 0); + size = 24 + entry_count * entry_size; + + /* write the atom data */ + avio_wb32(pb, size); ffio_wfourcc(pb, "edts"); - avio_wb32(pb, 0x1c); /* size */ + avio_wb32(pb, size - 8); ffio_wfourcc(pb, "elst"); - avio_wb32(pb, 0x0); - avio_wb32(pb, 0x1); + avio_w8(pb, version); + avio_wb24(pb, 0); /* flags */ - /* duration ... doesn't seem to effect psp */ - avio_wb32(pb, av_rescale_rnd(track->trackDuration, MOV_TIMESCALE, - track->timescale, AV_ROUND_UP)); + avio_wb32(pb, entry_count); + if (delay > 0) { /* add an empty edit to delay presentation */ + if (version == 1) { + avio_wb64(pb, delay); + avio_wb64(pb, -1); + } else { + avio_wb32(pb, delay); + avio_wb32(pb, -1); + } + avio_wb32(pb, 0x00010000); + } - avio_wb32(pb, track->cluster[0].cts); /* first pts is cts since dts is 0 */ + /* duration */ + if (version == 1) { + avio_wb64(pb, duration); + avio_wb64(pb, start_ct); + } else { + avio_wb32(pb, duration); + avio_wb32(pb, start_ct); + } avio_wb32(pb, 0x00010000); - return 0x24; + return size; } static int mov_write_tref_tag(AVIOContext *pb, MOVTrack *track) @@ -1319,12 +1365,12 @@ static int mov_write_uuid_tag_psp(AVIOContext *pb, MOVTrack *mov) return 0x34; } -static int mov_write_udta_sdp(AVIOContext *pb, AVCodecContext *ctx, int index) +static int mov_write_udta_sdp(AVIOContext *pb, AVFormatContext *ctx, int index) { char buf[1000] = ""; int len; - ff_sdp_write_media(buf, sizeof(buf), ctx, NULL, NULL, 0, 0); + ff_sdp_write_media(buf, sizeof(buf), ctx->streams[0]->codec, NULL, NULL, 0, 0, ctx); av_strlcatf(buf, sizeof(buf), "a=control:streamid=%d\r\n", index); len = strlen(buf); @@ -1344,7 +1390,7 @@ static int mov_write_trak_tag(AVIOContext *pb, MOVTrack *track, AVStream *st) avio_wb32(pb, 0); /* size */ ffio_wfourcc(pb, "trak"); mov_write_tkhd_tag(pb, track, st); - if (track->mode == MODE_PSP || track->flags & MOV_TRACK_CTTS) + if (track->mode == MODE_PSP || track->flags & MOV_TRACK_CTTS || track->cluster[0].dts) mov_write_edts_tag(pb, track); // PSP Movies require edts box if (track->tref_tag) mov_write_tref_tag(pb, track); @@ -1352,7 +1398,7 @@ static int mov_write_trak_tag(AVIOContext *pb, MOVTrack *track, AVStream *st) if (track->mode == MODE_PSP) mov_write_uuid_tag_psp(pb,track); // PSP Movies require this uuid box if (track->tag == MKTAG('r','t','p',' ')) - mov_write_udta_sdp(pb, track->rtp_ctx->streams[0]->codec, track->trackID); + mov_write_udta_sdp(pb, track->rtp_ctx, track->trackID); if (track->enc->codec_type == AVMEDIA_TYPE_VIDEO && track->mode == MODE_MOV) { double sample_aspect_ratio = av_q2d(st->sample_aspect_ratio); if (0.0 != sample_aspect_ratio && 1.0 != sample_aspect_ratio) @@ -1491,15 +1537,15 @@ static int mov_write_string_metadata(AVFormatContext *s, AVIOContext *pb, int long_style) { int l, lang = 0, len, len2; - AVMetadataTag *t, *t2 = NULL; + AVDictionaryEntry *t, *t2 = NULL; char tag2[16]; - if (!(t = av_metadata_get(s->metadata, tag, NULL, 0))) + if (!(t = av_dict_get(s->metadata, tag, NULL, 0))) return 0; len = strlen(t->key); snprintf(tag2, sizeof(tag2), "%s-", tag); - while ((t2 = av_metadata_get(s->metadata, tag2, t2, AV_METADATA_IGNORE_SUFFIX))) { + while ((t2 = av_dict_get(s->metadata, tag2, t2, AV_DICT_IGNORE_SUFFIX))) { len2 = strlen(t2->key); if (len2 == len+4 && !strcmp(t->value, t2->value) && (l=ff_mov_iso639_to_lang(&t2->key[len2-3], 1)) >= 0) { @@ -1514,7 +1560,7 @@ static int mov_write_string_metadata(AVFormatContext *s, AVIOContext *pb, static int mov_write_trkn_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) { - AVMetadataTag *t = av_metadata_get(s->metadata, "track", NULL, 0); + AVDictionaryEntry *t = av_dict_get(s->metadata, "track", NULL, 0); int size = 0, track = t ? atoi(t->value) : 0; if (track) { avio_wb32(pb, 32); /* size */ @@ -1606,7 +1652,7 @@ static int mov_write_3gp_udta_tag(AVIOContext *pb, AVFormatContext *s, const char *tag, const char *str) { int64_t pos = avio_tell(pb); - AVMetadataTag *t = av_metadata_get(s->metadata, str, NULL, 0); + AVDictionaryEntry *t = av_dict_get(s->metadata, str, NULL, 0); if (!t || !utf8len(t->value)) return 0; avio_wb32(pb, 0); /* size */ @@ -1618,7 +1664,7 @@ static int mov_write_3gp_udta_tag(AVIOContext *pb, AVFormatContext *s, avio_wb16(pb, language_code("eng")); /* language */ avio_write(pb, t->value, strlen(t->value)+1); /* UTF8 string value */ if (!strcmp(tag, "albm") && - (t = av_metadata_get(s->metadata, "track", NULL, 0))) + (t = av_dict_get(s->metadata, "track", NULL, 0))) avio_w8(pb, atoi(t->value)); } return updateSize(pb, pos); @@ -1637,10 +1683,10 @@ static int mov_write_chpl_tag(AVIOContext *pb, AVFormatContext *s) for (i = 0; i < nb_chapters; i++) { AVChapter *c = s->chapters[i]; - AVMetadataTag *t; + AVDictionaryEntry *t; avio_wb64(pb, av_rescale_q(c->start, c->time_base, (AVRational){1,10000000})); - if ((t = av_metadata_get(c->metadata, "title", NULL, 0))) { + if ((t = av_dict_get(c->metadata, "title", NULL, 0))) { int len = FFMIN(strlen(t->value), 255); avio_w8(pb, len); avio_write(pb, t->value, len); @@ -1718,7 +1764,7 @@ static void mov_write_psp_udta_tag(AVIOContext *pb, static int mov_write_uuidusmt_tag(AVIOContext *pb, AVFormatContext *s) { - AVMetadataTag *title = av_metadata_get(s->metadata, "title", NULL, 0); + AVDictionaryEntry *title = av_dict_get(s->metadata, "title", NULL, 0); int64_t pos, pos2; if (title) { @@ -2072,13 +2118,13 @@ static void mov_create_chapter_track(AVFormatContext *s, int tracknum) for (i = 0; i < s->nb_chapters; i++) { AVChapter *c = s->chapters[i]; - AVMetadataTag *t; + AVDictionaryEntry *t; int64_t end = av_rescale_q(c->end, c->time_base, (AVRational){1,MOV_TIMESCALE}); pkt.pts = pkt.dts = av_rescale_q(c->start, c->time_base, (AVRational){1,MOV_TIMESCALE}); pkt.duration = end - pkt.dts; - if ((t = av_metadata_get(c->metadata, "title", NULL, 0))) { + if ((t = av_dict_get(c->metadata, "title", NULL, 0))) { len = strlen(t->value); pkt.size = len+2; pkt.data = av_malloc(pkt.size); @@ -2125,7 +2171,15 @@ static int mov_write_header(AVFormatContext *s) if (mov->mode & (MODE_MOV|MODE_IPOD) && s->nb_chapters) mov->chapter_track = mov->nb_streams++; +#if FF_API_FLAG_RTP_HINT if (s->flags & AVFMT_FLAG_RTP_HINT) { + av_log(s, AV_LOG_WARNING, "The RTP_HINT flag is deprecated, enable it " + "via the -movflags rtphint muxer option " + "instead.\n"); + mov->flags |= FF_MOV_FLAG_RTP_HINT; + } +#endif + if (mov->flags & FF_MOV_FLAG_RTP_HINT) { /* Add hint tracks for each audio and video stream */ hint_track = mov->nb_streams; for (i = 0; i < s->nb_streams; i++) { @@ -2144,7 +2198,7 @@ static int mov_write_header(AVFormatContext *s) for(i=0; i<s->nb_streams; i++){ AVStream *st= s->streams[i]; MOVTrack *track= &mov->tracks[i]; - AVMetadataTag *lang = av_metadata_get(st->metadata, "language", NULL,0); + AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0); track->enc = st->codec; track->language = ff_mov_iso639_to_lang(lang?lang->value:"und", mov->mode!=MODE_MOV); @@ -2221,7 +2275,7 @@ static int mov_write_header(AVFormatContext *s) if (mov->chapter_track) mov_create_chapter_track(s, mov->chapter_track); - if (s->flags & AVFMT_FLAG_RTP_HINT) { + if (mov->flags & FF_MOV_FLAG_RTP_HINT) { /* Initialize the hint tracks for each audio and video stream */ for (i = 0; i < s->nb_streams; i++) { AVStream *st = s->streams[i]; @@ -2298,6 +2352,7 @@ AVOutputFormat ff_mov_muxer = { mov_write_trailer, .flags = AVFMT_GLOBALHEADER, .codec_tag = (const AVCodecTag* const []){codec_movvideo_tags, codec_movaudio_tags, 0}, + .priv_class = &mov_muxer_class, }; #endif #if CONFIG_TGP_MUXER @@ -2314,6 +2369,7 @@ AVOutputFormat ff_tgp_muxer = { mov_write_trailer, .flags = AVFMT_GLOBALHEADER, .codec_tag = (const AVCodecTag* const []){codec_3gp_tags, 0}, + .priv_class = &mov_muxer_class, }; #endif #if CONFIG_MP4_MUXER @@ -2330,6 +2386,7 @@ AVOutputFormat ff_mp4_muxer = { mov_write_trailer, .flags = AVFMT_GLOBALHEADER, .codec_tag = (const AVCodecTag* const []){ff_mp4_obj_type, 0}, + .priv_class = &mov_muxer_class, }; #endif #if CONFIG_PSP_MUXER @@ -2346,6 +2403,7 @@ AVOutputFormat ff_psp_muxer = { mov_write_trailer, .flags = AVFMT_GLOBALHEADER, .codec_tag = (const AVCodecTag* const []){ff_mp4_obj_type, 0}, + .priv_class = &mov_muxer_class, }; #endif #if CONFIG_TG2_MUXER @@ -2362,6 +2420,7 @@ AVOutputFormat ff_tg2_muxer = { mov_write_trailer, .flags = AVFMT_GLOBALHEADER, .codec_tag = (const AVCodecTag* const []){codec_3gp_tags, 0}, + .priv_class = &mov_muxer_class, }; #endif #if CONFIG_IPOD_MUXER @@ -2378,5 +2437,6 @@ AVOutputFormat ff_ipod_muxer = { mov_write_trailer, .flags = AVFMT_GLOBALHEADER, .codec_tag = (const AVCodecTag* const []){codec_ipod_tags, 0}, + .priv_class = &mov_muxer_class, }; #endif diff --git a/libavformat/movenc.h b/libavformat/movenc.h index 182c5edc8c..610683fa6a 100644 --- a/libavformat/movenc.h +++ b/libavformat/movenc.h @@ -101,6 +101,7 @@ typedef struct MOVIndex { } MOVTrack; typedef struct MOVMuxContext { + const AVClass *av_class; int mode; int64_t time; int nb_streams; @@ -108,8 +109,13 @@ typedef struct MOVMuxContext { int64_t mdat_pos; uint64_t mdat_size; MOVTrack *tracks; + + int flags; + int rtp_flags; } MOVMuxContext; +#define FF_MOV_FLAG_RTP_HINT 1 + int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt); int ff_mov_init_hinting(AVFormatContext *s, int index, int src_index); diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c index 1306888b46..64f6ea8c0e 100644 --- a/libavformat/mp3dec.c +++ b/libavformat/mp3dec.c @@ -21,6 +21,7 @@ #include "libavutil/avstring.h" #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "avformat.h" #include "id3v2.h" #include "id3v1.h" @@ -149,7 +150,7 @@ static int mp3_read_header(AVFormatContext *s, off = avio_tell(s->pb); - if (!av_metadata_get(s->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX)) + if (!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) ff_id3v1_read(s); if (mp3_parse_vbr_tags(s, st, off) < 0) diff --git a/libavformat/mp3enc.c b/libavformat/mp3enc.c index 082450779f..50342bb950 100644 --- a/libavformat/mp3enc.c +++ b/libavformat/mp3enc.c @@ -27,22 +27,24 @@ #include "libavutil/avstring.h" #include "libavutil/intreadwrite.h" #include "libavutil/opt.h" +#include "libavcodec/mpegaudio.h" #include "libavcodec/mpegaudiodata.h" #include "libavcodec/mpegaudiodecheader.h" #include "libavformat/avio_internal.h" +#include "libavutil/dict.h" static int id3v1_set_string(AVFormatContext *s, const char *key, uint8_t *buf, int buf_size) { - AVMetadataTag *tag; - if ((tag = av_metadata_get(s->metadata, key, NULL, 0))) + AVDictionaryEntry *tag; + if ((tag = av_dict_get(s->metadata, key, NULL, 0))) av_strlcpy(buf, tag->value, buf_size); return !!tag; } static int id3v1_create_tag(AVFormatContext *s, uint8_t *buf) { - AVMetadataTag *tag; + AVDictionaryEntry *tag; int i, count = 0; memset(buf, 0, ID3v1_TAG_SIZE); /* fail safe */ @@ -54,13 +56,13 @@ static int id3v1_create_tag(AVFormatContext *s, uint8_t *buf) count += id3v1_set_string(s, "TALB", buf + 63, 30); //album count += id3v1_set_string(s, "TDRL", buf + 93, 4); //date count += id3v1_set_string(s, "comment", buf + 97, 30); - if ((tag = av_metadata_get(s->metadata, "TRCK", NULL, 0))) { //track + if ((tag = av_dict_get(s->metadata, "TRCK", NULL, 0))) { //track buf[125] = 0; buf[126] = atoi(tag->value); count++; } buf[127] = 0xFF; /* default to unknown genre */ - if ((tag = av_metadata_get(s->metadata, "TCON", NULL, 0))) { //genre + if ((tag = av_dict_get(s->metadata, "TCON", NULL, 0))) { //genre for(i = 0; i <= ID3v1_GENRE_MAX; i++) { if (!strcasecmp(tag->value, ff_id3v1_genre_str[i])) { buf[127] = i; @@ -185,7 +187,7 @@ static const AVClass mp3_muxer_class = { .version = LIBAVUTIL_VERSION_INT, }; -static int id3v2_check_write_tag(AVFormatContext *s, AVMetadataTag *t, const char table[][4], +static int id3v2_check_write_tag(AVFormatContext *s, AVDictionaryEntry *t, const char table[][4], enum ID3v2Encoding enc) { uint32_t tag; @@ -344,7 +346,7 @@ static void mp3_fix_xing(AVFormatContext *s) static int mp3_write_header(struct AVFormatContext *s) { MP3Context *mp3 = s->priv_data; - AVMetadataTag *t = NULL; + AVDictionaryEntry *t = NULL; int totlen = 0, enc = mp3->id3v2_version == 3 ? ID3v2_ENCODING_UTF16BOM : ID3v2_ENCODING_UTF8; int64_t size_pos, cur_pos; @@ -361,7 +363,7 @@ static int mp3_write_header(struct AVFormatContext *s) if (mp3->id3v2_version == 4) ff_metadata_conv(&s->metadata, ff_id3v2_4_metadata_conv, NULL); - while ((t = av_metadata_get(s->metadata, "", t, AV_METADATA_IGNORE_SUFFIX))) { + while ((t = av_dict_get(s->metadata, "", t, AV_DICT_IGNORE_SUFFIX))) { int ret; if ((ret = id3v2_check_write_tag(s, t, ff_id3v2_tags, enc)) > 0) { diff --git a/libavformat/mpc.c b/libavformat/mpc.c index 3544cf11cf..4d6854f13f 100644 --- a/libavformat/mpc.c +++ b/libavformat/mpc.c @@ -23,6 +23,7 @@ #include "avformat.h" #include "apetag.h" #include "id3v1.h" +#include "libavutil/dict.h" #define MPC_FRAMESIZE 1152 #define DELAY_FRAMES 32 @@ -96,7 +97,7 @@ static int mpc_read_header(AVFormatContext *s, AVFormatParameters *ap) if (s->pb->seekable) { int64_t pos = avio_tell(s->pb); ff_ape_parse_tag(s); - if (!av_metadata_get(s->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX)) + if (!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) ff_id3v1_read(s); avio_seek(s->pb, pos, SEEK_SET); } diff --git a/libavformat/mpeg.c b/libavformat/mpeg.c index f9f818951f..1bc4480cd8 100644 --- a/libavformat/mpeg.c +++ b/libavformat/mpeg.c @@ -23,8 +23,6 @@ #include "internal.h" #include "mpeg.h" -//#define DEBUG_SEEK - #undef NDEBUG #include <assert.h> @@ -572,10 +570,9 @@ static int mpegps_read_packet(AVFormatContext *s, pkt->dts = dts; pkt->pos = dummy_pos; pkt->stream_index = st->index; -#if 0 - av_log(s, AV_LOG_DEBUG, "%d: pts=%0.3f dts=%0.3f size=%d\n", - pkt->stream_index, pkt->pts / 90000.0, pkt->dts / 90000.0, pkt->size); -#endif + av_dlog(s, "%d: pts=%0.3f dts=%0.3f size=%d\n", + pkt->stream_index, pkt->pts / 90000.0, pkt->dts / 90000.0, + pkt->size); return 0; } @@ -587,18 +584,13 @@ static int64_t mpegps_read_dts(AVFormatContext *s, int stream_index, int64_t pos, pts, dts; pos = *ppos; -#ifdef DEBUG_SEEK - printf("read_dts: pos=0x%"PRIx64" next=%d -> ", pos, find_next); -#endif if (avio_seek(s->pb, pos, SEEK_SET) < 0) return AV_NOPTS_VALUE; for(;;) { len = mpegps_read_pes_header(s, &pos, &startcode, &pts, &dts); if (len < 0) { -#ifdef DEBUG_SEEK - printf("none (ret=%d)\n", len); -#endif + av_dlog(s, "none (ret=%d)\n", len); return AV_NOPTS_VALUE; } if (startcode == s->streams[stream_index]->id && @@ -607,9 +599,8 @@ static int64_t mpegps_read_dts(AVFormatContext *s, int stream_index, } avio_skip(s->pb, len); } -#ifdef DEBUG_SEEK - printf("pos=0x%"PRIx64" dts=0x%"PRIx64" %0.3f\n", pos, dts, dts / 90000.0); -#endif + av_dlog(s, "pos=0x%"PRIx64" dts=0x%"PRIx64" %0.3f\n", + pos, dts, dts / 90000.0); *ppos = pos; return dts; } diff --git a/libavformat/mpegenc.c b/libavformat/mpegenc.c index e42fac4ceb..9c808d0f1e 100644 --- a/libavformat/mpegenc.c +++ b/libavformat/mpegenc.c @@ -25,7 +25,6 @@ #include "mpeg.h" #define MAX_PAYLOAD_SIZE 4096 -//#define DEBUG_SEEK #undef NDEBUG #include <assert.h> @@ -1078,10 +1077,8 @@ retry: best_dts= pkt_desc->dts; } -#if 0 - av_log(ctx, AV_LOG_DEBUG, "bumping scr, scr:%f, dts:%f\n", - scr/90000.0, best_dts/90000.0); -#endif + av_dlog(ctx, "bumping scr, scr:%f, dts:%f\n", + scr / 90000.0, best_dts / 90000.0); if(best_dts == INT64_MAX) return 0; diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c index 6452861e38..89109c0689 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c @@ -19,12 +19,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -//#define DEBUG -//#define DEBUG_SEEK //#define USE_SYNCPOINT_SEARCH #include "libavutil/crc.h" #include "libavutil/intreadwrite.h" +#include "libavutil/log.h" +#include "libavutil/dict.h" +#include "libavutil/opt.h" #include "libavcodec/bytestream.h" #include "avformat.h" #include "mpegts.h" @@ -86,6 +87,7 @@ struct Program { }; struct MpegTSContext { + const AVClass *class; /* user data */ AVFormatContext *stream; /** raw packet size, including FEC if present */ @@ -122,6 +124,19 @@ struct MpegTSContext { MpegTSFilter *pids[NB_PID_MAX]; }; +static const AVOption options[] = { + {"compute_pcr", "Compute exact PCR for each transport stream packet.", offsetof(MpegTSContext, mpeg2ts_compute_pcr), FF_OPT_TYPE_INT, + {.dbl = 0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM }, + { NULL }, +}; + +static const AVClass mpegtsraw_class = { + .class_name = "mpegtsraw demuxer", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + /* TS stream handling */ enum MpegTSState { @@ -205,6 +220,17 @@ static void add_pid_to_pmt(MpegTSContext *ts, unsigned int programid, unsigned i p->pids[p->nb_pids++] = pid; } +static void set_pcr_pid(AVFormatContext *s, unsigned int programid, unsigned int pid) +{ + int i; + for(i=0; i<s->nb_programs; i++) { + if(s->programs[i]->id == programid) { + s->programs[i]->pcr_pid = pid; + break; + } + } +} + /** * \brief discard_pid() decides if the pid is to be discarded according * to caller's programs selection @@ -928,7 +954,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type language[1] = get8(pp, desc_end); language[2] = get8(pp, desc_end); language[3] = 0; - av_metadata_set2(&st->metadata, "language", language, 0); + av_dict_set(&st->metadata, "language", language, 0); break; case 0x59: /* subtitling descriptor */ language[0] = get8(pp, desc_end); @@ -957,7 +983,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type } } *pp += 4; - av_metadata_set2(&st->metadata, "language", language, 0); + av_dict_set(&st->metadata, "language", language, 0); break; case 0x0a: /* ISO 639 language descriptor */ for (i = 0; i + 4 <= desc_len; i += 4) { @@ -973,7 +999,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type } if (i) { language[i - 1] = 0; - av_metadata_set2(&st->metadata, "language", language, 0); + av_dict_set(&st->metadata, "language", language, 0); } break; case 0x05: /* registration descriptor */ @@ -983,6 +1009,9 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type stream_type == STREAM_TYPE_PRIVATE_DATA) mpegts_find_stream_type(st, st->codec->codec_tag, REGD_types); break; + case 0x52: /* stream identifier descriptor */ + st->stream_identifier = 1 + get8(pp, desc_end); + break; default: break; } @@ -1004,10 +1033,8 @@ static void pmt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len int mp4_dec_config_descr_len = 0; int mp4_es_id = 0; -#ifdef DEBUG av_dlog(ts->stream, "PMT: len %i\n", section_len); - av_hex_dump_log(ts->stream, AV_LOG_DEBUG, (uint8_t *)section, section_len); -#endif + hex_dump_debug(ts->stream, (uint8_t *)section, section_len); p_end = section + section_len - 4; p = section; @@ -1025,6 +1052,7 @@ static void pmt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len if (pcr_pid < 0) return; add_pid_to_pmt(ts, h->id, pcr_pid); + set_pcr_pid(ts->stream, h->id, pcr_pid); av_dlog(ts->stream, "pcr_pid=0x%x\n", pcr_pid); @@ -1123,11 +1151,11 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len SectionHeader h1, *h = &h1; const uint8_t *p, *p_end; int sid, pmt_pid; + AVProgram *program; -#ifdef DEBUG av_dlog(ts->stream, "PAT:\n"); - av_hex_dump_log(ts->stream, AV_LOG_DEBUG, (uint8_t *)section, section_len); -#endif + hex_dump_debug(ts->stream, (uint8_t *)section, section_len); + p_end = section + section_len - 4; p = section; if (parse_section_header(h, &p, p_end) < 0) @@ -1135,6 +1163,8 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len if (h->tid != PAT_TID) return; + ts->stream->ts_id = h->id; + clear_programs(ts); for(;;) { sid = get16(&p, p_end); @@ -1149,7 +1179,9 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len if (sid == 0x0000) { /* NIT info */ } else { - av_new_program(ts->stream, sid); + program = av_new_program(ts->stream, sid); + program->program_num = sid; + program->pmt_pid = pmt_pid; if (ts->pids[pmt_pid]) mpegts_close_filter(ts, ts->pids[pmt_pid]); mpegts_open_section_filter(ts, pmt_pid, pmt_cb, ts, 1); @@ -1168,10 +1200,8 @@ static void sdt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len int onid, val, sid, desc_list_len, desc_tag, desc_len, service_type; char *name, *provider_name; -#ifdef DEBUG av_dlog(ts->stream, "SDT:\n"); - av_hex_dump_log(ts->stream, AV_LOG_DEBUG, (uint8_t *)section, section_len); -#endif + hex_dump_debug(ts->stream, (uint8_t *)section, section_len); p_end = section + section_len - 4; p = section; @@ -1222,8 +1252,8 @@ static void sdt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len if (name) { AVProgram *program = av_new_program(ts->stream, sid); if(program) { - av_metadata_set2(&program->metadata, "service_name", name, 0); - av_metadata_set2(&program->metadata, "service_provider", provider_name, 0); + av_dict_set(&program->metadata, "service_name", name, 0); + av_dict_set(&program->metadata, "service_provider", provider_name, 0); } } av_free(name); @@ -1460,13 +1490,16 @@ static int mpegts_read_header(AVFormatContext *s, int len; int64_t pos; +#if FF_API_FORMAT_PARAMETERS if (ap) { - ts->mpeg2ts_compute_pcr = ap->mpeg2ts_compute_pcr; + if (ap->mpeg2ts_compute_pcr) + ts->mpeg2ts_compute_pcr = ap->mpeg2ts_compute_pcr; if(ap->mpeg2ts_raw){ av_log(s, AV_LOG_ERROR, "use mpegtsraw_demuxer!\n"); return -1; } } +#endif /* read the first 1024 bytes to get packet size */ pos = avio_tell(pb); @@ -1544,10 +1577,8 @@ static int mpegts_read_header(AVFormatContext *s, s->bit_rate = (TS_PACKET_SIZE * 8) * 27e6 / ts->pcr_incr; st->codec->bit_rate = s->bit_rate; st->start_time = ts->cur_pcr; -#if 0 - av_log(ts->stream, AV_LOG_DEBUG, "start=%0.3f pcr=%0.3f incr=%d\n", - st->start_time / 1000000.0, pcrs[0] / 27e6, ts->pcr_incr); -#endif + av_dlog(ts->stream, "start=%0.3f pcr=%0.3f incr=%d\n", + st->start_time / 1000000.0, pcrs[0] / 27e6, ts->pcr_incr); } avio_seek(pb, pos, SEEK_SET); @@ -1888,4 +1919,5 @@ AVInputFormat ff_mpegtsraw_demuxer = { #ifdef USE_SYNCPOINT_SEARCH .read_seek2 = read_seek2, #endif + .priv_class = &mpegtsraw_class, }; diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c index e7d70015aa..7e9647257f 100644 --- a/libavformat/mpegtsenc.c +++ b/libavformat/mpegtsenc.c @@ -21,6 +21,7 @@ #include "libavutil/bswap.h" #include "libavutil/crc.h" +#include "libavutil/dict.h" #include "libavutil/opt.h" #include "libavcodec/mpegvideo.h" #include "avformat.h" @@ -53,6 +54,7 @@ typedef struct MpegTSService { } MpegTSService; typedef struct MpegTSWrite { + const AVClass *av_class; MpegTSSection pat; /* MPEG2 pat table */ MpegTSSection sdt; /* MPEG2 sdt table context */ MpegTSService **services; @@ -243,7 +245,7 @@ static void mpegts_write_pmt(AVFormatContext *s, MpegTSService *service) for(i = 0; i < s->nb_streams; i++) { AVStream *st = s->streams[i]; MpegTSWriteStream *ts_st = st->priv_data; - AVMetadataTag *lang = av_metadata_get(st->metadata, "language", NULL,0); + AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0); switch(st->codec->codec_id) { case CODEC_ID_MPEG1VIDEO: case CODEC_ID_MPEG2VIDEO: @@ -442,7 +444,7 @@ static int mpegts_write_header(AVFormatContext *s) MpegTSWriteStream *ts_st; MpegTSService *service; AVStream *st, *pcr_st = NULL; - AVMetadataTag *title, *provider; + AVDictionaryEntry *title, *provider; int i, j; const char *service_name; const char *provider_name; @@ -451,11 +453,11 @@ static int mpegts_write_header(AVFormatContext *s) ts->tsid = ts->transport_stream_id; ts->onid = ts->original_network_id; /* allocate a single DVB service */ - title = av_metadata_get(s->metadata, "service_name", NULL, 0); + title = av_dict_get(s->metadata, "service_name", NULL, 0); if (!title) - title = av_metadata_get(s->metadata, "title", NULL, 0); + title = av_dict_get(s->metadata, "title", NULL, 0); service_name = title ? title->value : DEFAULT_SERVICE_NAME; - provider = av_metadata_get(s->metadata, "service_provider", NULL, 0); + provider = av_dict_get(s->metadata, "service_provider", NULL, 0); provider_name = provider ? provider->value : DEFAULT_PROVIDER_NAME; service = mpegts_add_service(ts, ts->service_id, provider_name, service_name); service->pmt.write_packet = section_write_packet; diff --git a/libavformat/mpegvideodec.c b/libavformat/mpegvideodec.c index d1ba1f8c2c..d38d6955e4 100644 --- a/libavformat/mpegvideodec.c +++ b/libavformat/mpegvideodec.c @@ -55,13 +55,4 @@ static int mpegvideo_probe(AVProbeData *p) return 0; } -AVInputFormat ff_mpegvideo_demuxer = { - "mpegvideo", - NULL_IF_CONFIG_SMALL("raw MPEG video"), - 0, - mpegvideo_probe, - ff_raw_video_read_header, - ff_raw_read_partial_packet, - .flags= AVFMT_GENERIC_INDEX, - .value = CODEC_ID_MPEG1VIDEO, -}; +FF_DEF_RAWVIDEO_DEMUXER(mpegvideo, "raw MPEG video", mpegvideo_probe, NULL, CODEC_ID_MPEG1VIDEO) diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c index 6b44b8f3e3..8548c792f6 100644 --- a/libavformat/mxfdec.c +++ b/libavformat/mxfdec.c @@ -309,7 +309,7 @@ static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt) if (klv_read_packet(&klv, s->pb) < 0) return -1; PRINT_KEY(s, "read packet", klv.key); - av_dlog(s, "size %lld offset %#llx\n", klv.length, klv.offset); + av_dlog(s, "size %"PRIu64" offset %#"PRIx64"\n", klv.length, klv.offset); if (IS_KLV_KEY(klv.key, mxf_encrypted_triplet_key)) { int res = mxf_decrypt_triplet(s, pkt, &klv); if (res < 0) { @@ -522,8 +522,8 @@ static int mxf_read_index_table_segment(void *arg, AVIOContext *pb, int tag, int case 0x3F06: av_dlog(NULL, "IndexSID %d\n", avio_rb32(pb)); break; case 0x3F07: av_dlog(NULL, "BodySID %d\n", avio_rb32(pb)); break; case 0x3F0B: av_dlog(NULL, "IndexEditRate %d/%d\n", avio_rb32(pb), avio_rb32(pb)); break; - case 0x3F0C: av_dlog(NULL, "IndexStartPosition %lld\n", avio_rb64(pb)); break; - case 0x3F0D: av_dlog(NULL, "IndexDuration %lld\n", avio_rb64(pb)); break; + case 0x3F0C: av_dlog(NULL, "IndexStartPosition %"PRIu64"\n", avio_rb64(pb)); break; + case 0x3F0D: av_dlog(NULL, "IndexDuration %"PRIu64"\n", avio_rb64(pb)); break; } return 0; } @@ -920,7 +920,7 @@ static int mxf_read_header(AVFormatContext *s, AVFormatParameters *ap) if (klv_read_packet(&klv, s->pb) < 0) return -1; PRINT_KEY(s, "read header", klv.key); - av_dlog(s, "size %lld offset %#llx\n", klv.length, klv.offset); + av_dlog(s, "size %"PRIu64" offset %#"PRIx64"\n", klv.length, klv.offset); if (IS_KLV_KEY(klv.key, mxf_encrypted_triplet_key) || IS_KLV_KEY(klv.key, mxf_essence_element_key)) { /* FIXME avoid seek */ diff --git a/libavformat/network.h b/libavformat/network.h index f2a0c97d77..f7e19b196e 100644 --- a/libavformat/network.h +++ b/libavformat/network.h @@ -21,7 +21,10 @@ #ifndef AVFORMAT_NETWORK_H #define AVFORMAT_NETWORK_H +#include <errno.h> + #include "config.h" +#include "libavutil/error.h" #include "os_support.h" #if HAVE_WINSOCK2_H @@ -33,7 +36,8 @@ #define ECONNREFUSED WSAECONNREFUSED #define EINPROGRESS WSAEINPROGRESS -static inline int ff_neterrno() { +static inline int ff_neterrno(void) +{ int err = WSAGetLastError(); switch (err) { case WSAEWOULDBLOCK: diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c index 300a8fd7f8..4898187f3a 100644 --- a/libavformat/nsvdec.c +++ b/libavformat/nsvdec.c @@ -20,10 +20,10 @@ */ #include "avformat.h" #include "riff.h" +#include "libavutil/dict.h" +#include "libavutil/intreadwrite.h" -//#define DEBUG //#define DEBUG_DUMP_INDEX // XXX dumbdriving-271.nsv breaks with it commented!! -//#define DEBUG_SEEK #define CHECK_SUBSEQUENT_NSVS //#define DISABLE_AUDIO @@ -267,7 +267,8 @@ static int nsv_parse_NSVf_header(AVFormatContext *s, AVFormatParameters *ap) { NSVContext *nsv = s->priv_data; AVIOContext *pb = s->pb; - unsigned int file_size, size; + unsigned int av_unused file_size; + unsigned int size; int64_t duration; int strings_size; int table_entries; @@ -327,7 +328,7 @@ static int nsv_parse_NSVf_header(AVFormatContext *s, AVFormatParameters *ap) break; *p++ = '\0'; av_dlog(s, "NSV NSVf INFO: %s='%s'\n", token, value); - av_metadata_set2(&s->metadata, token, value, 0); + av_dict_set(&s->metadata, token, value, 0); } av_free(strings); } @@ -531,7 +532,7 @@ static int nsv_read_header(AVFormatContext *s, AVFormatParameters *ap) err = nsv_read_chunk(s, 1); av_dlog(s, "parsed header\n"); - return 0; + return err; } static int nsv_read_chunk(AVFormatContext *s, int fill_header) @@ -546,7 +547,7 @@ static int nsv_read_chunk(AVFormatContext *s, int fill_header) uint32_t vsize; uint16_t asize; uint16_t auxsize; - uint32_t auxtag; + uint32_t av_unused auxtag; av_dlog(s, "%s(%d)\n", __FUNCTION__, fill_header); @@ -736,10 +737,8 @@ static int nsv_read_close(AVFormatContext *s) static int nsv_probe(AVProbeData *p) { - int i; - int score; - int vsize, asize, auxcount; - score = 0; + int i, score = 0; + av_dlog(NULL, "nsv_probe(), buf_size %d\n", p->buf_size); /* check file header */ /* streamed files might not have any header */ @@ -751,19 +750,14 @@ static int nsv_probe(AVProbeData *p) /* seems the servers don't bother starting clean chunks... */ /* sometimes even the first header is at 9KB or something :^) */ for (i = 1; i < p->buf_size - 3; i++) { - if (p->buf[i+0] == 'N' && p->buf[i+1] == 'S' && - p->buf[i+2] == 'V' && p->buf[i+3] == 's') { - score = AVPROBE_SCORE_MAX/5; + if (AV_RL32(p->buf + i) == AV_RL32("NSVs")) { /* Get the chunk size and check if at the end we are getting 0xBEEF */ - auxcount = p->buf[i+19]; - vsize = p->buf[i+20] | p->buf[i+21] << 8; - asize = p->buf[i+22] | p->buf[i+23] << 8; - vsize = (vsize << 4) | (auxcount >> 4); - if ((asize + vsize + i + 23) < p->buf_size - 2) { - if (p->buf[i+23+asize+vsize+1] == 0xEF && - p->buf[i+23+asize+vsize+2] == 0xBE) - return AVPROBE_SCORE_MAX-20; - } + int vsize = AV_RL24(p->buf+i+19) >> 4; + int asize = AV_RL16(p->buf+i+22); + int offset = i + 23 + asize + vsize + 1; + if (offset <= p->buf_size - 2 && AV_RL16(p->buf + offset) == 0xBEEF) + return 4*AVPROBE_SCORE_MAX/5; + score = AVPROBE_SCORE_MAX/5; } } /* so we'll have more luck on extension... */ diff --git a/libavformat/nutdec.c b/libavformat/nutdec.c index 2957a2b173..8af0c48bfc 100644 --- a/libavformat/nutdec.c +++ b/libavformat/nutdec.c @@ -23,6 +23,7 @@ #include <strings.h> #include "libavutil/avstring.h" #include "libavutil/bswap.h" +#include "libavutil/dict.h" #include "libavutil/tree.h" #include "avio_internal.h" #include "nut.h" @@ -194,7 +195,6 @@ static int decode_main_header(NUTContext *nut){ uint64_t tmp, end; unsigned int stream_count; int i, j, tmp_stream, tmp_mul, tmp_pts, tmp_size, count, tmp_res, tmp_head_idx; - int64_t tmp_match; end= get_packetheader(nut, bc, 1, MAIN_STARTCODE); end += avio_tell(bc); @@ -222,7 +222,6 @@ static int decode_main_header(NUTContext *nut){ tmp_pts=0; tmp_mul=1; tmp_stream=0; - tmp_match= 1-(1LL<<62); tmp_head_idx= 0; for(i=0; i<256;){ int tmp_flags = ffio_read_varlen(bc); @@ -236,7 +235,7 @@ static int decode_main_header(NUTContext *nut){ else tmp_res = 0; if(tmp_fields>5) count = ffio_read_varlen(bc); else count = tmp_mul - tmp_size; - if(tmp_fields>6) tmp_match = get_s(bc); + if(tmp_fields>6) get_s(bc); if(tmp_fields>7) tmp_head_idx= ffio_read_varlen(bc); while(tmp_fields-- > 8) @@ -407,7 +406,7 @@ static int decode_info_header(NUTContext *nut){ const char *type; AVChapter *chapter= NULL; AVStream *st= NULL; - AVMetadata **metadata = NULL; + AVDictionary **metadata = NULL; end= get_packetheader(nut, bc, 1, INFO_STARTCODE); end += avio_tell(bc); @@ -465,7 +464,7 @@ static int decode_info_header(NUTContext *nut){ } if(metadata && strcasecmp(name,"Uses") && strcasecmp(name,"Depends") && strcasecmp(name,"Replaces")) - av_metadata_set2(metadata, name, str_value, 0); + av_dict_set(metadata, name, str_value, 0); } } diff --git a/libavformat/nutenc.c b/libavformat/nutenc.c index 4756ca2a1a..6ec9bcd8fd 100644 --- a/libavformat/nutenc.c +++ b/libavformat/nutenc.c @@ -21,6 +21,7 @@ #include "libavutil/intreadwrite.h" #include "libavutil/tree.h" +#include "libavutil/dict.h" #include "libavcodec/mpegaudiodata.h" #include "nut.h" #include "internal.h" @@ -432,7 +433,7 @@ static int add_info(AVIOContext *bc, const char *type, const char *value){ static int write_globalinfo(NUTContext *nut, AVIOContext *bc){ AVFormatContext *s= nut->avf; - AVMetadataTag *t = NULL; + AVDictionaryEntry *t = NULL; AVIOContext *dyn_bc; uint8_t *dyn_buf=NULL; int count=0, dyn_size; @@ -440,7 +441,7 @@ static int write_globalinfo(NUTContext *nut, AVIOContext *bc){ if(ret < 0) return ret; - while ((t = av_metadata_get(s->metadata, "", t, AV_METADATA_IGNORE_SUFFIX))) + while ((t = av_dict_get(s->metadata, "", t, AV_DICT_IGNORE_SUFFIX))) count += add_info(dyn_bc, t->key, t->value); ff_put_v(bc, 0); //stream_if_plus1 @@ -491,7 +492,7 @@ static int write_chapter(NUTContext *nut, AVIOContext *bc, int id) { AVIOContext *dyn_bc; uint8_t *dyn_buf = NULL; - AVMetadataTag *t = NULL; + AVDictionaryEntry *t = NULL; AVChapter *ch = nut->avf->chapters[id]; int ret, dyn_size, count = 0; @@ -504,7 +505,7 @@ static int write_chapter(NUTContext *nut, AVIOContext *bc, int id) put_tt(nut, nut->chapter[id].time_base, bc, ch->start); // chapter_start ff_put_v(bc, ch->end - ch->start); // chapter_len - while ((t = av_metadata_get(ch->metadata, "", t, AV_METADATA_IGNORE_SUFFIX))) + while ((t = av_dict_get(ch->metadata, "", t, AV_DICT_IGNORE_SUFFIX))) count += add_info(dyn_bc, t->key, t->value); ff_put_v(bc, count); diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c index c799ce5205..655da35dd4 100644 --- a/libavformat/oggdec.c +++ b/libavformat/oggdec.c @@ -193,18 +193,17 @@ static int ogg_read_page(AVFormatContext *s, int *str) AVIOContext *bc = s->pb; struct ogg *ogg = s->priv_data; struct ogg_stream *os; - int i = 0; + int ret, i = 0; int flags, nsegs; uint64_t gp; uint32_t serial; - uint32_t seq; - uint32_t crc; int size, idx; uint8_t sync[4]; int sp = 0; - if (avio_read (bc, sync, 4) < 4) - return -1; + ret = avio_read (bc, sync, 4); + if (ret < 4) + return ret < 0 ? ret : AVERROR_EOF; do{ int c; @@ -216,23 +215,22 @@ static int ogg_read_page(AVFormatContext *s, int *str) c = avio_r8(bc); if (url_feof(bc)) - return -1; + return AVERROR_EOF; sync[sp++ & 3] = c; }while (i++ < MAX_PAGE_SIZE); if (i >= MAX_PAGE_SIZE){ av_log (s, AV_LOG_INFO, "ogg, can't find sync word\n"); - return -1; + return AVERROR_INVALIDDATA; } if (avio_r8(bc) != 0) /* version */ - return -1; + return AVERROR_INVALIDDATA; flags = avio_r8(bc); gp = avio_rl64 (bc); serial = avio_rl32 (bc); - seq = avio_rl32 (bc); - crc = avio_rl32 (bc); + avio_skip(bc, 8); /* seq, crc */ nsegs = avio_r8(bc); idx = ogg_find_stream (ogg, serial); @@ -251,7 +249,7 @@ static int ogg_read_page(AVFormatContext *s, int *str) idx = ogg_new_stream(s, serial, 1); } if (idx < 0) - return -1; + return idx; } os = ogg->streams + idx; @@ -260,8 +258,9 @@ static int ogg_read_page(AVFormatContext *s, int *str) if(os->psize > 0) ogg_new_buf(ogg, idx); - if (avio_read (bc, os->segments, nsegs) < nsegs) - return -1; + ret = avio_read (bc, os->segments, nsegs); + if (ret < nsegs) + return ret < 0 ? ret : AVERROR_EOF; os->nsegs = nsegs; os->segp = 0; @@ -292,8 +291,9 @@ static int ogg_read_page(AVFormatContext *s, int *str) os->buf = nb; } - if (avio_read (bc, os->buf + os->bufpos, size) < size) - return -1; + ret = avio_read (bc, os->buf + os->bufpos, size); + if (ret < size) + return ret < 0 ? ret : AVERROR_EOF; os->bufpos += size; os->granule = gp; @@ -309,35 +309,32 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize, int64_t *fpos) { struct ogg *ogg = s->priv_data; - int idx, i; + int idx, i, ret; struct ogg_stream *os; int complete = 0; int segp = 0, psize = 0; -#if 0 - av_log (s, AV_LOG_DEBUG, "ogg_packet: curidx=%i\n", ogg->curidx); -#endif + av_dlog(s, "ogg_packet: curidx=%i\n", ogg->curidx); do{ idx = ogg->curidx; while (idx < 0){ - if (ogg_read_page (s, &idx) < 0) - return -1; + ret = ogg_read_page (s, &idx); + if (ret < 0) + return ret; } os = ogg->streams + idx; -#if 0 - av_log (s, AV_LOG_DEBUG, - "ogg_packet: idx=%d pstart=%d psize=%d segp=%d nsegs=%d\n", + av_dlog(s, "ogg_packet: idx=%d pstart=%d psize=%d segp=%d nsegs=%d\n", idx, os->pstart, os->psize, os->segp, os->nsegs); -#endif if (!os->codec){ if (os->header < 0){ os->codec = ogg_find_codec (os->buf, os->bufpos); if (!os->codec){ + av_log(s, AV_LOG_WARNING, "Codec not found\n"); os->header = 0; return 0; } @@ -364,11 +361,6 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize, } }while (!complete); -#if 0 - av_log (s, AV_LOG_DEBUG, - "ogg_packet: idx %i, frame size %i, start %i\n", - idx, os->psize, os->pstart); -#endif if (os->granule == -1) av_log(s, AV_LOG_WARNING, "Page at %"PRId64" is missing granule\n", os->page_pos); @@ -418,6 +410,8 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize, *fpos = os->sync_pos; os->pstart += os->psize; os->psize = 0; + if(os->pstart == os->bufpos) + os->bufpos = os->pstart = 0; os->sync_pos = os->page_pos; } @@ -439,15 +433,15 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize, static int ogg_get_headers(AVFormatContext *s) { struct ogg *ogg = s->priv_data; + int ret; do{ - if (ogg_packet (s, NULL, NULL, NULL, NULL) < 0) - return -1; + ret = ogg_packet (s, NULL, NULL, NULL, NULL); + if (ret < 0) + return ret; }while (!ogg->headers); -#if 0 - av_log (s, AV_LOG_DEBUG, "found headers\n"); -#endif + av_dlog(s, "found headers\n"); return 0; } @@ -503,11 +497,12 @@ static int ogg_get_length(AVFormatContext *s) static int ogg_read_header(AVFormatContext *s, AVFormatParameters *ap) { struct ogg *ogg = s->priv_data; - int i; + int ret, i; ogg->curidx = -1; //linear headers seek from start - if (ogg_get_headers (s) < 0){ - return -1; + ret = ogg_get_headers (s); + if (ret < 0){ + return ret; } for (i = 0; i < ogg->nstreams; i++) diff --git a/libavformat/oggdec.h b/libavformat/oggdec.h index 82efd97aec..184a628622 100644 --- a/libavformat/oggdec.h +++ b/libavformat/oggdec.h @@ -112,7 +112,7 @@ extern const struct ogg_codec ff_speex_codec; extern const struct ogg_codec ff_theora_codec; extern const struct ogg_codec ff_vorbis_codec; -int ff_vorbis_comment(AVFormatContext *ms, AVMetadata **m, const uint8_t *buf, int size); +int ff_vorbis_comment(AVFormatContext *ms, AVDictionary **m, const uint8_t *buf, int size); static inline int ogg_find_stream (struct ogg * ogg, int serial) diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c index 45e677fb08..04f536f157 100644 --- a/libavformat/oggenc.c +++ b/libavformat/oggenc.c @@ -241,7 +241,7 @@ static int ogg_buffer_data(AVFormatContext *s, AVStream *st, } static uint8_t *ogg_write_vorbiscomment(int offset, int bitexact, - int *header_len, AVMetadata **m, int framing_bit) + int *header_len, AVDictionary **m, int framing_bit) { const char *vendor = bitexact ? "ffmpeg" : LIBAVFORMAT_IDENT; int size; @@ -267,7 +267,7 @@ static uint8_t *ogg_write_vorbiscomment(int offset, int bitexact, static int ogg_build_flac_headers(AVCodecContext *avctx, OGGStreamContext *oggstream, int bitexact, - AVMetadata **m) + AVDictionary **m) { enum FLACExtradataFormat format; uint8_t *streaminfo; @@ -307,7 +307,7 @@ static int ogg_build_flac_headers(AVCodecContext *avctx, static int ogg_build_speex_headers(AVCodecContext *avctx, OGGStreamContext *oggstream, int bitexact, - AVMetadata **m) + AVDictionary **m) { uint8_t *p; diff --git a/libavformat/oggparsecelt.c b/libavformat/oggparsecelt.c index bbb695f438..2adc06046a 100644 --- a/libavformat/oggparsecelt.c +++ b/libavformat/oggparsecelt.c @@ -41,8 +41,8 @@ static int celt_header(AVFormatContext *s, int idx) /* Main header */ - uint32_t version, header_size, sample_rate, nb_channels, frame_size; - uint32_t overlap, bytes_per_packet, extra_headers; + uint32_t version, header_size av_unused, sample_rate, nb_channels, frame_size; + uint32_t overlap, bytes_per_packet av_unused, extra_headers; uint8_t *extradata; extradata = av_malloc(2 * sizeof(uint32_t) + diff --git a/libavformat/oggparseogm.c b/libavformat/oggparseogm.c index dda5be601a..9526c3acc1 100644 --- a/libavformat/oggparseogm.c +++ b/libavformat/oggparseogm.c @@ -39,7 +39,6 @@ ogm_header(AVFormatContext *s, int idx) const uint8_t *p = os->buf + os->pstart; uint64_t time_unit; uint64_t spu; - uint32_t default_len; if(!(*p & 1)) return 0; @@ -74,8 +73,7 @@ ogm_header(AVFormatContext *s, int idx) time_unit = bytestream_get_le64(&p); spu = bytestream_get_le64(&p); - default_len = bytestream_get_le32(&p); - + p += 4; /* default_len */ p += 8; /* buffersize + bits_per_sample */ if(st->codec->codec_type == AVMEDIA_TYPE_VIDEO){ diff --git a/libavformat/oggparsevorbis.c b/libavformat/oggparsevorbis.c index 830f0bca37..86951f3e2f 100644 --- a/libavformat/oggparsevorbis.c +++ b/libavformat/oggparsevorbis.c @@ -25,6 +25,7 @@ #include <stdlib.h> #include "libavutil/avstring.h" #include "libavutil/bswap.h" +#include "libavutil/dict.h" #include "libavcodec/get_bits.h" #include "libavcodec/bytestream.h" #include "avformat.h" @@ -57,8 +58,8 @@ static int ogm_chapter(AVFormatContext *as, uint8_t *key, uint8_t *val) if (!chapter) return 0; - av_metadata_set2(&chapter->metadata, "title", val, - AV_METADATA_DONT_STRDUP_VAL); + av_dict_set(&chapter->metadata, "title", val, + AV_DICT_DONT_STRDUP_VAL); } else return 0; @@ -67,7 +68,7 @@ static int ogm_chapter(AVFormatContext *as, uint8_t *key, uint8_t *val) } int -ff_vorbis_comment(AVFormatContext * as, AVMetadata **m, const uint8_t *buf, int size) +ff_vorbis_comment(AVFormatContext * as, AVDictionary **m, const uint8_t *buf, int size) { const uint8_t *p = buf; const uint8_t *end = buf + size; @@ -127,9 +128,9 @@ ff_vorbis_comment(AVFormatContext * as, AVMetadata **m, const uint8_t *buf, int ct[vl] = 0; if (!ogm_chapter(as, tt, ct)) - av_metadata_set2(m, tt, ct, - AV_METADATA_DONT_STRDUP_KEY | - AV_METADATA_DONT_STRDUP_VAL); + av_dict_set(m, tt, ct, + AV_DICT_DONT_STRDUP_KEY | + AV_DICT_DONT_STRDUP_VAL); } } diff --git a/libavformat/oma.c b/libavformat/oma.c index f29b16a4d6..aac96d14e5 100644 --- a/libavformat/oma.c +++ b/libavformat/oma.c @@ -80,6 +80,8 @@ static int oma_read_header(AVFormatContext *s, ff_id3v2_read(s, ID3v2_EA3_MAGIC); ret = avio_read(s->pb, buf, EA3_HEADER_SIZE); + if (ret < EA3_HEADER_SIZE) + return -1; if (memcmp(buf, ((const uint8_t[]){'E', 'A', '3'}),3) || buf[4] != 0 || buf[5] != EA3_HEADER_SIZE) { av_log(s, AV_LOG_ERROR, "Couldn't find the EA3 header !\n"); diff --git a/libavformat/options.c b/libavformat/options.c index a6f78817f4..ffd82ba207 100644 --- a/libavformat/options.c +++ b/libavformat/options.c @@ -49,8 +49,12 @@ static const AVOption options[]={ {"nofillin", "do not fill in missing values that can be exactly calculated", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_NOFILLIN }, INT_MIN, INT_MAX, D, "fflags"}, {"noparse", "disable AVParsers, this needs nofillin too", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_NOPARSE }, INT_MIN, INT_MAX, D, "fflags"}, {"igndts", "ignore dts", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_IGNDTS }, INT_MIN, INT_MAX, D, "fflags"}, -{"rtphint", "add rtp hinting", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_RTP_HINT }, INT_MIN, INT_MAX, E, "fflags"}, +#if FF_API_FLAG_RTP_HINT +{"rtphint", "add rtp hinting (deprecated, use the -movflags rtphint option instead)", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_RTP_HINT }, INT_MIN, INT_MAX, E, "fflags"}, +#endif {"sortdts", "try to interleave outputted packets by dts", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_SORT_DTS }, INT_MIN, INT_MAX, D, "fflags"}, +{"keepside", "dont merge side data", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_KEEP_SIDE_DATA }, INT_MIN, INT_MAX, D, "fflags"}, +{"latm", "enable RTP MP4A-LATM payload", 0, FF_OPT_TYPE_CONST, {.dbl = AVFMT_FLAG_MP4A_LATM }, INT_MIN, INT_MAX, E, "fflags"}, {"analyzeduration", "how many microseconds are analyzed to estimate duration", OFFSET(max_analyze_duration), FF_OPT_TYPE_INT, {.dbl = 5*AV_TIME_BASE }, 0, INT_MAX, D}, {"cryptokey", "decryption key", OFFSET(key), FF_OPT_TYPE_BINARY, {.dbl = 0}, 0, 0, D}, {"indexmem", "max memory used for timestamp index (per stream)", OFFSET(max_index_size), FF_OPT_TYPE_INT, {.dbl = 1<<20 }, 0, INT_MAX, D}, @@ -58,6 +62,7 @@ static const AVOption options[]={ {"fdebug", "print specific debug info", OFFSET(debug), FF_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, 0, INT_MAX, E|D, "fdebug"}, {"ts", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_FDEBUG_TS }, INT_MIN, INT_MAX, E|D, "fdebug"}, {"max_delay", "maximum muxing or demuxing delay in microseconds", OFFSET(max_delay), FF_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, INT_MAX, E|D}, +{"fpsprobesize", "number of frames used to probe fps", OFFSET(fps_probe_size), FF_OPT_TYPE_INT, {.dbl = -1}, -1, INT_MAX-1, D}, {NULL}, }; diff --git a/libavformat/pcmdec.c b/libavformat/pcmdec.c index 6441d89c5c..ab3b739ccb 100644 --- a/libavformat/pcmdec.c +++ b/libavformat/pcmdec.c @@ -50,7 +50,7 @@ static int raw_read_packet(AVFormatContext *s, AVPacket *pkt) AVInputFormat ff_pcm_ ## name ## _demuxer = {\ #name,\ NULL_IF_CONFIG_SMALL(long_name),\ - 0,\ + sizeof(RawAudioDemuxerContext),\ NULL,\ ff_raw_read_header,\ raw_read_packet,\ @@ -59,6 +59,7 @@ AVInputFormat ff_pcm_ ## name ## _demuxer = {\ .flags= AVFMT_GENERIC_INDEX,\ .extensions = ext,\ .value = codec,\ + .priv_class = &ff_rawaudio_demuxer_class,\ }; PCMDEF(f64be, "PCM 64 bit floating-point big-endian format", diff --git a/libavformat/r3d.c b/libavformat/r3d.c index 5e027d158b..af74a9ee56 100644 --- a/libavformat/r3d.c +++ b/libavformat/r3d.c @@ -22,6 +22,7 @@ //#define DEBUG #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "avformat.h" typedef struct { @@ -43,7 +44,7 @@ static int read_atom(AVFormatContext *s, Atom *atom) if (atom->size < 8) return -1; atom->tag = avio_rl32(s->pb); - av_dlog(s, "atom %d %.4s offset %#llx\n", + av_dlog(s, "atom %u %.4s offset %#"PRIx64"\n", atom->size, (char*)&atom->tag, atom->offset); return atom->size; } @@ -52,7 +53,8 @@ static int r3d_read_red1(AVFormatContext *s) { AVStream *st = av_new_stream(s, 0); char filename[258]; - int tmp, tmp2; + int tmp; + int av_unused tmp2; if (!st) return AVERROR(ENOMEM); @@ -97,7 +99,7 @@ static int r3d_read_red1(AVFormatContext *s) avio_read(s->pb, filename, 257); filename[sizeof(filename)-1] = 0; - av_metadata_set2(&st->metadata, "filename", filename, 0); + av_dict_set(&st->metadata, "filename", filename, 0); av_dlog(s, "filename %s\n", filename); av_dlog(s, "resolution %dx%d\n", st->codec->width, st->codec->height); @@ -131,7 +133,7 @@ static int r3d_read_rdvo(AVFormatContext *s, Atom *atom) if (st->codec->time_base.den) st->duration = (uint64_t)r3d->video_offsets_count* st->time_base.den*st->codec->time_base.num/st->codec->time_base.den; - av_dlog(s, "duration %lld\n", st->duration); + av_dlog(s, "duration %"PRId64"\n", st->duration); return 0; } @@ -139,7 +141,7 @@ static int r3d_read_rdvo(AVFormatContext *s, Atom *atom) static void r3d_read_reos(AVFormatContext *s) { R3DContext *r3d = s->priv_data; - int tmp; + int av_unused tmp; r3d->rdvo_offset = avio_rb32(s->pb); avio_rb32(s->pb); // rdvs offset @@ -176,7 +178,7 @@ static int r3d_read_header(AVFormatContext *s, AVFormatParameters *ap) } s->data_offset = avio_tell(s->pb); - av_dlog(s, "data offset %#llx\n", s->data_offset); + av_dlog(s, "data offset %#"PRIx64"\n", s->data_offset); if (!s->pb->seekable) return 0; // find REOB/REOF/REOS to load index @@ -209,7 +211,8 @@ static int r3d_read_header(AVFormatContext *s, AVFormatParameters *ap) static int r3d_read_redv(AVFormatContext *s, AVPacket *pkt, Atom *atom) { AVStream *st = s->streams[0]; - int tmp, tmp2; + int tmp; + int av_unused tmp2; uint64_t pos = avio_tell(s->pb); unsigned dts; int ret; @@ -255,7 +258,7 @@ static int r3d_read_redv(AVFormatContext *s, AVPacket *pkt, Atom *atom) if (st->codec->time_base.den) pkt->duration = (uint64_t)st->time_base.den* st->codec->time_base.num/st->codec->time_base.den; - av_dlog(s, "pkt dts %lld duration %d\n", pkt->dts, pkt->duration); + av_dlog(s, "pkt dts %"PRId64" duration %d\n", pkt->dts, pkt->duration); return 0; } @@ -263,7 +266,8 @@ static int r3d_read_redv(AVFormatContext *s, AVPacket *pkt, Atom *atom) static int r3d_read_reda(AVFormatContext *s, AVPacket *pkt, Atom *atom) { AVStream *st = s->streams[1]; - int tmp, tmp2, samples, size; + int av_unused tmp, tmp2; + int samples, size; uint64_t pos = avio_tell(s->pb); unsigned dts; int ret; @@ -299,7 +303,7 @@ static int r3d_read_reda(AVFormatContext *s, AVPacket *pkt, Atom *atom) pkt->stream_index = 1; pkt->dts = dts; pkt->duration = av_rescale(samples, st->time_base.den, st->codec->sample_rate); - av_dlog(s, "pkt dts %lld duration %d samples %d sample rate %d\n", + av_dlog(s, "pkt dts %"PRId64" duration %d samples %d sample rate %d\n", pkt->dts, pkt->duration, samples, st->codec->sample_rate); return 0; @@ -356,7 +360,8 @@ static int r3d_seek(AVFormatContext *s, int stream_index, int64_t sample_time, i frame_num = sample_time*st->codec->time_base.den/ ((int64_t)st->codec->time_base.num*st->time_base.den); - av_dlog(s, "seek frame num %d timestamp %lld\n", frame_num, sample_time); + av_dlog(s, "seek frame num %d timestamp %"PRId64"\n", + frame_num, sample_time); if (frame_num < r3d->video_offsets_count) { avio_seek(s->pb, r3d->video_offsets_count, SEEK_SET); diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c index 0387e3d266..a4e009b7e0 100644 --- a/libavformat/rawdec.c +++ b/libavformat/rawdec.c @@ -23,6 +23,9 @@ #include "avformat.h" #include "avio_internal.h" #include "rawdec.h" +#include "libavutil/opt.h" +#include "libavutil/parseutils.h" +#include "libavutil/pixdesc.h" /* raw input */ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap) @@ -43,26 +46,64 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap) st->codec->codec_id = id; switch(st->codec->codec_type) { - case AVMEDIA_TYPE_AUDIO: - st->codec->sample_rate = ap->sample_rate; - if(ap->channels) st->codec->channels = ap->channels; - else st->codec->channels = 1; + case AVMEDIA_TYPE_AUDIO: { + RawAudioDemuxerContext *s1 = s->priv_data; + +#if FF_API_FORMAT_PARAMETERS + if (ap->sample_rate) + st->codec->sample_rate = ap->sample_rate; + if (ap->channels) + st->codec->channels = ap->channels; + else st->codec->channels = 1; +#endif + + if (s1->sample_rate) + st->codec->sample_rate = s1->sample_rate; + if (s1->channels) + st->codec->channels = s1->channels; + st->codec->bits_per_coded_sample = av_get_bits_per_sample(st->codec->codec_id); assert(st->codec->bits_per_coded_sample > 0); st->codec->block_align = st->codec->bits_per_coded_sample*st->codec->channels/8; av_set_pts_info(st, 64, 1, st->codec->sample_rate); break; - case AVMEDIA_TYPE_VIDEO: - if(ap->time_base.num) - av_set_pts_info(st, 64, ap->time_base.num, ap->time_base.den); - else - av_set_pts_info(st, 64, 1, 25); - st->codec->width = ap->width; - st->codec->height = ap->height; - st->codec->pix_fmt = ap->pix_fmt; - if(st->codec->pix_fmt == PIX_FMT_NONE) - st->codec->pix_fmt= PIX_FMT_YUV420P; - break; + } + case AVMEDIA_TYPE_VIDEO: { + FFRawVideoDemuxerContext *s1 = s->priv_data; + int width = 0, height = 0, ret = 0; + enum PixelFormat pix_fmt; + AVRational framerate; + + if (s1->video_size && (ret = av_parse_video_size(&width, &height, s1->video_size)) < 0) { + av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n"); + goto fail; + } + if ((pix_fmt = av_get_pix_fmt(s1->pixel_format)) == PIX_FMT_NONE) { + av_log(s, AV_LOG_ERROR, "No such pixel format: %s.\n", s1->pixel_format); + ret = AVERROR(EINVAL); + goto fail; + } + if ((ret = av_parse_video_rate(&framerate, s1->framerate)) < 0) { + av_log(s, AV_LOG_ERROR, "Could not parse framerate: %s.\n", s1->framerate); + goto fail; + } +#if FF_API_FORMAT_PARAMETERS + if (ap->width > 0) + width = ap->width; + if (ap->height > 0) + height = ap->height; + if (ap->pix_fmt) + pix_fmt = ap->pix_fmt; + if (ap->time_base.num) + framerate = (AVRational){ap->time_base.den, ap->time_base.num}; +#endif + av_set_pts_info(st, 64, framerate.den, framerate.num); + st->codec->width = width; + st->codec->height = height; + st->codec->pix_fmt = pix_fmt; +fail: + return ret; + } default: return -1; } @@ -111,45 +152,82 @@ int ff_raw_video_read_header(AVFormatContext *s, AVFormatParameters *ap) { AVStream *st; + FFRawVideoDemuxerContext *s1 = s->priv_data; + AVRational framerate; + int ret = 0; + st = av_new_stream(s, 0); - if (!st) - return AVERROR(ENOMEM); + if (!st) { + ret = AVERROR(ENOMEM); + goto fail; + } st->codec->codec_type = AVMEDIA_TYPE_VIDEO; st->codec->codec_id = s->iformat->value; st->need_parsing = AVSTREAM_PARSE_FULL; - /* for MJPEG, specify frame rate */ - /* for MPEG-4 specify it, too (most MPEG-4 streams do not have the fixed_vop_rate set ...)*/ - if (ap->time_base.num) { - st->codec->time_base= ap->time_base; - } else if ( st->codec->codec_id == CODEC_ID_MJPEG || - st->codec->codec_id == CODEC_ID_MPEG4 || - st->codec->codec_id == CODEC_ID_DIRAC || - st->codec->codec_id == CODEC_ID_DNXHD || - st->codec->codec_id == CODEC_ID_VC1 || - st->codec->codec_id == CODEC_ID_H264) { - st->codec->time_base= (AVRational){1,25}; + if ((ret = av_parse_video_rate(&framerate, s1->framerate)) < 0) { + av_log(s, AV_LOG_ERROR, "Could not parse framerate: %s.\n", s1->framerate); + goto fail; } +#if FF_API_FORMAT_PARAMETERS + if (ap->time_base.num) + framerate = (AVRational){ap->time_base.den, ap->time_base.num}; +#endif + + st->codec->time_base = (AVRational){framerate.den, framerate.num}; av_set_pts_info(st, 64, 1, 1200000); - return 0; +fail: + return ret; } /* Note: Do not forget to add new entries to the Makefile as well. */ +static const AVOption audio_options[] = { + { "sample_rate", "", offsetof(RawAudioDemuxerContext, sample_rate), FF_OPT_TYPE_INT, {.dbl = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM }, + { "channels", "", offsetof(RawAudioDemuxerContext, channels), FF_OPT_TYPE_INT, {.dbl = 0}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM }, + { NULL }, +}; + +const AVClass ff_rawaudio_demuxer_class = { + .class_name = "rawaudio demuxer", + .item_name = av_default_item_name, + .option = audio_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +#define OFFSET(x) offsetof(FFRawVideoDemuxerContext, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM +static const AVOption video_options[] = { + { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC }, + { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = "yuv420p"}, 0, 0, DEC }, + { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "25"}, 0, 0, DEC }, + { NULL }, +}; +#undef OFFSET +#undef DEC + +const AVClass ff_rawvideo_demuxer_class = { + .class_name = "rawvideo demuxer", + .item_name = av_default_item_name, + .option = video_options, + .version = LIBAVUTIL_VERSION_INT, +}; + #if CONFIG_G722_DEMUXER AVInputFormat ff_g722_demuxer = { "g722", NULL_IF_CONFIG_SMALL("raw G.722"), - 0, + sizeof(RawAudioDemuxerContext), NULL, ff_raw_read_header, ff_raw_read_partial_packet, .flags= AVFMT_GENERIC_INDEX, .extensions = "g722,722", .value = CODEC_ID_ADPCM_G722, + .priv_class = &ff_rawaudio_demuxer_class, }; #endif @@ -168,17 +246,7 @@ AVInputFormat ff_gsm_demuxer = { #endif #if CONFIG_MJPEG_DEMUXER -AVInputFormat ff_mjpeg_demuxer = { - "mjpeg", - NULL_IF_CONFIG_SMALL("raw MJPEG video"), - 0, - NULL, - ff_raw_video_read_header, - ff_raw_read_partial_packet, - .flags= AVFMT_GENERIC_INDEX, - .extensions = "mjpg,mjpeg", - .value = CODEC_ID_MJPEG, -}; +FF_DEF_RAWVIDEO_DEMUXER(mjpeg, "raw MJPEG video", NULL, "mjpg,mjpeg", CODEC_ID_MJPEG) #endif #if CONFIG_MLP_DEMUXER @@ -224,14 +292,5 @@ AVInputFormat ff_shorten_demuxer = { #endif #if CONFIG_VC1_DEMUXER -AVInputFormat ff_vc1_demuxer = { - "vc1", - NULL_IF_CONFIG_SMALL("raw VC-1"), - 0, - NULL /* vc1_probe */, - ff_raw_video_read_header, - ff_raw_read_partial_packet, - .extensions = "vc1", - .value = CODEC_ID_VC1, -}; +FF_DEF_RAWVIDEO_DEMUXER(vc1, "raw VC-1", NULL, "vc1", CODEC_ID_VC1) #endif diff --git a/libavformat/rawdec.h b/libavformat/rawdec.h index 0b0cf1beef..6cb5af2b0a 100644 --- a/libavformat/rawdec.h +++ b/libavformat/rawdec.h @@ -23,6 +23,23 @@ #define AVFORMAT_RAWDEC_H #include "avformat.h" +#include "libavutil/log.h" + +typedef struct RawAudioDemuxerContext { + AVClass *class; + int sample_rate; + int channels; +} RawAudioDemuxerContext; + +typedef struct FFRawVideoDemuxerContext { + const AVClass *class; /**< Class for private options. */ + char *video_size; /**< String describing video size, set by a private option. */ + char *pixel_format; /**< Set by a private option. */ + char *framerate; /**< String describing framerate, set by a private option. */ +} FFRawVideoDemuxerContext; + +extern const AVClass ff_rawaudio_demuxer_class; +extern const AVClass ff_rawvideo_demuxer_class; int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap); @@ -32,4 +49,18 @@ int ff_raw_audio_read_header(AVFormatContext *s, AVFormatParameters *ap); int ff_raw_video_read_header(AVFormatContext *s, AVFormatParameters *ap); +#define FF_DEF_RAWVIDEO_DEMUXER(shortname, longname, probe, ext, id)\ +AVInputFormat ff_ ## shortname ## _demuxer = {\ + .name = #shortname,\ + .long_name = NULL_IF_CONFIG_SMALL(longname),\ + .read_probe = probe,\ + .read_header = ff_raw_video_read_header,\ + .read_packet = ff_raw_read_partial_packet,\ + .extensions = ext,\ + .flags = AVFMT_GENERIC_INDEX,\ + .value = id,\ + .priv_data_size = sizeof(FFRawVideoDemuxerContext),\ + .priv_class = &ff_rawvideo_demuxer_class,\ +}; + #endif /* AVFORMAT_RAWDEC_H */ diff --git a/libavformat/rawvideodec.c b/libavformat/rawvideodec.c index 127119f18a..36f5d954ae 100644 --- a/libavformat/rawvideodec.c +++ b/libavformat/rawvideodec.c @@ -47,11 +47,12 @@ static int rawvideo_read_packet(AVFormatContext *s, AVPacket *pkt) AVInputFormat ff_rawvideo_demuxer = { "rawvideo", NULL_IF_CONFIG_SMALL("raw video format"), - 0, + sizeof(FFRawVideoDemuxerContext), NULL, ff_raw_read_header, rawvideo_read_packet, .flags= AVFMT_GENERIC_INDEX, .extensions = "yuv,cif,qcif,rgb", .value = CODEC_ID_RAWVIDEO, + .priv_class = &ff_rawvideo_demuxer_class, }; diff --git a/libavformat/riff.c b/libavformat/riff.c index 833a6bf9e8..544c29f116 100644 --- a/libavformat/riff.c +++ b/libavformat/riff.c @@ -250,6 +250,7 @@ const AVCodecTag ff_codec_bmp_tags[] = { { CODEC_ID_ZMBV, MKTAG('Z', 'M', 'B', 'V') }, { CODEC_ID_KMVC, MKTAG('K', 'M', 'V', 'C') }, { CODEC_ID_CAVS, MKTAG('C', 'A', 'V', 'S') }, + { CODEC_ID_JPEG2000, MKTAG('m', 'j', 'p', '2') }, { CODEC_ID_JPEG2000, MKTAG('M', 'J', '2', 'C') }, { CODEC_ID_VMNC, MKTAG('V', 'M', 'n', 'c') }, { CODEC_ID_TARGA, MKTAG('t', 'g', 'a', ' ') }, @@ -536,6 +537,7 @@ int ff_get_wav_header(AVIOContext *pb, AVCodecContext *codec, int size) } codec->extradata_size = cbSize; if (cbSize > 0) { + av_free(codec->extradata); codec->extradata = av_mallocz(codec->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE); if (!codec->extradata) return AVERROR(ENOMEM); diff --git a/libavformat/rl2.c b/libavformat/rl2.c index 93d4c34459..e41406791e 100644 --- a/libavformat/rl2.c +++ b/libavformat/rl2.c @@ -80,8 +80,6 @@ static av_cold int rl2_read_header(AVFormatContext *s, unsigned int audio_frame_counter = 0; unsigned int video_frame_counter = 0; unsigned int back_size; - int data_size; - unsigned short encoding_method; unsigned short sound_rate; unsigned short rate; unsigned short channels; @@ -98,14 +96,14 @@ static av_cold int rl2_read_header(AVFormatContext *s, avio_skip(pb,4); /* skip FORM tag */ back_size = avio_rl32(pb); /**< get size of the background frame */ signature = avio_rb32(pb); - data_size = avio_rb32(pb); + avio_skip(pb, 4); /* data size */ frame_count = avio_rl32(pb); /* disallow back_sizes and frame_counts that may lead to overflows later */ if(back_size > INT_MAX/2 || frame_count > INT_MAX / sizeof(uint32_t)) return AVERROR_INVALIDDATA; - encoding_method = avio_rl16(pb); + avio_skip(pb, 2); /* encoding mentod */ sound_rate = avio_rl16(pb); rate = avio_rl16(pb); channels = avio_rl16(pb); diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c index 6fb362473e..c2eee815c2 100644 --- a/libavformat/rmdec.c +++ b/libavformat/rmdec.c @@ -21,6 +21,7 @@ #include "libavutil/avstring.h" #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "avformat.h" #include "riff.h" #include "rm.h" @@ -104,7 +105,7 @@ static void rm_read_metadata(AVFormatContext *s, int wide) for (i=0; i<FF_ARRAY_ELEMS(ff_rm_metadata); i++) { int len = wide ? avio_rb16(s->pb) : avio_r8(s->pb); get_strl(s->pb, buf, sizeof(buf), len); - av_metadata_set2(&s->metadata, ff_rm_metadata[i], buf, 0); + av_dict_set(&s->metadata, ff_rm_metadata[i], buf, 0); } } @@ -280,7 +281,7 @@ ff_rm_read_mdpr_codecdata (AVFormatContext *s, AVIOContext *pb, if (rm_read_audio_stream_info(s, pb, st, rst, 0)) return -1; } else { - int fps, fps2; + int fps; if (avio_rl32(pb) != MKTAG('V', 'I', 'D', 'O')) { fail1: av_log(st->codec, AV_LOG_ERROR, "Unsupported video codec\n"); @@ -298,7 +299,7 @@ ff_rm_read_mdpr_codecdata (AVFormatContext *s, AVIOContext *pb, fps= avio_rb16(pb); st->codec->codec_type = AVMEDIA_TYPE_VIDEO; avio_rb32(pb); - fps2= avio_rb16(pb); + avio_skip(pb, 2); avio_rb16(pb); if ((ret = rm_read_extradata(pb, st->codec, codec_data_size - (avio_tell(pb) - codec_pos))) < 0) diff --git a/libavformat/rmenc.c b/libavformat/rmenc.c index e08b677987..575d895a6a 100644 --- a/libavformat/rmenc.c +++ b/libavformat/rmenc.c @@ -21,6 +21,7 @@ #include "avformat.h" #include "avio_internal.h" #include "rm.h" +#include "libavutil/dict.h" typedef struct { int nb_packets; @@ -71,7 +72,7 @@ static int rv10_write_header(AVFormatContext *ctx, const char *desc, *mimetype; int nb_packets, packet_total_size, packet_max_size, size, packet_avg_size, i; int bit_rate, v, duration, flags, data_pos; - AVMetadataTag *tag; + AVDictionaryEntry *tag; start_ptr = s->buf_ptr; @@ -127,13 +128,13 @@ static int rv10_write_header(AVFormatContext *ctx, ffio_wfourcc(s,"CONT"); size = 4 * 2 + 10; for(i=0; i<FF_ARRAY_ELEMS(ff_rm_metadata); i++) { - tag = av_metadata_get(ctx->metadata, ff_rm_metadata[i], NULL, 0); + tag = av_dict_get(ctx->metadata, ff_rm_metadata[i], NULL, 0); if(tag) size += strlen(tag->value); } avio_wb32(s,size); avio_wb16(s,0); for(i=0; i<FF_ARRAY_ELEMS(ff_rm_metadata); i++) { - tag = av_metadata_get(ctx->metadata, ff_rm_metadata[i], NULL, 0); + tag = av_dict_get(ctx->metadata, ff_rm_metadata[i], NULL, 0); put_str(s, tag ? tag->value : ""); } diff --git a/libavformat/rpl.c b/libavformat/rpl.c index 9702fc7035..151893cdd7 100644 --- a/libavformat/rpl.c +++ b/libavformat/rpl.c @@ -20,6 +20,7 @@ */ #include "libavutil/avstring.h" +#include "libavutil/dict.h" #include "avformat.h" #include <stdlib.h> @@ -131,11 +132,11 @@ static int rpl_read_header(AVFormatContext *s, AVFormatParameters *ap) // for the text in a few cases; samples needed.) error |= read_line(pb, line, sizeof(line)); // ARMovie error |= read_line(pb, line, sizeof(line)); // movie name - av_metadata_set2(&s->metadata, "title" , line, 0); + av_dict_set(&s->metadata, "title" , line, 0); error |= read_line(pb, line, sizeof(line)); // date/copyright - av_metadata_set2(&s->metadata, "copyright", line, 0); + av_dict_set(&s->metadata, "copyright", line, 0); error |= read_line(pb, line, sizeof(line)); // author and other - av_metadata_set2(&s->metadata, "author" , line, 0); + av_dict_set(&s->metadata, "author" , line, 0); // video headers vst = av_new_stream(s, 0); @@ -299,9 +300,9 @@ static int rpl_read_packet(AVFormatContext *s, AVPacket *pkt) stream->codec->codec_tag == 124) { // We have to split Escape 124 frames because there are // multiple frames per chunk in Escape 124 samples. - uint32_t frame_size, frame_flags; + uint32_t frame_size; - frame_flags = avio_rl32(pb); + avio_skip(pb, 4); /* flags */ frame_size = avio_rl32(pb); if (avio_seek(pb, -8, SEEK_CUR) < 0) return AVERROR(EIO); diff --git a/libavformat/rtmppkt.c b/libavformat/rtmppkt.c index 53c6a5fe19..4b6d549f74 100644 --- a/libavformat/rtmppkt.c +++ b/libavformat/rtmppkt.c @@ -233,9 +233,11 @@ int ff_rtmp_packet_write(URLContext *h, RTMPPacket *pkt, int ff_rtmp_packet_create(RTMPPacket *pkt, int channel_id, RTMPPacketType type, int timestamp, int size) { - pkt->data = av_malloc(size); - if (!pkt->data) - return AVERROR(ENOMEM); + if (size) { + pkt->data = av_malloc(size); + if (!pkt->data) + return AVERROR(ENOMEM); + } pkt->data_size = size; pkt->channel_id = channel_id; pkt->type = type; diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c index 94361ca80d..3701a77176 100644 --- a/libavformat/rtmpproto.c +++ b/libavformat/rtmpproto.c @@ -690,7 +690,7 @@ static int get_packet(URLContext *s, int for_header) return AVERROR_EOF; for (;;) { - RTMPPacket rpkt; + RTMPPacket rpkt = { 0 }; if ((ret = ff_rtmp_packet_read(rt->stream, &rpkt, rt->chunk_size, rt->prev_pkt[0])) <= 0) { if (ret == 0) { diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c index 0a9a8bc8d6..9fc30d7b66 100644 --- a/libavformat/rtpdec.c +++ b/libavformat/rtpdec.c @@ -321,7 +321,7 @@ int rtp_check_and_send_back_rr(RTPDemuxContext *s, int count) avio_flush(pb); len = avio_close_dyn_buf(pb, &buf); if ((len > 0) && buf) { - int result; + int av_unused result; av_dlog(s->ic, "sending %d bytes of RR\n", len); result= ffurl_write(s->rtp_ctx, buf, len); av_dlog(s->ic, "result from ffurl_write: %d\n", result); diff --git a/libavformat/rtpdec_asf.c b/libavformat/rtpdec_asf.c index b2ac2b7ac0..58c9068a87 100644 --- a/libavformat/rtpdec_asf.c +++ b/libavformat/rtpdec_asf.c @@ -113,7 +113,7 @@ int ff_wms_parse_sdp_a_line(AVFormatContext *s, const char *p) ret = av_open_input_stream(&rt->asf_ctx, &pb, "", &ff_asf_demuxer, NULL); if (ret < 0) return ret; - av_metadata_copy(&s->metadata, rt->asf_ctx->metadata, 0); + av_dict_copy(&s->metadata, rt->asf_ctx->metadata, 0); rt->asf_pb_pos = avio_tell(&pb); av_free(buf); rt->asf_ctx->pb = NULL; diff --git a/libavformat/rtpdec_latm.c b/libavformat/rtpdec_latm.c index 2b366a0dc2..4bf2c3fb23 100644 --- a/libavformat/rtpdec_latm.c +++ b/libavformat/rtpdec_latm.c @@ -108,8 +108,7 @@ static int parse_fmtp_config(AVStream *st, char *value) int len = ff_hex_to_data(NULL, value), i, ret = 0; GetBitContext gb; uint8_t *config; - int audio_mux_version, same_time_framing, num_sub_frames, - num_programs, num_layers; + int audio_mux_version, same_time_framing, num_programs, num_layers; /* Pad this buffer, too, to avoid out of bounds reads with get_bits below */ config = av_mallocz(len + FF_INPUT_BUFFER_PADDING_SIZE); @@ -119,7 +118,7 @@ static int parse_fmtp_config(AVStream *st, char *value) init_get_bits(&gb, config, len*8); audio_mux_version = get_bits(&gb, 1); same_time_framing = get_bits(&gb, 1); - num_sub_frames = get_bits(&gb, 6); + skip_bits(&gb, 6); /* num_sub_frames */ num_programs = get_bits(&gb, 4); num_layers = get_bits(&gb, 3); if (audio_mux_version != 0 || same_time_framing != 1 || num_programs != 0 || diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c index 71ccdabf4a..60629b098b 100644 --- a/libavformat/rtpenc.c +++ b/libavformat/rtpenc.c @@ -23,11 +23,24 @@ #include "mpegts.h" #include "internal.h" #include "libavutil/random_seed.h" +#include "libavutil/opt.h" #include "rtpenc.h" //#define DEBUG +static const AVOption options[] = { + FF_RTP_FLAG_OPTS(RTPMuxContext, flags), + { NULL }, +}; + +static const AVClass rtp_muxer_class = { + .class_name = "RTP muxer", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + #define RTCP_SR_SIZE 28 static int is_supported(enum CodecID id) @@ -404,7 +417,10 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt) ff_rtp_send_mpegvideo(s1, pkt->data, size); break; case CODEC_ID_AAC: - ff_rtp_send_aac(s1, pkt->data, size); + if (s->flags & FF_RTP_FLAG_MP4A_LATM) + ff_rtp_send_latm(s1, pkt->data, size); + else + ff_rtp_send_aac(s1, pkt->data, size); break; case CODEC_ID_AMR_NB: case CODEC_ID_AMR_WB: @@ -455,4 +471,5 @@ AVOutputFormat ff_rtp_muxer = { rtp_write_header, rtp_write_packet, rtp_write_trailer, + .priv_class = &rtp_muxer_class, }; diff --git a/libavformat/rtpenc.h b/libavformat/rtpenc.h index b9663c55b0..cba0d02d91 100644 --- a/libavformat/rtpenc.h +++ b/libavformat/rtpenc.h @@ -25,6 +25,7 @@ #include "rtp.h" struct RTPMuxContext { + const AVClass *av_class; AVFormatContext *ic; AVStream *st; int payload_type; @@ -56,15 +57,24 @@ struct RTPMuxContext { * (1, 2 or 4) */ int nal_length_size; + + int flags; }; typedef struct RTPMuxContext RTPMuxContext; +#define FF_RTP_FLAG_MP4A_LATM 1 + +#define FF_RTP_FLAG_OPTS(ctx, fieldname) \ + { "rtpflags", "RTP muxer flags", offsetof(ctx, fieldname), FF_OPT_TYPE_FLAGS, {.dbl = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" }, \ + { "latm", "Use MP4A-LATM packetization instead of MPEG4-GENERIC for AAC", 0, FF_OPT_TYPE_CONST, {.dbl = FF_RTP_FLAG_MP4A_LATM}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" } \ + void ff_rtp_send_data(AVFormatContext *s1, const uint8_t *buf1, int len, int m); void ff_rtp_send_h264(AVFormatContext *s1, const uint8_t *buf1, int size); void ff_rtp_send_h263(AVFormatContext *s1, const uint8_t *buf1, int size); void ff_rtp_send_aac(AVFormatContext *s1, const uint8_t *buff, int size); +void ff_rtp_send_latm(AVFormatContext *s1, const uint8_t *buff, int size); void ff_rtp_send_amr(AVFormatContext *s1, const uint8_t *buff, int size); void ff_rtp_send_mpegvideo(AVFormatContext *s1, const uint8_t *buf1, int size); void ff_rtp_send_xiph(AVFormatContext *s1, const uint8_t *buff, int size); diff --git a/libavformat/rtpenc_chain.c b/libavformat/rtpenc_chain.c index bd3d1a875d..7f1a8868eb 100644 --- a/libavformat/rtpenc_chain.c +++ b/libavformat/rtpenc_chain.c @@ -23,6 +23,7 @@ #include "avio_internal.h" #include "rtpenc_chain.h" #include "avio_internal.h" +#include "libavutil/opt.h" AVFormatContext *ff_rtp_chain_mux_open(AVFormatContext *s, AVStream *st, URLContext *handle, int packet_size) @@ -48,6 +49,14 @@ AVFormatContext *ff_rtp_chain_mux_open(AVFormatContext *s, AVStream *st, rtpctx->max_delay = s->max_delay; /* Copy other stream parameters. */ rtpctx->streams[0]->sample_aspect_ratio = st->sample_aspect_ratio; + rtpctx->flags |= s->flags & AVFMT_FLAG_MP4A_LATM; + + av_set_parameters(rtpctx, NULL); + /* Copy the rtpflags values straight through */ + if (s->oformat->priv_class && + av_find_opt(s->priv_data, "rtpflags", NULL, 0, 0)) + av_set_int(rtpctx->priv_data, "rtpflags", + av_get_int(s->priv_data, "rtpflags", NULL)); /* Set the synchronized start time. */ rtpctx->start_time_realtime = s->start_time_realtime; diff --git a/libavformat/rtpenc_h264.c b/libavformat/rtpenc_h264.c index 697def61c2..11074d0d51 100644 --- a/libavformat/rtpenc_h264.c +++ b/libavformat/rtpenc_h264.c @@ -29,6 +29,24 @@ #include "avc.h" #include "rtpenc.h" +static const uint8_t *avc_mp4_find_startcode(const uint8_t *start, const uint8_t *end, int nal_length_size) +{ + int res = 0; + + if (end - start < nal_length_size) { + return NULL; + } + while (nal_length_size--) { + res = (res << 8) | *start++; + } + + if (end - start < res) { + return NULL; + } + + return res + start; +} + static void nal_send(AVFormatContext *s1, const uint8_t *buf, int size, int last) { RTPMuxContext *s = s1->priv_data; @@ -66,12 +84,20 @@ void ff_rtp_send_h264(AVFormatContext *s1, const uint8_t *buf1, int size) RTPMuxContext *s = s1->priv_data; s->timestamp = s->cur_timestamp; - r = ff_avc_find_startcode(buf1, buf1 + size); + r = s->nal_length_size ? (avc_mp4_find_startcode(buf1, buf1 + size, s->nal_length_size) ? buf1 : buf1 + size) : ff_avc_find_startcode(buf1, buf1 + size); while (r < buf1 + size) { const uint8_t *r1; - while(!*(r++)); - r1 = ff_avc_find_startcode(r, buf1 + size); + if (s->nal_length_size) { + r1 = avc_mp4_find_startcode(r, buf1 + size, s->nal_length_size); + if (!r1) { + r1 = buf1 + size; + } + r += s->nal_length_size; + } else { + while(!*(r++)); + r1 = ff_avc_find_startcode(r, buf1 + size); + } nal_send(s1, r, r1 - r, (r1 == buf1 + size)); r = r1; } diff --git a/libavformat/rtpenc_latm.c b/libavformat/rtpenc_latm.c new file mode 100644 index 0000000000..4430c44afe --- /dev/null +++ b/libavformat/rtpenc_latm.c @@ -0,0 +1,61 @@ +/* + * RTP Packetization of MPEG-4 Audio (RFC 3016) + * Copyright (c) 2011 Juan Carlos Rodriguez <ing.juancarlosrodriguez@hotmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "avformat.h" +#include "rtpenc.h" + +void ff_rtp_send_latm(AVFormatContext *s1, const uint8_t *buff, int size) +{ + /* MP4A-LATM + * The RTP payload format specification is described in RFC 3016 + * The encoding specifications are provided in ISO/IEC 14496-3 */ + + RTPMuxContext *s = s1->priv_data; + int header_size; + int offset = 0; + int len = 0; + + /* skip ADTS header, if present */ + if ((s1->streams[0]->codec->extradata_size) == 0) { + size -= 7; + buff += 7; + } + + /* PayloadLengthInfo() */ + header_size = size/0xFF + 1; + memset(s->buf, 0xFF, header_size - 1); + s->buf[header_size - 1] = size % 0xFF; + + s->timestamp = s->cur_timestamp; + + /* PayloadMux() */ + while (size > 0) { + len = FFMIN(size, s->max_payload_size - (!offset ? header_size : 0)); + size -= len; + if (!offset) { + memcpy(s->buf + header_size, buff, len); + ff_rtp_send_data(s1, s->buf, header_size + len, !size); + } else { + ff_rtp_send_data(s1, buff + offset, len, !size); + } + offset += len; + } +} diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c index 74e4b31f43..f5a638423f 100644 --- a/libavformat/rtsp.c +++ b/libavformat/rtsp.c @@ -24,6 +24,7 @@ #include "libavutil/intreadwrite.h" #include "libavutil/parseutils.h" #include "libavutil/random_seed.h" +#include "libavutil/dict.h" #include "avformat.h" #include "avio_internal.h" @@ -45,7 +46,6 @@ #include "url.h" //#define DEBUG -//#define DEBUG_RTP_TCP /* Timeout values for socket poll, in ms, * and read_packet(), in seconds */ @@ -282,11 +282,11 @@ static void sdp_parse_line(AVFormatContext *s, SDPParseState *s1, } break; case 's': - av_metadata_set2(&s->metadata, "title", p, 0); + av_dict_set(&s->metadata, "title", p, 0); break; case 'i': if (s->nb_streams == 0) { - av_metadata_set2(&s->metadata, "comment", p, 0); + av_dict_set(&s->metadata, "comment", p, 0); break; } break; @@ -860,9 +860,7 @@ int ff_rtsp_read_reply(AVFormatContext *s, RTSPMessageHeader *reply, q = buf; for (;;) { ret = ffurl_read_complete(rt->rtsp_hd, &ch, 1); -#ifdef DEBUG_RTP_TCP av_dlog(s, "ret=%d c=%02x [%c]\n", ret, ch, ch); -#endif if (ret != 1) return AVERROR_EOF; if (ch == '\n') diff --git a/libavformat/rtsp.h b/libavformat/rtsp.h index 56160cefc2..6b060923e4 100644 --- a/libavformat/rtsp.h +++ b/libavformat/rtsp.h @@ -28,6 +28,8 @@ #include "network.h" #include "httpauth.h" +#include "libavutil/log.h" + /** * Network layer over which RTP/etc packet data will be transported. */ @@ -196,6 +198,7 @@ enum RTSPServerType { * @todo Use AVIOContext instead of URLContext */ typedef struct RTSPState { + const AVClass *class; /**< Class for private options. */ URLContext *rtsp_hd; /* RTSP TCP connection handle */ /** number of items in the 'rtsp_streams' variable */ @@ -336,6 +339,16 @@ typedef struct RTSPState { * Whether the server supports the GET_PARAMETER method. */ int get_parameter_supported; + + /** + * Do not begin to play the stream immediately. + */ + int initial_pause; + + /** + * Option flags for the chained RTP muxer. + */ + int rtp_muxer_flags; } RTSPState; /** diff --git a/libavformat/rtspdec.c b/libavformat/rtspdec.c index 454a31c3f9..cf95915110 100644 --- a/libavformat/rtspdec.c +++ b/libavformat/rtspdec.c @@ -21,6 +21,7 @@ #include "libavutil/avstring.h" #include "libavutil/intreadwrite.h" +#include "libavutil/opt.h" #include "avformat.h" #include "internal.h" @@ -30,9 +31,6 @@ #include "rdt.h" #include "url.h" -//#define DEBUG -//#define DEBUG_RTP_TCP - static int rtsp_read_play(AVFormatContext *s) { RTSPState *rt = s->priv_data; @@ -165,7 +163,12 @@ static int rtsp_read_header(AVFormatContext *s, return AVERROR(ENOMEM); rt->real_setup = rt->real_setup_cache + s->nb_streams; - if (ap->initial_pause) { +#if FF_API_FORMAT_PARAMETERS + if (ap->initial_pause) + rt->initial_pause = ap->initial_pause; +#endif + + if (rt->initial_pause) { /* do not start immediately */ } else { if (rtsp_read_play(s) < 0) { @@ -185,9 +188,7 @@ int ff_rtsp_tcp_read_packet(AVFormatContext *s, RTSPStream **prtsp_st, int id, len, i, ret; RTSPStream *rtsp_st; -#ifdef DEBUG_RTP_TCP av_dlog(s, "tcp_read_packet:\n"); -#endif redo: for (;;) { RTSPMessageHeader reply; @@ -206,9 +207,7 @@ redo: return -1; id = buf[0]; len = AV_RB16(buf + 1); -#ifdef DEBUG_RTP_TCP av_dlog(s, "id=%d len=%d\n", id, len); -#endif if (len > buf_size || len < 12) goto redo; /* get the data */ @@ -399,6 +398,18 @@ static int rtsp_read_close(AVFormatContext *s) return 0; } +static const AVOption options[] = { + { "initial_pause", "Don't start playing the stream immediately", offsetof(RTSPState, initial_pause), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM }, + { NULL }, +}; + +const AVClass rtsp_demuxer_class = { + .class_name = "RTSP demuxer", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_rtsp_demuxer = { "rtsp", NULL_IF_CONFIG_SMALL("RTSP input format"), @@ -411,4 +422,5 @@ AVInputFormat ff_rtsp_demuxer = { .flags = AVFMT_NOFILE, .read_play = rtsp_read_play, .read_pause = rtsp_read_pause, + .priv_class = &rtsp_demuxer_class, }; diff --git a/libavformat/rtspenc.c b/libavformat/rtspenc.c index 1cbcb49750..9120d187b4 100644 --- a/libavformat/rtspenc.c +++ b/libavformat/rtspenc.c @@ -33,9 +33,23 @@ #include "libavutil/intreadwrite.h" #include "libavutil/avstring.h" #include "url.h" +#include "libavutil/opt.h" +#include "rtpenc.h" #define SDP_MAX_SIZE 16384 +static const AVOption options[] = { + FF_RTP_FLAG_OPTS(RTSPState, rtp_muxer_flags), + { NULL }, +}; + +static const AVClass rtsp_muxer_class = { + .class_name = "RTSP muxer", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + int ff_rtsp_setup_output_streams(AVFormatContext *s, const char *addr) { RTSPState *rt = s->priv_data; @@ -238,5 +252,6 @@ AVOutputFormat ff_rtsp_muxer = { rtsp_write_packet, rtsp_write_close, .flags = AVFMT_NOFILE | AVFMT_GLOBALHEADER, + .priv_class = &rtsp_muxer_class, }; diff --git a/libavformat/sauce.c b/libavformat/sauce.c index 41e991e3e7..21cc95bccb 100644 --- a/libavformat/sauce.c +++ b/libavformat/sauce.c @@ -25,6 +25,7 @@ */ #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "avformat.h" #include "sauce.h" @@ -32,7 +33,7 @@ int ff_sauce_read(AVFormatContext *avctx, uint64_t *fsize, int *got_width, int g { AVIOContext *pb = avctx->pb; char buf[36]; - int datatype, filetype, t1, t2, nb_comments, flags; + int datatype, filetype, t1, t2, nb_comments; uint64_t start_pos = avio_size(pb) - 128; avio_seek(pb, start_pos, SEEK_SET); @@ -44,7 +45,7 @@ int ff_sauce_read(AVFormatContext *avctx, uint64_t *fsize, int *got_width, int g #define GET_SAUCE_META(name,size) \ if (avio_read(pb, buf, size) == size && buf[0]) { \ buf[size] = 0; \ - av_metadata_set2(&avctx->metadata, name, buf, 0); \ + av_dict_set(&avctx->metadata, name, buf, 0); \ } GET_SAUCE_META("title", 35) @@ -57,7 +58,7 @@ int ff_sauce_read(AVFormatContext *avctx, uint64_t *fsize, int *got_width, int g t1 = avio_rl16(pb); t2 = avio_rl16(pb); nb_comments = avio_r8(pb); - flags = avio_r8(pb); + avio_skip(pb, 1); /* flags */ avio_skip(pb, 4); GET_SAUCE_META("encoder", 22); @@ -95,7 +96,7 @@ int ff_sauce_read(AVFormatContext *avctx, uint64_t *fsize, int *got_width, int g str[65*i + 64] = '\n'; } str[65*i] = 0; - av_metadata_set2(&avctx->metadata, "comment", str, AV_METADATA_DONT_STRDUP_VAL); + av_dict_set(&avctx->metadata, "comment", str, AV_DICT_DONT_STRDUP_VAL); } } diff --git a/libavformat/sdp.c b/libavformat/sdp.c index 923c57fbcd..32cef49916 100644 --- a/libavformat/sdp.c +++ b/libavformat/sdp.c @@ -21,8 +21,11 @@ #include <string.h> #include "libavutil/avstring.h" #include "libavutil/base64.h" +#include "libavutil/dict.h" #include "libavutil/parseutils.h" +#include "libavutil/opt.h" #include "libavcodec/xiph.h" +#include "libavcodec/mpeg4audio.h" #include "avformat.h" #include "internal.h" #include "avc.h" @@ -299,7 +302,72 @@ xiph_fail: return NULL; } -static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c, int payload_type) +static int latm_context2profilelevel(AVCodecContext *c) +{ + /* MP4A-LATM + * The RTP payload format specification is described in RFC 3016 + * The encoding specifications are provided in ISO/IEC 14496-3 */ + + int profile_level = 0x2B; + + /* TODO: AAC Profile only supports AAC LC Object Type. + * Different Object Types should implement different Profile Levels */ + + if (c->sample_rate <= 24000) { + if (c->channels <= 2) + profile_level = 0x28; // AAC Profile, Level 1 + } else if (c->sample_rate <= 48000) { + if (c->channels <= 2) { + profile_level = 0x29; // AAC Profile, Level 2 + } else if (c->channels <= 5) { + profile_level = 0x2A; // AAC Profile, Level 4 + } + } else if (c->sample_rate <= 96000) { + if (c->channels <= 5) { + profile_level = 0x2B; // AAC Profile, Level 5 + } + } + + return profile_level; +} + +static char *latm_context2config(AVCodecContext *c) +{ + /* MP4A-LATM + * The RTP payload format specification is described in RFC 3016 + * The encoding specifications are provided in ISO/IEC 14496-3 */ + + uint8_t config_byte[6]; + int rate_index; + char *config; + + for (rate_index = 0; rate_index < 16; rate_index++) + if (ff_mpeg4audio_sample_rates[rate_index] == c->sample_rate) + break; + if (rate_index == 16) { + av_log(c, AV_LOG_ERROR, "Unsupported sample rate\n"); + return NULL; + } + + config_byte[0] = 0x40; + config_byte[1] = 0; + config_byte[2] = 0x20 | rate_index; + config_byte[3] = c->channels << 4; + config_byte[4] = 0x3f; + config_byte[5] = 0xc0; + + config = av_malloc(6*2+1); + if (!config) { + av_log(c, AV_LOG_ERROR, "Cannot allocate memory for the config info.\n"); + return NULL; + } + ff_data_to_hex(config, config_byte, 6, 1); + config[12] = 0; + + return config; +} + +static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c, int payload_type, AVFormatContext *fmt) { char *config = NULL; @@ -334,24 +402,35 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c, payload_type, config ? config : ""); break; case CODEC_ID_AAC: - if (c->extradata_size) { - config = extradata2config(c); + if (fmt && fmt->oformat->priv_class && + av_opt_flag_is_set(fmt->priv_data, "rtpflags", "latm")) { + config = latm_context2config(c); + if (!config) + return NULL; + av_strlcatf(buff, size, "a=rtpmap:%d MP4A-LATM/%d/%d\r\n" + "a=fmtp:%d profile-level-id=%d;cpresent=0;config=%s\r\n", + payload_type, c->sample_rate, c->channels, + payload_type, latm_context2profilelevel(c), config); } else { - /* FIXME: maybe we can forge config information based on the - * codec parameters... - */ - av_log(c, AV_LOG_ERROR, "AAC with no global headers is currently not supported.\n"); - return NULL; + if (c->extradata_size) { + config = extradata2config(c); + } else { + /* FIXME: maybe we can forge config information based on the + * codec parameters... + */ + av_log(c, AV_LOG_ERROR, "AAC with no global headers is currently not supported.\n"); + return NULL; + } + if (config == NULL) { + return NULL; + } + av_strlcatf(buff, size, "a=rtpmap:%d MPEG4-GENERIC/%d/%d\r\n" + "a=fmtp:%d profile-level-id=1;" + "mode=AAC-hbr;sizelength=13;indexlength=3;" + "indexdeltalength=3%s\r\n", + payload_type, c->sample_rate, c->channels, + payload_type, config); } - if (config == NULL) { - return NULL; - } - av_strlcatf(buff, size, "a=rtpmap:%d MPEG4-GENERIC/%d/%d\r\n" - "a=fmtp:%d profile-level-id=1;" - "mode=AAC-hbr;sizelength=13;indexlength=3;" - "indexdeltalength=3%s\r\n", - payload_type, c->sample_rate, c->channels, - payload_type, config); break; case CODEC_ID_PCM_S16BE: if (payload_type >= RTP_PT_PRIVATE) @@ -448,7 +527,7 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c, return buff; } -void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl) +void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl, AVFormatContext *fmt) { const char *type; int payload_type; @@ -471,12 +550,12 @@ void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *des av_strlcatf(buff, size, "b=AS:%d\r\n", c->bit_rate / 1000); } - sdp_write_media_attributes(buff, size, c, payload_type); + sdp_write_media_attributes(buff, size, c, payload_type, fmt); } int av_sdp_create(AVFormatContext *ac[], int n_files, char *buf, int size) { - AVMetadataTag *title = av_metadata_get(ac[0]->metadata, "title", NULL, 0); + AVDictionaryEntry *title = av_dict_get(ac[0]->metadata, "title", NULL, 0); struct sdp_session_level s; int i, j, port, ttl, is_multicast; char dst[32], dst_type[5]; @@ -520,7 +599,8 @@ int av_sdp_create(AVFormatContext *ac[], int n_files, char *buf, int size) for (j = 0; j < ac[i]->nb_streams; j++) { ff_sdp_write_media(buf, size, ac[i]->streams[j]->codec, dst[0] ? dst : NULL, - dst_type, (port > 0) ? port + j * 2 : 0, ttl); + dst_type, (port > 0) ? port + j * 2 : 0, ttl, + ac[i]); if (port <= 0) { av_strlcatf(buf, size, "a=control:streamid=%d\r\n", i + j); @@ -536,7 +616,7 @@ int av_sdp_create(AVFormatContext *ac[], int n_files, char *buf, int size) return AVERROR(ENOSYS); } -void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl) +void ff_sdp_write_media(char *buff, int size, AVCodecContext *c, const char *dest_addr, const char *dest_type, int port, int ttl, AVFormatContext *fmt) { } #endif diff --git a/libavformat/smacker.c b/libavformat/smacker.c index 2603a4d71c..29a66e79fa 100644 --- a/libavformat/smacker.c +++ b/libavformat/smacker.c @@ -233,7 +233,6 @@ static int smacker_read_packet(AVFormatContext *s, AVPacket *pkt) int i; int frame_size = 0; int palchange = 0; - int pos; if (url_feof(s->pb) || smk->cur_frame >= smk->frames) return AVERROR_EOF; @@ -244,7 +243,6 @@ static int smacker_read_packet(AVFormatContext *s, AVPacket *pkt) frame_size = smk->frm_size[smk->cur_frame] & (~3); flags = smk->frm_flags[smk->cur_frame]; /* handle palette change event */ - pos = avio_tell(s->pb); if(flags & SMACKER_PAL){ int size, sz, t, off, j, pos; uint8_t *pal = smk->pal; diff --git a/libavformat/sol.c b/libavformat/sol.c index 7ad894e304..c0d2c5d5a2 100644 --- a/libavformat/sol.c +++ b/libavformat/sol.c @@ -85,7 +85,6 @@ static int sol_channels(int magic, int type) static int sol_read_header(AVFormatContext *s, AVFormatParameters *ap) { - int size; unsigned int magic,tag; AVIOContext *pb = s->pb; unsigned int id, channels, rate, type; @@ -99,7 +98,7 @@ static int sol_read_header(AVFormatContext *s, return -1; rate = avio_rl16(pb); type = avio_r8(pb); - size = avio_rl32(pb); + avio_skip(pb, 4); /* size */ if (magic != 0x0B8D) avio_r8(pb); /* newer SOLs contain padding byte */ diff --git a/libavformat/soxdec.c b/libavformat/soxdec.c index 5e1e220f76..16d26ab4a7 100644 --- a/libavformat/soxdec.c +++ b/libavformat/soxdec.c @@ -30,6 +30,7 @@ */ #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "avformat.h" #include "pcm.h" #include "sox.h" @@ -101,8 +102,8 @@ static int sox_read_header(AVFormatContext *s, } comment[comment_size] = 0; - av_metadata_set2(&s->metadata, "comment", comment, - AV_METADATA_DONT_STRDUP_VAL); + av_dict_set(&s->metadata, "comment", comment, + AV_DICT_DONT_STRDUP_VAL); } avio_skip(pb, header_size - SOX_FIXED_HDR - comment_size); diff --git a/libavformat/soxenc.c b/libavformat/soxenc.c index c93869c691..a07a2068c3 100644 --- a/libavformat/soxenc.c +++ b/libavformat/soxenc.c @@ -30,6 +30,7 @@ */ #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "avformat.h" #include "avio_internal.h" #include "sox.h" @@ -43,10 +44,10 @@ static int sox_write_header(AVFormatContext *s) SoXContext *sox = s->priv_data; AVIOContext *pb = s->pb; AVCodecContext *enc = s->streams[0]->codec; - AVMetadataTag *comment; + AVDictionaryEntry *comment; size_t comment_len = 0, comment_size; - comment = av_metadata_get(s->metadata, "comment", NULL, 0); + comment = av_dict_get(s->metadata, "comment", NULL, 0); if (comment) comment_len = strlen(comment->value); comment_size = (comment_len + 7) & ~7; diff --git a/libavformat/spdif.h b/libavformat/spdif.h index ac5be7c31a..4b11de20d1 100644 --- a/libavformat/spdif.h +++ b/libavformat/spdif.h @@ -19,6 +19,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#ifndef AVFORMAT_SPDIF_H +#define AVFORMAT_SPDIF_H + #include <stdint.h> #define SYNCWORD1 0xF872 @@ -55,3 +58,5 @@ static const uint16_t spdif_mpeg_pkt_offset[2][3] = { }; void ff_spdif_bswap_buf16(uint16_t *dst, const uint16_t *src, int w); + +#endif /* AVFORMAT_SPDIF_H */ diff --git a/libavformat/tcp.c b/libavformat/tcp.c index 57be430c23..9b344ab98f 100644 --- a/libavformat/tcp.c +++ b/libavformat/tcp.c @@ -45,6 +45,7 @@ static int tcp_open(URLContext *h, const char *uri, int flags) char buf[256]; int ret; socklen_t optlen; + int timeout = 50; char hostname[1024],proto[1024],path[1024]; char portstr[10]; @@ -57,6 +58,9 @@ static int tcp_open(URLContext *h, const char *uri, int flags) if (p) { if (av_find_info_tag(buf, sizeof(buf), "listen", p)) listen_socket = 1; + if (av_find_info_tag(buf, sizeof(buf), "timeout", p)) { + timeout = strtol(buf, NULL, 10); + } } memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; @@ -73,6 +77,7 @@ static int tcp_open(URLContext *h, const char *uri, int flags) cur_ai = ai; restart: + ret = AVERROR(EIO); fd = socket(cur_ai->ai_family, cur_ai->ai_socktype, cur_ai->ai_protocol); if (fd < 0) goto fail; @@ -84,29 +89,29 @@ static int tcp_open(URLContext *h, const char *uri, int flags) fd1 = accept(fd, NULL, NULL); closesocket(fd); fd = fd1; + ff_socket_nonblock(fd, 1); } else { redo: + ff_socket_nonblock(fd, 1); ret = connect(fd, cur_ai->ai_addr, cur_ai->ai_addrlen); } - ff_socket_nonblock(fd, 1); - if (ret < 0) { - int timeout=50; struct pollfd p = {fd, POLLOUT, 0}; - if (ff_neterrno() == AVERROR(EINTR)) { + ret = ff_neterrno(); + if (ret == AVERROR(EINTR)) { if (url_interrupt_cb()) { ret = AVERROR_EXIT; goto fail1; } goto redo; } - if (ff_neterrno() != AVERROR(EINPROGRESS) && - ff_neterrno() != AVERROR(EAGAIN)) + if (ret != AVERROR(EINPROGRESS) && + ret != AVERROR(EAGAIN)) goto fail; /* wait until we are connected or until abort */ - for(;;) { + while(timeout--) { if (url_interrupt_cb()) { ret = AVERROR_EXIT; goto fail1; @@ -114,14 +119,11 @@ static int tcp_open(URLContext *h, const char *uri, int flags) ret = poll(&p, 1, 100); if (ret > 0) break; - if(!--timeout){ - av_log(NULL, AV_LOG_ERROR, - "TCP open %s:%d timeout\n", - hostname, port); - goto fail; - } } - + if (ret <= 0) { + ret = AVERROR(ETIMEDOUT); + goto fail; + } /* test error */ optlen = sizeof(ret); getsockopt (fd, SOL_SOCKET, SO_ERROR, &ret, &optlen); @@ -129,6 +131,7 @@ static int tcp_open(URLContext *h, const char *uri, int flags) av_log(NULL, AV_LOG_ERROR, "TCP connection to %s:%d failed: %s\n", hostname, port, strerror(ret)); + ret = AVERROR(ret); goto fail; } } @@ -151,7 +154,6 @@ static int tcp_open(URLContext *h, const char *uri, int flags) closesocket(fd); goto restart; } - ret = AVERROR(EIO); fail1: if (fd >= 0) closesocket(fd); diff --git a/libavformat/tta.c b/libavformat/tta.c index d5f8e1ed61..c37039d0da 100644 --- a/libavformat/tta.c +++ b/libavformat/tta.c @@ -22,6 +22,7 @@ #include "libavcodec/get_bits.h" #include "avformat.h" #include "id3v1.h" +#include "libavutil/dict.h" typedef struct { int totalframes, currentframe; @@ -43,7 +44,7 @@ static int tta_read_header(AVFormatContext *s, AVFormatParameters *ap) int i, channels, bps, samplerate, datalen, framelen; uint64_t framepos, start_offset; - if (!av_metadata_get(s->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX)) + if (!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) ff_id3v1_read(s); start_offset = avio_tell(s->pb); diff --git a/libavformat/tty.c b/libavformat/tty.c index b288d15393..ddf283d8a3 100644 --- a/libavformat/tty.c +++ b/libavformat/tty.c @@ -26,14 +26,19 @@ #include "libavutil/intreadwrite.h" #include "libavutil/avstring.h" +#include "libavutil/log.h" +#include "libavutil/dict.h" +#include "libavutil/opt.h" +#include "libavutil/parseutils.h" #include "avformat.h" #include "sauce.h" -#define LINE_RATE 6000 /* characters per second */ - typedef struct { + AVClass *class; int chars_per_frame; uint64_t fsize; /**< file size less metadata buffer */ + char *video_size;/**< A string describing video size, set by a private option. */ + char *framerate; /**< Set by a private option. */ } TtyDemuxContext; /** @@ -56,7 +61,7 @@ static int efi_read(AVFormatContext *avctx, uint64_t start_pos) return -1; \ if (avio_read(pb, buf, size) == size) { \ buf[len] = 0; \ - av_metadata_set2(&avctx->metadata, name, buf, 0); \ + av_dict_set(&avctx->metadata, name, buf, 0); \ } GET_EFI_META("filename", 12) @@ -70,23 +75,44 @@ static int read_header(AVFormatContext *avctx, AVFormatParameters *ap) { TtyDemuxContext *s = avctx->priv_data; + int width = 0, height = 0, ret = 0; AVStream *st = av_new_stream(avctx, 0); - if (!st) - return AVERROR(ENOMEM); + AVRational framerate; + + if (!st) { + ret = AVERROR(ENOMEM); + goto fail; + } st->codec->codec_tag = 0; st->codec->codec_type = AVMEDIA_TYPE_VIDEO; st->codec->codec_id = CODEC_ID_ANSI; - if (ap->width) st->codec->width = ap->width; - if (ap->height) st->codec->height = ap->height; - if (!ap->time_base.num) { - av_set_pts_info(st, 60, 1, 25); - } else { - av_set_pts_info(st, 60, ap->time_base.num, ap->time_base.den); + if (s->video_size && (ret = av_parse_video_size(&width, &height, s->video_size)) < 0) { + av_log (avctx, AV_LOG_ERROR, "Couldn't parse video size.\n"); + goto fail; } + if ((ret = av_parse_video_rate(&framerate, s->framerate)) < 0) { + av_log(avctx, AV_LOG_ERROR, "Could not parse framerate: %s.\n", s->framerate); + goto fail; + } +#if FF_API_FORMAT_PARAMETERS + if (ap->width > 0) + width = ap->width; + if (ap->height > 0) + height = ap->height; + if (ap->time_base.num) + framerate = (AVRational){ap->time_base.den, ap->time_base.num}; +#endif + st->codec->width = width; + st->codec->height = height; + av_set_pts_info(st, 60, framerate.den, framerate.num); /* simulate tty display speed */ - s->chars_per_frame = FFMAX(av_q2d(st->time_base) * (ap->sample_rate ? ap->sample_rate : LINE_RATE), 1); +#if FF_API_FORMAT_PARAMETERS + if (ap->sample_rate) + s->chars_per_frame = ap->sample_rate; +#endif + s->chars_per_frame = FFMAX(av_q2d(st->time_base)*s->chars_per_frame, 1); if (avctx->pb->seekable) { s->fsize = avio_size(avctx->pb); @@ -98,7 +124,8 @@ static int read_header(AVFormatContext *avctx, avio_seek(avctx->pb, 0, SEEK_SET); } - return 0; +fail: + return ret; } static int read_packet(AVFormatContext *avctx, AVPacket *pkt) @@ -124,6 +151,22 @@ static int read_packet(AVFormatContext *avctx, AVPacket *pkt) return 0; } +#define OFFSET(x) offsetof(TtyDemuxContext, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM +static const AVOption options[] = { + { "chars_per_frame", "", offsetof(TtyDemuxContext, chars_per_frame), FF_OPT_TYPE_INT, {.dbl = 6000}, 1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM}, + { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC }, + { "framerate", "", OFFSET(framerate), FF_OPT_TYPE_STRING, {.str = "25"}, 0, 0, DEC }, + { NULL }, +}; + +static const AVClass tty_demuxer_class = { + .class_name = "TTY demuxer", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_tty_demuxer = { .name = "tty", .long_name = NULL_IF_CONFIG_SMALL("Tele-typewriter"), @@ -131,4 +174,5 @@ AVInputFormat ff_tty_demuxer = { .read_header = read_header, .read_packet = read_packet, .extensions = "ans,art,asc,diz,ice,nfo,txt,vt", + .priv_class = &tty_demuxer_class, }; diff --git a/libavformat/udp.c b/libavformat/udp.c index 021f529d64..78077e6595 100644 --- a/libavformat/udp.c +++ b/libavformat/udp.c @@ -35,7 +35,11 @@ #include "network.h" #include "os_support.h" #include "url.h" + +#if HAVE_PTHREADS #include <pthread.h> +#endif + #include <sys/time.h> #ifndef IPV6_ADD_MEMBERSHIP @@ -57,9 +61,10 @@ typedef struct { /* Circular Buffer variables for use in UDP receive code */ int circular_buffer_size; AVFifoBuffer *fifo; - int circular_buffer_available_max; int circular_buffer_error; +#if HAVE_PTHREADS pthread_t circular_buffer_thread; +#endif } UDPContext; #define UDP_TX_BUF_SIZE 32768 @@ -509,6 +514,7 @@ static int udp_open(URLContext *h, const char *uri, int flags) s->udp_fd = udp_fd; +#if HAVE_PTHREADS if (!is_output && s->circular_buffer_size) { /* start the task going */ s->fifo = av_fifo_alloc(s->circular_buffer_size); @@ -517,12 +523,13 @@ static int udp_open(URLContext *h, const char *uri, int flags) goto fail; } } +#endif return 0; fail: if (udp_fd >= 0) closesocket(udp_fd); - av_fifo_free(s->fifo); + av_fifo_free(s->fifo); av_free(s); return AVERROR(EIO); } @@ -532,7 +539,6 @@ static int udp_read(URLContext *h, uint8_t *buf, int size) UDPContext *s = h->priv_data; int ret; int avail; - int left; fd_set rfds; struct timeval tv; @@ -597,7 +603,6 @@ static int udp_close(URLContext *h) if (s->is_multicast && !(h->flags & AVIO_WRONLY)) udp_leave_multicast_group(s->udp_fd, (struct sockaddr *)&s->dest_addr); closesocket(s->udp_fd); - av_log( h, AV_LOG_INFO, "circular_buffer_info max:%d%%\r\n", (s->circular_buffer_available_max*100)/s->circular_buffer_size); av_fifo_free(s->fifo); av_free(s); return 0; diff --git a/libavformat/url.h b/libavformat/url.h index 56366644aa..5ef6a21d7b 100644 --- a/libavformat/url.h +++ b/libavformat/url.h @@ -173,4 +173,4 @@ int ffurl_register_protocol(URLProtocol *protocol, int size); int ff_udp_set_remote_url(URLContext *h, const char *uri); int ff_udp_get_local_port(URLContext *h); -#endif //AVFORMAT_URL_H +#endif /* AVFORMAT_URL_H */ diff --git a/libavformat/utils.c b/libavformat/utils.c index 16297af16a..454cd5843a 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -27,6 +27,7 @@ #include "libavcodec/internal.h" #include "libavcodec/raw.h" #include "libavutil/opt.h" +#include "libavutil/dict.h" #include "metadata.h" #include "id3v2.h" #include "libavutil/avstring.h" @@ -743,6 +744,7 @@ int av_read_packet(AVFormatContext *s, AVPacket *pkt) s->streams[i]->request_probe = -1; continue; } + st= s->streams[pkt->stream_index]; switch(st->codec->codec_type){ @@ -1516,8 +1518,6 @@ int av_index_search_timestamp(AVStream *st, int64_t wanted_timestamp, wanted_timestamp, flags); } -#define DEBUG_SEEK - int av_seek_frame_binary(AVFormatContext *s, int stream_index, int64_t target_ts, int flags){ AVInputFormat *avif= s->iformat; int64_t av_uninit(pos_min), av_uninit(pos_max), pos, pos_limit; @@ -1529,9 +1529,7 @@ int av_seek_frame_binary(AVFormatContext *s, int stream_index, int64_t target_ts if (stream_index < 0) return -1; -#ifdef DEBUG_SEEK - av_log(s, AV_LOG_DEBUG, "read_seek: %d %"PRId64"\n", stream_index, target_ts); -#endif + av_dlog(s, "read_seek: %d %"PRId64"\n", stream_index, target_ts); ts_max= ts_min= AV_NOPTS_VALUE; @@ -1548,10 +1546,8 @@ int av_seek_frame_binary(AVFormatContext *s, int stream_index, int64_t target_ts if(e->timestamp <= target_ts || e->pos == e->min_distance){ pos_min= e->pos; ts_min= e->timestamp; -#ifdef DEBUG_SEEK - av_log(s, AV_LOG_DEBUG, "using cached pos_min=0x%"PRIx64" dts_min=%"PRId64"\n", - pos_min,ts_min); -#endif + av_dlog(s, "using cached pos_min=0x%"PRIx64" dts_min=%"PRId64"\n", + pos_min,ts_min); }else{ assert(index==0); } @@ -1564,10 +1560,8 @@ int av_seek_frame_binary(AVFormatContext *s, int stream_index, int64_t target_ts pos_max= e->pos; ts_max= e->timestamp; pos_limit= pos_max - e->min_distance; -#ifdef DEBUG_SEEK - av_log(s, AV_LOG_DEBUG, "using cached pos_max=0x%"PRIx64" pos_limit=0x%"PRIx64" dts_max=%"PRId64"\n", - pos_max,pos_limit, ts_max); -#endif + av_dlog(s, "using cached pos_max=0x%"PRIx64" pos_limit=0x%"PRIx64" dts_max=%"PRId64"\n", + pos_max,pos_limit, ts_max); } } @@ -1589,9 +1583,7 @@ int64_t av_gen_search(AVFormatContext *s, int stream_index, int64_t target_ts, i int64_t start_pos, filesize; int no_change; -#ifdef DEBUG_SEEK - av_log(s, AV_LOG_DEBUG, "gen_seek: %d %"PRId64"\n", stream_index, target_ts); -#endif + av_dlog(s, "gen_seek: %d %"PRId64"\n", stream_index, target_ts); if(ts_min == AV_NOPTS_VALUE){ pos_min = s->data_offset; @@ -1633,11 +1625,8 @@ int64_t av_gen_search(AVFormatContext *s, int stream_index, int64_t target_ts, i no_change=0; while (pos_min < pos_limit) { -#ifdef DEBUG_SEEK - av_log(s, AV_LOG_DEBUG, "pos_min=0x%"PRIx64" pos_max=0x%"PRIx64" dts_min=%"PRId64" dts_max=%"PRId64"\n", - pos_min, pos_max, - ts_min, ts_max); -#endif + av_dlog(s, "pos_min=0x%"PRIx64" pos_max=0x%"PRIx64" dts_min=%"PRId64" dts_max=%"PRId64"\n", + pos_min, pos_max, ts_min, ts_max); assert(pos_limit <= pos_max); if(no_change==0){ @@ -1664,11 +1653,9 @@ int64_t av_gen_search(AVFormatContext *s, int stream_index, int64_t target_ts, i no_change++; else no_change=0; -#ifdef DEBUG_SEEK - av_log(s, AV_LOG_DEBUG, "%"PRId64" %"PRId64" %"PRId64" / %"PRId64" %"PRId64" %"PRId64" target:%"PRId64" limit:%"PRId64" start:%"PRId64" noc:%d\n", - pos_min, pos, pos_max, ts_min, ts, ts_max, target_ts, pos_limit, - start_pos, no_change); -#endif + av_dlog(s, "%"PRId64" %"PRId64" %"PRId64" / %"PRId64" %"PRId64" %"PRId64" target:%"PRId64" limit:%"PRId64" start:%"PRId64" noc:%d\n", + pos_min, pos, pos_max, ts_min, ts, ts_max, target_ts, + pos_limit, start_pos, no_change); if(ts == AV_NOPTS_VALUE){ av_log(s, AV_LOG_ERROR, "read_timestamp() failed in the middle\n"); return -1; @@ -1687,14 +1674,12 @@ int64_t av_gen_search(AVFormatContext *s, int stream_index, int64_t target_ts, i pos = (flags & AVSEEK_FLAG_BACKWARD) ? pos_min : pos_max; ts = (flags & AVSEEK_FLAG_BACKWARD) ? ts_min : ts_max; -#ifdef DEBUG_SEEK pos_min = pos; ts_min = read_timestamp(s, stream_index, &pos_min, INT64_MAX); pos_min++; ts_max = read_timestamp(s, stream_index, &pos_min, INT64_MAX); - av_log(s, AV_LOG_DEBUG, "pos=0x%"PRIx64" %"PRId64"<=%"PRId64"<=%"PRId64"\n", - pos, ts_min, target_ts, ts_max); -#endif + av_dlog(s, "pos=0x%"PRIx64" %"PRId64"<=%"PRId64"<=%"PRId64"\n", + pos, ts_min, target_ts, ts_max); *ts_ret= ts; return pos; } @@ -1813,10 +1798,12 @@ int av_seek_frame(AVFormatContext *s, int stream_index, int64_t timestamp, int f return 0; } - if(s->iformat->read_timestamp) + if(s->iformat->read_timestamp && !(s->iformat->flags & AVFMT_NOBINSEARCH)) return av_seek_frame_binary(s, stream_index, timestamp, flags); - else + else if (!(s->iformat->flags & AVFMT_NOGENSEARCH)) return av_seek_frame_generic(s, stream_index, timestamp, flags); + else + return -1; } int avformat_seek_file(AVFormatContext *s, int stream_index, int64_t min_ts, int64_t ts, int64_t max_ts, int flags) @@ -2085,7 +2072,7 @@ static void av_estimate_timings(AVFormatContext *ic, int64_t old_offset) #if 0 { int i; - AVStream *st; + AVStream av_unused *st; for(i = 0;i < ic->nb_streams; i++) { st = ic->streams[i]; printf("%d: start_time: %0.3f duration: %0.3f\n", @@ -2219,7 +2206,7 @@ enum CodecID av_codec_get_id(const AVCodecTag * const *tags, unsigned int tag) static void compute_chapters_end(AVFormatContext *s) { unsigned int i, j; - int64_t max_time = s->duration + (s->start_time == AV_NOPTS_VALUE) ? 0 : s->start_time; + int64_t max_time = s->duration + ((s->start_time == AV_NOPTS_VALUE) ? 0 : s->start_time); for (i = 0; i < s->nb_chapters; i++) if (s->chapters[i]->end == AV_NOPTS_VALUE) { @@ -2338,6 +2325,8 @@ int av_find_stream_info(AVFormatContext *ic) the correct fps */ if (av_q2d(st->time_base) > 0.0005) fps_analyze_framecount *= 2; + if (ic->fps_probe_size >= 0) + fps_analyze_framecount = ic->fps_probe_size; /* variable fps and no guess at the real fps */ if( tb_unreliable(st->codec) && !(st->r_frame_rate.num && st->avg_frame_rate.num) && st->info->duration_count < fps_analyze_framecount @@ -2396,8 +2385,9 @@ int av_find_stream_info(AVFormatContext *ic) st = ic->streams[pkt->stream_index]; if (st->codec_info_nb_frames>1) { - if (st->time_base.den > 0 && av_rescale_q(st->info->codec_info_duration, st->time_base, AV_TIME_BASE_Q) >= ic->max_analyze_duration) { - av_log(ic, AV_LOG_WARNING, "max_analyze_duration reached\n"); + int64_t t; + if (st->time_base.den > 0 && (t=av_rescale_q(st->info->codec_info_duration, st->time_base, AV_TIME_BASE_Q)) >= ic->max_analyze_duration) { + av_log(ic, AV_LOG_WARNING, "max_analyze_duration %d reached at %"PRId64"\n", ic->max_analyze_duration, t); break; } st->info->codec_info_duration += pkt->duration; @@ -2655,6 +2645,10 @@ void avformat_free_context(AVFormatContext *s) int i; AVStream *st; + av_opt_free(s); + if (s->iformat && s->iformat->priv_class && s->priv_data) + av_opt_free(s->priv_data); + for(i=0;i<s->nb_streams;i++) { /* free all data in a stream component */ st = s->streams[i]; @@ -2662,7 +2656,7 @@ void avformat_free_context(AVFormatContext *s) av_parser_close(st->parser); av_free_packet(&st->cur_pkt); } - av_metadata_free(&st->metadata); + av_dict_free(&st->metadata); av_free(st->index_entries); av_free(st->codec->extradata); av_free(st->codec->subtitle_header); @@ -2689,12 +2683,12 @@ void avformat_free_context(AVFormatContext *s) #if FF_API_OLD_METADATA av_free(s->chapters[s->nb_chapters]->title); #endif - av_metadata_free(&s->chapters[s->nb_chapters]->metadata); + av_dict_free(&s->chapters[s->nb_chapters]->metadata); av_free(s->chapters[s->nb_chapters]); } av_freep(&s->chapters); av_metadata_free(&s->metadata); - av_freep(&s->key); +// av_freep(&s->key); av_free(s); } @@ -2770,9 +2764,7 @@ AVProgram *av_new_program(AVFormatContext *ac, int id) AVProgram *program=NULL; int i; -#ifdef DEBUG_SI - av_log(ac, AV_LOG_DEBUG, "new_program: id=0x%04x\n", id); -#endif + av_dlog(ac, "new_program: id=0x%04x\n", id); for(i=0; i<ac->nb_programs; i++) if(ac->programs[i]->id == id) @@ -2808,7 +2800,7 @@ AVChapter *ff_new_chapter(AVFormatContext *s, int id, AVRational time_base, int6 #if FF_API_OLD_METADATA av_free(chapter->title); #endif - av_metadata_set2(&chapter->metadata, "title", title, 0); + av_dict_set(&chapter->metadata, "title", title, 0); chapter->id = id; chapter->time_base= time_base; chapter->start = start; @@ -2836,29 +2828,36 @@ int av_set_parameters(AVFormatContext *s, AVFormatParameters *ap) return 0; } -AVFormatContext *avformat_alloc_output_context(const char *format, AVOutputFormat *oformat, const char *filename){ - AVFormatContext *s= avformat_alloc_context(); - if(!s) +int avformat_alloc_output_context2(AVFormatContext **avctx, AVOutputFormat *oformat, + const char *format, const char *filename) +{ + AVFormatContext *s = avformat_alloc_context(); + int ret = 0; + + *avctx = NULL; + if (!s) goto nomem; - if(!oformat){ + if (!oformat) { if (format) { oformat = av_guess_format(format, NULL, NULL); if (!oformat) { av_log(s, AV_LOG_ERROR, "Requested output format '%s' is not a suitable output format\n", format); + ret = AVERROR(EINVAL); goto error; } } else { oformat = av_guess_format(NULL, filename, NULL); if (!oformat) { + ret = AVERROR(EINVAL); av_log(s, AV_LOG_ERROR, "Unable to find a suitable output format for '%s'\n", - filename); + filename); goto error; } } } - s->oformat= oformat; + s->oformat = oformat; if (s->oformat->priv_data_size > 0) { s->priv_data = av_mallocz(s->oformat->priv_data_size); if (!s->priv_data) @@ -2870,15 +2869,27 @@ AVFormatContext *avformat_alloc_output_context(const char *format, AVOutputForma } else s->priv_data = NULL; - if(filename) + if (filename) av_strlcpy(s->filename, filename, sizeof(s->filename)); - return s; + *avctx = s; + return 0; nomem: av_log(s, AV_LOG_ERROR, "Out of memory\n"); + ret = AVERROR(ENOMEM); error: avformat_free_context(s); - return NULL; + return ret; +} + +#if FF_API_ALLOC_OUTPUT_CONTEXT +AVFormatContext *avformat_alloc_output_context(const char *format, + AVOutputFormat *oformat, const char *filename) +{ + AVFormatContext *avctx; + int ret = avformat_alloc_output_context2(&avctx, oformat, format, filename); + return ret < 0 ? NULL : avctx; } +#endif static int validate_codec_tag(AVFormatContext *s, AVStream *st) { @@ -2988,7 +2999,7 @@ int av_write_header(AVFormatContext *s) /* set muxer identification string */ if (s->nb_streams && !(s->streams[0]->codec->flags & CODEC_FLAG_BITEXACT)) { - av_metadata_set2(&s->metadata, "encoder", LIBAVFORMAT_IDENT, 0); + av_dict_set(&s->metadata, "encoder", LIBAVFORMAT_IDENT, 0); } if(s->oformat->write_header){ @@ -3061,7 +3072,7 @@ static int compute_pkt_fields2(AVFormatContext *s, AVStream *st, AVPacket *pkt){ pkt->dts= st->pts_buffer[0]; } - if(st->cur_dts && st->cur_dts != AV_NOPTS_VALUE && st->cur_dts >= pkt->dts){ + if(st->cur_dts && st->cur_dts != AV_NOPTS_VALUE && ((!(s->oformat->flags & AVFMT_TS_NONSTRICT) && st->cur_dts >= pkt->dts) || st->cur_dts > pkt->dts)){ av_log(s, AV_LOG_ERROR, "Application provided invalid, non monotonically increasing dts to muxer in stream %d: %"PRId64" >= %"PRId64"\n", st->index, st->cur_dts, pkt->dts); @@ -3269,6 +3280,8 @@ fail: av_freep(&s->streams[i]->priv_data); av_freep(&s->streams[i]->index_entries); } + if (s->iformat && s->iformat->priv_class) + av_opt_free(s->priv_data); av_freep(&s->priv_data); return ret; } @@ -3308,13 +3321,13 @@ static void print_fps(double d, const char *postfix){ else av_log(NULL, AV_LOG_INFO, ", %1.0fk %s", d/1000, postfix); } -static void dump_metadata(void *ctx, AVMetadata *m, const char *indent) +static void dump_metadata(void *ctx, AVDictionary *m, const char *indent) { - if(m && !(m->count == 1 && av_metadata_get(m, "language", NULL, 0))){ - AVMetadataTag *tag=NULL; + if(m && !(m->count == 1 && av_dict_get(m, "language", NULL, 0))){ + AVDictionaryEntry *tag=NULL; av_log(ctx, AV_LOG_INFO, "%sMetadata:\n", indent); - while((tag=av_metadata_get(m, "", tag, AV_METADATA_IGNORE_SUFFIX))) { + while((tag=av_dict_get(m, "", tag, AV_DICT_IGNORE_SUFFIX))) { if(strcmp("language", tag->key)) av_log(ctx, AV_LOG_INFO, "%s %-16s: %s\n", indent, tag->key, tag->value); } @@ -3328,7 +3341,7 @@ static void dump_stream_format(AVFormatContext *ic, int i, int index, int is_out int flags = (is_output ? ic->oformat->flags : ic->iformat->flags); AVStream *st = ic->streams[i]; int g = av_gcd(st->time_base.num, st->time_base.den); - AVMetadataTag *lang = av_metadata_get(st->metadata, "language", NULL, 0); + AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL, 0); avcodec_string(buf, sizeof(buf), st->codec, is_output); av_log(NULL, AV_LOG_INFO, " Stream #%d.%d", index, i); /* the pid is an important information, so we display it */ @@ -3452,7 +3465,7 @@ void av_dump_format(AVFormatContext *ic, if(ic->nb_programs) { int j, k, total = 0; for(j=0; j<ic->nb_programs; j++) { - AVMetadataTag *name = av_metadata_get(ic->programs[j]->metadata, + AVDictionaryEntry *name = av_dict_get(ic->programs[j]->metadata, "name", NULL, 0); av_log(NULL, AV_LOG_INFO, " Program %d %s\n", ic->programs[j]->id, name ? name->value : ""); diff --git a/libavformat/version.h b/libavformat/version.h index eec52b2363..7292007610 100644 --- a/libavformat/version.h +++ b/libavformat/version.h @@ -24,7 +24,7 @@ #include "libavutil/avutil.h" #define LIBAVFORMAT_VERSION_MAJOR 52 -#define LIBAVFORMAT_VERSION_MINOR 108 +#define LIBAVFORMAT_VERSION_MINOR 109 #define LIBAVFORMAT_VERSION_MICRO 0 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \ @@ -113,5 +113,14 @@ #ifndef FF_API_SDP_CREATE #define FF_API_SDP_CREATE (LIBAVFORMAT_VERSION_MAJOR < 54) #endif +#ifndef FF_API_ALLOC_OUTPUT_CONTEXT +#define FF_API_ALLOC_OUTPUT_CONTEXT (LIBAVFORMAT_VERSION_MAJOR < 54) +#endif +#ifndef FF_API_FORMAT_PARAMETERS +#define FF_API_FORMAT_PARAMETERS (LIBAVFORMAT_VERSION_MAJOR < 54) +#endif +#ifndef FF_API_FLAG_RTP_HINT +#define FF_API_FLAG_RTP_HINT (LIBAVFORMAT_VERSION_MAJOR < 54) +#endif -#endif //AVFORMAT_VERSION_H +#endif /* AVFORMAT_VERSION_H */ diff --git a/libavformat/vorbiscomment.c b/libavformat/vorbiscomment.c index 59a403f209..9b38e6a791 100644 --- a/libavformat/vorbiscomment.c +++ b/libavformat/vorbiscomment.c @@ -23,6 +23,7 @@ #include "metadata.h" #include "vorbiscomment.h" #include "libavcodec/bytestream.h" +#include "libavutil/dict.h" /** * VorbisComment metadata conversion mapping. @@ -36,15 +37,15 @@ const AVMetadataConv ff_vorbiscomment_metadata_conv[] = { { 0 } }; -int ff_vorbiscomment_length(AVMetadata *m, const char *vendor_string, +int ff_vorbiscomment_length(AVDictionary *m, const char *vendor_string, unsigned *count) { int len = 8; len += strlen(vendor_string); *count = 0; if (m) { - AVMetadataTag *tag = NULL; - while ((tag = av_metadata_get(m, "", tag, AV_METADATA_IGNORE_SUFFIX))) { + AVDictionaryEntry *tag = NULL; + while ((tag = av_dict_get(m, "", tag, AV_DICT_IGNORE_SUFFIX))) { len += 4 +strlen(tag->key) + 1 + strlen(tag->value); (*count)++; } @@ -52,15 +53,15 @@ int ff_vorbiscomment_length(AVMetadata *m, const char *vendor_string, return len; } -int ff_vorbiscomment_write(uint8_t **p, AVMetadata **m, +int ff_vorbiscomment_write(uint8_t **p, AVDictionary **m, const char *vendor_string, const unsigned count) { bytestream_put_le32(p, strlen(vendor_string)); bytestream_put_buffer(p, vendor_string, strlen(vendor_string)); if (*m) { - AVMetadataTag *tag = NULL; + AVDictionaryEntry *tag = NULL; bytestream_put_le32(p, count); - while ((tag = av_metadata_get(*m, "", tag, AV_METADATA_IGNORE_SUFFIX))) { + while ((tag = av_dict_get(*m, "", tag, AV_DICT_IGNORE_SUFFIX))) { unsigned int len1 = strlen(tag->key); unsigned int len2 = strlen(tag->value); bytestream_put_le32(p, len1+1+len2); diff --git a/libavformat/vorbiscomment.h b/libavformat/vorbiscomment.h index 3dfe9d226b..b147092071 100644 --- a/libavformat/vorbiscomment.h +++ b/libavformat/vorbiscomment.h @@ -35,13 +35,13 @@ * @param count Pointer to store the number of tags in m because m->count is "not allowed" * @return The length in bytes. */ -int ff_vorbiscomment_length(AVMetadata *m, const char *vendor_string, +int ff_vorbiscomment_length(AVDictionary *m, const char *vendor_string, unsigned *count); /** * Writes a VorbisComment into a buffer. The buffer, p, must have enough * data to hold the whole VorbisComment. The minimum size required can be - * obtained by passing the same AVMetadata and vendor_string to + * obtained by passing the same AVDictionary and vendor_string to * ff_vorbiscomment_length() * * @param p The buffer in which to write. @@ -49,7 +49,7 @@ int ff_vorbiscomment_length(AVMetadata *m, const char *vendor_string, * @param vendor_string The vendor string to write. * @param count The number of tags in m because m->count is "not allowed" */ -int ff_vorbiscomment_write(uint8_t **p, AVMetadata **m, +int ff_vorbiscomment_write(uint8_t **p, AVDictionary **m, const char *vendor_string, const unsigned count); extern const AVMetadataConv ff_vorbiscomment_metadata_conv[]; diff --git a/libavformat/vqf.c b/libavformat/vqf.c index 94ba543ba5..e06f39349b 100644 --- a/libavformat/vqf.c +++ b/libavformat/vqf.c @@ -21,6 +21,7 @@ #include "avformat.h" #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" typedef struct VqfContext { int frame_bit_len; @@ -56,7 +57,7 @@ static void add_metadata(AVFormatContext *s, const char *tag, return; avio_read(s->pb, buf, len); buf[len] = 0; - av_metadata_set2(&s->metadata, tag, buf, AV_METADATA_DONT_STRDUP_VAL); + av_dict_set(&s->metadata, tag, buf, AV_DICT_DONT_STRDUP_VAL); } static int vqf_read_header(AVFormatContext *s, AVFormatParameters *ap) diff --git a/libavformat/wav.c b/libavformat/wav.c index 588aff512b..c5dbd631b4 100644 --- a/libavformat/wav.c +++ b/libavformat/wav.c @@ -26,6 +26,7 @@ #include "avio_internal.h" #include "pcm.h" #include "riff.h" +#include "metadata.h" typedef struct { int64_t data; @@ -184,6 +185,115 @@ static int wav_probe(AVProbeData *p) return 0; } +static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream **st) +{ + AVIOContext *pb = s->pb; + int ret; + + /* parse fmt header */ + *st = av_new_stream(s, 0); + if (!*st) + return AVERROR(ENOMEM); + + ret = ff_get_wav_header(pb, (*st)->codec, size); + if (ret < 0) + return ret; + (*st)->need_parsing = AVSTREAM_PARSE_FULL; + + av_set_pts_info(*st, 64, 1, (*st)->codec->sample_rate); + + return 0; +} + +static inline int wav_parse_bext_string(AVFormatContext *s, const char *key, int length) +{ + char temp[257]; + int ret; + + if ((ret = avio_read(s->pb, temp, length)) < 0) + return ret; + + temp[length] = 0; + + if (strlen(temp)) + return av_dict_set(&s->metadata, key, temp, 0); + + return 0; +} + +static int wav_parse_bext_tag(AVFormatContext *s, int64_t size) +{ + char temp[131], *coding_history; + int ret, x; + uint64_t time_reference; + int64_t umid_parts[8], umid_mask = 0; + + if ((ret = wav_parse_bext_string(s, "description", 256)) < 0 || + (ret = wav_parse_bext_string(s, "originator", 32)) < 0 || + (ret = wav_parse_bext_string(s, "originator_reference", 32)) < 0 || + (ret = wav_parse_bext_string(s, "origination_date", 10)) < 0 || + (ret = wav_parse_bext_string(s, "origination_time", 8)) < 0) + return ret; + + time_reference = avio_rl64(s->pb); + snprintf(temp, sizeof(temp), "%"PRIu64, time_reference); + if ((ret = av_dict_set(&s->metadata, "time_reference", temp, 0)) < 0) + return ret; + + /* check if version is >= 1, in which case an UMID may be present */ + if (avio_rl16(s->pb) >= 1) { + for (x = 0; x < 8; x++) + umid_mask |= umid_parts[x] = avio_rb64(s->pb); + + if (umid_mask) { + /* the string formatting below is per SMPTE 330M-2004 Annex C */ + if (umid_parts[4] == 0 && umid_parts[5] == 0 && umid_parts[6] == 0 && umid_parts[7] == 0) { + /* basic UMID */ + snprintf(temp, sizeof(temp), "0x%016"PRIX64"%016"PRIX64"%016"PRIX64"%016"PRIX64, + umid_parts[0], umid_parts[1], umid_parts[2], umid_parts[3]); + } else { + /* extended UMID */ + snprintf(temp, sizeof(temp), "0x%016"PRIX64"%016"PRIX64"%016"PRIX64"%016"PRIX64 + "0x%016"PRIX64"%016"PRIX64"%016"PRIX64"%016"PRIX64, + umid_parts[0], umid_parts[1], umid_parts[2], umid_parts[3], + umid_parts[4], umid_parts[5], umid_parts[6], umid_parts[7]); + } + + if ((ret = av_dict_set(&s->metadata, "umid", temp, 0)) < 0) + return ret; + } + + avio_skip(s->pb, 190); + } else + avio_skip(s->pb, 254); + + if (size > 602) { + /* CodingHistory present */ + size -= 602; + + if (!(coding_history = av_malloc(size+1))) + return AVERROR(ENOMEM); + + if ((ret = avio_read(s->pb, coding_history, size)) < 0) + return ret; + + coding_history[size] = 0; + if ((ret = av_dict_set(&s->metadata, "coding_history", coding_history, + AV_METADATA_DONT_STRDUP_VAL)) < 0) + return ret; + } + + return 0; +} + +static const AVMetadataConv wav_metadata_conv[] = { + {"description", "comment" }, + {"originator", "encoded_by" }, + {"origination_date", "date" }, + {"origination_time", "creation_time"}, + {0}, +}; + /* wav input */ static int wav_read_header(AVFormatContext *s, AVFormatParameters *ap) @@ -195,7 +305,8 @@ static int wav_read_header(AVFormatContext *s, AVIOContext *pb = s->pb; AVStream *st; WAVContext *wav = s->priv_data; - int ret; + int ret, got_fmt = 0; + int64_t next_tag_ofs, data_ofs = -1; /* check RIFF header */ tag = avio_rl32(pb); @@ -212,54 +323,89 @@ static int wav_read_header(AVFormatContext *s, if (avio_rl32(pb) != MKTAG('d', 's', '6', '4')) return -1; size = avio_rl32(pb); - if (size < 16) + if (size < 24) return -1; avio_rl64(pb); /* RIFF size */ data_size = avio_rl64(pb); sample_count = avio_rl64(pb); - avio_skip(pb, size - 16); /* skip rest of ds64 chunk */ + if (data_size < 0 || sample_count < 0) { + av_log(s, AV_LOG_ERROR, "negative data_size and/or sample_count in " + "ds64: data_size = %"PRId64", sample_count = %"PRId64"\n", + data_size, sample_count); + return AVERROR_INVALIDDATA; + } + avio_skip(pb, size - 24); /* skip rest of ds64 chunk */ } - /* parse fmt header */ - size = find_tag(pb, MKTAG('f', 'm', 't', ' ')); - if (size < 0) - return -1; - st = av_new_stream(s, 0); - if (!st) - return AVERROR(ENOMEM); + for (;;) { + size = next_tag(pb, &tag); + next_tag_ofs = avio_tell(pb) + size; - ret = ff_get_wav_header(pb, st->codec, size); - if (ret < 0) - return ret; - st->need_parsing = AVSTREAM_PARSE_FULL; + if (url_feof(pb)) + break; - av_set_pts_info(st, 64, 1, st->codec->sample_rate); + switch (tag) { + case MKTAG('f', 'm', 't', ' '): + /* only parse the first 'fmt ' tag found */ + if (!got_fmt && (ret = wav_parse_fmt_tag(s, size, &st) < 0)) { + return ret; + } else if (got_fmt) + av_log(s, AV_LOG_WARNING, "found more than one 'fmt ' tag\n"); - for (;;) { - if (url_feof(pb)) - return -1; - size = next_tag(pb, &tag); - if (tag == MKTAG('d', 'a', 't', 'a')){ + got_fmt = 1; + break; + case MKTAG('d', 'a', 't', 'a'): + if (!got_fmt) { + av_log(s, AV_LOG_ERROR, "found no 'fmt ' tag before the 'data' tag\n"); + return AVERROR_INVALIDDATA; + } + + if (rf64) { + next_tag_ofs = wav->data_end = avio_tell(pb) + data_size; + } else { + data_size = size; + next_tag_ofs = wav->data_end = size ? next_tag_ofs : INT64_MAX; + } + + data_ofs = avio_tell(pb); + + /* don't look for footer metadata if we can't seek or if we don't + * know where the data tag ends + */ + if (!pb->seekable || (!rf64 && !size)) + goto break_loop; + break; + case MKTAG('f','a','c','t'): + if(!sample_count) + sample_count = avio_rl32(pb); + break; + case MKTAG('b','e','x','t'): + if ((ret = wav_parse_bext_tag(s, size)) < 0) + return ret; + break; + } + + /* seek to next tag unless we know that we'll run into EOF */ + if ((avio_size(pb) > 0 && next_tag_ofs >= avio_size(pb)) || + avio_seek(pb, next_tag_ofs, SEEK_SET) < 0) { break; - }else if (tag == MKTAG('f','a','c','t') && !sample_count){ - sample_count = avio_rl32(pb); - size -= 4; } - avio_skip(pb, size); } - if (rf64) - size = data_size; - if (size < 0) - return -1; - if (!size) { - wav->data_end = INT64_MAX; - } else - wav->data_end= avio_tell(pb) + size; +break_loop: + if (data_ofs < 0) { + av_log(s, AV_LOG_ERROR, "no 'data' tag found\n"); + return AVERROR_INVALIDDATA; + } + + avio_seek(pb, data_ofs, SEEK_SET); if (!sample_count && st->codec->channels && av_get_bits_per_sample(st->codec->codec_id)) - sample_count = (size<<3) / (st->codec->channels * (uint64_t)av_get_bits_per_sample(st->codec->codec_id)); + sample_count = (data_size<<3) / (st->codec->channels * (uint64_t)av_get_bits_per_sample(st->codec->codec_id)); if (sample_count) st->duration = sample_count; + + ff_metadata_conv_ctx(s, NULL, wav_metadata_conv); + return 0; } diff --git a/libavformat/wc3movie.c b/libavformat/wc3movie.c index 0f2e077859..eb2eae1c9c 100644 --- a/libavformat/wc3movie.c +++ b/libavformat/wc3movie.c @@ -28,6 +28,7 @@ */ #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "avformat.h" #define FORM_TAG MKTAG('F', 'O', 'R', 'M') @@ -130,8 +131,8 @@ static int wc3_read_header(AVFormatContext *s, if ((ret = avio_read(pb, buffer, size)) != size) return AVERROR(EIO); buffer[size] = 0; - av_metadata_set2(&s->metadata, "title", buffer, - AV_METADATA_DONT_STRDUP_VAL); + av_dict_set(&s->metadata, "title", buffer, + AV_DICT_DONT_STRDUP_VAL); break; case SIZE_TAG: diff --git a/libavformat/wtvdec.c b/libavformat/wtvdec.c index 5cbec0576a..5fe7e9fe12 100644 --- a/libavformat/wtvdec.c +++ b/libavformat/wtvdec.c @@ -431,7 +431,7 @@ static void get_attachment(AVFormatContext *s, AVIOContext *pb, int length) st = av_new_stream(s, 0); if (!st) goto done; - av_metadata_set2(&st->metadata, "title", description, 0); + av_dict_set(&st->metadata, "title", description, 0); st->codec->codec_id = CODEC_ID_MJPEG; st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT; st->codec->extradata = av_mallocz(filesize); @@ -494,7 +494,7 @@ static void get_tag(AVFormatContext *s, AVIOContext *pb, const char *key, int ty return; } - av_metadata_set2(&s->metadata, key, buf, 0); + av_dict_set(&s->metadata, key, buf, 0); av_freep(&buf); } @@ -818,7 +818,7 @@ static int parse_chunks(AVFormatContext *s, int mode, int64_t seekts, int *len_p avio_read(pb, language, 3); if (language[0]) { language[3] = 0; - av_metadata_set2(&st->metadata, "language", language, 0); + av_dict_set(&st->metadata, "language", language, 0); if (!strcmp(language, "nar") || !strcmp(language, "NAR")) st->disposition |= AV_DISPOSITION_VISUAL_IMPAIRED; } diff --git a/libavformat/wv.c b/libavformat/wv.c index 52ec9264a0..9da416e580 100644 --- a/libavformat/wv.c +++ b/libavformat/wv.c @@ -21,6 +21,7 @@ #include "libavutil/audioconvert.h" #include "libavutil/intreadwrite.h" +#include "libavutil/dict.h" #include "avformat.h" #include "apetag.h" #include "id3v1.h" @@ -226,7 +227,7 @@ static int wv_read_header(AVFormatContext *s, if(s->pb->seekable) { int64_t cur = avio_tell(s->pb); ff_ape_parse_tag(s); - if(!av_metadata_get(s->metadata, "", NULL, AV_METADATA_IGNORE_SUFFIX)) + if(!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) ff_id3v1_read(s); avio_seek(s->pb, cur, SEEK_SET); } diff --git a/libavformat/yuv4mpeg.c b/libavformat/yuv4mpeg.c index a852568a64..90b222d1d4 100644 --- a/libavformat/yuv4mpeg.c +++ b/libavformat/yuv4mpeg.c @@ -94,7 +94,7 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket *pkt) AVPicture *picture; int* first_pkt = s->priv_data; int width, height, h_chroma_shift, v_chroma_shift; - int i, m; + int i; char buf2[Y4M_LINE_MAX+1]; char buf1[20]; uint8_t *ptr, *ptr1, *ptr2; @@ -114,7 +114,7 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket *pkt) /* construct frame header */ - m = snprintf(buf1, sizeof(buf1), "%s\n", Y4M_FRAME_MAGIC); + snprintf(buf1, sizeof(buf1), "%s\n", Y4M_FRAME_MAGIC); avio_write(pb, buf1, strlen(buf1)); width = st->codec->width; @@ -154,6 +154,12 @@ static int yuv4_write_header(AVFormatContext *s) if (s->nb_streams != 1) return AVERROR(EIO); + if (s->streams[0]->codec->codec_id != CODEC_ID_RAWVIDEO) { + av_log(s, AV_LOG_ERROR, + "A non-rawvideo stream was selected, but yuv4mpeg only handles rawvideo streams\n"); + return AVERROR(EINVAL); + } + if (s->streams[0]->codec->pix_fmt == PIX_FMT_YUV411P) { av_log(s, AV_LOG_ERROR, "Warning: generating rarely used 4:1:1 YUV stream, some mjpegtools might not work.\n"); } diff --git a/libavutil/Makefile b/libavutil/Makefile index 6fd261489e..f6f5fd3b91 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -27,6 +27,7 @@ HEADERS = adler32.h \ mathematics.h \ md5.h \ mem.h \ + dict.h \ opt.h \ parseutils.h \ pixdesc.h \ @@ -61,6 +62,7 @@ OBJS = adler32.o \ mathematics.o \ md5.o \ mem.o \ + dict.o \ opt.o \ parseutils.o \ pixdesc.o \ @@ -76,7 +78,7 @@ OBJS-$(ARCH_ARM) += arm/cpu.o OBJS-$(ARCH_PPC) += ppc/cpu.o OBJS-$(ARCH_X86) += x86/cpu.o -TESTPROGS = adler32 aes base64 cpu crc des lls md5 pca sha tree +TESTPROGS = adler32 aes base64 cpu crc des eval lls md5 pca sha tree TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo DIRS = arm bfin sh4 x86 diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h index 8f03d4bf90..4130177549 100644 --- a/libavutil/arm/intmath.h +++ b/libavutil/arm/intmath.h @@ -33,12 +33,12 @@ #define FASTDIV FASTDIV static av_always_inline av_const int FASTDIV(int a, int b) { - int r, t; - __asm__ volatile("cmp %3, #2 \n\t" - "ldr %1, [%4, %3, lsl #2] \n\t" - "lsrle %0, %2, #1 \n\t" - "smmulgt %0, %1, %2 \n\t" - : "=&r"(r), "=&r"(t) : "r"(a), "r"(b), "r"(ff_inverse)); + int r; + __asm__ ("cmp %2, #2 \n\t" + "ldr %0, [%3, %2, lsl #2] \n\t" + "lsrle %0, %1, #1 \n\t" + "smmulgt %0, %0, %1 \n\t" + : "=&r"(r) : "r"(a), "r"(b), "r"(ff_inverse) : "cc"); return r; } @@ -46,7 +46,7 @@ static av_always_inline av_const int FASTDIV(int a, int b) static av_always_inline av_const uint8_t av_clip_uint8_arm(int a) { unsigned x; - __asm__ volatile ("usat %0, #8, %1" : "=r"(x) : "r"(a)); + __asm__ ("usat %0, #8, %1" : "=r"(x) : "r"(a)); return x; } @@ -54,7 +54,7 @@ static av_always_inline av_const uint8_t av_clip_uint8_arm(int a) static av_always_inline av_const uint8_t av_clip_int8_arm(int a) { unsigned x; - __asm__ volatile ("ssat %0, #8, %1" : "=r"(x) : "r"(a)); + __asm__ ("ssat %0, #8, %1" : "=r"(x) : "r"(a)); return x; } @@ -62,7 +62,7 @@ static av_always_inline av_const uint8_t av_clip_int8_arm(int a) static av_always_inline av_const uint16_t av_clip_uint16_arm(int a) { unsigned x; - __asm__ volatile ("usat %0, #16, %1" : "=r"(x) : "r"(a)); + __asm__ ("usat %0, #16, %1" : "=r"(x) : "r"(a)); return x; } @@ -70,18 +70,27 @@ static av_always_inline av_const uint16_t av_clip_uint16_arm(int a) static av_always_inline av_const int16_t av_clip_int16_arm(int a) { int x; - __asm__ volatile ("ssat %0, #16, %1" : "=r"(x) : "r"(a)); + __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a)); return x; } +#define av_clip_uintp2 av_clip_uintp2_arm +static av_always_inline av_const unsigned av_clip_uintp2_arm(int a, int p) +{ + unsigned x; + __asm__ ("usat %0, %2, %1" : "=r"(x) : "r"(a), "i"(p)); + return x; +} + + #else /* HAVE_ARMV6 */ #define FASTDIV FASTDIV static av_always_inline av_const int FASTDIV(int a, int b) { int r, t; - __asm__ volatile("umull %1, %0, %2, %3" - : "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b])); + __asm__ ("umull %1, %0, %2, %3" + : "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b])); return r; } @@ -91,10 +100,11 @@ static av_always_inline av_const int FASTDIV(int a, int b) static av_always_inline av_const int32_t av_clipl_int32_arm(int64_t a) { int x, y; - __asm__ volatile ("adds %1, %R2, %Q2, lsr #31 \n\t" - "mvnne %1, #1<<31 \n\t" - "eorne %0, %1, %R2, asr #31 \n\t" - : "=r"(x), "=&r"(y) : "r"(a)); + __asm__ ("adds %1, %R2, %Q2, lsr #31 \n\t" + "mvnne %1, #1<<31 \n\t" + "moveq %0, %Q2 \n\t" + "eorne %0, %1, %R2, asr #31 \n\t" + : "=r"(x), "=&r"(y) : "r"(a)); return x; } diff --git a/libavutil/arm/intreadwrite.h b/libavutil/arm/intreadwrite.h index 339f8856fa..0292aabafd 100644 --- a/libavutil/arm/intreadwrite.h +++ b/libavutil/arm/intreadwrite.h @@ -55,22 +55,21 @@ static av_always_inline void AV_WN32(void *p, uint32_t v) #define AV_RN64 AV_RN64 static av_always_inline uint64_t AV_RN64(const void *p) { - union { uint64_t v; uint32_t hl[2]; } v; - __asm__ ("ldr %0, %2 \n\t" - "ldr %1, %3 \n\t" - : "=&r"(v.hl[0]), "=r"(v.hl[1]) + uint64_t v; + __asm__ ("ldr %Q0, %1 \n\t" + "ldr %R0, %2 \n\t" + : "=&r"(v) : "m"(*(const uint32_t*)p), "m"(*((const uint32_t*)p+1))); - return v.v; + return v; } #define AV_WN64 AV_WN64 static av_always_inline void AV_WN64(void *p, uint64_t v) { - union { uint64_t v; uint32_t hl[2]; } vv = { v }; - __asm__ ("str %2, %0 \n\t" - "str %3, %1 \n\t" + __asm__ ("str %Q2, %0 \n\t" + "str %R2, %1 \n\t" : "=m"(*(uint32_t*)p), "=m"(*((uint32_t*)p+1)) - : "r"(vv.hl[0]), "r"(vv.hl[1])); + : "r"(v)); } #endif /* HAVE_INLINE_ASM */ diff --git a/libavutil/audioconvert.c b/libavutil/audioconvert.c index 8bc43f8936..60743de22a 100644 --- a/libavutil/audioconvert.c +++ b/libavutil/audioconvert.c @@ -23,8 +23,8 @@ * audio conversion routines */ -#include "libavutil/avutil.h" -#include "libavutil/avstring.h" +#include "avstring.h" +#include "avutil.h" #include "audioconvert.h" static const char * const channel_names[] = { diff --git a/libavutil/avassert.h b/libavutil/avassert.h index 8dd4878c11..e100d0bfdd 100644 --- a/libavutil/avassert.h +++ b/libavutil/avassert.h @@ -63,4 +63,4 @@ #define av_assert2(cond) ((void)0) #endif -#endif +#endif /* AVUTIL_AVASSERT_H */ diff --git a/libavutil/avutil.h b/libavutil/avutil.h index 3f2c839e3e..bb8f198746 100644 --- a/libavutil/avutil.h +++ b/libavutil/avutil.h @@ -40,7 +40,7 @@ #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c) #define LIBAVUTIL_VERSION_MAJOR 50 -#define LIBAVUTIL_VERSION_MINOR 41 +#define LIBAVUTIL_VERSION_MINOR 42 #define LIBAVUTIL_VERSION_MICRO 0 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ @@ -61,6 +61,9 @@ #define FF_API_OLD_EVAL_NAMES (LIBAVUTIL_VERSION_MAJOR < 52) #endif +#ifndef FF_API_GET_BITS_PER_SAMPLE_FMT +#define FF_API_GET_BITS_PER_SAMPLE_FMT (LIBAVUTIL_VERSION_MAJOR < 52) +#endif /** * Return the LIBAVUTIL_VERSION_INT constant. diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 66262b9afd..c439a830c5 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -19,10 +19,15 @@ #include "cpu.h" #include "config.h" +static int flags, checked; + +void av_force_cpu_flags(int arg){ + flags = arg; + checked = 1; +} + int av_get_cpu_flags(void) { - static int flags, checked; - if (checked) return flags; diff --git a/libavutil/cpu.h b/libavutil/cpu.h index ff0c2e64ac..da93fb5f69 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -46,9 +46,16 @@ */ int av_get_cpu_flags(void); + +/** + * Disables cpu detection and forces the specified flags. + */ +void av_force_cpu_flags(int flags); + + /* The following CPU-specific functions shall not be called directly. */ int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); int ff_get_cpu_flags_x86(void); -#endif /* AVUTIL_CPU_H */ +#endif /* AVUTIL_CPU_H */ diff --git a/libavutil/dict.c b/libavutil/dict.c new file mode 100644 index 0000000000..332eccd679 --- /dev/null +++ b/libavutil/dict.c @@ -0,0 +1,110 @@ +/* + * copyright (c) 2009 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <strings.h> +#include "dict.h" +#include "internal.h" +#include "mem.h" + +AVDictionaryEntry * +av_dict_get(AVDictionary *m, const char *key, const AVDictionaryEntry *prev, int flags) +{ + unsigned int i, j; + + if(!m) + return NULL; + + if(prev) i= prev - m->elems + 1; + else i= 0; + + for(; i<m->count; i++){ + const char *s= m->elems[i].key; + if(flags & AV_DICT_MATCH_CASE) for(j=0; s[j] == key[j] && key[j]; j++); + else for(j=0; toupper(s[j]) == toupper(key[j]) && key[j]; j++); + if(key[j]) + continue; + if(s[j] && !(flags & AV_DICT_IGNORE_SUFFIX)) + continue; + return &m->elems[i]; + } + return NULL; +} + +int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags) +{ + AVDictionary *m = *pm; + AVDictionaryEntry *tag = av_dict_get(m, key, NULL, flags); + + if(!m) + m = *pm = av_mallocz(sizeof(*m)); + + if(tag) { + if (flags & AV_DICT_DONT_OVERWRITE) + return 0; + av_free(tag->value); + av_free(tag->key); + *tag = m->elems[--m->count]; + } else { + AVDictionaryEntry *tmp = av_realloc(m->elems, (m->count+1) * sizeof(*m->elems)); + if(tmp) { + m->elems = tmp; + } else + return AVERROR(ENOMEM); + } + if (value) { + if (flags & AV_DICT_DONT_STRDUP_KEY) { + m->elems[m->count].key = key; + } else + m->elems[m->count].key = av_strdup(key ); + if (flags & AV_DICT_DONT_STRDUP_VAL) { + m->elems[m->count].value = value; + } else + m->elems[m->count].value = av_strdup(value); + m->count++; + } + if (!m->count) { + av_free(m->elems); + av_freep(pm); + } + + return 0; +} + +void av_dict_free(AVDictionary **pm) +{ + AVDictionary *m = *pm; + + if (m) { + while(m->count--) { + av_free(m->elems[m->count].key); + av_free(m->elems[m->count].value); + } + av_free(m->elems); + } + av_freep(pm); +} + +void av_dict_copy(AVDictionary **dst, AVDictionary *src, int flags) +{ + AVDictionaryEntry *t = NULL; + + while ((t = av_dict_get(src, "", t, AV_DICT_IGNORE_SUFFIX))) + av_dict_set(dst, t->key, t->value, flags); +} diff --git a/libavutil/dict.h b/libavutil/dict.h new file mode 100644 index 0000000000..19cc0915d8 --- /dev/null +++ b/libavutil/dict.h @@ -0,0 +1,78 @@ +/* + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file Public dictionary API. + */ + +#ifndef AVUTIL_DICT_H +#define AVUTIL_DICT_H + +#define AV_DICT_MATCH_CASE 1 +#define AV_DICT_IGNORE_SUFFIX 2 +#define AV_DICT_DONT_STRDUP_KEY 4 +#define AV_DICT_DONT_STRDUP_VAL 8 +#define AV_DICT_DONT_OVERWRITE 16 ///< Don't overwrite existing entries. + +typedef struct { + char *key; + char *value; +} AVDictionaryEntry; + +typedef struct AVDictionary AVDictionary; + +/** + * Get a dictionary entry with matching key. + * + * @param prev Set to the previous matching element to find the next. + * If set to NULL the first matching element is returned. + * @param flags Allows case as well as suffix-insensitive comparisons. + * @return Found entry or NULL, changing key or value leads to undefined behavior. + */ +AVDictionaryEntry * +av_dict_get(AVDictionary *m, const char *key, const AVDictionaryEntry *prev, int flags); + +/** + * Set the given entry in *pm, overwriting an existing entry. + * + * @param pm pointer to a pointer to a dictionary struct. If *pm is NULL + * a dictionary struct is allocated and put in *pm. + * @param key entry key to add to *pm (will be av_strduped depending on flags) + * @param value entry value to add to *pm (will be av_strduped depending on flags). + * Passing a NULL value will cause an existing tag to be deleted. + * @return >= 0 on success otherwise an error code <0 + */ +int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags); + +/** + * Copy entries from one AVDictionary struct into another. + * @param dst pointer to a pointer to a AVDictionary struct. If *dst is NULL, + * this function will allocate a struct for you and put it in *dst + * @param src pointer to source AVDictionary struct + * @param flags flags to use when setting entries in *dst + * @note metadata is read using the AV_DICT_IGNORE_SUFFIX flag + */ +void av_dict_copy(AVDictionary **dst, AVDictionary *src, int flags); + +/** + * Free all the memory allocated for an AVDictionary struct. + */ +void av_dict_free(AVDictionary **m); + +#endif // AVUTIL_DICT_H diff --git a/libavutil/error.c b/libavutil/error.c index d6419ca9b8..42de50dc6a 100644 --- a/libavutil/error.c +++ b/libavutil/error.c @@ -16,6 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#undef _GNU_SOURCE #include "avutil.h" #include "avstring.h" diff --git a/libavutil/eval.c b/libavutil/eval.c index 98b4e0ac52..36cc76d3bf 100644 --- a/libavutil/eval.c +++ b/libavutil/eval.c @@ -26,7 +26,7 @@ * see http://joe.hotchkiss.com/programming/eval/eval.html */ -#include "libavutil/avutil.h" +#include "avutil.h" #include "eval.h" typedef struct Parser { @@ -75,7 +75,10 @@ double av_strtod(const char *numstr, char **tail) { double d; char *next; - d = strtod(numstr, &next); + if(numstr[0]=='0' && (numstr[1]|0x20)=='x') { + d = strtol(numstr, &next, 16); + } else + d = strtod(numstr, &next); /* if parsing succeeded, check for and interpret postfixes */ if (next!=numstr) { if (*next >= 'E' && *next <= 'z') { @@ -122,7 +125,7 @@ struct AVExpr { e_mod, e_max, e_min, e_eq, e_gt, e_gte, e_pow, e_mul, e_div, e_add, e_last, e_st, e_while, e_floor, e_ceil, e_trunc, - e_sqrt, + e_sqrt, e_not, } type; double value; // is sign in other types union { @@ -150,6 +153,7 @@ static double eval_expr(Parser *p, AVExpr *e) case e_ceil : return e->value * ceil (eval_expr(p, e->param[0])); case e_trunc: return e->value * trunc(eval_expr(p, e->param[0])); case e_sqrt: return e->value * sqrt (eval_expr(p, e->param[0])); + case e_not: return e->value * eval_expr(p, e->param[0]) == 0; case e_while: { double d = NAN; while (eval_expr(p, e->param[0])) @@ -285,6 +289,8 @@ static int parse_primary(AVExpr **e, Parser *p) else if (strmatch(next, "ceil" )) d->type = e_ceil; else if (strmatch(next, "trunc" )) d->type = e_trunc; else if (strmatch(next, "sqrt" )) d->type = e_sqrt; + else if (strmatch(next, "not" )) d->type = e_not; + else if (strmatch(next, "pow" )) d->type = e_pow; else { for (i=0; p->func1_names && p->func1_names[i]; i++) { if (strmatch(next, p->func1_names[i])) { @@ -453,6 +459,7 @@ static int verify_expr(AVExpr *e) case e_ceil: case e_trunc: case e_sqrt: + case e_not: return verify_expr(e->param[0]); default: return verify_expr(e->param[0]) && verify_expr(e->param[1]); } @@ -634,6 +641,13 @@ int main(void) "ceil(-123.123)", "sqrt(1764)", "sqrt(-1)", + "not(1)", + "not(NAN)", + "not(0)", + "pow(0,1.23)", + "pow(PI,1.23)", + "PI^1.23", + "pow(-1,1.23)", NULL }; diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c index 0f449ff427..57b3103d6a 100644 --- a/libavutil/imgutils.c +++ b/libavutil/imgutils.c @@ -23,7 +23,7 @@ #include "imgutils.h" #include "internal.h" -#include "libavutil/pixdesc.h" +#include "pixdesc.h" void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4], const AVPixFmtDescriptor *pixdesc) @@ -43,24 +43,41 @@ void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4], } } +static inline +int image_get_linesize(int width, int plane, + int max_step, int max_step_comp, + const AVPixFmtDescriptor *desc) +{ + int s, shifted_w, linesize; + + if (width < 0) + return AVERROR(EINVAL); + s = (max_step_comp == 1 || max_step_comp == 2) ? desc->log2_chroma_w : 0; + shifted_w = ((width + (1 << s) - 1)) >> s; + if (shifted_w && max_step > INT_MAX / shifted_w) + return AVERROR(EINVAL); + linesize = max_step * shifted_w; + if (desc->flags & PIX_FMT_BITSTREAM) + linesize = (linesize + 7) >> 3; + return linesize; +} + int av_image_get_linesize(enum PixelFormat pix_fmt, int width, int plane) { const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt]; int max_step [4]; /* max pixel step for each plane */ int max_step_comp[4]; /* the component for each plane which has the max pixel step */ - int s; - if (desc->flags & PIX_FMT_BITSTREAM) - return (width * (desc->comp[0].step_minus1+1) + 7) >> 3; + if ((unsigned)pix_fmt >= PIX_FMT_NB || desc->flags & PIX_FMT_HWACCEL) + return AVERROR(EINVAL); av_image_fill_max_pixsteps(max_step, max_step_comp, desc); - s = (max_step_comp[plane] == 1 || max_step_comp[plane] == 2) ? desc->log2_chroma_w : 0; - return max_step[plane] * (((width + (1 << s) - 1)) >> s); + return image_get_linesize(width, plane, max_step[plane], max_step_comp[plane], desc); } int av_image_fill_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width) { - int i; + int i, ret; const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt]; int max_step [4]; /* max pixel step for each plane */ int max_step_comp[4]; /* the component for each plane which has the max pixel step */ @@ -70,20 +87,11 @@ int av_image_fill_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt if ((unsigned)pix_fmt >= PIX_FMT_NB || desc->flags & PIX_FMT_HWACCEL) return AVERROR(EINVAL); - if (desc->flags & PIX_FMT_BITSTREAM) { - if (width > (INT_MAX -7) / (desc->comp[0].step_minus1+1)) - return AVERROR(EINVAL); - linesizes[0] = (width * (desc->comp[0].step_minus1+1) + 7) >> 3; - return 0; - } - av_image_fill_max_pixsteps(max_step, max_step_comp, desc); for (i = 0; i < 4; i++) { - int s = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0; - int shifted_w = ((width + (1 << s) - 1)) >> s; - if (max_step[i] > INT_MAX / shifted_w) - return AVERROR(EINVAL); - linesizes[i] = max_step[i] * shifted_w; + if ((ret = image_get_linesize(width, i, max_step[i], max_step_comp[i], desc)) < 0) + return ret; + linesizes[i] = ret; } return 0; diff --git a/libavutil/imgutils.h b/libavutil/imgutils.h index c181863579..150f2ca3af 100644 --- a/libavutil/imgutils.h +++ b/libavutil/imgutils.h @@ -24,8 +24,8 @@ * misc image utilities */ -#include "libavutil/pixdesc.h" #include "avutil.h" +#include "pixdesc.h" /** * Compute the max pixel step for each plane of an image with a diff --git a/libavutil/internal.h b/libavutil/internal.h index b5cc2a6cd1..eecc1280df 100644 --- a/libavutil/internal.h +++ b/libavutil/internal.h @@ -37,6 +37,13 @@ #include "config.h" #include "attributes.h" #include "timer.h" +#include "cpu.h" +#include "dict.h" + +struct AVDictionary { + int count; + AVDictionaryEntry *elems; +}; #ifndef attribute_align_arg #if ARCH_X86_32 && AV_GCC_VERSION_AT_LEAST(4,2) @@ -141,7 +148,6 @@ #define strncpy strncpy_is_forbidden_due_to_security_issues_use_av_strlcpy #undef exit #define exit exit_is_forbidden -#ifndef LIBAVFORMAT_BUILD #undef printf #define printf please_use_av_log_instead_of_printf #undef fprintf @@ -150,7 +156,6 @@ #define puts please_use_av_log_instead_of_puts #undef perror #define perror please_use_av_log_instead_of_perror -#endif #define FF_ALLOC_OR_GOTO(ctx, p, size, label)\ {\ @@ -222,4 +227,19 @@ # define ONLY_IF_THREADS_ENABLED(x) NULL #endif +#if HAVE_MMX +/** + * Empty mmx state. + * this must be called between any dsp function and float/double code. + * for example sin(); dsp->idct_put(); emms_c(); cos() + */ +static av_always_inline void emms_c(void) +{ + if(av_get_cpu_flags() & AV_CPU_FLAG_MMX) + __asm__ volatile ("emms" ::: "memory"); +} +#else /* HAVE_MMX */ +#define emms_c() +#endif /* HAVE_MMX */ + #endif /* AVUTIL_INTERNAL_H */ diff --git a/libavutil/inverse.c b/libavutil/inverse.c index 74df1c2473..74c7a933ea 100644 --- a/libavutil/inverse.c +++ b/libavutil/inverse.c @@ -19,7 +19,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/intmath.h" +#include <stdint.h> /* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256 * for a>16909558, is an overestimate by less than 1 part in 1<<24 */ diff --git a/libavutil/mem.c b/libavutil/mem.c index de77e73ddb..fa2c388ff4 100644 --- a/libavutil/mem.c +++ b/libavutil/mem.c @@ -65,6 +65,8 @@ void free(void *ptr); memory allocator. You do not need to suppress this file because the linker will do it automatically. */ +#define MAX_MALLOC_SIZE INT_MAX + void *av_malloc(FF_INTERNAL_MEM_TYPE size) { void *ptr = NULL; @@ -73,7 +75,7 @@ void *av_malloc(FF_INTERNAL_MEM_TYPE size) #endif /* let's disallow possible ambiguous cases */ - if(size > (INT_MAX-32) ) + if (size > (MAX_MALLOC_SIZE-32)) return NULL; #if CONFIG_MEMALIGN_HACK @@ -84,6 +86,7 @@ void *av_malloc(FF_INTERNAL_MEM_TYPE size) ptr = (char*)ptr + diff; ((char*)ptr)[-1]= diff; #elif HAVE_POSIX_MEMALIGN + if (size) //OSX on SDK 10.6 has a broken posix_memalign implementation if (posix_memalign(&ptr,ALIGN,size)) ptr = NULL; #elif HAVE_MEMALIGN @@ -127,7 +130,7 @@ void *av_realloc(void *ptr, FF_INTERNAL_MEM_TYPE size) #endif /* let's disallow possible ambiguous cases */ - if(size > (INT_MAX-16) ) + if (size > (MAX_MALLOC_SIZE-16)) return NULL; #if CONFIG_MEMALIGN_HACK diff --git a/libavutil/opt.c b/libavutil/opt.c index 720ad99e8e..d57a547377 100644 --- a/libavutil/opt.c +++ b/libavutil/opt.c @@ -45,7 +45,7 @@ const AVOption *av_find_opt(void *v, const char *name, const char *unit, int mas const AVOption *av_next_option(void *obj, const AVOption *last) { if (last && last[1].name) return ++last; - else if (last) return NULL; + else if (last || !(*(AVClass**)obj)->option->name) return NULL; else return (*(AVClass**)obj)->option; } @@ -261,7 +261,7 @@ static int av_get_number(void *obj, const char *name, const AVOption **o_out, do { const AVOption *o= av_find_opt(obj, name, NULL, 0, 0); void *dst; - if (!o || o->offset<=0) + if (!o || (o->offset<=0 && o->type != FF_OPT_TYPE_CONST)) goto error; dst= ((uint8_t*)obj) + o->offset; @@ -277,6 +277,7 @@ static int av_get_number(void *obj, const char *name, const AVOption **o_out, do case FF_OPT_TYPE_RATIONAL: *intnum= ((AVRational*)dst)->num; *den = ((AVRational*)dst)->den; return 0; + case FF_OPT_TYPE_CONST: *intnum= o->default_val.dbl;return 0; } error: *den=*intnum=0; @@ -289,7 +290,8 @@ double av_get_double(void *obj, const char *name, const AVOption **o_out) double num=1; int den=1; - av_get_number(obj, name, o_out, &num, &den, &intnum); + if (av_get_number(obj, name, o_out, &num, &den, &intnum) < 0) + return NAN; return num*intnum/den; } @@ -299,7 +301,8 @@ AVRational av_get_q(void *obj, const char *name, const AVOption **o_out) double num=1; int den=1; - av_get_number(obj, name, o_out, &num, &den, &intnum); + if (av_get_number(obj, name, o_out, &num, &den, &intnum) < 0) + return (AVRational){0, 0}; if (num == 1.0 && (int)intnum == intnum) return (AVRational){intnum, den}; else @@ -312,10 +315,21 @@ int64_t av_get_int(void *obj, const char *name, const AVOption **o_out) double num=1; int den=1; - av_get_number(obj, name, o_out, &num, &den, &intnum); + if (av_get_number(obj, name, o_out, &num, &den, &intnum) < 0) + return -1; return num*intnum/den; } +int av_opt_flag_is_set(void *obj, const char *field_name, const char *flag_name) +{ + const AVOption *field = av_find_opt(obj, field_name, NULL, 0, 0); + const AVOption *flag = av_find_opt(obj, flag_name, NULL, 0, 0); + + if (!field || !flag || flag->type != FF_OPT_TYPE_CONST) + return 0; + return av_get_int(obj, field_name, NULL) & (int) flag->default_val.dbl; +} + static void opt_list(void *obj, void *av_log_obj, const char *unit, int req_flags, int rej_flags) { @@ -439,8 +453,10 @@ void av_opt_set_defaults2(void *s, int mask, int flags) } break; case FF_OPT_TYPE_STRING: + av_set_string3(s, opt->name, opt->default_val.str, 1, NULL); + break; case FF_OPT_TYPE_BINARY: - /* Cannot set default for string as default_val is of type * double */ + /* Cannot set default for binary */ break; default: av_log(s, AV_LOG_DEBUG, "AVOption type %d of option %s not implemented yet\n", opt->type, opt->name); @@ -514,6 +530,14 @@ int av_set_options_string(void *ctx, const char *opts, return count; } +void av_opt_free(void *obj) +{ + const AVOption *o = NULL; + while ((o = av_next_option(obj, o))) + if (o->type == FF_OPT_TYPE_STRING || o->type == FF_OPT_TYPE_BINARY) + av_freep((uint8_t *)obj + o->offset); +} + #ifdef TEST #undef printf diff --git a/libavutil/opt.h b/libavutil/opt.h index b04c7905d6..872b5547c1 100644 --- a/libavutil/opt.h +++ b/libavutil/opt.h @@ -176,4 +176,19 @@ void av_opt_set_defaults2(void *s, int mask, int flags); int av_set_options_string(void *ctx, const char *opts, const char *key_val_sep, const char *pairs_sep); +/** + * Free all string and binary options in obj. + */ +void av_opt_free(void *obj); + +/** + * Check whether a particular flag is set in a flags field. + * + * @param field_name the name of the flag field option + * @param flag_name the name of the flag to check + * @return non-zero if the flag is set, zero if the flag isn't set, + * isn't of the right type, or the flags field doesn't exist. + */ +int av_opt_flag_is_set(void *obj, const char *field_name, const char *flag_name); + #endif /* AVUTIL_OPT_H */ diff --git a/libavutil/parseutils.c b/libavutil/parseutils.c index d67d31bffe..b2404b5c0f 100644 --- a/libavutil/parseutils.c +++ b/libavutil/parseutils.c @@ -24,11 +24,12 @@ #include <strings.h> #include <sys/time.h> #include <time.h> + +#include "avstring.h" +#include "avutil.h" +#include "eval.h" +#include "random_seed.h" #include "parseutils.h" -#include "libavutil/avutil.h" -#include "libavutil/eval.h" -#include "libavutil/avstring.h" -#include "libavutil/random_seed.h" typedef struct { const char *abbr; diff --git a/libavutil/parseutils.h b/libavutil/parseutils.h index c3986af20c..d31212e257 100644 --- a/libavutil/parseutils.h +++ b/libavutil/parseutils.h @@ -19,7 +19,7 @@ #ifndef AVUTIL_PARSEUTILS_H #define AVUTIL_PARSEUTILS_H -#include "libavutil/rational.h" +#include "rational.h" /** * @file diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index f8f99289f9..491955a020 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -878,6 +878,52 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = { }, .flags = PIX_FMT_BE, }, + [PIX_FMT_YUV444P10LE] = { + .name = "yuv444p10le", + .nb_components= 3, + .log2_chroma_w= 0, + .log2_chroma_h= 0, + .comp = { + {0,1,1,0,9}, /* Y */ + {1,1,1,0,9}, /* U */ + {2,1,1,0,9}, /* V */ + }, + }, + [PIX_FMT_YUV444P10BE] = { + .name = "yuv444p10be", + .nb_components= 3, + .log2_chroma_w= 0, + .log2_chroma_h= 0, + .comp = { + {0,1,1,0,9}, /* Y */ + {1,1,1,0,9}, /* U */ + {2,1,1,0,9}, /* V */ + }, + .flags = PIX_FMT_BE, + }, + [PIX_FMT_YUV444P9LE] = { + .name = "yuv444p9le", + .nb_components= 3, + .log2_chroma_w= 0, + .log2_chroma_h= 0, + .comp = { + {0,1,1,0,8}, /* Y */ + {1,1,1,0,8}, /* U */ + {2,1,1,0,8}, /* V */ + }, + }, + [PIX_FMT_YUV444P9BE] = { + .name = "yuv444p9be", + .nb_components= 3, + .log2_chroma_w= 0, + .log2_chroma_h= 0, + .comp = { + {0,1,1,0,9}, /* Y */ + {1,1,1,0,9}, /* U */ + {2,1,1,0,9}, /* V */ + }, + .flags = PIX_FMT_BE, + }, [PIX_FMT_DXVA2_VLD] = { .name = "dxva2_vld", .log2_chroma_w = 1, @@ -906,6 +952,12 @@ static enum PixelFormat get_pix_fmt_internal(const char *name) return PIX_FMT_NONE; } +const char *av_get_pix_fmt_name(enum PixelFormat pix_fmt) +{ + return (unsigned)pix_fmt < PIX_FMT_NB ? + av_pix_fmt_descriptors[pix_fmt].name : NULL; +} + #if HAVE_BIGENDIAN # define X_NE(be, le) be #else diff --git a/libavutil/pixdesc.h b/libavutil/pixdesc.h index 727e47f06a..f0de9981ec 100644 --- a/libavutil/pixdesc.h +++ b/libavutil/pixdesc.h @@ -23,6 +23,7 @@ #define AVUTIL_PIXDESC_H #include <inttypes.h> +#include "pixfmt.h" typedef struct AVComponentDescriptor{ uint16_t plane :2; ///< which of the 4 planes contains the component @@ -142,6 +143,14 @@ void av_write_image_line(const uint16_t *src, uint8_t *data[4], const int linesi enum PixelFormat av_get_pix_fmt(const char *name); /** + * Return the short name for a pixel format, NULL in case pix_fmt is + * unknown. + * + * @see av_get_pix_fmt(), av_get_pix_fmt_string() + */ +const char *av_get_pix_fmt_name(enum PixelFormat pix_fmt); + +/** * Print in buf the string corresponding to the pixel format with * number pix_fmt, or an header if pix_fmt is negative. * diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 95972f9373..0f170d2094 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -136,13 +136,19 @@ enum PixelFormat { PIX_FMT_BGR48BE, ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as big-endian PIX_FMT_BGR48LE, ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as little-endian - //the following 6 formats are deprecated and should be replaced by PIX_FMT_YUV420P16* with the bpp stored seperately + //the following 10 formats have the disadvantage of needing 1 format for each bit depth, thus + //If you want to support multiple bit depths, then using PIX_FMT_YUV420P16* with the bpp stored seperately + //is better PIX_FMT_YUV420P9BE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian PIX_FMT_YUV420P9LE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian PIX_FMT_YUV420P10BE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian PIX_FMT_YUV420P10LE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian - PIX_FMT_YUV422P10BE,///< planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian - PIX_FMT_YUV422P10LE,///< planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian + PIX_FMT_YUV422P10BE,///< planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian + PIX_FMT_YUV422P10LE,///< planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian + PIX_FMT_YUV444P9BE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian + PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian + PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian + PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions }; @@ -171,8 +177,10 @@ enum PixelFormat { #define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE) #define PIX_FMT_YUV420P9 PIX_FMT_NE(YUV420P9BE , YUV420P9LE) +#define PIX_FMT_YUV444P9 PIX_FMT_NE(YUV444P9BE , YUV444P9LE) #define PIX_FMT_YUV420P10 PIX_FMT_NE(YUV420P10BE, YUV420P10LE) #define PIX_FMT_YUV422P10 PIX_FMT_NE(YUV422P10BE, YUV422P10LE) +#define PIX_FMT_YUV444P10 PIX_FMT_NE(YUV444P10BE, YUV444P10LE) #define PIX_FMT_YUV420P16 PIX_FMT_NE(YUV420P16BE, YUV420P16LE) #define PIX_FMT_YUV422P16 PIX_FMT_NE(YUV422P16BE, YUV422P16LE) #define PIX_FMT_YUV444P16 PIX_FMT_NE(YUV444P16BE, YUV444P16LE) diff --git a/libavutil/samplefmt.c b/libavutil/samplefmt.c index ea004d926a..2df06a619e 100644 --- a/libavutil/samplefmt.c +++ b/libavutil/samplefmt.c @@ -66,42 +66,52 @@ char *av_get_sample_fmt_string (char *buf, int buf_size, enum AVSampleFormat sam return buf; } +int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt) +{ + return sample_fmt < 0 || sample_fmt >= AV_SAMPLE_FMT_NB ? + 0 : sample_fmt_info[sample_fmt].bits >> 3; +} + +#if FF_API_GET_BITS_PER_SAMPLE_FMT int av_get_bits_per_sample_fmt(enum AVSampleFormat sample_fmt) { return sample_fmt < 0 || sample_fmt >= AV_SAMPLE_FMT_NB ? 0 : sample_fmt_info[sample_fmt].bits; } +#endif int av_samples_fill_arrays(uint8_t *pointers[8], int linesizes[8], uint8_t *buf, int nb_channels, int nb_samples, enum AVSampleFormat sample_fmt, int planar, int align) { - int i, step_size = 0; + int i, linesize; int sample_size = av_get_bits_per_sample_fmt(sample_fmt) >> 3; - int channel_step = planar ? FFALIGN(nb_samples*sample_size, align) : sample_size; - if(nb_channels * (uint64_t)nb_samples * sample_size >= INT_MAX - align*(uint64_t)nb_channels) + if (nb_channels * (uint64_t)nb_samples * sample_size >= INT_MAX - align*(uint64_t)nb_channels) return AVERROR(EINVAL); + linesize = planar ? FFALIGN(nb_samples*sample_size, align) : + FFALIGN(nb_samples*sample_size*nb_channels, align); if (pointers) { pointers[0] = buf; - for (i = 0; i < nb_channels; i++) { - pointers[i] = buf + step_size; - step_size += channel_step; + for (i = 1; planar && i < nb_channels; i++) { + pointers[i] = pointers[i-1] + linesize; } - memset(&pointers[nb_channels], 0, (8-nb_channels) * sizeof(pointers[0])); + memset(&pointers[i], 0, (8-i) * sizeof(pointers[0])); } if (linesizes) { - linesizes[0] = planar ? sample_size : nb_channels*sample_size; - memset(&linesizes[1], 0, (8-1) * sizeof(linesizes[0])); + linesizes[0] = linesize; + for (i = 1; planar && i < nb_channels; i++) + linesizes[i] = linesizes[0]; + memset(&linesizes[i], 0, (8-i) * sizeof(linesizes[0])); } - return planar ? channel_step * nb_channels : FFALIGN(nb_channels*sample_size*nb_samples, align); + return planar ? linesize * nb_channels : linesize; } int av_samples_alloc(uint8_t *pointers[8], int linesizes[8], - int nb_samples, int nb_channels, + int nb_channels, int nb_samples, enum AVSampleFormat sample_fmt, int planar, int align) { diff --git a/libavutil/samplefmt.h b/libavutil/samplefmt.h index 9b9c0d49a9..b06755d2ce 100644 --- a/libavutil/samplefmt.h +++ b/libavutil/samplefmt.h @@ -60,22 +60,35 @@ enum AVSampleFormat av_get_sample_fmt(const char *name); */ char *av_get_sample_fmt_string(char *buf, int buf_size, enum AVSampleFormat sample_fmt); +#if FF_API_GET_BITS_PER_SAMPLE_FMT /** - * Return sample format bits per sample. + * @deprecated Use av_get_bytes_per_sample() instead. + */ +attribute_deprecated +int av_get_bits_per_sample_fmt(enum AVSampleFormat sample_fmt); +#endif + +/** + * Return number of bytes per sample. * * @param sample_fmt the sample format - * @return number of bits per sample or zero if unknown for the given + * @return number of bytes per sample or zero if unknown for the given * sample format */ -int av_get_bits_per_sample_fmt(enum AVSampleFormat sample_fmt); +int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt); /** * Fill channel data pointers and linesizes for samples with sample * format sample_fmt. * * The pointers array is filled with the pointers to the samples data: - * data[c] points to the first sample of channel c. - * data[c] + linesize[0] points to the second sample of channel c + * for planar, set the start point of each plane's data within the buffer, + * for packed, set the start point of the entire buffer only. + * + * The linesize array is filled with the aligned size of each samples + * plane, that is linesize[i] will contain the linesize of the plane i, + * and will be zero for all the unused planes. All linesize values are + * equal. * * @param pointers array to be filled with the pointer for each plane, may be NULL * @param linesizes array to be filled with the linesize, may be NULL @@ -96,6 +109,7 @@ int av_samples_fill_arrays(uint8_t *pointers[8], int linesizes[8], * The allocated samples buffer has to be freed by using * av_freep(&pointers[0]). * + * @param nb_channels number of audio channels * @param nb_samples number of samples per channel * @param planar 1 if the samples layout is planar, 0 if packed, * @param align the value to use for buffer size alignment @@ -104,7 +118,7 @@ int av_samples_fill_arrays(uint8_t *pointers[8], int linesizes[8], * @see av_samples_fill_arrays() */ int av_samples_alloc(uint8_t *pointers[8], int linesizes[8], - int nb_samples, int nb_channels, + int nb_channels, int nb_samples, enum AVSampleFormat sample_fmt, int planar, int align); diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c index 03e5f194d7..589c2cead6 100644 --- a/libpostproc/postprocess.c +++ b/libpostproc/postprocess.c @@ -767,7 +767,8 @@ pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality) ppMode->maxClippedThreshold= 0.01; ppMode->error=0; - av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE); + memset(temp, 0, GET_MODE_BUFFER_SIZE); + av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1); av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name); @@ -818,12 +819,11 @@ pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality) int plen; int spaceLeft; - if(p==NULL) p= temp, *p=0; //last filter - else p--, *p=','; //not last filter + p--, *p=','; plen= strlen(p); spaceLeft= p - temp + plen; - if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){ + if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){ ppMode->error++; break; } diff --git a/libswscale/Makefile b/libswscale/Makefile index 6976079686..8bb06baae2 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -5,14 +5,19 @@ FFLIBS = avutil HEADERS = swscale.h -OBJS = options.o rgb2rgb.o swscale.o utils.o yuv2rgb.o +OBJS = options.o rgb2rgb.o swscale.o utils.o yuv2rgb.o \ + swscale_unscaled.o OBJS-$(ARCH_BFIN) += bfin/internal_bfin.o \ bfin/swscale_bfin.o \ bfin/yuv2rgb_bfin.o OBJS-$(CONFIG_MLIB) += mlib/yuv2rgb_mlib.o -OBJS-$(HAVE_ALTIVEC) += ppc/yuv2rgb_altivec.o -OBJS-$(HAVE_MMX) += x86/yuv2rgb_mmx.o +OBJS-$(HAVE_ALTIVEC) += ppc/swscale_altivec.o \ + ppc/yuv2rgb_altivec.o \ + ppc/yuv2yuv_altivec.o +OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \ + x86/swscale_mmx.o \ + x86/yuv2rgb_mmx.o OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o TESTPROGS = colorspace swscale diff --git a/libswscale/bfin/internal_bfin.S b/libswscale/bfin/internal_bfin.S index 5af46540a8..cb8d71253c 100644 --- a/libswscale/bfin/internal_bfin.S +++ b/libswscale/bfin/internal_bfin.S @@ -466,8 +466,8 @@ DEFUN_END(yuv2rgb24_line) #define ARG_srcStride 40 DEFUN(uyvytoyv12, mL3, (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride)): + int width, int height, + int lumStride, int chromStride, int srcStride)): link 0; [--sp] = (r7:4,p5:4); @@ -539,8 +539,8 @@ DEFUN(uyvytoyv12, mL3, (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8 DEFUN_END(uyvytoyv12) DEFUN(yuyvtoyv12, mL3, (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride)): + int width, int height, + int lumStride, int chromStride, int srcStride)): link 0; [--sp] = (r7:4,p5:4); diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c index ce2f1720dd..4b26ba67c2 100644 --- a/libswscale/bfin/swscale_bfin.c +++ b/libswscale/bfin/swscale_bfin.c @@ -38,12 +38,12 @@ #endif int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride) L1CODE; + int width, int height, + int lumStride, int chromStride, int srcStride) L1CODE; int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride) L1CODE; + int width, int height, + int lumStride, int chromStride, int srcStride) L1CODE; static int uyvytoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) @@ -79,15 +79,13 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i void ff_bfin_get_unscaled_swscale(SwsContext *c) { SwsFunc swScale = c->swScale; - if (c->flags & SWS_CPU_CAPS_BFIN) - if (c->dstFormat == PIX_FMT_YUV420P) - if (c->srcFormat == PIX_FMT_UYVY422) { - av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n"); - c->swScale = uyvytoyv12_unscaled; - } - if (c->dstFormat == PIX_FMT_YUV420P) - if (c->srcFormat == PIX_FMT_YUYV422) { - av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n"); - c->swScale = yuyvtoyv12_unscaled; - } + + if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_UYVY422) { + av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n"); + c->swScale = uyvytoyv12_unscaled; + } + if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_YUYV422) { + av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n"); + c->swScale = yuyvtoyv12_unscaled; + } } diff --git a/libswscale/bfin/yuv2rgb_bfin.c b/libswscale/bfin/yuv2rgb_bfin.c index eaa83eaf3b..7a7dc7f0e6 100644 --- a/libswscale/bfin/yuv2rgb_bfin.c +++ b/libswscale/bfin/yuv2rgb_bfin.c @@ -28,6 +28,7 @@ #include <assert.h> #include "config.h" #include <unistd.h> +#include "libavutil/pixdesc.h" #include "libswscale/rgb2rgb.h" #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" @@ -197,7 +198,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c) } av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n", - sws_format_name (c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); return f; } diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c index 50db7d09e3..34095d8532 100644 --- a/libswscale/colorspace-test.c +++ b/libswscale/colorspace-test.c @@ -33,31 +33,6 @@ #define FUNC(s,d,n) {s,d,#n,n} -static int cpu_caps; - -static char *args_parse(int argc, char *argv[]) -{ - int o; - - while ((o = getopt(argc, argv, "m23")) != -1) { - switch (o) { - case 'm': - cpu_caps |= SWS_CPU_CAPS_MMX; - break; - case '2': - cpu_caps |= SWS_CPU_CAPS_MMX2; - break; - case '3': - cpu_caps |= SWS_CPU_CAPS_3DNOW; - break; - default: - av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o); - } - } - - return argv[optind]; -} - int main(int argc, char **argv) { int i, funcNum; @@ -70,16 +45,14 @@ int main(int argc, char **argv) return -1; av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n"); - args_parse(argc, argv); - av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps); - sws_rgb2rgb_init(cpu_caps); + sws_rgb2rgb_init(); for(funcNum=0; ; funcNum++) { struct func_info_s { int src_bpp; int dst_bpp; const char *name; - void (*func)(const uint8_t *src, uint8_t *dst, long src_size); + void (*func)(const uint8_t *src, uint8_t *dst, int src_size); } func_info[] = { FUNC(2, 2, rgb15to16), FUNC(2, 3, rgb15to24), diff --git a/libswscale/options.c b/libswscale/options.c index d3cd0a3190..24e70b96fc 100644 --- a/libswscale/options.c +++ b/libswscale/options.c @@ -48,12 +48,6 @@ static const AVOption options[] = { { "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_SPLINE }, INT_MIN, INT_MAX, VE, "sws_flags" }, { "print_info", "print info", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_PRINT_INFO }, INT_MIN, INT_MAX, VE, "sws_flags" }, { "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_ACCURATE_RND }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_MMX }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_MMX2 }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_SSE2 }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_3DNOW }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_ALTIVEC }, INT_MIN, INT_MAX, VE, "sws_flags" }, - { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_BFIN }, INT_MIN, INT_MAX, VE, "sws_flags" }, { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX, VE, "sws_flags" }, { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX, VE, "sws_flags" }, { "bitexact", "", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_BITEXACT }, INT_MIN, INT_MAX, VE, "sws_flags" }, diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec.c index c7aa0fd2e6..197000beb9 100644 --- a/libswscale/ppc/swscale_altivec_template.c +++ b/libswscale/ppc/swscale_altivec.c @@ -21,6 +21,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <inttypes.h> +#include "config.h" +#include "libswscale/swscale.h" +#include "libswscale/swscale_internal.h" +#include "libavutil/cpu.h" +#include "yuv2rgb_altivec.h" + #define vzero vec_splat_s32(0) static inline void @@ -29,13 +36,13 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) register int i; vector unsigned int altivec_vectorShiftInt19 = vec_add(vec_splat_u32(10), vec_splat_u32(9)); - if ((unsigned long)dest % 16) { + if ((uintptr_t)dest % 16) { /* badly aligned store, we force store alignment */ /* and will handle load misalignment on val w/ vec_perm */ vector unsigned char perm1; vector signed int v1; for (i = 0 ; (i < dstW) && - (((unsigned long)dest + i) % 16) ; i++) { + (((uintptr_t)dest + i) % 16) ; i++) { int t = val[i] >> 19; dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t); } @@ -85,10 +92,15 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) } } -static inline void -yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) +static void +yuv2yuvX_altivec_real(SwsContext *c, + const int16_t *lumFilter, const int16_t **lumSrc, + int lumFilterSize, const int16_t *chrFilter, + const int16_t **chrUSrc, const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, uint8_t *uDest, + uint8_t *vDest, uint8_t *aDest, + int dstW, int chrDstW) { const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)}; register int i, j; @@ -159,22 +171,22 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0); vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter - perm = vec_lvsl(0, chrSrc[j]); - l1 = vec_ld(0, chrSrc[j]); - l1_V = vec_ld(VOFW << 1, chrSrc[j]); + perm = vec_lvsl(0, chrUSrc[j]); + l1 = vec_ld(0, chrUSrc[j]); + l1_V = vec_ld(0, chrVSrc[j]); for (i = 0; i < (chrDstW - 7); i+=8) { int offset = i << 2; - vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]); - vector signed short l2_V = vec_ld(((i + VOFW) << 1) + 16, chrSrc[j]); + vector signed short l2 = vec_ld((i << 1) + 16, chrUSrc[j]); + vector signed short l2_V = vec_ld((i << 1) + 16, chrVSrc[j]); vector signed int v1 = vec_ld(offset, u); vector signed int v2 = vec_ld(offset + 16, u); vector signed int v1_V = vec_ld(offset, v); vector signed int v2_V = vec_ld(offset + 16, v); - vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7] - vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+VOFW] ... chrSrc[j][i+2055] + vector signed short ls = vec_perm(l1, l2, perm); // chrUSrc[j][i] ... chrUSrc[j][i+7] + vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrVSrc[j][i] ... chrVSrc[j][i] vector signed int i1 = vec_mule(vChrFilter, ls); vector signed int i2 = vec_mulo(vChrFilter, ls); @@ -182,9 +194,9 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF vector signed int i2_V = vec_mulo(vChrFilter, ls_V); vector signed int vf1 = vec_mergeh(i1, i2); - vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j] + vector signed int vf2 = vec_mergel(i1, i2); // chrUSrc[j][i] * chrFilter[j] ... chrUSrc[j][i+7] * chrFilter[j] vector signed int vf1_V = vec_mergeh(i1_V, i2_V); - vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j] + vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrVSrc[j][i] * chrFilter[j] ... chrVSrc[j][i+7] * chrFilter[j] vector signed int vo1 = vec_add(v1, vf1); vector signed int vo2 = vec_add(v2, vf2); @@ -200,8 +212,8 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF l1_V = l2_V; } for ( ; i < chrDstW; i++) { - u[i] += chrSrc[j][i] * chrFilter[j]; - v[i] += chrSrc[j][i + VOFW] * chrFilter[j]; + u[i] += chrUSrc[j][i] * chrFilter[j]; + v[i] += chrVSrc[j][i] * chrFilter[j]; } } altivec_packIntArrayToCharArray(u, uDest, chrDstW); @@ -209,10 +221,10 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF } } -static inline void hScale_altivec_real(int16_t *dst, int dstW, - const uint8_t *src, int srcW, - int xInc, const int16_t *filter, - const int16_t *filterPos, int filterSize) +static void hScale_altivec_real(int16_t *dst, int dstW, + const uint8_t *src, int srcW, + int xInc, const int16_t *filter, + const int16_t *filterPos, int filterSize) { register int i; DECLARE_ALIGNED(16, int, tempo)[4]; @@ -389,157 +401,24 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, } } -static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) +void ff_sws_init_swScale_altivec(SwsContext *c) { - uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY; - // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); - const uint8_t *ysrc = src[0]; - const uint8_t *usrc = src[1]; - const uint8_t *vsrc = src[2]; - const int width = c->srcW; - const int height = srcSliceH; - const int lumStride = srcStride[0]; - const int chromStride = srcStride[1]; - const int dstStride = dstStride_a[0]; - const vector unsigned char yperm = vec_lvsl(0, ysrc); - const int vertLumPerChroma = 2; - register unsigned int y; - - if (width&15) { - yv12toyuy2(ysrc, usrc, vsrc, dst, c->srcW, srcSliceH, lumStride, chromStride, dstStride); - return srcSliceH; - } + enum PixelFormat dstFormat = c->dstFormat; - /* This code assumes: - - 1) dst is 16 bytes-aligned - 2) dstStride is a multiple of 16 - 3) width is a multiple of 16 - 4) lum & chrom stride are multiples of 8 - */ - - for (y=0; y<height; y++) { - int i; - for (i = 0; i < width - 31; i+= 32) { - const unsigned int j = i >> 1; - vector unsigned char v_yA = vec_ld(i, ysrc); - vector unsigned char v_yB = vec_ld(i + 16, ysrc); - vector unsigned char v_yC = vec_ld(i + 32, ysrc); - vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); - vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); - vector unsigned char v_uA = vec_ld(j, usrc); - vector unsigned char v_uB = vec_ld(j + 16, usrc); - vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); - vector unsigned char v_vA = vec_ld(j, vsrc); - vector unsigned char v_vB = vec_ld(j + 16, vsrc); - vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); - vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); - vector unsigned char v_uv_b = vec_mergel(v_u, v_v); - vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); - vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); - vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b); - vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b); - vec_st(v_yuy2_0, (i << 1), dst); - vec_st(v_yuy2_1, (i << 1) + 16, dst); - vec_st(v_yuy2_2, (i << 1) + 32, dst); - vec_st(v_yuy2_3, (i << 1) + 48, dst); - } - if (i < width) { - const unsigned int j = i >> 1; - vector unsigned char v_y1 = vec_ld(i, ysrc); - vector unsigned char v_u = vec_ld(j, usrc); - vector unsigned char v_v = vec_ld(j, vsrc); - vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); - vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); - vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); - vec_st(v_yuy2_0, (i << 1), dst); - vec_st(v_yuy2_1, (i << 1) + 16, dst); - } - if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { - usrc += chromStride; - vsrc += chromStride; - } - ysrc += lumStride; - dst += dstStride; - } + if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) + return; - return srcSliceH; -} - -static inline int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) -{ - uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY; - // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); - const uint8_t *ysrc = src[0]; - const uint8_t *usrc = src[1]; - const uint8_t *vsrc = src[2]; - const int width = c->srcW; - const int height = srcSliceH; - const int lumStride = srcStride[0]; - const int chromStride = srcStride[1]; - const int dstStride = dstStride_a[0]; - const int vertLumPerChroma = 2; - const vector unsigned char yperm = vec_lvsl(0, ysrc); - register unsigned int y; - - if (width&15) { - yv12touyvy(ysrc, usrc, vsrc, dst, c->srcW, srcSliceH, lumStride, chromStride, dstStride); - return srcSliceH; + c->hScale = hScale_altivec_real; + if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat)) { + c->yuv2yuvX = yuv2yuvX_altivec_real; } - /* This code assumes: - - 1) dst is 16 bytes-aligned - 2) dstStride is a multiple of 16 - 3) width is a multiple of 16 - 4) lum & chrom stride are multiples of 8 - */ - - for (y=0; y<height; y++) { - int i; - for (i = 0; i < width - 31; i+= 32) { - const unsigned int j = i >> 1; - vector unsigned char v_yA = vec_ld(i, ysrc); - vector unsigned char v_yB = vec_ld(i + 16, ysrc); - vector unsigned char v_yC = vec_ld(i + 32, ysrc); - vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); - vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); - vector unsigned char v_uA = vec_ld(j, usrc); - vector unsigned char v_uB = vec_ld(j + 16, usrc); - vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); - vector unsigned char v_vA = vec_ld(j, vsrc); - vector unsigned char v_vB = vec_ld(j + 16, vsrc); - vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); - vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); - vector unsigned char v_uv_b = vec_mergel(v_u, v_v); - vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); - vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); - vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2); - vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2); - vec_st(v_uyvy_0, (i << 1), dst); - vec_st(v_uyvy_1, (i << 1) + 16, dst); - vec_st(v_uyvy_2, (i << 1) + 32, dst); - vec_st(v_uyvy_3, (i << 1) + 48, dst); + /* The following list of supported dstFormat values should + * match what's found in the body of ff_yuv2packedX_altivec() */ + if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf && + (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA || + c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 || + c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)) { + c->yuv2packedX = ff_yuv2packedX_altivec; } - if (i < width) { - const unsigned int j = i >> 1; - vector unsigned char v_y1 = vec_ld(i, ysrc); - vector unsigned char v_u = vec_ld(j, usrc); - vector unsigned char v_v = vec_ld(j, vsrc); - vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); - vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); - vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); - vec_st(v_uyvy_0, (i << 1), dst); - vec_st(v_uyvy_1, (i << 1) + 16, dst); - } - if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { - usrc += chromStride; - vsrc += chromStride; - } - ysrc += lumStride; - dst += dstStride; - } - return srcSliceH; } diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c index 2b58eb27c9..e13702b100 100644 --- a/libswscale/ppc/yuv2rgb_altivec.c +++ b/libswscale/ppc/yuv2rgb_altivec.c @@ -94,6 +94,9 @@ adjustment. #include "libswscale/rgb2rgb.h" #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" +#include "libavutil/cpu.h" +#include "libavutil/pixdesc.h" +#include "yuv2rgb_altivec.h" #undef PROFILE_THE_BEAST #undef INC_SCALING @@ -296,7 +299,7 @@ static int altivec_##name (SwsContext *c, \ vector signed short R1,G1,B1; \ vector unsigned char R,G,B; \ \ - vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ + const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ vector unsigned char align_perm; \ \ vector signed short \ @@ -333,10 +336,10 @@ static int altivec_##name (SwsContext *c, \ \ for (j=0;j<w/16;j++) { \ \ - y1ivP = (vector unsigned char *)y1i; \ - y2ivP = (vector unsigned char *)y2i; \ - uivP = (vector unsigned char *)ui; \ - vivP = (vector unsigned char *)vi; \ + y1ivP = (const vector unsigned char *)y1i; \ + y2ivP = (const vector unsigned char *)y2i; \ + uivP = (const vector unsigned char *)ui; \ + vivP = (const vector unsigned char *)vi; \ \ align_perm = vec_lvsl (0, y1i); \ y0 = (vector unsigned char) \ @@ -446,159 +449,7 @@ static int altivec_##name (SwsContext *c, \ #define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr) DEFCSP420_CVT (yuv2_abgr, out_abgr) -#if 1 DEFCSP420_CVT (yuv2_bgra, out_bgra) -#else -static int altivec_yuv2_bgra32 (SwsContext *c, - unsigned char **in, int *instrides, - int srcSliceY, int srcSliceH, - unsigned char **oplanes, int *outstrides) -{ - int w = c->srcW; - int h = srcSliceH; - int i,j; - int instrides_scl[3]; - vector unsigned char y0,y1; - - vector signed char u,v; - - vector signed short Y0,Y1,Y2,Y3; - vector signed short U,V; - vector signed short vx,ux,uvx; - vector signed short vx0,ux0,uvx0; - vector signed short vx1,ux1,uvx1; - vector signed short R0,G0,B0; - vector signed short R1,G1,B1; - vector unsigned char R,G,B; - - vector unsigned char *uivP, *vivP; - vector unsigned char align_perm; - - vector signed short - lCY = c->CY, - lOY = c->OY, - lCRV = c->CRV, - lCBU = c->CBU, - lCGU = c->CGU, - lCGV = c->CGV; - - vector unsigned short lCSHIFT = c->CSHIFT; - - ubyte *y1i = in[0]; - ubyte *y2i = in[0]+w; - ubyte *ui = in[1]; - ubyte *vi = in[2]; - - vector unsigned char *oute - = (vector unsigned char *) - (oplanes[0]+srcSliceY*outstrides[0]); - vector unsigned char *outo - = (vector unsigned char *) - (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); - - - instrides_scl[0] = instrides[0]; - instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */ - instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */ - - - for (i=0;i<h/2;i++) { - vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); - vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); - - for (j=0;j<w/16;j++) { - - y0 = vec_ldl (0,y1i); - y1 = vec_ldl (0,y2i); - uivP = (vector unsigned char *)ui; - vivP = (vector unsigned char *)vi; - - align_perm = vec_lvsl (0, ui); - u = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm); - - align_perm = vec_lvsl (0, vi); - v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm); - u = (vector signed char) - vec_sub (u,(vector signed char) - vec_splat((vector signed char){128},0)); - - v = (vector signed char) - vec_sub (v, (vector signed char) - vec_splat((vector signed char){128},0)); - - U = vec_unpackh (u); - V = vec_unpackh (v); - - - Y0 = vec_unh (y0); - Y1 = vec_unl (y0); - Y2 = vec_unh (y1); - Y3 = vec_unl (y1); - - Y0 = vec_mradds (Y0, lCY, lOY); - Y1 = vec_mradds (Y1, lCY, lOY); - Y2 = vec_mradds (Y2, lCY, lOY); - Y3 = vec_mradds (Y3, lCY, lOY); - - /* ux = (CBU*(u<<CSHIFT)+0x4000)>>15 */ - ux = vec_sl (U, lCSHIFT); - ux = vec_mradds (ux, lCBU, (vector signed short){0}); - ux0 = vec_mergeh (ux,ux); - ux1 = vec_mergel (ux,ux); - - /* vx = (CRV*(v<<CSHIFT)+0x4000)>>15; */ - vx = vec_sl (V, lCSHIFT); - vx = vec_mradds (vx, lCRV, (vector signed short){0}); - vx0 = vec_mergeh (vx,vx); - vx1 = vec_mergel (vx,vx); - /* uvx = ((CGU*u) + (CGV*v))>>15 */ - uvx = vec_mradds (U, lCGU, (vector signed short){0}); - uvx = vec_mradds (V, lCGV, uvx); - uvx0 = vec_mergeh (uvx,uvx); - uvx1 = vec_mergel (uvx,uvx); - R0 = vec_add (Y0,vx0); - G0 = vec_add (Y0,uvx0); - B0 = vec_add (Y0,ux0); - R1 = vec_add (Y1,vx1); - G1 = vec_add (Y1,uvx1); - B1 = vec_add (Y1,ux1); - R = vec_packclp (R0,R1); - G = vec_packclp (G0,G1); - B = vec_packclp (B0,B1); - - out_argb(R,G,B,oute); - R0 = vec_add (Y2,vx0); - G0 = vec_add (Y2,uvx0); - B0 = vec_add (Y2,ux0); - R1 = vec_add (Y3,vx1); - G1 = vec_add (Y3,uvx1); - B1 = vec_add (Y3,ux1); - R = vec_packclp (R0,R1); - G = vec_packclp (G0,G1); - B = vec_packclp (B0,B1); - - out_argb(R,G,B,outo); - y1i += 16; - y2i += 16; - ui += 8; - vi += 8; - - } - - outo += (outstrides[0])>>4; - oute += (outstrides[0])>>4; - - ui += instrides_scl[1]; - vi += instrides_scl[2]; - y1i += instrides_scl[0]; - y2i += instrides_scl[0]; - } - return srcSliceH; -} - -#endif - - DEFCSP420_CVT (yuv2_rgba, out_rgba) DEFCSP420_CVT (yuv2_argb, out_argb) DEFCSP420_CVT (yuv2_rgb24, out_rgb24) @@ -692,7 +543,7 @@ static int altivec_uyvy_rgb32 (SwsContext *c, */ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c) { - if (!(c->flags & SWS_CPU_CAPS_ALTIVEC)) + if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) return NULL; /* @@ -777,10 +628,12 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b void -ff_yuv2packedX_altivec(SwsContext *c, - const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, int dstW, int dstY) +ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, + int dstW, int dstY) { int i,j; vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; @@ -791,7 +644,7 @@ ff_yuv2packedX_altivec(SwsContext *c, vector signed short RND = vec_splat_s16(1<<3); vector unsigned short SCL = vec_splat_u16(4); - DECLARE_ALIGNED(16, unsigned long, scratch)[16]; + DECLARE_ALIGNED(16, unsigned int, scratch)[16]; vector signed short *YCoeffs, *CCoeffs; @@ -815,9 +668,9 @@ ff_yuv2packedX_altivec(SwsContext *c, V = RND; /* extract 8 coeffs from U,V */ for (j=0; j<chrFilterSize; j++) { - X = vec_ld (0, &chrSrc[j][i/2]); + X = vec_ld (0, &chrUSrc[j][i/2]); U = vec_mradds (X, CCoeffs[j], U); - X = vec_ld (0, &chrSrc[j][i/2+VOFW]); + X = vec_ld (0, &chrVSrc[j][i/2]); V = vec_mradds (X, CCoeffs[j], V); } @@ -868,7 +721,7 @@ ff_yuv2packedX_altivec(SwsContext *c, static int printed_error_message; if (!printed_error_message) { av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); printed_error_message=1; } return; @@ -893,9 +746,9 @@ ff_yuv2packedX_altivec(SwsContext *c, V = RND; /* extract 8 coeffs from U,V */ for (j=0; j<chrFilterSize; j++) { - X = vec_ld (0, &chrSrc[j][i/2]); + X = vec_ld (0, &chrUSrc[j][i/2]); U = vec_mradds (X, CCoeffs[j], U); - X = vec_ld (0, &chrSrc[j][i/2+VOFW]); + X = vec_ld (0, &chrVSrc[j][i/2]); V = vec_mradds (X, CCoeffs[j], V); } @@ -943,7 +796,7 @@ ff_yuv2packedX_altivec(SwsContext *c, default: /* Unreachable, I think. */ av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); return; } diff --git a/libavcodec/tableprint.c b/libswscale/ppc/yuv2rgb_altivec.h index 52f6ac2a7c..15385b1d3b 100644 --- a/libavcodec/tableprint.c +++ b/libswscale/ppc/yuv2rgb_altivec.h @@ -1,7 +1,8 @@ /* - * Generate a file for hardcoded tables + * AltiVec-enhanced yuv2yuvX * - * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de> + * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> + * based on the equivalent C code in swscale.c * * This file is part of FFmpeg. * @@ -20,22 +21,14 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <stdio.h> -#include <inttypes.h> -#include "tableprint.h" +#ifndef PPC_YUV2RGB_ALTIVEC_H +#define PPC_YUV2RGB_ALTIVEC_H 1 -WRITE_1D_FUNC(int8_t, "%3"PRIi8, 15) -WRITE_1D_FUNC(uint8_t, "0x%02"PRIx8, 15) -WRITE_1D_FUNC(uint16_t, "0x%08"PRIx16, 7) -WRITE_1D_FUNC(uint32_t, "0x%08"PRIx32, 7) -WRITE_1D_FUNC(float, "%.18e", 3) +void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, + int dstW, int dstY); -WRITE_2D_FUNC(int8_t) -WRITE_2D_FUNC(uint8_t) -WRITE_2D_FUNC(uint32_t) -WRITE_2D_FUNC(float) - -void write_fileheader(void) { - printf("/* This file was generated by libavcodec/tableprint */\n"); - printf("#include <stdint.h>\n"); -} +#endif /* PPC_YUV2RGB_ALTIVEC_H */ diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c new file mode 100644 index 0000000000..82c265afd2 --- /dev/null +++ b/libswscale/ppc/yuv2yuv_altivec.c @@ -0,0 +1,191 @@ +/* + * AltiVec-enhanced yuv-to-yuv convertion routines. + * + * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> + * based on the equivalent C code in swscale.c + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <inttypes.h> +#include "config.h" +#include "libswscale/swscale.h" +#include "libswscale/swscale_internal.h" +#include "libavutil/cpu.h" + +static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t* src[], + int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], + int dstStride_a[]) +{ + uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY; + // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); + const uint8_t *ysrc = src[0]; + const uint8_t *usrc = src[1]; + const uint8_t *vsrc = src[2]; + const int width = c->srcW; + const int height = srcSliceH; + const int lumStride = srcStride[0]; + const int chromStride = srcStride[1]; + const int dstStride = dstStride_a[0]; + const vector unsigned char yperm = vec_lvsl(0, ysrc); + const int vertLumPerChroma = 2; + register unsigned int y; + + /* This code assumes: + + 1) dst is 16 bytes-aligned + 2) dstStride is a multiple of 16 + 3) width is a multiple of 16 + 4) lum & chrom stride are multiples of 8 + */ + + for (y=0; y<height; y++) { + int i; + for (i = 0; i < width - 31; i+= 32) { + const unsigned int j = i >> 1; + vector unsigned char v_yA = vec_ld(i, ysrc); + vector unsigned char v_yB = vec_ld(i + 16, ysrc); + vector unsigned char v_yC = vec_ld(i + 32, ysrc); + vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); + vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); + vector unsigned char v_uA = vec_ld(j, usrc); + vector unsigned char v_uB = vec_ld(j + 16, usrc); + vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); + vector unsigned char v_vA = vec_ld(j, vsrc); + vector unsigned char v_vB = vec_ld(j + 16, vsrc); + vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); + vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); + vector unsigned char v_uv_b = vec_mergel(v_u, v_v); + vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); + vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); + vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b); + vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b); + vec_st(v_yuy2_0, (i << 1), dst); + vec_st(v_yuy2_1, (i << 1) + 16, dst); + vec_st(v_yuy2_2, (i << 1) + 32, dst); + vec_st(v_yuy2_3, (i << 1) + 48, dst); + } + if (i < width) { + const unsigned int j = i >> 1; + vector unsigned char v_y1 = vec_ld(i, ysrc); + vector unsigned char v_u = vec_ld(j, usrc); + vector unsigned char v_v = vec_ld(j, vsrc); + vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); + vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); + vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); + vec_st(v_yuy2_0, (i << 1), dst); + vec_st(v_yuy2_1, (i << 1) + 16, dst); + } + if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { + usrc += chromStride; + vsrc += chromStride; + } + ysrc += lumStride; + dst += dstStride; + } + + return srcSliceH; +} + +static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t* src[], + int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], + int dstStride_a[]) +{ + uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY; + // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); + const uint8_t *ysrc = src[0]; + const uint8_t *usrc = src[1]; + const uint8_t *vsrc = src[2]; + const int width = c->srcW; + const int height = srcSliceH; + const int lumStride = srcStride[0]; + const int chromStride = srcStride[1]; + const int dstStride = dstStride_a[0]; + const int vertLumPerChroma = 2; + const vector unsigned char yperm = vec_lvsl(0, ysrc); + register unsigned int y; + + /* This code assumes: + + 1) dst is 16 bytes-aligned + 2) dstStride is a multiple of 16 + 3) width is a multiple of 16 + 4) lum & chrom stride are multiples of 8 + */ + + for (y=0; y<height; y++) { + int i; + for (i = 0; i < width - 31; i+= 32) { + const unsigned int j = i >> 1; + vector unsigned char v_yA = vec_ld(i, ysrc); + vector unsigned char v_yB = vec_ld(i + 16, ysrc); + vector unsigned char v_yC = vec_ld(i + 32, ysrc); + vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); + vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); + vector unsigned char v_uA = vec_ld(j, usrc); + vector unsigned char v_uB = vec_ld(j + 16, usrc); + vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); + vector unsigned char v_vA = vec_ld(j, vsrc); + vector unsigned char v_vB = vec_ld(j + 16, vsrc); + vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); + vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); + vector unsigned char v_uv_b = vec_mergel(v_u, v_v); + vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); + vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); + vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2); + vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2); + vec_st(v_uyvy_0, (i << 1), dst); + vec_st(v_uyvy_1, (i << 1) + 16, dst); + vec_st(v_uyvy_2, (i << 1) + 32, dst); + vec_st(v_uyvy_3, (i << 1) + 48, dst); + } + if (i < width) { + const unsigned int j = i >> 1; + vector unsigned char v_y1 = vec_ld(i, ysrc); + vector unsigned char v_u = vec_ld(j, usrc); + vector unsigned char v_v = vec_ld(j, vsrc); + vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); + vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); + vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); + vec_st(v_uyvy_0, (i << 1), dst); + vec_st(v_uyvy_1, (i << 1) + 16, dst); + } + if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { + usrc += chromStride; + vsrc += chromStride; + } + ysrc += lumStride; + dst += dstStride; + } + return srcSliceH; +} + +void ff_swscale_get_unscaled_altivec(SwsContext *c) +{ + if ((av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) && !(c->srcW & 15) && + !(c->flags & SWS_BITEXACT) && c->srcFormat == PIX_FMT_YUV420P) { + enum PixelFormat dstFormat = c->dstFormat; + + // unscaled YV12 -> packed YUV, we want speed + if (dstFormat == PIX_FMT_YUYV422) + c->swScale= yv12toyuy2_unscaled_altivec; + else if (dstFormat == PIX_FMT_UYVY422) + c->swScale= yv12touyvy_unscaled_altivec; + } +} diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c index adc5d59c8c..84ef43b774 100644 --- a/libswscale/rgb2rgb.c +++ b/libswscale/rgb2rgb.c @@ -24,115 +24,75 @@ */ #include <inttypes.h> #include "config.h" -#include "libavutil/x86_cpu.h" #include "libavutil/bswap.h" #include "rgb2rgb.h" #include "swscale.h" #include "swscale_internal.h" -void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size); -void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size); +void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size); +void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); +void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride); + int width, int height, + int lumStride, int chromStride, int dstStride); void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride); + int width, int height, + int lumStride, int chromStride, int dstStride); void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride); + int width, int height, + int lumStride, int chromStride, int dstStride); void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride); + int width, int height, + int lumStride, int chromStride, int dstStride); void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride); + int width, int height, + int lumStride, int chromStride, int srcStride); void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride); -void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height, - long srcStride, long dstStride); + int width, int height, + int lumStride, int chromStride, int srcStride); +void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, + int srcStride, int dstStride); void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, - long width, long height, long src1Stride, - long src2Stride, long dstStride); + int width, int height, int src1Stride, + int src2Stride, int dstStride); void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, - long width, long height, - long srcStride1, long srcStride2, - long dstStride1, long dstStride2); + int width, int height, + int srcStride1, int srcStride2, + int dstStride1, int dstStride2); void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, uint8_t *dst, - long width, long height, - long srcStride1, long srcStride2, - long srcStride3, long dstStride); + int width, int height, + int srcStride1, int srcStride2, + int srcStride3, int dstStride); void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride); + int width, int height, + int lumStride, int chromStride, int srcStride); void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride); + int width, int height, + int lumStride, int chromStride, int srcStride); void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride); + int width, int height, + int lumStride, int chromStride, int srcStride); void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride); - - -#if ARCH_X86 -DECLARE_ASM_CONST(8, uint64_t, mmx_ff) = 0x00000000000000FFULL; -DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL; -DECLARE_ASM_CONST(8, uint64_t, mmx_one) = 0xFFFFFFFFFFFFFFFFULL; -DECLARE_ASM_CONST(8, uint64_t, mask32b) = 0x000000FF000000FFULL; -DECLARE_ASM_CONST(8, uint64_t, mask32g) = 0x0000FF000000FF00ULL; -DECLARE_ASM_CONST(8, uint64_t, mask32r) = 0x00FF000000FF0000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask32a) = 0xFF000000FF000000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask32) = 0x00FFFFFF00FFFFFFULL; -DECLARE_ASM_CONST(8, uint64_t, mask3216br) = 0x00F800F800F800F8ULL; -DECLARE_ASM_CONST(8, uint64_t, mask3216g) = 0x0000FC000000FC00ULL; -DECLARE_ASM_CONST(8, uint64_t, mask3215g) = 0x0000F8000000F800ULL; -DECLARE_ASM_CONST(8, uint64_t, mul3216) = 0x2000000420000004ULL; -DECLARE_ASM_CONST(8, uint64_t, mul3215) = 0x2000000820000008ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24b) = 0x00FF0000FF0000FFULL; -DECLARE_ASM_CONST(8, uint64_t, mask24g) = 0xFF0000FF0000FF00ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24r) = 0x0000FF0000FF0000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24l) = 0x0000000000FFFFFFULL; -DECLARE_ASM_CONST(8, uint64_t, mask24h) = 0x0000FFFFFF000000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24hh) = 0xffff000000000000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24hhh) = 0xffffffff00000000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask24hhhh) = 0xffffffffffff0000ULL; -DECLARE_ASM_CONST(8, uint64_t, mask15b) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */ -DECLARE_ASM_CONST(8, uint64_t, mask15rg) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */ -DECLARE_ASM_CONST(8, uint64_t, mask15s) = 0xFFE0FFE0FFE0FFE0ULL; -DECLARE_ASM_CONST(8, uint64_t, mask15g) = 0x03E003E003E003E0ULL; -DECLARE_ASM_CONST(8, uint64_t, mask15r) = 0x7C007C007C007C00ULL; -#define mask16b mask15b -DECLARE_ASM_CONST(8, uint64_t, mask16g) = 0x07E007E007E007E0ULL; -DECLARE_ASM_CONST(8, uint64_t, mask16r) = 0xF800F800F800F800ULL; -DECLARE_ASM_CONST(8, uint64_t, red_16mask) = 0x0000f8000000f800ULL; -DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL; -DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL; -DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; -DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; -DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; -#endif /* ARCH_X86 */ + int width, int height, + int lumStride, int chromStride, int srcStride); #define RGB2YUV_SHIFT 8 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) @@ -145,50 +105,9 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5)) -//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one. //plain C versions -#define COMPILE_TEMPLATE_MMX 0 -#define COMPILE_TEMPLATE_MMX2 0 -#define COMPILE_TEMPLATE_AMD3DNOW 0 -#define COMPILE_TEMPLATE_SSE2 0 -#define RENAME(a) a ## _C -#include "rgb2rgb_template.c" - -#if ARCH_X86 - -//MMX versions -#undef RENAME -#undef COMPILE_TEMPLATE_MMX -#define COMPILE_TEMPLATE_MMX 1 -#define RENAME(a) a ## _MMX -#include "rgb2rgb_template.c" - -//MMX2 versions -#undef RENAME -#undef COMPILE_TEMPLATE_MMX2 -#define COMPILE_TEMPLATE_MMX2 1 -#define RENAME(a) a ## _MMX2 -#include "rgb2rgb_template.c" - -//SSE2 versions -#undef RENAME -#undef COMPILE_TEMPLATE_SSE2 -#define COMPILE_TEMPLATE_SSE2 1 -#define RENAME(a) a ## _SSE2 #include "rgb2rgb_template.c" -//3DNOW versions -#undef RENAME -#undef COMPILE_TEMPLATE_MMX2 -#undef COMPILE_TEMPLATE_SSE2 -#undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMX2 0 -#define COMPILE_TEMPLATE_SSE2 1 -#define COMPILE_TEMPLATE_AMD3DNOW 1 -#define RENAME(a) a ## _3DNOW -#include "rgb2rgb_template.c" - -#endif //ARCH_X86 || ARCH_X86_64 /* RGB15->RGB16 original by Strepto/Astral @@ -197,20 +116,11 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; 32-bit C version, and and&add trick by Michael Niedermayer */ -void sws_rgb2rgb_init(int flags) +void sws_rgb2rgb_init(void) { -#if HAVE_SSE2 || HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX - if (flags & SWS_CPU_CAPS_SSE2) - rgb2rgb_init_SSE2(); - else if (flags & SWS_CPU_CAPS_MMX2) - rgb2rgb_init_MMX2(); - else if (flags & SWS_CPU_CAPS_3DNOW) - rgb2rgb_init_3DNOW(); - else if (flags & SWS_CPU_CAPS_MMX) - rgb2rgb_init_MMX(); - else -#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */ - rgb2rgb_init_C(); + rgb2rgb_init_c(); + if (HAVE_MMX) + rgb2rgb_init_x86(); } #if LIBSWSCALE_VERSION_MAJOR < 1 @@ -241,10 +151,10 @@ void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const ui } #endif -void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size) +void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size) { - long i; - long num_pixels = src_size >> 2; + int i; + int num_pixels = src_size >> 2; for (i=0; i<num_pixels; i++) { #if HAVE_BIGENDIAN /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */ @@ -259,9 +169,9 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size) } } -void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size) +void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size) { - long i; + int i; for (i=0; 3*i<src_size; i++) { #if HAVE_BIGENDIAN /* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */ @@ -278,7 +188,7 @@ void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size) } } -void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size) +void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; uint8_t *d = dst; @@ -301,7 +211,7 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size) } } -void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size) +void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; uint8_t *d = dst; @@ -316,10 +226,10 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size) } } -void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size) +void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size) { - long i; - long num_pixels = src_size >> 1; + int i; + int num_pixels = src_size >> 1; for (i=0; i<num_pixels; i++) { unsigned rgb = ((const uint16_t*)src)[i]; @@ -327,10 +237,10 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size) } } -void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size) +void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size) { - long i; - long num_pixels = src_size >> 1; + int i; + int num_pixels = src_size >> 1; for (i=0; i<num_pixels; i++) { unsigned rgb = ((const uint16_t*)src)[i]; @@ -338,7 +248,7 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size) } } -void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size) +void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; uint8_t *d = dst; @@ -361,7 +271,7 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size) } } -void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size) +void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; uint8_t *d = dst; @@ -376,10 +286,10 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size) } } -void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size) +void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size) { - long i; - long num_pixels = src_size >> 1; + int i; + int num_pixels = src_size >> 1; for (i=0; i<num_pixels; i++) { unsigned rgb = ((const uint16_t*)src)[i]; @@ -387,10 +297,10 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size) } } -void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size) +void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size) { - long i; - long num_pixels = src_size >> 1; + int i; + int num_pixels = src_size >> 1; for (i=0; i<num_pixels; i++) { unsigned br; @@ -400,10 +310,10 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size) } } -void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size) +void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size) { - long i; - long num_pixels = src_size; + int i; + int num_pixels = src_size; for (i=0; i<num_pixels; i++) { unsigned b,g,r; register uint8_t rgb; @@ -416,9 +326,9 @@ void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size) } #define DEFINE_SHUFFLE_BYTES(a, b, c, d) \ -void shuffle_bytes_##a##b##c##d(const uint8_t *src, uint8_t *dst, long src_size) \ +void shuffle_bytes_##a##b##c##d(const uint8_t *src, uint8_t *dst, int src_size) \ { \ - long i; \ + int i; \ \ for (i = 0; i < src_size; i+=4) { \ dst[i + 0] = src[i + a]; \ diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h index 31e21af127..6923dd9608 100644 --- a/libswscale/rgb2rgb.h +++ b/libswscale/rgb2rgb.h @@ -32,41 +32,41 @@ #include "libavutil/avutil.h" /* A full collection of RGB to RGB(BGR) converters */ -extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb32to16) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb32to15) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb15to16) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb15to32) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb16to15) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb16to32) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size); - -void rgb24to32 (const uint8_t *src, uint8_t *dst, long src_size); -void rgb32to24 (const uint8_t *src, uint8_t *dst, long src_size); -void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size); -void rgb16to24 (const uint8_t *src, uint8_t *dst, long src_size); -void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size); -void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size); -void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size); -void rgb15to24 (const uint8_t *src, uint8_t *dst, long src_size); -void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size); -void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size); -void bgr8torgb8 (const uint8_t *src, uint8_t *dst, long src_size); - -void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, long src_size); -void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, long src_size); -void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, long src_size); -void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, long src_size); +extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb32to16) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb32to15) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb15to16) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb15to32) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb16to15) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb16to32) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); + +void rgb24to32 (const uint8_t *src, uint8_t *dst, int src_size); +void rgb32to24 (const uint8_t *src, uint8_t *dst, int src_size); +void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size); +void rgb16to24 (const uint8_t *src, uint8_t *dst, int src_size); +void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size); +void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size); +void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size); +void rgb15to24 (const uint8_t *src, uint8_t *dst, int src_size); +void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size); +void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size); +void bgr8torgb8 (const uint8_t *src, uint8_t *dst, int src_size); + +void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size); +void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size); +void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, int src_size); +void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, int src_size); #if LIBSWSCALE_VERSION_MAJOR < 1 /* deprecated, use the public versions in swscale.h */ @@ -78,51 +78,48 @@ attribute_deprecated void palette8torgb16(const uint8_t *src, uint8_t *dst, long attribute_deprecated void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); #endif -/** - * Height should be a multiple of 2 and width should be a multiple of 16. - * (If this is a problem for anyone then tell me, and I will fix it.) - * Chrominance data is only taken from every second line, others are ignored. - * FIXME: Write high quality version. - */ -//void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + +void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride); /** * Height should be a multiple of 2 and width should be a multiple of 16. * (If this is a problem for anyone then tell me, and I will fix it.) */ extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride); + int width, int height, + int lumStride, int chromStride, int dstStride); /** * Width should be a multiple of 16. */ extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride); + int width, int height, + int lumStride, int chromStride, int dstStride); /** * Height should be a multiple of 2 and width should be a multiple of 16. * (If this is a problem for anyone then tell me, and I will fix it.) */ extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride); + int width, int height, + int lumStride, int chromStride, int srcStride); /** * Height should be a multiple of 2 and width should be a multiple of 16. * (If this is a problem for anyone then tell me, and I will fix it.) */ extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride); + int width, int height, + int lumStride, int chromStride, int dstStride); /** * Width should be a multiple of 16. */ extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride); + int width, int height, + int lumStride, int chromStride, int dstStride); /** * Height should be a multiple of 2 and width should be a multiple of 2. @@ -131,41 +128,43 @@ extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uin * FIXME: Write high quality version. */ extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride); -extern void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height, - long srcStride, long dstStride); + int width, int height, + int lumStride, int chromStride, int srcStride); +extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, + int srcStride, int dstStride); extern void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, - long width, long height, long src1Stride, - long src2Stride, long dstStride); + int width, int height, int src1Stride, + int src2Stride, int dstStride); extern void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, - long width, long height, - long srcStride1, long srcStride2, - long dstStride1, long dstStride2); + int width, int height, + int srcStride1, int srcStride2, + int dstStride1, int dstStride2); extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, uint8_t *dst, - long width, long height, - long srcStride1, long srcStride2, - long srcStride3, long dstStride); + int width, int height, + int srcStride1, int srcStride2, + int srcStride3, int dstStride); extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride); + int width, int height, + int lumStride, int chromStride, int srcStride); extern void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride); + int width, int height, + int lumStride, int chromStride, int srcStride); extern void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride); + int width, int height, + int lumStride, int chromStride, int srcStride); extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride); + int width, int height, + int lumStride, int chromStride, int srcStride); + +void sws_rgb2rgb_init(void); -void sws_rgb2rgb_init(int flags); +void rgb2rgb_init_x86(void); #endif /* SWSCALE_RGB2RGB_H */ diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index 9af0eaa366..0734e8891b 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -26,85 +26,13 @@ #include <stddef.h> -#undef PREFETCH -#undef MOVNTQ -#undef EMMS -#undef SFENCE -#undef MMREG_SIZE -#undef PAVGB - -#if COMPILE_TEMPLATE_SSE2 -#define MMREG_SIZE 16 -#else -#define MMREG_SIZE 8 -#endif - -#if COMPILE_TEMPLATE_AMD3DNOW -#define PREFETCH "prefetch" -#define PAVGB "pavgusb" -#elif COMPILE_TEMPLATE_MMX2 -#define PREFETCH "prefetchnta" -#define PAVGB "pavgb" -#else -#define PREFETCH " # nop" -#endif - -#if COMPILE_TEMPLATE_AMD3DNOW -/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ -#define EMMS "femms" -#else -#define EMMS "emms" -#endif - -#if COMPILE_TEMPLATE_MMX2 -#define MOVNTQ "movntq" -#define SFENCE "sfence" -#else -#define MOVNTQ "movq" -#define SFENCE " # nop" -#endif - -static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, int src_size) { uint8_t *dest = dst; const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX - const uint8_t *mm_end; -#endif end = s + src_size; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 23; - __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "punpckldq 3%1, %%mm0 \n\t" - "movd 6%1, %%mm1 \n\t" - "punpckldq 9%1, %%mm1 \n\t" - "movd 12%1, %%mm2 \n\t" - "punpckldq 15%1, %%mm2 \n\t" - "movd 18%1, %%mm3 \n\t" - "punpckldq 21%1, %%mm3 \n\t" - "por %%mm7, %%mm0 \n\t" - "por %%mm7, %%mm1 \n\t" - "por %%mm7, %%mm2 \n\t" - "por %%mm7, %%mm3 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0" - :"=m"(*dest) - :"m"(*s) - :"memory"); - dest += 32; - s += 24; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif + while (s < end) { #if HAVE_BIGENDIAN /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ @@ -122,76 +50,14 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s } } -#define STORE_BGR24_MMX \ - "psrlq $8, %%mm2 \n\t" \ - "psrlq $8, %%mm3 \n\t" \ - "psrlq $8, %%mm6 \n\t" \ - "psrlq $8, %%mm7 \n\t" \ - "pand "MANGLE(mask24l)", %%mm0\n\t" \ - "pand "MANGLE(mask24l)", %%mm1\n\t" \ - "pand "MANGLE(mask24l)", %%mm4\n\t" \ - "pand "MANGLE(mask24l)", %%mm5\n\t" \ - "pand "MANGLE(mask24h)", %%mm2\n\t" \ - "pand "MANGLE(mask24h)", %%mm3\n\t" \ - "pand "MANGLE(mask24h)", %%mm6\n\t" \ - "pand "MANGLE(mask24h)", %%mm7\n\t" \ - "por %%mm2, %%mm0 \n\t" \ - "por %%mm3, %%mm1 \n\t" \ - "por %%mm6, %%mm4 \n\t" \ - "por %%mm7, %%mm5 \n\t" \ - \ - "movq %%mm1, %%mm2 \n\t" \ - "movq %%mm4, %%mm3 \n\t" \ - "psllq $48, %%mm2 \n\t" \ - "psllq $32, %%mm3 \n\t" \ - "pand "MANGLE(mask24hh)", %%mm2\n\t" \ - "pand "MANGLE(mask24hhh)", %%mm3\n\t" \ - "por %%mm2, %%mm0 \n\t" \ - "psrlq $16, %%mm1 \n\t" \ - "psrlq $32, %%mm4 \n\t" \ - "psllq $16, %%mm5 \n\t" \ - "por %%mm3, %%mm1 \n\t" \ - "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \ - "por %%mm5, %%mm4 \n\t" \ - \ - MOVNTQ" %%mm0, %0 \n\t" \ - MOVNTQ" %%mm1, 8%0 \n\t" \ - MOVNTQ" %%mm4, 16%0" - - -static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size) { uint8_t *dest = dst; const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX - const uint8_t *mm_end; -#endif + end = s + src_size; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 31; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm5 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - "movq %%mm4, %%mm6 \n\t" - "movq %%mm5, %%mm7 \n\t" - STORE_BGR24_MMX - :"=m"(*dest) - :"m"(*s) - :"memory"); - dest += 24; - s += 32; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif + while (s < end) { #if HAVE_BIGENDIAN /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ @@ -215,39 +81,13 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s MMX2, 3DNOW optimization by Nick Kurshev 32-bit C version, and and&add trick by Michael Niedermayer */ -static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size) { register const uint8_t* s=src; register uint8_t* d=dst; register const uint8_t *end; const uint8_t *mm_end; end = s + src_size; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*s)); - __asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); - mm_end = end - 15; - while (s<mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "pand %%mm4, %%mm0 \n\t" - "pand %%mm4, %%mm2 \n\t" - "paddw %%mm1, %%mm0 \n\t" - "paddw %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) - ); - d+=16; - s+=16; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif mm_end = end - 3; while (s < mm_end) { register unsigned x= *((const uint32_t *)s); @@ -261,44 +101,14 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_ } } -static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size) { register const uint8_t* s=src; register uint8_t* d=dst; register const uint8_t *end; const uint8_t *mm_end; end = s + src_size; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*s)); - __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); - __asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); - mm_end = end - 15; - while (s<mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlq $1, %%mm0 \n\t" - "psrlq $1, %%mm2 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm3 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) - ); - d+=16; - s+=16; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif + mm_end = end - 3; while (s < mm_end) { register uint32_t x= *((const uint32_t*)s); @@ -312,369 +122,61 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_ } } -static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX - const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX - mm_end = end - 15; -#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) - __asm__ volatile( - "movq %3, %%mm5 \n\t" - "movq %4, %%mm6 \n\t" - "movq %5, %%mm7 \n\t" - "jmp 2f \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 4(%1), %%mm3 \n\t" - "punpckldq 8(%1), %%mm0 \n\t" - "punpckldq 12(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm3, %%mm4 \n\t" - "pand %%mm6, %%mm0 \n\t" - "pand %%mm6, %%mm3 \n\t" - "pmaddwd %%mm7, %%mm0 \n\t" - "pmaddwd %%mm7, %%mm3 \n\t" - "pand %%mm5, %%mm1 \n\t" - "pand %%mm5, %%mm4 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "psrld $5, %%mm0 \n\t" - "pslld $11, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - "add $16, %1 \n\t" - "add $8, %0 \n\t" - "2: \n\t" - "cmp %2, %1 \n\t" - " jb 1b \n\t" - : "+r" (d), "+r"(s) - : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) - ); -#else - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_16mask),"m"(green_16mask)); - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psrlq $3, %%mm0 \n\t" - "psrlq $3, %%mm3 \n\t" - "pand %2, %%mm0 \n\t" - "pand %2, %%mm3 \n\t" - "psrlq $5, %%mm1 \n\t" - "psrlq $5, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $8, %%mm2 \n\t" - "psrlq $8, %%mm5 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); - d += 4; - s += 16; - } -#endif - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif + while (s < end) { register int rgb = *(const uint32_t*)s; s += 4; *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); } } -static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX - const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_16mask),"m"(green_16mask)); - mm_end = end - 15; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psllq $8, %%mm0 \n\t" - "psllq $8, %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm3 \n\t" - "psrlq $5, %%mm1 \n\t" - "psrlq $5, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $19, %%mm2 \n\t" - "psrlq $19, %%mm5 \n\t" - "pand %2, %%mm2 \n\t" - "pand %2, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); - d += 4; - s += 16; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register int rgb = *(const uint32_t*)s; s += 4; *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); } } -static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX - const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX - mm_end = end - 15; -#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) - __asm__ volatile( - "movq %3, %%mm5 \n\t" - "movq %4, %%mm6 \n\t" - "movq %5, %%mm7 \n\t" - "jmp 2f \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 4(%1), %%mm3 \n\t" - "punpckldq 8(%1), %%mm0 \n\t" - "punpckldq 12(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm3, %%mm4 \n\t" - "pand %%mm6, %%mm0 \n\t" - "pand %%mm6, %%mm3 \n\t" - "pmaddwd %%mm7, %%mm0 \n\t" - "pmaddwd %%mm7, %%mm3 \n\t" - "pand %%mm5, %%mm1 \n\t" - "pand %%mm5, %%mm4 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "psrld $6, %%mm0 \n\t" - "pslld $10, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - "add $16, %1 \n\t" - "add $8, %0 \n\t" - "2: \n\t" - "cmp %2, %1 \n\t" - " jb 1b \n\t" - : "+r" (d), "+r"(s) - : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) - ); -#else - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_15mask),"m"(green_15mask)); - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psrlq $3, %%mm0 \n\t" - "psrlq $3, %%mm3 \n\t" - "pand %2, %%mm0 \n\t" - "pand %2, %%mm3 \n\t" - "psrlq $6, %%mm1 \n\t" - "psrlq $6, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $9, %%mm2 \n\t" - "psrlq $9, %%mm5 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); - d += 4; - s += 16; - } -#endif - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register int rgb = *(const uint32_t*)s; s += 4; *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); } } -static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX - const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_15mask),"m"(green_15mask)); - mm_end = end - 15; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psllq $7, %%mm0 \n\t" - "psllq $7, %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm3 \n\t" - "psrlq $6, %%mm1 \n\t" - "psrlq $6, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $19, %%mm2 \n\t" - "psrlq $19, %%mm5 \n\t" - "pand %2, %%mm2 \n\t" - "pand %2, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); - d += 4; - s += 16; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register int rgb = *(const uint32_t*)s; s += 4; *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); } } -static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX - const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_16mask),"m"(green_16mask)); - mm_end = end - 11; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psrlq $3, %%mm0 \n\t" - "psrlq $3, %%mm3 \n\t" - "pand %2, %%mm0 \n\t" - "pand %2, %%mm3 \n\t" - "psrlq $5, %%mm1 \n\t" - "psrlq $5, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $8, %%mm2 \n\t" - "psrlq $8, %%mm5 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); - d += 4; - s += 12; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { const int b = *s++; const int g = *s++; @@ -683,59 +185,12 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s } } -static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX - const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_16mask),"m"(green_16mask)); - mm_end = end - 15; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psllq $8, %%mm0 \n\t" - "psllq $8, %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm3 \n\t" - "psrlq $5, %%mm1 \n\t" - "psrlq $5, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $19, %%mm2 \n\t" - "psrlq $19, %%mm5 \n\t" - "pand %2, %%mm2 \n\t" - "pand %2, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); - d += 4; - s += 12; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { const int r = *s++; const int g = *s++; @@ -744,59 +199,12 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_ } } -static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX - const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_15mask),"m"(green_15mask)); - mm_end = end - 11; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psrlq $3, %%mm0 \n\t" - "psrlq $3, %%mm3 \n\t" - "pand %2, %%mm0 \n\t" - "pand %2, %%mm3 \n\t" - "psrlq $6, %%mm1 \n\t" - "psrlq $6, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $9, %%mm2 \n\t" - "psrlq $9, %%mm5 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); - d += 4; - s += 12; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { const int b = *s++; const int g = *s++; @@ -805,59 +213,12 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s } } -static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint8_t *s = src; const uint8_t *end; -#if COMPILE_TEMPLATE_MMX - const uint8_t *mm_end; -#endif uint16_t *d = (uint16_t *)dst; end = s + src_size; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_15mask),"m"(green_15mask)); - mm_end = end - 15; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psllq $7, %%mm0 \n\t" - "psllq $7, %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm3 \n\t" - "psrlq $6, %%mm1 \n\t" - "psrlq $6, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $19, %%mm2 \n\t" - "psrlq $19, %%mm5 \n\t" - "pand %2, %%mm2 \n\t" - "pand %2, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); - d += 4; - s += 12; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { const int r = *s++; const int g = *s++; @@ -887,104 +248,12 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_ | original bits */ -static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; -#if COMPILE_TEMPLATE_MMX - const uint16_t *mm_end; -#endif uint8_t *d = dst; const uint16_t *s = (const uint16_t*)src; end = s + src_size/2; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 7; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" - "movq %%mm0, %%mm3 \n\t" - "movq %%mm1, %%mm4 \n\t" - "movq %%mm2, %%mm5 \n\t" - "punpcklwd %5, %%mm0 \n\t" - "punpcklwd %5, %%mm1 \n\t" - "punpcklwd %5, %%mm2 \n\t" - "punpckhwd %5, %%mm3 \n\t" - "punpckhwd %5, %%mm4 \n\t" - "punpckhwd %5, %%mm5 \n\t" - "psllq $8, %%mm1 \n\t" - "psllq $16, %%mm2 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm2, %%mm0 \n\t" - "psllq $8, %%mm4 \n\t" - "psllq $16, %%mm5 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm5, %%mm3 \n\t" - - "movq %%mm0, %%mm6 \n\t" - "movq %%mm3, %%mm7 \n\t" - - "movq 8%1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 8%1, %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" - "movq %%mm0, %%mm3 \n\t" - "movq %%mm1, %%mm4 \n\t" - "movq %%mm2, %%mm5 \n\t" - "punpcklwd %5, %%mm0 \n\t" - "punpcklwd %5, %%mm1 \n\t" - "punpcklwd %5, %%mm2 \n\t" - "punpckhwd %5, %%mm3 \n\t" - "punpckhwd %5, %%mm4 \n\t" - "punpckhwd %5, %%mm5 \n\t" - "psllq $8, %%mm1 \n\t" - "psllq $16, %%mm2 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm2, %%mm0 \n\t" - "psllq $8, %%mm4 \n\t" - "psllq $16, %%mm5 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm5, %%mm3 \n\t" - - :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) - :"memory"); - /* borrowed 32 to 24 */ - __asm__ volatile( - "movq %%mm0, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "movq %%mm6, %%mm0 \n\t" - "movq %%mm7, %%mm1 \n\t" - - "movq %%mm4, %%mm6 \n\t" - "movq %%mm5, %%mm7 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - - STORE_BGR24_MMX - - :"=m"(*d) - :"m"(*s) - :"memory"); - d += 24; - s += 8; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register uint16_t bgr; bgr = *s++; @@ -994,103 +263,12 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s } } -static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; -#if COMPILE_TEMPLATE_MMX - const uint16_t *mm_end; -#endif uint8_t *d = (uint8_t *)dst; const uint16_t *s = (const uint16_t *)src; end = s + src_size/2; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 7; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" - "movq %%mm0, %%mm3 \n\t" - "movq %%mm1, %%mm4 \n\t" - "movq %%mm2, %%mm5 \n\t" - "punpcklwd %5, %%mm0 \n\t" - "punpcklwd %5, %%mm1 \n\t" - "punpcklwd %5, %%mm2 \n\t" - "punpckhwd %5, %%mm3 \n\t" - "punpckhwd %5, %%mm4 \n\t" - "punpckhwd %5, %%mm5 \n\t" - "psllq $8, %%mm1 \n\t" - "psllq $16, %%mm2 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm2, %%mm0 \n\t" - "psllq $8, %%mm4 \n\t" - "psllq $16, %%mm5 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm5, %%mm3 \n\t" - - "movq %%mm0, %%mm6 \n\t" - "movq %%mm3, %%mm7 \n\t" - - "movq 8%1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 8%1, %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" - "movq %%mm0, %%mm3 \n\t" - "movq %%mm1, %%mm4 \n\t" - "movq %%mm2, %%mm5 \n\t" - "punpcklwd %5, %%mm0 \n\t" - "punpcklwd %5, %%mm1 \n\t" - "punpcklwd %5, %%mm2 \n\t" - "punpckhwd %5, %%mm3 \n\t" - "punpckhwd %5, %%mm4 \n\t" - "punpckhwd %5, %%mm5 \n\t" - "psllq $8, %%mm1 \n\t" - "psllq $16, %%mm2 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm2, %%mm0 \n\t" - "psllq $8, %%mm4 \n\t" - "psllq $16, %%mm5 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm5, %%mm3 \n\t" - :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) - :"memory"); - /* borrowed 32 to 24 */ - __asm__ volatile( - "movq %%mm0, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "movq %%mm6, %%mm0 \n\t" - "movq %%mm7, %%mm1 \n\t" - - "movq %%mm4, %%mm6 \n\t" - "movq %%mm5, %%mm7 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - - STORE_BGR24_MMX - - :"=m"(*d) - :"m"(*s) - :"memory"); - d += 24; - s += 8; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register uint16_t bgr; bgr = *s++; @@ -1100,61 +278,12 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s } } -/* - * mm0 = 00 B3 00 B2 00 B1 00 B0 - * mm1 = 00 G3 00 G2 00 G1 00 G0 - * mm2 = 00 R3 00 R2 00 R1 00 R0 - * mm6 = FF FF FF FF FF FF FF FF - * mm7 = 00 00 00 00 00 00 00 00 - */ -#define PACK_RGB32 \ - "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \ - "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \ - "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \ - "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \ - "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \ - "movq %%mm0, %%mm3 \n\t" \ - "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ - "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ - MOVNTQ" %%mm0, %0 \n\t" \ - MOVNTQ" %%mm3, 8%0 \n\t" \ - -static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; -#if COMPILE_TEMPLATE_MMX - const uint16_t *mm_end; -#endif uint8_t *d = dst; const uint16_t *s = (const uint16_t *)src; end = s + src_size/2; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); - __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); - mm_end = end - 3; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" - PACK_RGB32 - :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) - :"memory"); - d += 16; - s += 4; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register uint16_t bgr; bgr = *s++; @@ -1172,42 +301,12 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_ } } -static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; -#if COMPILE_TEMPLATE_MMX - const uint16_t *mm_end; -#endif uint8_t *d = dst; const uint16_t *s = (const uint16_t*)src; end = s + src_size/2; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); - __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); - __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); - mm_end = end - 3; - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" - "pand %2, %%mm0 \n\t" - "pand %3, %%mm1 \n\t" - "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" - PACK_RGB32 - :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) - :"memory"); - d += 16; - s += 4; - } - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); -#endif while (s < end) { register uint16_t bgr; bgr = *s++; @@ -1225,63 +324,11 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_ } } -static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, int src_size) { - x86_reg idx = 15 - src_size; + int idx = 15 - src_size; const uint8_t *s = src-idx; uint8_t *d = dst-idx; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile( - "test %0, %0 \n\t" - "jns 2f \n\t" - PREFETCH" (%1, %0) \n\t" - "movq %3, %%mm7 \n\t" - "pxor %4, %%mm7 \n\t" - "movq %%mm7, %%mm6 \n\t" - "pxor %5, %%mm7 \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1, %0) \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq 8(%1, %0), %%mm1 \n\t" -# if COMPILE_TEMPLATE_MMX2 - "pshufw $177, %%mm0, %%mm3 \n\t" - "pshufw $177, %%mm1, %%mm5 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm6, %%mm3 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm6, %%mm5 \n\t" - "por %%mm3, %%mm0 \n\t" - "por %%mm5, %%mm1 \n\t" -# else - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm4 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm6, %%mm2 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "movq %%mm2, %%mm3 \n\t" - "movq %%mm4, %%mm5 \n\t" - "pslld $16, %%mm2 \n\t" - "psrld $16, %%mm3 \n\t" - "pslld $16, %%mm4 \n\t" - "psrld $16, %%mm5 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm4, %%mm1 \n\t" - "por %%mm3, %%mm0 \n\t" - "por %%mm5, %%mm1 \n\t" -# endif - MOVNTQ" %%mm0, (%2, %0) \n\t" - MOVNTQ" %%mm1, 8(%2, %0) \n\t" - "add $16, %0 \n\t" - "js 1b \n\t" - SFENCE" \n\t" - EMMS" \n\t" - "2: \n\t" - : "+&r"(idx) - : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) - : "memory"); -#endif for (; idx<15; idx+=4) { register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; v &= 0xff00ff; @@ -1289,66 +336,9 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, } } -static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size) { unsigned i; -#if COMPILE_TEMPLATE_MMX - x86_reg mmx_size= 23 - src_size; - __asm__ volatile ( - "test %%"REG_a", %%"REG_a" \n\t" - "jns 2f \n\t" - "movq "MANGLE(mask24r)", %%mm5 \n\t" - "movq "MANGLE(mask24g)", %%mm6 \n\t" - "movq "MANGLE(mask24b)", %%mm7 \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1, %%"REG_a") \n\t" - "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG - "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG - "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B - "psllq $16, %%mm0 \n\t" // 00 BGR BGR - "pand %%mm5, %%mm0 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm7, %%mm2 \n\t" - "por %%mm0, %%mm1 \n\t" - "por %%mm2, %%mm1 \n\t" - "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG - MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG - "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B - "movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR - "pand %%mm7, %%mm0 \n\t" - "pand %%mm5, %%mm1 \n\t" - "pand %%mm6, %%mm2 \n\t" - "por %%mm0, %%mm1 \n\t" - "por %%mm2, %%mm1 \n\t" - "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B - MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R - "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR - "movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG - "pand %%mm6, %%mm0 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm5, %%mm2 \n\t" - "por %%mm0, %%mm1 \n\t" - "por %%mm2, %%mm1 \n\t" - MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t" - "add $24, %%"REG_a" \n\t" - " js 1b \n\t" - "2: \n\t" - : "+a" (mmx_size) - : "r" (src-mmx_size), "r"(dst-mmx_size) - ); - - __asm__ volatile(SFENCE:::"memory"); - __asm__ volatile(EMMS:::"memory"); - - if (mmx_size==23) return; //finished, was multiple of 8 - - src+= src_size; - dst+= src_size; - src_size= 23-mmx_size; - src-= src_size; - dst-= src_size; -#endif for (i=0; i<src_size; i+=3) { register uint8_t x; x = src[i + 2]; @@ -1358,98 +348,16 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s } } -static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride, long vertLumPerChroma) +static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, + const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, + int dstStride, int vertLumPerChroma) { - long y; - const x86_reg chromWidth= width>>1; + int y; + const int chromWidth = width >> 1; for (y=0; y<height; y++) { -#if COMPILE_TEMPLATE_MMX - //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1, %%"REG_a", 2) \n\t" - PREFETCH" 32(%2, %%"REG_a") \n\t" - PREFETCH" 32(%3, %%"REG_a") \n\t" - "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) - "movq %%mm0, %%mm2 \n\t" // U(0) - "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) - "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) - "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) - - "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) - "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) - "movq %%mm3, %%mm4 \n\t" // Y(0) - "movq %%mm5, %%mm6 \n\t" // Y(8) - "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) - "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) - "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) - "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) - - MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) - : "%"REG_a - ); -#else - -#if ARCH_ALPHA && HAVE_MVI -#define pl2yuy2(n) \ - y1 = yc[n]; \ - y2 = yc2[n]; \ - u = uc[n]; \ - v = vc[n]; \ - __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \ - __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \ - __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u)); \ - __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v)); \ - yuv1 = (u << 8) + (v << 24); \ - yuv2 = yuv1 + y2; \ - yuv1 += y1; \ - qdst[n] = yuv1; \ - qdst2[n] = yuv2; - - int i; - uint64_t *qdst = (uint64_t *) dst; - uint64_t *qdst2 = (uint64_t *) (dst + dstStride); - const uint32_t *yc = (uint32_t *) ysrc; - const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); - const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; - for (i = 0; i < chromWidth; i += 8) { - uint64_t y1, y2, yuv1, yuv2; - uint64_t u, v; - /* Prefetch */ - __asm__("ldq $31,64(%0)" :: "r"(yc)); - __asm__("ldq $31,64(%0)" :: "r"(yc2)); - __asm__("ldq $31,64(%0)" :: "r"(uc)); - __asm__("ldq $31,64(%0)" :: "r"(vc)); - - pl2yuy2(0); - pl2yuy2(1); - pl2yuy2(2); - pl2yuy2(3); - - yc += 4; - yc2 += 4; - uc += 4; - vc += 4; - qdst += 4; - qdst2 += 4; - } - y++; - ysrc += lumStride; - dst += dstStride; - -#elif HAVE_FAST_64BIT +#if HAVE_FAST_64BIT int i; uint64_t *ldst = (uint64_t *) dst; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; @@ -1481,7 +389,6 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u vc++; } #endif -#endif if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { usrc += chromStride; vsrc += chromStride; @@ -1489,70 +396,32 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u ysrc += lumStride; dst += dstStride; } -#if COMPILE_TEMPLATE_MMX - __asm__(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); -#endif } /** * Height should be a multiple of 2 and width should be a multiple of 16. * (If this is a problem for anyone then tell me, and I will fix it.) */ -static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride) +static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, + const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, + int dstStride) { //FIXME interpolate chroma - RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); + yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride, + chromStride, dstStride, 2); } -static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride, long vertLumPerChroma) +static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc, + const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, + int dstStride, int vertLumPerChroma) { - long y; - const x86_reg chromWidth= width>>1; + int y; + const int chromWidth = width >> 1; for (y=0; y<height; y++) { -#if COMPILE_TEMPLATE_MMX - //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 32(%1, %%"REG_a", 2) \n\t" - PREFETCH" 32(%2, %%"REG_a") \n\t" - PREFETCH" 32(%3, %%"REG_a") \n\t" - "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) - "movq %%mm0, %%mm2 \n\t" // U(0) - "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) - "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) - "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) - - "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) - "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) - "movq %%mm0, %%mm4 \n\t" // Y(0) - "movq %%mm2, %%mm6 \n\t" // Y(8) - "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0) - "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4) - "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8) - "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12) - - MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t" - MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) - : "%"REG_a - ); -#else -//FIXME adapt the Alpha ASM code from yv12->yuy2 - #if HAVE_FAST_64BIT int i; uint64_t *ldst = (uint64_t *) dst; @@ -1585,7 +454,6 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u vc++; } #endif -#endif if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { usrc += chromStride; vsrc += chromStride; @@ -1593,140 +461,63 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u ysrc += lumStride; dst += dstStride; } -#if COMPILE_TEMPLATE_MMX - __asm__(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); -#endif } /** * Height should be a multiple of 2 and width should be a multiple of 16 * (If this is a problem for anyone then tell me, and I will fix it.) */ -static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride) +static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc, + const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, + int dstStride) { //FIXME interpolate chroma - RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); + yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride, + chromStride, dstStride, 2); } /** * Width should be a multiple of 16. */ -static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride) +static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc, + const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, + int dstStride) { - RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); + yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride, + chromStride, dstStride, 1); } /** * Width should be a multiple of 16. */ -static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, - long width, long height, - long lumStride, long chromStride, long dstStride) +static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, + const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, + int dstStride) { - RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); + yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride, + chromStride, dstStride, 1); } /** * Height should be a multiple of 2 and width should be a multiple of 16. * (If this is a problem for anyone then tell me, and I will fix it.) */ -static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride) -{ - long y; - const x86_reg chromWidth= width>>1; +static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst, + uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, + int srcStride) +{ + int y; + const int chromWidth = width >> 1; for (y=0; y<height; y+=2) { -#if COMPILE_TEMPLATE_MMX - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_a", 4) \n\t" - "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) - "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) - "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) - "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) - "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) - "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4) - "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0) - "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4) - "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) - "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) - - MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" - - "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8) - "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12) - "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8) - "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12) - "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8) - "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12) - "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8) - "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12) - "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) - "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) - - MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" - - "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) - "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) - "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) - "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) - "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) - "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) - "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) - "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) - - MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" - MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) - : "memory", "%"REG_a - ); - - ydst += lumStride; - src += srcStride; - - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_a", 4) \n\t" - "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) - "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) - "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) - "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) - "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) - "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) - "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) - "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12) - "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) - "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) - - MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - - ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) - : "memory", "%"REG_a - ); -#else - long i; + int i; for (i=0; i<chromWidth; i++) { ydst[2*i+0] = src[4*i+0]; udst[i] = src[4*i+1]; @@ -1740,22 +531,17 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ydst[2*i+0] = src[4*i+0]; ydst[2*i+1] = src[4*i+2]; } -#endif udst += chromStride; vdst += chromStride; ydst += lumStride; src += srcStride; } -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); -#endif } -static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride) +static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth, + int srcHeight, int srcStride, int dstStride) { - long x,y; + int x,y; dst[0]= src[0]; @@ -1769,66 +555,10 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi dst+= dstStride; for (y=1; y<srcHeight; y++) { -#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW - const x86_reg mmxSize= srcWidth&~15; - __asm__ volatile( - "mov %4, %%"REG_a" \n\t" - "movq "MANGLE(mmx_ff)", %%mm0 \n\t" - "movq (%0, %%"REG_a"), %%mm4 \n\t" - "movq %%mm4, %%mm2 \n\t" - "psllq $8, %%mm4 \n\t" - "pand %%mm0, %%mm2 \n\t" - "por %%mm2, %%mm4 \n\t" - "movq (%1, %%"REG_a"), %%mm5 \n\t" - "movq %%mm5, %%mm3 \n\t" - "psllq $8, %%mm5 \n\t" - "pand %%mm0, %%mm3 \n\t" - "por %%mm3, %%mm5 \n\t" - "1: \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "movq (%1, %%"REG_a"), %%mm1 \n\t" - "movq 1(%0, %%"REG_a"), %%mm2 \n\t" - "movq 1(%1, %%"REG_a"), %%mm3 \n\t" - PAVGB" %%mm0, %%mm5 \n\t" - PAVGB" %%mm0, %%mm3 \n\t" - PAVGB" %%mm0, %%mm5 \n\t" - PAVGB" %%mm0, %%mm3 \n\t" - PAVGB" %%mm1, %%mm4 \n\t" - PAVGB" %%mm1, %%mm2 \n\t" - PAVGB" %%mm1, %%mm4 \n\t" - PAVGB" %%mm1, %%mm2 \n\t" - "movq %%mm5, %%mm7 \n\t" - "movq %%mm4, %%mm6 \n\t" - "punpcklbw %%mm3, %%mm5 \n\t" - "punpckhbw %%mm3, %%mm7 \n\t" - "punpcklbw %%mm2, %%mm4 \n\t" - "punpckhbw %%mm2, %%mm6 \n\t" -#if 1 - MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t" -#else - "movq %%mm5, (%2, %%"REG_a", 2) \n\t" - "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t" - "movq %%mm4, (%3, %%"REG_a", 2) \n\t" - "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t" -#endif - "add $8, %%"REG_a" \n\t" - "movq -1(%0, %%"REG_a"), %%mm4 \n\t" - "movq -1(%1, %%"REG_a"), %%mm5 \n\t" - " js 1b \n\t" - :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), - "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), - "g" (-mmxSize) - : "%"REG_a - ); -#else - const x86_reg mmxSize=1; + const int mmxSize = 1; dst[0 ]= (3*src[0] + src[srcStride])>>2; dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; -#endif for (x=mmxSize-1; x<srcWidth-1; x++) { dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; @@ -1844,7 +574,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi } // last line -#if 1 dst[0]= src[0]; for (x=0; x<srcWidth-1; x++) { @@ -1852,18 +581,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; } dst[2*srcWidth-1]= src[srcWidth-1]; -#else - for (x=0; x<srcWidth; x++) { - dst[2*x+0]= - dst[2*x+1]= src[x]; - } -#endif - -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); -#endif } /** @@ -1872,97 +589,16 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi * Chrominance data is only taken from every second line, others are ignored. * FIXME: Write HQ version. */ -static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride) -{ - long y; - const x86_reg chromWidth= width>>1; +static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst, + uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, + int srcStride) +{ + int y; + const int chromWidth = width >> 1; for (y=0; y<height; y+=2) { -#if COMPILE_TEMPLATE_MMX - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_a", 4) \n\t" - "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0) - "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4) - "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) - "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) - "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) - "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4) - "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0) - "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4) - "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) - "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) - - MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" - - "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8) - "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12) - "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) - "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) - "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) - "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12) - "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8) - "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12) - "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) - "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) - - MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" - - "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) - "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) - "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) - "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) - "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) - "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) - "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) - "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) - - MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" - MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) - : "memory", "%"REG_a - ); - - ydst += lumStride; - src += srcStride; - - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_a", 4) \n\t" - "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) - "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) - "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) - "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) - "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) - "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) - "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) - "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12) - "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) - "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) - - MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" - - "add $8, %%"REG_a" \n\t" - "cmp %4, %%"REG_a" \n\t" - " jb 1b \n\t" - - ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) - : "memory", "%"REG_a - ); -#else - long i; + int i; for (i=0; i<chromWidth; i++) { udst[i] = src[4*i+0]; ydst[2*i+0] = src[4*i+1]; @@ -1976,17 +612,11 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ydst[2*i+0] = src[4*i+1]; ydst[2*i+1] = src[4*i+3]; } -#endif udst += chromStride; vdst += chromStride; ydst += lumStride; src += srcStride; } -#if COMPILE_TEMPLATE_MMX - __asm__ volatile(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); -#endif } /** @@ -1996,251 +626,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t * others are ignored in the C version. * FIXME: Write HQ version. */ -static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride) +void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride) { - long y; - const x86_reg chromWidth= width>>1; -#if COMPILE_TEMPLATE_MMX - for (y=0; y<height-2; y+=2) { - long i; - for (i=0; i<2; i++) { - __asm__ volatile( - "mov %2, %%"REG_a" \n\t" - "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "pxor %%mm7, %%mm7 \n\t" - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_d") \n\t" - "movd (%0, %%"REG_d"), %%mm0 \n\t" - "movd 3(%0, %%"REG_d"), %%mm1 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "movd 6(%0, %%"REG_d"), %%mm2 \n\t" - "movd 9(%0, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "pmaddwd %%mm6, %%mm0 \n\t" - "pmaddwd %%mm6, %%mm1 \n\t" - "pmaddwd %%mm6, %%mm2 \n\t" - "pmaddwd %%mm6, %%mm3 \n\t" -#ifndef FAST_BGR2YV12 - "psrad $8, %%mm0 \n\t" - "psrad $8, %%mm1 \n\t" - "psrad $8, %%mm2 \n\t" - "psrad $8, %%mm3 \n\t" -#endif - "packssdw %%mm1, %%mm0 \n\t" - "packssdw %%mm3, %%mm2 \n\t" - "pmaddwd %%mm5, %%mm0 \n\t" - "pmaddwd %%mm5, %%mm2 \n\t" - "packssdw %%mm2, %%mm0 \n\t" - "psraw $7, %%mm0 \n\t" - - "movd 12(%0, %%"REG_d"), %%mm4 \n\t" - "movd 15(%0, %%"REG_d"), %%mm1 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "movd 18(%0, %%"REG_d"), %%mm2 \n\t" - "movd 21(%0, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "pmaddwd %%mm6, %%mm4 \n\t" - "pmaddwd %%mm6, %%mm1 \n\t" - "pmaddwd %%mm6, %%mm2 \n\t" - "pmaddwd %%mm6, %%mm3 \n\t" -#ifndef FAST_BGR2YV12 - "psrad $8, %%mm4 \n\t" - "psrad $8, %%mm1 \n\t" - "psrad $8, %%mm2 \n\t" - "psrad $8, %%mm3 \n\t" -#endif - "packssdw %%mm1, %%mm4 \n\t" - "packssdw %%mm3, %%mm2 \n\t" - "pmaddwd %%mm5, %%mm4 \n\t" - "pmaddwd %%mm5, %%mm2 \n\t" - "add $24, %%"REG_d" \n\t" - "packssdw %%mm2, %%mm4 \n\t" - "psraw $7, %%mm4 \n\t" - - "packuswb %%mm4, %%mm0 \n\t" - "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" - - MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) - : "%"REG_a, "%"REG_d - ); - ydst += lumStride; - src += srcStride; - } - src -= srcStride*2; - __asm__ volatile( - "mov %4, %%"REG_a" \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" - "pxor %%mm7, %%mm7 \n\t" - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" - "add %%"REG_d", %%"REG_d" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - PREFETCH" 64(%0, %%"REG_d") \n\t" - PREFETCH" 64(%1, %%"REG_d") \n\t" -#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW - "movq (%0, %%"REG_d"), %%mm0 \n\t" - "movq (%1, %%"REG_d"), %%mm1 \n\t" - "movq 6(%0, %%"REG_d"), %%mm2 \n\t" - "movq 6(%1, %%"REG_d"), %%mm3 \n\t" - PAVGB" %%mm1, %%mm0 \n\t" - PAVGB" %%mm3, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlq $24, %%mm0 \n\t" - "psrlq $24, %%mm2 \n\t" - PAVGB" %%mm1, %%mm0 \n\t" - PAVGB" %%mm3, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" -#else - "movd (%0, %%"REG_d"), %%mm0 \n\t" - "movd (%1, %%"REG_d"), %%mm1 \n\t" - "movd 3(%0, %%"REG_d"), %%mm2 \n\t" - "movd 3(%1, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm0 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm2, %%mm0 \n\t" - "movd 6(%0, %%"REG_d"), %%mm4 \n\t" - "movd 6(%1, %%"REG_d"), %%mm1 \n\t" - "movd 9(%0, %%"REG_d"), %%mm2 \n\t" - "movd 9(%1, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm4 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm4, %%mm2 \n\t" - "psrlw $2, %%mm0 \n\t" - "psrlw $2, %%mm2 \n\t" -#endif - "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" - - "pmaddwd %%mm0, %%mm1 \n\t" - "pmaddwd %%mm2, %%mm3 \n\t" - "pmaddwd %%mm6, %%mm0 \n\t" - "pmaddwd %%mm6, %%mm2 \n\t" -#ifndef FAST_BGR2YV12 - "psrad $8, %%mm0 \n\t" - "psrad $8, %%mm1 \n\t" - "psrad $8, %%mm2 \n\t" - "psrad $8, %%mm3 \n\t" -#endif - "packssdw %%mm2, %%mm0 \n\t" - "packssdw %%mm3, %%mm1 \n\t" - "pmaddwd %%mm5, %%mm0 \n\t" - "pmaddwd %%mm5, %%mm1 \n\t" - "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 - "psraw $7, %%mm0 \n\t" - -#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW - "movq 12(%0, %%"REG_d"), %%mm4 \n\t" - "movq 12(%1, %%"REG_d"), %%mm1 \n\t" - "movq 18(%0, %%"REG_d"), %%mm2 \n\t" - "movq 18(%1, %%"REG_d"), %%mm3 \n\t" - PAVGB" %%mm1, %%mm4 \n\t" - PAVGB" %%mm3, %%mm2 \n\t" - "movq %%mm4, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlq $24, %%mm4 \n\t" - "psrlq $24, %%mm2 \n\t" - PAVGB" %%mm1, %%mm4 \n\t" - PAVGB" %%mm3, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" -#else - "movd 12(%0, %%"REG_d"), %%mm4 \n\t" - "movd 12(%1, %%"REG_d"), %%mm1 \n\t" - "movd 15(%0, %%"REG_d"), %%mm2 \n\t" - "movd 15(%1, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm4 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm2, %%mm4 \n\t" - "movd 18(%0, %%"REG_d"), %%mm5 \n\t" - "movd 18(%1, %%"REG_d"), %%mm1 \n\t" - "movd 21(%0, %%"REG_d"), %%mm2 \n\t" - "movd 21(%1, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm5 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm5, %%mm2 \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "psrlw $2, %%mm4 \n\t" - "psrlw $2, %%mm2 \n\t" -#endif - "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" - - "pmaddwd %%mm4, %%mm1 \n\t" - "pmaddwd %%mm2, %%mm3 \n\t" - "pmaddwd %%mm6, %%mm4 \n\t" - "pmaddwd %%mm6, %%mm2 \n\t" -#ifndef FAST_BGR2YV12 - "psrad $8, %%mm4 \n\t" - "psrad $8, %%mm1 \n\t" - "psrad $8, %%mm2 \n\t" - "psrad $8, %%mm3 \n\t" -#endif - "packssdw %%mm2, %%mm4 \n\t" - "packssdw %%mm3, %%mm1 \n\t" - "pmaddwd %%mm5, %%mm4 \n\t" - "pmaddwd %%mm5, %%mm1 \n\t" - "add $24, %%"REG_d" \n\t" - "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 - "psraw $7, %%mm4 \n\t" - - "movq %%mm0, %%mm1 \n\t" - "punpckldq %%mm4, %%mm0 \n\t" - "punpckhdq %%mm4, %%mm1 \n\t" - "packsswb %%mm1, %%mm0 \n\t" - "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" - "movd %%mm0, (%2, %%"REG_a") \n\t" - "punpckhdq %%mm0, %%mm0 \n\t" - "movd %%mm0, (%3, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" - " js 1b \n\t" - : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) - : "%"REG_a, "%"REG_d - ); - - udst += chromStride; - vdst += chromStride; - src += srcStride*2; - } - - __asm__ volatile(EMMS" \n\t" - SFENCE" \n\t" - :::"memory"); -#else + int y; + const int chromWidth = width >> 1; y=0; -#endif for (; y<height; y+=2) { - long i; + int i; for (i=0; i<chromWidth; i++) { unsigned int b = src[6*i+0]; unsigned int g = src[6*i+1]; @@ -2290,195 +684,56 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ } } -static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, - long width, long height, long src1Stride, - long src2Stride, long dstStride) +static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, + uint8_t *dest, int width, + int height, int src1Stride, + int src2Stride, int dstStride) { - long h; + int h; for (h=0; h < height; h++) { - long w; - -#if COMPILE_TEMPLATE_MMX -#if COMPILE_TEMPLATE_SSE2 - __asm__( - "xor %%"REG_a", %%"REG_a" \n\t" - "1: \n\t" - PREFETCH" 64(%1, %%"REG_a") \n\t" - PREFETCH" 64(%2, %%"REG_a") \n\t" - "movdqa (%1, %%"REG_a"), %%xmm0 \n\t" - "movdqa (%1, %%"REG_a"), %%xmm1 \n\t" - "movdqa (%2, %%"REG_a"), %%xmm2 \n\t" - "punpcklbw %%xmm2, %%xmm0 \n\t" - "punpckhbw %%xmm2, %%xmm1 \n\t" - "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t" - "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t" - "add $16, %%"REG_a" \n\t" - "cmp %3, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) - : "memory", "%"REG_a"" - ); -#else - __asm__( - "xor %%"REG_a", %%"REG_a" \n\t" - "1: \n\t" - PREFETCH" 64(%1, %%"REG_a") \n\t" - PREFETCH" 64(%2, %%"REG_a") \n\t" - "movq (%1, %%"REG_a"), %%mm0 \n\t" - "movq 8(%1, %%"REG_a"), %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "movq (%2, %%"REG_a"), %%mm4 \n\t" - "movq 8(%2, %%"REG_a"), %%mm5 \n\t" - "punpcklbw %%mm4, %%mm0 \n\t" - "punpckhbw %%mm4, %%mm1 \n\t" - "punpcklbw %%mm5, %%mm2 \n\t" - "punpckhbw %%mm5, %%mm3 \n\t" - MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t" - MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t" - "add $16, %%"REG_a" \n\t" - "cmp %3, %%"REG_a" \n\t" - " jb 1b \n\t" - ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) - : "memory", "%"REG_a - ); -#endif - for (w= (width&(~15)); w < width; w++) { - dest[2*w+0] = src1[w]; - dest[2*w+1] = src2[w]; - } -#else + int w; for (w=0; w < width; w++) { dest[2*w+0] = src1[w]; dest[2*w+1] = src2[w]; } -#endif dest += dstStride; src1 += src1Stride; src2 += src2Stride; } -#if COMPILE_TEMPLATE_MMX - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -#endif } -static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, - uint8_t *dst1, uint8_t *dst2, - long width, long height, - long srcStride1, long srcStride2, - long dstStride1, long dstStride2) +static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2, + uint8_t *dst1, uint8_t *dst2, + int width, int height, + int srcStride1, int srcStride2, + int dstStride1, int dstStride2) { - x86_reg y; - long x,w,h; + int y; + int x,w,h; w=width/2; h=height/2; -#if COMPILE_TEMPLATE_MMX - __asm__ volatile( - PREFETCH" %0 \n\t" - PREFETCH" %1 \n\t" - ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); -#endif for (y=0;y<h;y++) { const uint8_t* s1=src1+srcStride1*(y>>1); uint8_t* d=dst1+dstStride1*y; x=0; -#if COMPILE_TEMPLATE_MMX - for (;x<w-31;x+=32) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm6 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "movq %%mm4, %%mm5 \n\t" - "movq %%mm6, %%mm7 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpcklbw %%mm2, %%mm2 \n\t" - "punpckhbw %%mm3, %%mm3 \n\t" - "punpcklbw %%mm4, %%mm4 \n\t" - "punpckhbw %%mm5, %%mm5 \n\t" - "punpcklbw %%mm6, %%mm6 \n\t" - "punpckhbw %%mm7, %%mm7 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0 \n\t" - MOVNTQ" %%mm4, 32%0 \n\t" - MOVNTQ" %%mm5, 40%0 \n\t" - MOVNTQ" %%mm6, 48%0 \n\t" - MOVNTQ" %%mm7, 56%0" - :"=m"(d[2*x]) - :"m"(s1[x]) - :"memory"); - } -#endif for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; } for (y=0;y<h;y++) { const uint8_t* s2=src2+srcStride2*(y>>1); uint8_t* d=dst2+dstStride2*y; x=0; -#if COMPILE_TEMPLATE_MMX - for (;x<w-31;x+=32) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm6 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "movq %%mm4, %%mm5 \n\t" - "movq %%mm6, %%mm7 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpcklbw %%mm2, %%mm2 \n\t" - "punpckhbw %%mm3, %%mm3 \n\t" - "punpcklbw %%mm4, %%mm4 \n\t" - "punpckhbw %%mm5, %%mm5 \n\t" - "punpcklbw %%mm6, %%mm6 \n\t" - "punpckhbw %%mm7, %%mm7 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0 \n\t" - MOVNTQ" %%mm4, 32%0 \n\t" - MOVNTQ" %%mm5, 40%0 \n\t" - MOVNTQ" %%mm6, 48%0 \n\t" - MOVNTQ" %%mm7, 56%0" - :"=m"(d[2*x]) - :"m"(s2[x]) - :"memory"); - } -#endif for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; } -#if COMPILE_TEMPLATE_MMX - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -#endif } -static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, - uint8_t *dst, - long width, long height, - long srcStride1, long srcStride2, - long srcStride3, long dstStride) +static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2, + const uint8_t *src3, uint8_t *dst, + int width, int height, + int srcStride1, int srcStride2, + int srcStride3, int dstStride) { - x86_reg x; - long y,w,h; + int x; + int y,w,h; w=width/2; h=height; for (y=0;y<h;y++) { const uint8_t* yp=src1+srcStride1*y; @@ -2486,62 +741,8 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 const uint8_t* vp=src3+srcStride3*(y>>2); uint8_t* d=dst+dstStride*y; x=0; -#if COMPILE_TEMPLATE_MMX - for (;x<w-7;x+=8) { - __asm__ volatile( - PREFETCH" 32(%1, %0) \n\t" - PREFETCH" 32(%2, %0) \n\t" - PREFETCH" 32(%3, %0) \n\t" - "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ - "movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */ - "movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */ - "movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ - "movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */ - "movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */ - "punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */ - "punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */ - "punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */ - "punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */ - - "movq %%mm1, %%mm6 \n\t" - "punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/ - "punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/ - "punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/ - MOVNTQ" %%mm0, (%4, %0, 8) \n\t" - MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t" - - "punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/ - "movq 8(%1, %0, 4), %%mm0 \n\t" - "movq %%mm0, %%mm3 \n\t" - "punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/ - "punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/ - MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t" - MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t" - - "movq %%mm4, %%mm6 \n\t" - "movq 16(%1, %0, 4), %%mm0 \n\t" - "movq %%mm0, %%mm3 \n\t" - "punpcklbw %%mm5, %%mm4 \n\t" - "punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/ - "punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/ - MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t" - MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t" - - "punpckhbw %%mm5, %%mm6 \n\t" - "movq 24(%1, %0, 4), %%mm0 \n\t" - "movq %%mm0, %%mm3 \n\t" - "punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/ - "punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/ - MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t" - MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t" - - : "+r" (x) - : "r"(yp), "r" (up), "r"(vp), "r"(d) - :"memory"); - } -#endif for (; x<w; x++) { - const long x2 = x<<2; + const int x2 = x<<2; d[8*x+0] = yp[x2]; d[8*x+1] = up[x]; d[8*x+2] = yp[x2+1]; @@ -2552,95 +753,27 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 d[8*x+7] = vp[x]; } } -#if COMPILE_TEMPLATE_MMX - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -#endif } -static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) +static void extract_even_c(const uint8_t *src, uint8_t *dst, int count) { dst += count; src += 2*count; count= - count; -#if COMPILE_TEMPLATE_MMX - if(count <= -16) { - count += 15; - __asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" - "1: \n\t" - "movq -30(%1, %0, 2), %%mm0 \n\t" - "movq -22(%1, %0, 2), %%mm1 \n\t" - "movq -14(%1, %0, 2), %%mm2 \n\t" - "movq -6(%1, %0, 2), %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0,-15(%2, %0) \n\t" - MOVNTQ" %%mm2,- 7(%2, %0) \n\t" - "add $16, %0 \n\t" - " js 1b \n\t" - : "+r"(count) - : "r"(src), "r"(dst) - ); - count -= 15; - } -#endif while(count<0) { dst[count]= src[2*count]; count++; } } -static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) +static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, + int count) { dst0+= count; dst1+= count; src += 4*count; count= - count; -#if COMPILE_TEMPLATE_MMX - if(count <= -8) { - count += 7; - __asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" - "1: \n\t" - "movq -28(%1, %0, 4), %%mm0 \n\t" - "movq -20(%1, %0, 4), %%mm1 \n\t" - "movq -12(%1, %0, 4), %%mm2 \n\t" - "movq -4(%1, %0, 4), %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm2 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm2, %%mm0 \n\t" - "packuswb %%mm3, %%mm1 \n\t" - MOVNTQ" %%mm0,- 7(%3, %0) \n\t" - MOVNTQ" %%mm1,- 7(%2, %0) \n\t" - "add $8, %0 \n\t" - " js 1b \n\t" - : "+r"(count) - : "r"(src), "r"(dst0), "r"(dst1) - ); - count -= 7; - } -#endif while(count<0) { dst0[count]= src[4*count+0]; dst1[count]= src[4*count+2]; @@ -2648,52 +781,14 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds } } -static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) +static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1, + uint8_t *dst0, uint8_t *dst1, int count) { dst0 += count; dst1 += count; src0 += 4*count; src1 += 4*count; count= - count; -#ifdef PAVGB - if(count <= -8) { - count += 7; - __asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" - "1: \n\t" - "movq -28(%1, %0, 4), %%mm0 \n\t" - "movq -20(%1, %0, 4), %%mm1 \n\t" - "movq -12(%1, %0, 4), %%mm2 \n\t" - "movq -4(%1, %0, 4), %%mm3 \n\t" - PAVGB" -28(%2, %0, 4), %%mm0 \n\t" - PAVGB" -20(%2, %0, 4), %%mm1 \n\t" - PAVGB" -12(%2, %0, 4), %%mm2 \n\t" - PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm2 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm2, %%mm0 \n\t" - "packuswb %%mm3, %%mm1 \n\t" - MOVNTQ" %%mm0,- 7(%4, %0) \n\t" - MOVNTQ" %%mm1,- 7(%3, %0) \n\t" - "add $8, %0 \n\t" - " js 1b \n\t" - : "+r"(count) - : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) - ); - count -= 7; - } -#endif while(count<0) { dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; @@ -2701,47 +796,13 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u } } -static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) +static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, + int count) { dst0+= count; dst1+= count; src += 4*count; count= - count; -#if COMPILE_TEMPLATE_MMX - if(count <= -8) { - count += 7; - __asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" - "1: \n\t" - "movq -28(%1, %0, 4), %%mm0 \n\t" - "movq -20(%1, %0, 4), %%mm1 \n\t" - "movq -12(%1, %0, 4), %%mm2 \n\t" - "movq -4(%1, %0, 4), %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm1 \n\t" - "psrlw $8, %%mm2 \n\t" - "psrlw $8, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm2 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm2, %%mm0 \n\t" - "packuswb %%mm3, %%mm1 \n\t" - MOVNTQ" %%mm0,- 7(%3, %0) \n\t" - MOVNTQ" %%mm1,- 7(%2, %0) \n\t" - "add $8, %0 \n\t" - " js 1b \n\t" - : "+r"(count) - : "r"(src), "r"(dst0), "r"(dst1) - ); - count -= 7; - } -#endif src++; while(count<0) { dst0[count]= src[4*count+0]; @@ -2750,52 +811,14 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst } } -static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) +static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1, + uint8_t *dst0, uint8_t *dst1, int count) { dst0 += count; dst1 += count; src0 += 4*count; src1 += 4*count; count= - count; -#ifdef PAVGB - if(count <= -8) { - count += 7; - __asm__ volatile( - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $8, %%mm7 \n\t" - "1: \n\t" - "movq -28(%1, %0, 4), %%mm0 \n\t" - "movq -20(%1, %0, 4), %%mm1 \n\t" - "movq -12(%1, %0, 4), %%mm2 \n\t" - "movq -4(%1, %0, 4), %%mm3 \n\t" - PAVGB" -28(%2, %0, 4), %%mm0 \n\t" - PAVGB" -20(%2, %0, 4), %%mm1 \n\t" - PAVGB" -12(%2, %0, 4), %%mm2 \n\t" - PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm1 \n\t" - "psrlw $8, %%mm2 \n\t" - "psrlw $8, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm2 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm3 \n\t" - "packuswb %%mm2, %%mm0 \n\t" - "packuswb %%mm3, %%mm1 \n\t" - MOVNTQ" %%mm0,- 7(%4, %0) \n\t" - MOVNTQ" %%mm1,- 7(%3, %0) \n\t" - "add $8, %0 \n\t" - " js 1b \n\t" - : "+r"(count) - : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) - ); - count -= 7; - } -#endif src0++; src1++; while(count<0) { @@ -2805,17 +828,17 @@ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, ui } } -static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride) +static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + const uint8_t *src, int width, int height, + int lumStride, int chromStride, int srcStride) { - long y; - const long chromWidth= -((-width)>>1); + int y; + const int chromWidth= -((-width)>>1); for (y=0; y<height; y++) { - RENAME(extract_even)(src, ydst, width); + extract_even_c(src, ydst, width); if(y&1) { - RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth); + extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth); udst+= chromStride; vdst+= chromStride; } @@ -2823,51 +846,37 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co src += srcStride; ydst+= lumStride; } -#if COMPILE_TEMPLATE_MMX - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -#endif } -static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride) +static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + const uint8_t *src, int width, int height, + int lumStride, int chromStride, int srcStride) { - long y; - const long chromWidth= -((-width)>>1); + int y; + const int chromWidth= -((-width)>>1); for (y=0; y<height; y++) { - RENAME(extract_even)(src, ydst, width); - RENAME(extract_odd2)(src, udst, vdst, chromWidth); + extract_even_c(src, ydst, width); + extract_odd2_c(src, udst, vdst, chromWidth); src += srcStride; ydst+= lumStride; udst+= chromStride; vdst+= chromStride; } -#if COMPILE_TEMPLATE_MMX - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -#endif } -static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride) +static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + const uint8_t *src, int width, int height, + int lumStride, int chromStride, int srcStride) { - long y; - const long chromWidth= -((-width)>>1); + int y; + const int chromWidth= -((-width)>>1); for (y=0; y<height; y++) { - RENAME(extract_even)(src+1, ydst, width); + extract_even_c(src + 1, ydst, width); if(y&1) { - RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); + extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth); udst+= chromStride; vdst+= chromStride; } @@ -2875,73 +884,59 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co src += srcStride; ydst+= lumStride; } -#if COMPILE_TEMPLATE_MMX - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -#endif } -static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, - long width, long height, - long lumStride, long chromStride, long srcStride) +static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + const uint8_t *src, int width, int height, + int lumStride, int chromStride, int srcStride) { - long y; - const long chromWidth= -((-width)>>1); + int y; + const int chromWidth= -((-width)>>1); for (y=0; y<height; y++) { - RENAME(extract_even)(src+1, ydst, width); - RENAME(extract_even2)(src, udst, vdst, chromWidth); + extract_even_c(src + 1, ydst, width); + extract_even2_c(src, udst, vdst, chromWidth); src += srcStride; ydst+= lumStride; udst+= chromStride; vdst+= chromStride; } -#if COMPILE_TEMPLATE_MMX - __asm__( - EMMS" \n\t" - SFENCE" \n\t" - ::: "memory" - ); -#endif } -static inline void RENAME(rgb2rgb_init)(void) -{ - rgb15to16 = RENAME(rgb15to16); - rgb15tobgr24 = RENAME(rgb15tobgr24); - rgb15to32 = RENAME(rgb15to32); - rgb16tobgr24 = RENAME(rgb16tobgr24); - rgb16to32 = RENAME(rgb16to32); - rgb16to15 = RENAME(rgb16to15); - rgb24tobgr16 = RENAME(rgb24tobgr16); - rgb24tobgr15 = RENAME(rgb24tobgr15); - rgb24tobgr32 = RENAME(rgb24tobgr32); - rgb32to16 = RENAME(rgb32to16); - rgb32to15 = RENAME(rgb32to15); - rgb32tobgr24 = RENAME(rgb32tobgr24); - rgb24to15 = RENAME(rgb24to15); - rgb24to16 = RENAME(rgb24to16); - rgb24tobgr24 = RENAME(rgb24tobgr24); - shuffle_bytes_2103 = RENAME(shuffle_bytes_2103); - rgb32tobgr16 = RENAME(rgb32tobgr16); - rgb32tobgr15 = RENAME(rgb32tobgr15); - yv12toyuy2 = RENAME(yv12toyuy2); - yv12touyvy = RENAME(yv12touyvy); - yuv422ptoyuy2 = RENAME(yuv422ptoyuy2); - yuv422ptouyvy = RENAME(yuv422ptouyvy); - yuy2toyv12 = RENAME(yuy2toyv12); - planar2x = RENAME(planar2x); - rgb24toyv12 = RENAME(rgb24toyv12); - interleaveBytes = RENAME(interleaveBytes); - vu9_to_vu12 = RENAME(vu9_to_vu12); - yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); - - uyvytoyuv420 = RENAME(uyvytoyuv420); - uyvytoyuv422 = RENAME(uyvytoyuv422); - yuyvtoyuv420 = RENAME(yuyvtoyuv420); - yuyvtoyuv422 = RENAME(yuyvtoyuv422); +static inline void rgb2rgb_init_c(void) +{ + rgb15to16 = rgb15to16_c; + rgb15tobgr24 = rgb15tobgr24_c; + rgb15to32 = rgb15to32_c; + rgb16tobgr24 = rgb16tobgr24_c; + rgb16to32 = rgb16to32_c; + rgb16to15 = rgb16to15_c; + rgb24tobgr16 = rgb24tobgr16_c; + rgb24tobgr15 = rgb24tobgr15_c; + rgb24tobgr32 = rgb24tobgr32_c; + rgb32to16 = rgb32to16_c; + rgb32to15 = rgb32to15_c; + rgb32tobgr24 = rgb32tobgr24_c; + rgb24to15 = rgb24to15_c; + rgb24to16 = rgb24to16_c; + rgb24tobgr24 = rgb24tobgr24_c; + shuffle_bytes_2103 = shuffle_bytes_2103_c; + rgb32tobgr16 = rgb32tobgr16_c; + rgb32tobgr15 = rgb32tobgr15_c; + yv12toyuy2 = yv12toyuy2_c; + yv12touyvy = yv12touyvy_c; + yuv422ptoyuy2 = yuv422ptoyuy2_c; + yuv422ptouyvy = yuv422ptouyvy_c; + yuy2toyv12 = yuy2toyv12_c; + planar2x = planar2x_c; + rgb24toyv12 = rgb24toyv12_c; + interleaveBytes = interleaveBytes_c; + vu9_to_vu12 = vu9_to_vu12_c; + yvu9_to_yuy2 = yvu9_to_yuy2_c; + + uyvytoyuv420 = uyvytoyuv420_c; + uyvytoyuv422 = uyvytoyuv422_c; + yuyvtoyuv420 = yuyvtoyuv420_c; + yuyvtoyuv422 = yuyvtoyuv422_c; } diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c index 7f171ea725..888cbab26a 100644 --- a/libswscale/swscale-test.c +++ b/libswscale/swscale-test.c @@ -58,15 +58,11 @@ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, i int x,y; uint64_t ssd=0; -//printf("%d %d\n", w, h); - for (y=0; y<h; y++) { for (x=0; x<w; x++) { int d= src1[x + y*stride1] - src2[x + y*stride2]; ssd+= d*d; -//printf("%d", abs(src1[x + y*stride1] - src2[x + y*stride2])/26 ); } -//printf("\n"); } return ssd; } @@ -162,8 +158,6 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, goto end; } -// printf("test %X %X %X -> %X %X %X\n", (int)ref[0], (int)ref[1], (int)ref[2], -// (int)src[0], (int)src[1], (int)src[2]); printf(" %s %dx%d -> %s %3dx%3d flags=%2d", av_pix_fmt_descriptors[srcFormat].name, srcW, srcH, diff --git a/libswscale/swscale.c b/libswscale/swscale.c index d53af2771d..4318e0bf15 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -60,29 +60,14 @@ untested special converters #include "swscale.h" #include "swscale_internal.h" #include "rgb2rgb.h" +#include "libavutil/avassert.h" #include "libavutil/intreadwrite.h" -#include "libavutil/x86_cpu.h" +#include "libavutil/cpu.h" #include "libavutil/avutil.h" #include "libavutil/mathematics.h" #include "libavutil/bswap.h" #include "libavutil/pixdesc.h" -#undef MOVNTQ -#undef PAVGB - -//#undef HAVE_MMX2 -//#define HAVE_AMD3DNOW -//#undef HAVE_MMX -//#undef ARCH_X86 -#define DITHER1XBPP - -#define isPacked(x) ( \ - (x)==PIX_FMT_PAL8 \ - || (x)==PIX_FMT_YUYV422 \ - || (x)==PIX_FMT_UYVY422 \ - || (x)==PIX_FMT_GRAY8A \ - || isAnyRGB(x) \ - ) #define RGB2YUV_SHIFT 15 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5)) @@ -121,63 +106,6 @@ add BGR4 output support write special BGR->BGR scaler */ -#if ARCH_X86 -DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL; -DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; -DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; -DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; -DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL; -DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL; -DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL; -DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { - 0x0103010301030103LL, - 0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { - 0x0602060206020602LL, - 0x0004000400040004LL,}; - -DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; -DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; -DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; -DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL; -DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL; -DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL; - -DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL; -DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; -DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; - -#ifdef FAST_BGR2YV12 -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL; -#else -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; -#endif /* FAST_BGR2YV12 */ -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; - -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL; -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL; -DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL; -DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL; -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL; - -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = { - {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL}, - {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL}, -}; - -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL; - -#endif /* ARCH_X86 */ - DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={ { 1, 3, 1, 3, 1, 3, 1, 3, }, { 2, 0, 2, 0, 2, 0, 2, 0, }, @@ -341,7 +269,9 @@ DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={ { 112, 16,104, 8,118, 22,110, 14,}, }}; -uint16_t dither_scale[15][16]={ +static const uint8_t flat64[8]={64,64,64,64,64,64,64,64}; + +const uint16_t dither_scale[15][16]={ { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,}, { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,}, { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,}, @@ -359,10 +289,14 @@ uint16_t dither_scale[15][16]={ { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,}, }; -static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, - int dstW, int chrDstW, int big_endian, int output_bits) +static av_always_inline void +yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc, + int lumFilterSize, const int16_t *chrFilter, + const int16_t **chrUSrc, const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint16_t *dest, uint16_t *uDest, uint16_t *vDest, + uint16_t *aDest, int dstW, int chrDstW, + int big_endian, int output_bits) { //FIXME Optimize (just quickly written not optimized..) int i; @@ -383,7 +317,7 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co } \ } for (i = 0; i < dstW; i++) { - int val = 1 << 10; + int val = 1 << (26-output_bits); int j; for (j = 0; j < lumFilterSize; j++) @@ -394,13 +328,13 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co if (uDest) { for (i = 0; i < chrDstW; i++) { - int u = 1 << 10; - int v = 1 << 10; + int u = 1 << (26-output_bits); + int v = 1 << (26-output_bits); int j; for (j = 0; j < chrFilterSize; j++) { - u += chrSrc[j][i ] * chrFilter[j]; - v += chrSrc[j][i + VOFW] * chrFilter[j]; + u += chrUSrc[j][i] * chrFilter[j]; + v += chrVSrc[j][i] * chrFilter[j]; } output_pixel(&uDest[i], u); @@ -410,7 +344,7 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co if (CONFIG_SWSCALE_ALPHA && aDest) { for (i = 0; i < dstW; i++) { - int val = 1 << 10; + int val = 1 << (26-output_bits); int j; for (j = 0; j < lumFilterSize; j++) @@ -419,89 +353,46 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co output_pixel(&aDest[i], val); } } +#undef output_pixel } -static av_always_inline void yuv2yuvXNinC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, - int dstW, int chrDstW, int big_endian, int depth) -{ - //FIXME Optimize (just quickly written not optimized..) - int i; - - for (i = 0; i < dstW; i++) { - int val = 1 << (26-depth); - int j; - - for (j = 0; j < lumFilterSize; j++) - val += lumSrc[j][i] * lumFilter[j]; - - if (big_endian) { - AV_WB16(&dest[i], av_clip(val >> (27-depth), 0, (1<<depth)-1)); - } else { - AV_WL16(&dest[i], av_clip(val >> (27-depth), 0, (1<<depth)-1)); - } - } - - if (uDest) { - for (i = 0; i < chrDstW; i++) { - int u = 1 << (26-depth); - int v = 1 << (26-depth); - int j; - - for (j = 0; j < chrFilterSize; j++) { - u += chrSrc[j][i ] * chrFilter[j]; - v += chrSrc[j][i + VOFW] * chrFilter[j]; - } - - if (big_endian) { - AV_WB16(&uDest[i], av_clip(u >> (27-depth), 0, (1<<depth)-1)); - AV_WB16(&vDest[i], av_clip(v >> (27-depth), 0, (1<<depth)-1)); - } else { - AV_WL16(&uDest[i], av_clip(u >> (27-depth), 0, (1<<depth)-1)); - AV_WL16(&vDest[i], av_clip(v >> (27-depth), 0, (1<<depth)-1)); - } - } - } -} - -static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW, - enum PixelFormat dstFormat) -{ - if (isNBPS(dstFormat)) { - const int depth = av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1+1; - yuv2yuvXNinC_template(lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - alpSrc, - dest, uDest, vDest, aDest, - dstW, chrDstW, isBE(dstFormat), depth); - } else { - if (isBE(dstFormat)) { - yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - alpSrc, - dest, uDest, vDest, aDest, - dstW, chrDstW, 1, 16); - } else { - yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - alpSrc, - dest, uDest, vDest, aDest, - dstW, chrDstW, 0, 16); - } - } +#define yuv2NBPS(bits, BE_LE, is_be) \ +static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \ + const int16_t **lumSrc, int lumFilterSize, \ + const int16_t *chrFilter, const int16_t **chrUSrc, \ + const int16_t **chrVSrc, \ + int chrFilterSize, const int16_t **alpSrc, \ + uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \ + uint8_t *_aDest, int dstW, int chrDstW) \ +{ \ + uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \ + *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \ + yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \ + chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ + alpSrc, \ + dest, uDest, vDest, aDest, \ + dstW, chrDstW, is_be, bits); \ } - -static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW) +yuv2NBPS( 9, BE, 1); +yuv2NBPS( 9, LE, 0); +yuv2NBPS(10, BE, 1); +yuv2NBPS(10, LE, 0); +yuv2NBPS(16, BE, 1); +yuv2NBPS(16, LE, 0); + +static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { //FIXME Optimize (just quickly written not optimized..) int i; for (i=0; i<dstW; i++) { - int val=1<<18; + int val = lumDither[i&7] << 12; int j; for (j=0; j<lumFilterSize; j++) val += lumSrc[j][i] * lumFilter[j]; @@ -511,12 +402,12 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, if (uDest) for (i=0; i<chrDstW; i++) { - int u=1<<18; - int v=1<<18; + int u = chrDither[i&7] << 12; + int v = chrDither[(i+3)&7] << 12; int j; for (j=0; j<chrFilterSize; j++) { - u += chrSrc[j][i] * chrFilter[j]; - v += chrSrc[j][i + VOFW] * chrFilter[j]; + u += chrUSrc[j][i] * chrFilter[j]; + v += chrVSrc[j][i] * chrFilter[j]; } uDest[i]= av_clip_uint8(u>>19); @@ -525,24 +416,58 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, if (CONFIG_SWSCALE_ALPHA && aDest) for (i=0; i<dstW; i++) { - int val=1<<18; + int val = lumDither[i&7] << 12; int j; for (j=0; j<lumFilterSize; j++) val += alpSrc[j][i] * lumFilter[j]; aDest[i]= av_clip_uint8(val>>19); } +} + +static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, + const int16_t *chrUSrc, const int16_t *chrVSrc, + const int16_t *alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither) +{ + int i; + + for (i=0; i<dstW; i++) { + int val= (lumSrc[i]+lumDither[i&7])>>7; + dest[i]= av_clip_uint8(val); + } + if (uDest) + for (i=0; i<chrDstW; i++) { + int u=(chrUSrc[i]+chrDither[i&7])>>7; + int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7; + uDest[i]= av_clip_uint8(u); + vDest[i]= av_clip_uint8(v); + } + + if (CONFIG_SWSCALE_ALPHA && aDest) + for (i=0; i<dstW; i++) { + int val= (alpSrc[i]+lumDither[i&7])>>7; + aDest[i]= av_clip_uint8(val); + } } -static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat) +static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, + uint8_t *vDest, uint8_t *aDest, + int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { + enum PixelFormat dstFormat = c->dstFormat; + //FIXME Optimize (just quickly written not optimized..) int i; for (i=0; i<dstW; i++) { - int val=1<<18; + int val = lumDither[i&7]<<12; int j; for (j=0; j<lumFilterSize; j++) val += lumSrc[j][i] * lumFilter[j]; @@ -555,12 +480,12 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc if (dstFormat == PIX_FMT_NV12) for (i=0; i<chrDstW; i++) { - int u=1<<18; - int v=1<<18; + int u = chrDither[i&7]<<12; + int v = chrDither[(i+3)&7]<<12; int j; for (j=0; j<chrFilterSize; j++) { - u += chrSrc[j][i] * chrFilter[j]; - v += chrSrc[j][i + VOFW] * chrFilter[j]; + u += chrUSrc[j][i] * chrFilter[j]; + v += chrVSrc[j][i] * chrFilter[j]; } uDest[2*i]= av_clip_uint8(u>>19); @@ -568,12 +493,12 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc } else for (i=0; i<chrDstW; i++) { - int u=1<<18; - int v=1<<18; + int u = chrDither[i&7]<<12; + int v = chrDither[(i+3)&7]<<12; int j; for (j=0; j<chrFilterSize; j++) { - u += chrSrc[j][i] * chrFilter[j]; - v += chrSrc[j][i + VOFW] * chrFilter[j]; + u += chrUSrc[j][i] * chrFilter[j]; + v += chrVSrc[j][i] * chrFilter[j]; } uDest[2*i]= av_clip_uint8(v>>19); @@ -581,7 +506,484 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc } } -#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \ +#define output_pixel(pos, val) \ + if (target == PIX_FMT_GRAY16BE) { \ + AV_WB16(pos, val); \ + } else { \ + AV_WL16(pos, val); \ + } + +static av_always_inline void +yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, + int y, enum PixelFormat target) +{ + int i; + + for (i = 0; i < (dstW >> 1); i++) { + int j; + int Y1 = 1 << 18; + int Y2 = 1 << 18; + const int i2 = 2 * i; + + for (j = 0; j < lumFilterSize; j++) { + Y1 += lumSrc[j][i2] * lumFilter[j]; + Y2 += lumSrc[j][i2+1] * lumFilter[j]; + } + Y1 >>= 11; + Y2 >>= 11; + if ((Y1 | Y2) & 0x10000) { + Y1 = av_clip_uint16(Y1); + Y2 = av_clip_uint16(Y2); + } + output_pixel(&dest[2 * i2 + 0], Y1); + output_pixel(&dest[2 * i2 + 2], Y2); + } +} + +static av_always_inline void +yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, int dstW, + int yalpha, int uvalpha, int y, + enum PixelFormat target) +{ + int yalpha1 = 4095 - yalpha; \ + int i; + + for (i = 0; i < (dstW >> 1); i++) { + const int i2 = 2 * i; + int Y1 = (buf0[i2 ] * yalpha1 + buf1[i2 ] * yalpha) >> 11; + int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11; + + output_pixel(&dest[2 * i2 + 0], Y1); + output_pixel(&dest[2 * i2 + 2], Y2); + } +} + +static av_always_inline void +yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, enum PixelFormat dstFormat, + int flags, int y, enum PixelFormat target) +{ + int i; + + for (i = 0; i < (dstW >> 1); i++) { + const int i2 = 2 * i; + int Y1 = buf0[i2 ] << 1; + int Y2 = buf0[i2+1] << 1; + + output_pixel(&dest[2 * i2 + 0], Y1); + output_pixel(&dest[2 * i2 + 2], Y2); + } +} + +#undef output_pixel + +#define YUV2PACKEDWRAPPER(name, base, ext, fmt) \ +static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ + const int16_t **lumSrc, int lumFilterSize, \ + const int16_t *chrFilter, const int16_t **chrUSrc, \ + const int16_t **chrVSrc, int chrFilterSize, \ + const int16_t **alpSrc, uint8_t *dest, int dstW, \ + int y) \ +{ \ + name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ + chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ + alpSrc, dest, dstW, y, fmt); \ +} \ + \ +static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \ + const uint16_t *buf1, const uint16_t *ubuf0, \ + const uint16_t *ubuf1, const uint16_t *vbuf0, \ + const uint16_t *vbuf1, const uint16_t *abuf0, \ + const uint16_t *abuf1, uint8_t *dest, int dstW, \ + int yalpha, int uvalpha, int y) \ +{ \ + name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \ + vbuf0, vbuf1, abuf0, abuf1, \ + dest, dstW, yalpha, uvalpha, y, fmt); \ +} \ + \ +static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \ + const uint16_t *ubuf0, const uint16_t *ubuf1, \ + const uint16_t *vbuf0, const uint16_t *vbuf1, \ + const uint16_t *abuf0, uint8_t *dest, int dstW, \ + int uvalpha, enum PixelFormat dstFormat, \ + int flags, int y) \ +{ \ + name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \ + vbuf1, abuf0, dest, dstW, uvalpha, \ + dstFormat, flags, y, fmt); \ +} + +YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE); +YUV2PACKEDWRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE); + +#define output_pixel(pos, acc) \ + if (target == PIX_FMT_MONOBLACK) { \ + pos = acc; \ + } else { \ + pos = ~acc; \ + } + +static av_always_inline void +yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, + int y, enum PixelFormat target) +{ + const uint8_t * const d128=dither_8x8_220[y&7]; + uint8_t *g = c->table_gU[128] + c->table_gV[128]; + int i; + int acc = 0; + + for (i = 0; i < dstW - 1; i += 2) { + int j; + int Y1 = 1 << 18; + int Y2 = 1 << 18; + + for (j = 0; j < lumFilterSize; j++) { + Y1 += lumSrc[j][i] * lumFilter[j]; + Y2 += lumSrc[j][i+1] * lumFilter[j]; + } + Y1 >>= 19; + Y2 >>= 19; + if ((Y1 | Y2) & 0x100) { + Y1 = av_clip_uint8(Y1); + Y2 = av_clip_uint8(Y2); + } + acc += acc + g[Y1 + d128[(i + 0) & 7]]; + acc += acc + g[Y2 + d128[(i + 1) & 7]]; + if ((i & 7) == 6) { + output_pixel(*dest++, acc); + } + } +} + +static av_always_inline void +yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, int dstW, + int yalpha, int uvalpha, int y, + enum PixelFormat target) +{ + const uint8_t * const d128 = dither_8x8_220[y & 7]; + uint8_t *g = c->table_gU[128] + c->table_gV[128]; + int yalpha1 = 4095 - yalpha; + int i; + + for (i = 0; i < dstW - 7; i += 8) { + int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]]; + acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]]; + acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]]; + acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]]; + acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]]; + acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]]; + acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]]; + acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]]; + output_pixel(*dest++, acc); + } +} + +static av_always_inline void +yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, enum PixelFormat dstFormat, + int flags, int y, enum PixelFormat target) +{ + const uint8_t * const d128 = dither_8x8_220[y & 7]; + uint8_t *g = c->table_gU[128] + c->table_gV[128]; + int i; + + for (i = 0; i < dstW - 7; i += 8) { + int acc = g[(buf0[i ] >> 7) + d128[0]]; + acc += acc + g[(buf0[i + 1] >> 7) + d128[1]]; + acc += acc + g[(buf0[i + 2] >> 7) + d128[2]]; + acc += acc + g[(buf0[i + 3] >> 7) + d128[3]]; + acc += acc + g[(buf0[i + 4] >> 7) + d128[4]]; + acc += acc + g[(buf0[i + 5] >> 7) + d128[5]]; + acc += acc + g[(buf0[i + 6] >> 7) + d128[6]]; + acc += acc + g[(buf0[i + 7] >> 7) + d128[7]]; + output_pixel(*dest++, acc); + } +} + +#undef output_pixel + +YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE); +YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK); + +#define output_pixels(pos, Y1, U, Y2, V) \ + if (target == PIX_FMT_YUYV422) { \ + dest[pos + 0] = Y1; \ + dest[pos + 1] = U; \ + dest[pos + 2] = Y2; \ + dest[pos + 3] = V; \ + } else { \ + dest[pos + 0] = U; \ + dest[pos + 1] = Y1; \ + dest[pos + 2] = V; \ + dest[pos + 3] = Y2; \ + } + +static av_always_inline void +yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, + int y, enum PixelFormat target) +{ + int i; + + for (i = 0; i < (dstW >> 1); i++) { + int j; + int Y1 = 1 << 18; + int Y2 = 1 << 18; + int U = 1 << 18; + int V = 1 << 18; + + for (j = 0; j < lumFilterSize; j++) { + Y1 += lumSrc[j][i * 2] * lumFilter[j]; + Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; + } + for (j = 0; j < chrFilterSize; j++) { + U += chrUSrc[j][i] * chrFilter[j]; + V += chrVSrc[j][i] * chrFilter[j]; + } + Y1 >>= 19; + Y2 >>= 19; + U >>= 19; + V >>= 19; + if ((Y1 | Y2 | U | V) & 0x100) { + Y1 = av_clip_uint8(Y1); + Y2 = av_clip_uint8(Y2); + U = av_clip_uint8(U); + V = av_clip_uint8(V); + } + output_pixels(4*i, Y1, U, Y2, V); + } +} + +static av_always_inline void +yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, int dstW, + int yalpha, int uvalpha, int y, + enum PixelFormat target) +{ + int yalpha1 = 4095 - yalpha; + int uvalpha1 = 4095 - uvalpha; + int i; + + for (i = 0; i < (dstW >> 1); i++) { + int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19; + int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19; + int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; + int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; + + output_pixels(i * 4, Y1, U, Y2, V); + } +} + +static av_always_inline void +yuv2422_1_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, enum PixelFormat dstFormat, + int flags, int y, enum PixelFormat target) +{ + int i; + + if (uvalpha < 2048) { + for (i = 0; i < (dstW >> 1); i++) { + int Y1 = buf0[i * 2] >> 7; + int Y2 = buf0[i * 2 + 1] >> 7; + int U = ubuf1[i] >> 7; + int V = vbuf1[i] >> 7; + + output_pixels(i * 4, Y1, U, Y2, V); + } + } else { + for (i = 0; i < (dstW >> 1); i++) { + int Y1 = buf0[i * 2] >> 7; + int Y2 = buf0[i * 2 + 1] >> 7; + int U = (ubuf0[i] + ubuf1[i]) >> 8; + int V = (vbuf0[i] + vbuf1[i]) >> 8; + + output_pixels(i * 4, Y1, U, Y2, V); + } + } +} + +#undef output_pixels + +YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422); +YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422); + +#define r_b ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? r : b) +#define b_r ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? b : r) + +static av_always_inline void +yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, + int y, enum PixelFormat target) +{ + int i; + + for (i = 0; i < (dstW >> 1); i++) { + int j; + int Y1 = 1 << 18; + int Y2 = 1 << 18; + int U = 1 << 18; + int V = 1 << 18; + const uint8_t *r, *g, *b; + + for (j = 0; j < lumFilterSize; j++) { + Y1 += lumSrc[j][i * 2] * lumFilter[j]; + Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j]; + } + for (j = 0; j < chrFilterSize; j++) { + U += chrUSrc[j][i] * chrFilter[j]; + V += chrVSrc[j][i] * chrFilter[j]; + } + Y1 >>= 19; + Y2 >>= 19; + U >>= 19; + V >>= 19; + if ((Y1 | Y2 | U | V) & 0x100) { + Y1 = av_clip_uint8(Y1); + Y2 = av_clip_uint8(Y2); + U = av_clip_uint8(U); + V = av_clip_uint8(V); + } + + /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/ + r = (const uint8_t *) c->table_rV[V]; + g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]); + b = (const uint8_t *) c->table_bU[U]; + + dest[ 0] = dest[ 1] = r_b[Y1]; + dest[ 2] = dest[ 3] = g[Y1]; + dest[ 4] = dest[ 5] = b_r[Y1]; + dest[ 6] = dest[ 7] = r_b[Y2]; + dest[ 8] = dest[ 9] = g[Y2]; + dest[10] = dest[11] = b_r[Y2]; + dest += 12; + } +} + +static av_always_inline void +yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, int dstW, + int yalpha, int uvalpha, int y, + enum PixelFormat target) +{ + int yalpha1 = 4095 - yalpha; + int uvalpha1 = 4095 - uvalpha; + int i; + + for (i = 0; i < (dstW >> 1); i++) { + int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19; + int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19; + int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; + int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; + const uint8_t *r = (const uint8_t *) c->table_rV[V], + *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]), + *b = (const uint8_t *) c->table_bU[U]; + + dest[ 0] = dest[ 1] = r_b[Y1]; + dest[ 2] = dest[ 3] = g[Y1]; + dest[ 4] = dest[ 5] = b_r[Y1]; + dest[ 6] = dest[ 7] = r_b[Y2]; + dest[ 8] = dest[ 9] = g[Y2]; + dest[10] = dest[11] = b_r[Y2]; + dest += 12; + } +} + +static av_always_inline void +yuv2rgb48_1_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, enum PixelFormat dstFormat, + int flags, int y, enum PixelFormat target) +{ + int i; + + if (uvalpha < 2048) { + for (i = 0; i < (dstW >> 1); i++) { + int Y1 = buf0[i * 2] >> 7; + int Y2 = buf0[i * 2 + 1] >> 7; + int U = ubuf1[i] >> 7; + int V = vbuf1[i] >> 7; + const uint8_t *r = (const uint8_t *) c->table_rV[V], + *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]), + *b = (const uint8_t *) c->table_bU[U]; + + dest[ 0] = dest[ 1] = r_b[Y1]; + dest[ 2] = dest[ 3] = g[Y1]; + dest[ 4] = dest[ 5] = b_r[Y1]; + dest[ 6] = dest[ 7] = r_b[Y2]; + dest[ 8] = dest[ 9] = g[Y2]; + dest[10] = dest[11] = b_r[Y2]; + dest += 12; + } + } else { + for (i = 0; i < (dstW >> 1); i++) { + int Y1 = buf0[i * 2] >> 7; + int Y2 = buf0[i * 2 + 1] >> 7; + int U = (ubuf0[i] + ubuf1[i]) >> 8; + int V = (vbuf0[i] + vbuf1[i]) >> 8; + const uint8_t *r = (const uint8_t *) c->table_rV[V], + *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]), + *b = (const uint8_t *) c->table_bU[U]; + + dest[ 0] = dest[ 1] = r_b[Y1]; + dest[ 2] = dest[ 3] = g[Y1]; + dest[ 4] = dest[ 5] = b_r[Y1]; + dest[ 6] = dest[ 7] = r_b[Y2]; + dest[ 8] = dest[ 9] = g[Y2]; + dest[10] = dest[11] = b_r[Y2]; + dest += 12; + } + } +} + +#undef r_b +#undef b_r + +YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE); +//YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE); +YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE); +//YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE); + +#define YSCALE_YUV_2_RGBX_C(type,alpha) \ for (i=0; i<(dstW>>1); i++) {\ int j;\ int Y1 = 1<<18;\ @@ -597,13 +999,19 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc Y2 += lumSrc[j][i2+1] * lumFilter[j];\ }\ for (j=0; j<chrFilterSize; j++) {\ - U += chrSrc[j][i] * chrFilter[j];\ - V += chrSrc[j][i+VOFW] * chrFilter[j];\ + U += chrUSrc[j][i] * chrFilter[j];\ + V += chrVSrc[j][i] * chrFilter[j];\ }\ Y1>>=19;\ Y2>>=19;\ U >>=19;\ V >>=19;\ + if ((Y1|Y2|U|V)&0x100) {\ + Y1 = av_clip_uint8(Y1); \ + Y2 = av_clip_uint8(Y2); \ + U = av_clip_uint8(U); \ + V = av_clip_uint8(V); \ + }\ if (alpha) {\ A1 = 1<<18;\ A2 = 1<<18;\ @@ -613,31 +1021,22 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc }\ A1>>=19;\ A2>>=19;\ - } - -#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \ - YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\ - if ((Y1|Y2|U|V)&256) {\ - if (Y1>255) Y1=255; \ - else if (Y1<0)Y1=0; \ - if (Y2>255) Y2=255; \ - else if (Y2<0)Y2=0; \ - if (U>255) U=255; \ - else if (U<0) U=0; \ - if (V>255) V=255; \ - else if (V<0) V=0; \ + if ((A1|A2)&0x100) {\ + A1 = av_clip_uint8(A1); \ + A2 = av_clip_uint8(A2); \ + }\ }\ - if (alpha && ((A1|A2)&256)) {\ - A1=av_clip_uint8(A1);\ - A2=av_clip_uint8(A2);\ - } + /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\ + r = (type *)c->table_rV[V]; \ + g = (type *)(c->table_gU[U] + c->table_gV[V]); \ + b = (type *)c->table_bU[U]; -#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \ +#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \ for (i=0; i<dstW; i++) {\ int j;\ - int Y = 0;\ - int U = -128<<19;\ - int V = -128<<19;\ + int Y = 1<<9;\ + int U = (1<<9)-(128<<19);\ + int V = (1<<9)-(128<<19);\ int av_unused A;\ int R,G,B;\ \ @@ -645,23 +1044,20 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc Y += lumSrc[j][i ] * lumFilter[j];\ }\ for (j=0; j<chrFilterSize; j++) {\ - U += chrSrc[j][i ] * chrFilter[j];\ - V += chrSrc[j][i+VOFW] * chrFilter[j];\ + U += chrUSrc[j][i] * chrFilter[j];\ + V += chrVSrc[j][i] * chrFilter[j];\ }\ Y >>=10;\ U >>=10;\ V >>=10;\ if (alpha) {\ - A = rnd;\ + A = rnd>>3;\ for (j=0; j<lumFilterSize; j++)\ A += alpSrc[j][i ] * lumFilter[j];\ A >>=19;\ - if (A&256)\ + if (A&0x100)\ A = av_clip_uint8(A);\ - } - -#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \ - YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\ + }\ Y-= c->yuv2rgb_y_offset;\ Y*= c->yuv2rgb_y_coeff;\ Y+= rnd;\ @@ -669,193 +1065,64 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\ B= Y + U*c->yuv2rgb_u2b_coeff;\ if ((R|G|B)&(0xC0000000)) {\ - if (R>=(256<<22)) R=(256<<22)-1; \ - else if (R<0)R=0; \ - if (G>=(256<<22)) G=(256<<22)-1; \ - else if (G<0)G=0; \ - if (B>=(256<<22)) B=(256<<22)-1; \ - else if (B<0)B=0; \ + R = av_clip_uintp2(R, 30); \ + G = av_clip_uintp2(G, 30); \ + B = av_clip_uintp2(B, 30); \ } -#define YSCALE_YUV_2_GRAY16_C \ - for (i=0; i<(dstW>>1); i++) {\ - int j;\ - int Y1 = 1<<18;\ - int Y2 = 1<<18;\ - int U = 1<<18;\ - int V = 1<<18;\ - \ - const int i2= 2*i;\ - \ - for (j=0; j<lumFilterSize; j++) {\ - Y1 += lumSrc[j][i2] * lumFilter[j];\ - Y2 += lumSrc[j][i2+1] * lumFilter[j];\ - }\ - Y1>>=11;\ - Y2>>=11;\ - if ((Y1|Y2|U|V)&65536) {\ - if (Y1>65535) Y1=65535; \ - else if (Y1<0)Y1=0; \ - if (Y2>65535) Y2=65535; \ - else if (Y2<0)Y2=0; \ - } - -#define YSCALE_YUV_2_RGBX_C(type,alpha) \ - YSCALE_YUV_2_PACKEDX_C(type,alpha) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\ - r = (type *)c->table_rV[V]; \ - g = (type *)(c->table_gU[U] + c->table_gV[V]); \ - b = (type *)c->table_bU[U]; - -#define YSCALE_YUV_2_PACKED2_C(type,alpha) \ +#define YSCALE_YUV_2_RGB2_C(type,alpha) \ for (i=0; i<(dstW>>1); i++) { \ const int i2= 2*i; \ int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \ int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \ - int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19; \ - int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19; \ + int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \ + int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \ type av_unused *r, *b, *g; \ int av_unused A1, A2; \ if (alpha) {\ A1= (abuf0[i2 ]*yalpha1+abuf1[i2 ]*yalpha)>>19; \ A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \ - } - -#define YSCALE_YUV_2_GRAY16_2_C \ - for (i=0; i<(dstW>>1); i++) { \ - const int i2= 2*i; \ - int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \ - int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11; - -#define YSCALE_YUV_2_RGB2_C(type,alpha) \ - YSCALE_YUV_2_PACKED2_C(type,alpha)\ + }\ r = (type *)c->table_rV[V];\ g = (type *)(c->table_gU[U] + c->table_gV[V]);\ b = (type *)c->table_bU[U]; -#define YSCALE_YUV_2_PACKED1_C(type,alpha) \ +#define YSCALE_YUV_2_RGB1_C(type,alpha) \ for (i=0; i<(dstW>>1); i++) {\ const int i2= 2*i;\ int Y1= buf0[i2 ]>>7;\ int Y2= buf0[i2+1]>>7;\ - int U= (uvbuf1[i ])>>7;\ - int V= (uvbuf1[i+VOFW])>>7;\ + int U= (ubuf1[i])>>7;\ + int V= (vbuf1[i])>>7;\ type av_unused *r, *b, *g;\ int av_unused A1, A2;\ if (alpha) {\ A1= abuf0[i2 ]>>7;\ A2= abuf0[i2+1]>>7;\ - } - -#define YSCALE_YUV_2_GRAY16_1_C \ - for (i=0; i<(dstW>>1); i++) {\ - const int i2= 2*i;\ - int Y1= buf0[i2 ]<<1;\ - int Y2= buf0[i2+1]<<1; - -#define YSCALE_YUV_2_RGB1_C(type,alpha) \ - YSCALE_YUV_2_PACKED1_C(type,alpha)\ + }\ r = (type *)c->table_rV[V];\ g = (type *)(c->table_gU[U] + c->table_gV[V]);\ b = (type *)c->table_bU[U]; -#define YSCALE_YUV_2_PACKED1B_C(type,alpha) \ +#define YSCALE_YUV_2_RGB1B_C(type,alpha) \ for (i=0; i<(dstW>>1); i++) {\ const int i2= 2*i;\ int Y1= buf0[i2 ]>>7;\ int Y2= buf0[i2+1]>>7;\ - int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\ - int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\ + int U= (ubuf0[i] + ubuf1[i])>>8;\ + int V= (vbuf0[i] + vbuf1[i])>>8;\ type av_unused *r, *b, *g;\ int av_unused A1, A2;\ if (alpha) {\ A1= abuf0[i2 ]>>7;\ A2= abuf0[i2+1]>>7;\ - } - -#define YSCALE_YUV_2_RGB1B_C(type,alpha) \ - YSCALE_YUV_2_PACKED1B_C(type,alpha)\ + }\ r = (type *)c->table_rV[V];\ g = (type *)(c->table_gU[U] + c->table_gV[V]);\ b = (type *)c->table_bU[U]; -#define YSCALE_YUV_2_MONO2_C \ - const uint8_t * const d128=dither_8x8_220[y&7];\ - uint8_t *g= c->table_gU[128] + c->table_gV[128];\ - for (i=0; i<dstW-7; i+=8) {\ - int acc;\ - acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\ - acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\ - acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\ - acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\ - acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\ - acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\ - acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\ - acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\ - ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\ - dest++;\ - } - -#define YSCALE_YUV_2_MONOX_C \ - const uint8_t * const d128=dither_8x8_220[y&7];\ - uint8_t *g= c->table_gU[128] + c->table_gV[128];\ - int acc=0;\ - for (i=0; i<dstW-1; i+=2) {\ - int j;\ - int Y1=1<<18;\ - int Y2=1<<18;\ -\ - for (j=0; j<lumFilterSize; j++) {\ - Y1 += lumSrc[j][i] * lumFilter[j];\ - Y2 += lumSrc[j][i+1] * lumFilter[j];\ - }\ - Y1>>=19;\ - Y2>>=19;\ - if ((Y1|Y2)&256) {\ - if (Y1>255) Y1=255;\ - else if (Y1<0)Y1=0;\ - if (Y2>255) Y2=255;\ - else if (Y2<0)Y2=0;\ - }\ - acc+= acc + g[Y1+d128[(i+0)&7]];\ - acc+= acc + g[Y2+d128[(i+1)&7]];\ - if ((i&7)==6) {\ - ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\ - dest++;\ - }\ - } - -#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\ +#define YSCALE_YUV_2_ANYRGB_C(func)\ switch(c->dstFormat) {\ - case PIX_FMT_RGB48BE:\ - case PIX_FMT_RGB48LE:\ - func(uint8_t,0)\ - ((uint8_t*)dest)[ 0]= r[Y1];\ - ((uint8_t*)dest)[ 1]= r[Y1];\ - ((uint8_t*)dest)[ 2]= g[Y1];\ - ((uint8_t*)dest)[ 3]= g[Y1];\ - ((uint8_t*)dest)[ 4]= b[Y1];\ - ((uint8_t*)dest)[ 5]= b[Y1];\ - ((uint8_t*)dest)[ 6]= r[Y2];\ - ((uint8_t*)dest)[ 7]= r[Y2];\ - ((uint8_t*)dest)[ 8]= g[Y2];\ - ((uint8_t*)dest)[ 9]= g[Y2];\ - ((uint8_t*)dest)[10]= b[Y2];\ - ((uint8_t*)dest)[11]= b[Y2];\ - dest+=12;\ - }\ - break;\ - case PIX_FMT_BGR48BE:\ - case PIX_FMT_BGR48LE:\ - func(uint8_t,0)\ - ((uint8_t*)dest)[ 0] = ((uint8_t*)dest)[ 1] = b[Y1];\ - ((uint8_t*)dest)[ 2] = ((uint8_t*)dest)[ 3] = g[Y1];\ - ((uint8_t*)dest)[ 4] = ((uint8_t*)dest)[ 5] = r[Y1];\ - ((uint8_t*)dest)[ 6] = ((uint8_t*)dest)[ 7] = b[Y2];\ - ((uint8_t*)dest)[ 8] = ((uint8_t*)dest)[ 9] = g[Y2];\ - ((uint8_t*)dest)[10] = ((uint8_t*)dest)[11] = r[Y2];\ - dest+=12;\ - }\ - break;\ case PIX_FMT_RGBA:\ case PIX_FMT_BGRA:\ if (CONFIG_SMALL) {\ @@ -922,10 +1189,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc dest+=6;\ }\ break;\ - case PIX_FMT_RGB565BE:\ - case PIX_FMT_RGB565LE:\ - case PIX_FMT_BGR565BE:\ - case PIX_FMT_BGR565LE:\ + case PIX_FMT_RGB565:\ + case PIX_FMT_BGR565:\ {\ const int dr1= dither_2x2_8[y&1 ][0];\ const int dg1= dither_2x2_4[y&1 ][0];\ @@ -939,10 +1204,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc }\ }\ break;\ - case PIX_FMT_RGB555BE:\ - case PIX_FMT_RGB555LE:\ - case PIX_FMT_BGR555BE:\ - case PIX_FMT_BGR555LE:\ + case PIX_FMT_RGB555:\ + case PIX_FMT_BGR555:\ {\ const int dr1= dither_2x2_8[y&1 ][0];\ const int dg1= dither_2x2_8[y&1 ][1];\ @@ -956,10 +1219,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc }\ }\ break;\ - case PIX_FMT_RGB444BE:\ - case PIX_FMT_RGB444LE:\ - case PIX_FMT_BGR444BE:\ - case PIX_FMT_BGR444LE:\ + case PIX_FMT_RGB444:\ + case PIX_FMT_BGR444:\ {\ const int dr1= dither_4x4_16[y&3 ][0];\ const int dg1= dither_4x4_16[y&3 ][1];\ @@ -1006,57 +1267,23 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc }\ }\ break;\ - case PIX_FMT_MONOBLACK:\ - case PIX_FMT_MONOWHITE:\ - {\ - func_monoblack\ - }\ - break;\ - case PIX_FMT_YUYV422:\ - func2\ - ((uint8_t*)dest)[2*i2+0]= Y1;\ - ((uint8_t*)dest)[2*i2+1]= U;\ - ((uint8_t*)dest)[2*i2+2]= Y2;\ - ((uint8_t*)dest)[2*i2+3]= V;\ - } \ - break;\ - case PIX_FMT_UYVY422:\ - func2\ - ((uint8_t*)dest)[2*i2+0]= U;\ - ((uint8_t*)dest)[2*i2+1]= Y1;\ - ((uint8_t*)dest)[2*i2+2]= V;\ - ((uint8_t*)dest)[2*i2+3]= Y2;\ - } \ - break;\ - case PIX_FMT_GRAY16BE:\ - func_g16\ - ((uint8_t*)dest)[2*i2+0]= Y1>>8;\ - ((uint8_t*)dest)[2*i2+1]= Y1;\ - ((uint8_t*)dest)[2*i2+2]= Y2>>8;\ - ((uint8_t*)dest)[2*i2+3]= Y2;\ - } \ - break;\ - case PIX_FMT_GRAY16LE:\ - func_g16\ - ((uint8_t*)dest)[2*i2+0]= Y1;\ - ((uint8_t*)dest)[2*i2+1]= Y1>>8;\ - ((uint8_t*)dest)[2*i2+2]= Y2;\ - ((uint8_t*)dest)[2*i2+3]= Y2>>8;\ - } \ - break;\ } -static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, int dstW, int y) +static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, int y) { int i; - YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C) + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C) } -static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, int dstW, int y) +static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, int y) { int i; int step= c->dstFormatBpp/8; @@ -1138,7 +1365,45 @@ static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, con } } -static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val) +/** + * vertical bilinear scale YV12 to RGB + */ +static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, int dstW, + int yalpha, int uvalpha, int y) +{ + int yalpha1=4095- yalpha; + int uvalpha1=4095-uvalpha; + int i; + + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C) +} + +/** + * YV12 to RGB without scaling or interpolating + */ +static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, enum PixelFormat dstFormat, + int flags, int y) +{ + int i; + + if (uvalpha < 2048) { + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C) + } else { + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C) + } +} + +static av_always_inline void fillPlane(uint8_t* plane, int stride, + int width, int height, + int y, uint8_t val) { int i; uint8_t *ptr = plane + stride*y; @@ -1148,1195 +1413,1186 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, } } -static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, long width, - uint32_t *unused) +#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) + +#define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b) +#define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r) + +static av_always_inline void +rgb48ToY_c_template(int16_t *dst, const uint16_t *src, int width, + enum PixelFormat origin) { int i; for (i = 0; i < width; i++) { - int r = src[i*6+0]; - int g = src[i*6+2]; - int b = src[i*6+4]; + int r_b = input_pixel(&src[i*3+0]); + int g = input_pixel(&src[i*3+1]); + int b_r = input_pixel(&src[i*3+2]); - dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8); } } -static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *unused) +static av_always_inline void +rgb48ToUV_c_template(int16_t *dstU, int16_t *dstV, + const uint16_t *src1, const uint16_t *src2, + int width, enum PixelFormat origin) { int i; assert(src1==src2); for (i = 0; i < width; i++) { - int r = src1[6*i + 0]; - int g = src1[6*i + 2]; - int b = src1[6*i + 4]; + int r_b = input_pixel(&src1[i*3+0]); + int g = input_pixel(&src1[i*3+1]); + int b_r = input_pixel(&src1[i*3+2]); - dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8); + dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8); } } -static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *unused) +static av_always_inline void +rgb48ToUV_half_c_template(int16_t *dstU, int16_t *dstV, + const uint16_t *src1, const uint16_t *src2, + int width, enum PixelFormat origin) { int i; assert(src1==src2); for (i = 0; i < width; i++) { - int r= src1[12*i + 0] + src1[12*i + 6]; - int g= src1[12*i + 2] + src1[12*i + 8]; - int b= src1[12*i + 4] + src1[12*i + 10]; + int r_b = (input_pixel(&src1[6*i + 0])) + (input_pixel(&src1[6*i + 3])); + int g = (input_pixel(&src1[6*i + 1])) + (input_pixel(&src1[6*i + 4])); + int b_r = (input_pixel(&src1[6*i + 2])) + (input_pixel(&src1[6*i + 5])); - dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); + dstU[i]= (RU*r + GU*g + BU*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8); + dstV[i]= (RV*r + GV*g + BV*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8); } } -static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, long width, - uint32_t *unused) +#undef r +#undef b +#undef input_pixel + +#define rgb48funcs(pattern, BE_LE, origin) \ +static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \ + int width, uint32_t *unused) \ +{ \ + rgb48ToY_c_template(dst, src, width, origin); \ +} \ + \ +static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *unused) \ +{ \ + rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \ +} \ + \ +static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *unused) \ +{ \ + rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \ +} + +rgb48funcs(rgb, LE, PIX_FMT_RGB48LE); +rgb48funcs(rgb, BE, PIX_FMT_RGB48BE); +rgb48funcs(bgr, LE, PIX_FMT_BGR48LE); +rgb48funcs(bgr, BE, PIX_FMT_BGR48BE); + +#define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \ + origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \ + (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2]))) + +static av_always_inline void +rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src, + int width, enum PixelFormat origin, + int shr, int shg, int shb, int shp, + int maskr, int maskg, int maskb, + int rsh, int gsh, int bsh, int S) { + const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh, + rnd = (32<<((S)-1)) + (1<<(S-7)); int i; + for (i = 0; i < width; i++) { - int b = src[i*6+0]; - int g = src[i*6+2]; - int r = src[i*6+4]; + int px = input_pixel(i) >> shp; + int b = (px & maskb) >> shb; + int g = (px & maskg) >> shg; + int r = (px & maskr) >> shr; - dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6); } } -static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *unused) +static av_always_inline void +rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV, + const uint8_t *src, int width, + enum PixelFormat origin, + int shr, int shg, int shb, int shp, + int maskr, int maskg, int maskb, + int rsh, int gsh, int bsh, int S) { + const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, + rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, + rnd = (256<<((S)-1)) + (1<<(S-7)); int i; + for (i = 0; i < width; i++) { - int b = src1[6*i + 0]; - int g = src1[6*i + 2]; - int r = src1[6*i + 4]; + int px = input_pixel(i) >> shp; + int b = (px & maskb) >> shb; + int g = (px & maskg) >> shg; + int r = (px & maskr) >> shr; - dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6); + dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6); } } -static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *unused) +static av_always_inline void +rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV, + const uint8_t *src, int width, + enum PixelFormat origin, + int shr, int shg, int shb, int shp, + int maskr, int maskg, int maskb, + int rsh, int gsh, int bsh, int S) { + const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, + rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, + rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb); int i; + + maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1; for (i = 0; i < width; i++) { - int b= src1[12*i + 0] + src1[12*i + 6]; - int g= src1[12*i + 2] + src1[12*i + 8]; - int r= src1[12*i + 4] + src1[12*i + 10]; + int px0 = input_pixel(2 * i + 0) >> shp; + int px1 = input_pixel(2 * i + 1) >> shp; + int b, r, g = (px0 & maskgx) + (px1 & maskgx); + int rb = px0 + px1 - g; + + b = (rb & maskb) >> shb; + if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE || + origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) { + g >>= shg; + } else { + g = (g & maskg) >> shg; + } + r = (rb & maskr) >> shr; - dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); + dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1); + dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1); } } -#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\ -static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\ -{\ - int i;\ - for (i=0; i<width; i++) {\ - int b= (((const type*)src)[i]>>shb)&maskb;\ - int g= (((const type*)src)[i]>>shg)&maskg;\ - int r= (((const type*)src)[i]>>shr)&maskr;\ -\ - dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\ - }\ +#undef input_pixel + +#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ + maskg, maskb, rsh, gsh, bsh, S) \ +static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \ + int width, uint32_t *unused) \ +{ \ + rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \ + maskr, maskg, maskb, rsh, gsh, bsh, S); \ +} \ + \ +static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src, const uint8_t *dummy, \ + int width, uint32_t *unused) \ +{ \ + rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \ + maskr, maskg, maskb, rsh, gsh, bsh, S); \ +} \ + \ +static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src, const uint8_t *dummy, \ + int width, uint32_t *unused) \ +{ \ + rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \ + maskr, maskg, maskb, rsh, gsh, bsh, S); \ } -BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8) -BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8) -BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8) -BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8) -BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8) -BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7) -BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8) -BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7) +rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7); +rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7); +rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7); +rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7); + +static void abgrToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) +{ + int i; + for (i=0; i<width; i++) { + dst[i]= src[4*i]<<6; + } +} -static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) +static void rgbaToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i; for (i=0; i<width; i++) { - dst[i]= src[4*i]; - } -} - -#define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \ -static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\ -{\ - int i;\ - for (i=0; i<width; i++) {\ - int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\ - int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\ - int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\ -\ - dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\ - dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\ - }\ -}\ -static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\ -{\ - int i;\ - for (i=0; i<width; i++) {\ - int pix0= ((const type*)src)[2*i+0]>>shp;\ - int pix1= ((const type*)src)[2*i+1]>>shp;\ - int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\ - int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\ - int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\ - g&= maskg|(2*maskg);\ -\ - g>>=shg;\ -\ - dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\ - dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\ - }\ -} - -BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8) -BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8) -BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8) -BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8) -BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8) -BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7) -BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8) -BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7) - -static inline void palToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal) + dst[i]= src[4*i+3]<<6; + } +} + +static void palToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *pal) { int i; for (i=0; i<width; i++) { int d= src[i]; - dst[i]= pal[d] >> 24; + dst[i]= (pal[d] >> 24)<<6; } } -static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal) +static void palToY_c(int16_t *dst, const uint8_t *src, long width, uint32_t *pal) { int i; for (i=0; i<width; i++) { int d= src[i]; - dst[i]= pal[d] & 0xFF; + dst[i]= (pal[d] & 0xFF)<<6; } } -static inline void palToUV(uint8_t *dstU, uint8_t *dstV, +static void palToUV_c(uint16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *pal) + int width, uint32_t *pal) { int i; assert(src1 == src2); for (i=0; i<width; i++) { int p= pal[src1[i]]; - dstU[i]= p>>8; - dstV[i]= p>>16; + dstU[i]= (uint8_t)(p>> 8)<<6; + dstV[i]= (uint8_t)(p>>16)<<6; } } -static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) +static void monowhite2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i, j; for (i=0; i<width/8; i++) { int d= ~src[i]; for(j=0; j<8; j++) - dst[8*i+j]= ((d>>(7-j))&1)*255; + dst[8*i+j]= ((d>>(7-j))&1)*16383; } } -static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) +static void monoblack2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i, j; for (i=0; i<width/8; i++) { int d= src[i]; for(j=0; j<8; j++) - dst[8*i+j]= ((d>>(7-j))&1)*255; - } -} - -//Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one -//Plain C versions -#if CONFIG_RUNTIME_CPUDETECT -# define COMPILE_C 1 -# if ARCH_X86 -# define COMPILE_MMX 1 -# define COMPILE_MMX2 1 -# define COMPILE_3DNOW 1 -# elif ARCH_PPC -# define COMPILE_ALTIVEC HAVE_ALTIVEC -# endif -#else /* CONFIG_RUNTIME_CPUDETECT */ -# if ARCH_X86 -# if HAVE_MMX2 -# define COMPILE_MMX2 1 -# elif HAVE_AMD3DNOW -# define COMPILE_3DNOW 1 -# elif HAVE_MMX -# define COMPILE_MMX 1 -# else -# define COMPILE_C 1 -# endif -# elif ARCH_PPC && HAVE_ALTIVEC -# define COMPILE_ALTIVEC 1 -# else -# define COMPILE_C 1 -# endif -#endif - -#ifndef COMPILE_C -# define COMPILE_C 0 -#endif -#ifndef COMPILE_MMX -# define COMPILE_MMX 0 -#endif -#ifndef COMPILE_MMX2 -# define COMPILE_MMX2 0 -#endif -#ifndef COMPILE_3DNOW -# define COMPILE_3DNOW 0 -#endif -#ifndef COMPILE_ALTIVEC -# define COMPILE_ALTIVEC 0 -#endif - -#define COMPILE_TEMPLATE_MMX 0 -#define COMPILE_TEMPLATE_MMX2 0 -#define COMPILE_TEMPLATE_AMD3DNOW 0 -#define COMPILE_TEMPLATE_ALTIVEC 0 - -#if COMPILE_C -#define RENAME(a) a ## _C -#include "swscale_template.c" -#endif - -#if COMPILE_ALTIVEC -#undef RENAME -#undef COMPILE_TEMPLATE_ALTIVEC -#define COMPILE_TEMPLATE_ALTIVEC 1 -#define RENAME(a) a ## _altivec -#include "swscale_template.c" -#endif - -#if ARCH_X86 - -//MMX versions -#if COMPILE_MMX -#undef RENAME -#undef COMPILE_TEMPLATE_MMX -#undef COMPILE_TEMPLATE_MMX2 -#undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMX 1 -#define COMPILE_TEMPLATE_MMX2 0 -#define COMPILE_TEMPLATE_AMD3DNOW 0 -#define RENAME(a) a ## _MMX -#include "swscale_template.c" -#endif - -//MMX2 versions -#if COMPILE_MMX2 -#undef RENAME -#undef COMPILE_TEMPLATE_MMX -#undef COMPILE_TEMPLATE_MMX2 -#undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMX 1 -#define COMPILE_TEMPLATE_MMX2 1 -#define COMPILE_TEMPLATE_AMD3DNOW 0 -#define RENAME(a) a ## _MMX2 -#include "swscale_template.c" -#endif - -//3DNOW versions -#if COMPILE_3DNOW -#undef RENAME -#undef COMPILE_TEMPLATE_MMX -#undef COMPILE_TEMPLATE_MMX2 -#undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMX 1 -#define COMPILE_TEMPLATE_MMX2 0 -#define COMPILE_TEMPLATE_AMD3DNOW 1 -#define RENAME(a) a ## _3DNow -#include "swscale_template.c" -#endif + dst[8*i+j]= ((d>>(7-j))&1)*16383; + } +} -#endif //ARCH_X86 +//FIXME yuy2* can read up to 7 samples too much -SwsFunc ff_getSwsFunc(SwsContext *c) +static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width, + uint32_t *unused) { -#if CONFIG_RUNTIME_CPUDETECT - int flags = c->flags; - -#if ARCH_X86 - // ordered per speed fastest first - if (flags & SWS_CPU_CAPS_MMX2) { - sws_init_swScale_MMX2(c); - return swScale_MMX2; - } else if (flags & SWS_CPU_CAPS_3DNOW) { - sws_init_swScale_3DNow(c); - return swScale_3DNow; - } else if (flags & SWS_CPU_CAPS_MMX) { - sws_init_swScale_MMX(c); - return swScale_MMX; - } else { - sws_init_swScale_C(c); - return swScale_C; - } + int i; + for (i=0; i<width; i++) + dst[i]= src[2*i]; +} -#else -#if COMPILE_ALTIVEC - if (flags & SWS_CPU_CAPS_ALTIVEC) { - sws_init_swScale_altivec(c); - return swScale_altivec; - } else { - sws_init_swScale_C(c); - return swScale_C; +static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) +{ + int i; + for (i=0; i<width; i++) { + dstU[i]= src1[4*i + 1]; + dstV[i]= src1[4*i + 3]; } -#endif - sws_init_swScale_C(c); - return swScale_C; -#endif /* ARCH_X86 */ -#else //CONFIG_RUNTIME_CPUDETECT -#if COMPILE_TEMPLATE_MMX2 - sws_init_swScale_MMX2(c); - return swScale_MMX2; -#elif COMPILE_TEMPLATE_AMD3DNOW - sws_init_swScale_3DNow(c); - return swScale_3DNow; -#elif COMPILE_TEMPLATE_MMX - sws_init_swScale_MMX(c); - return swScale_MMX; -#elif COMPILE_TEMPLATE_ALTIVEC - sws_init_swScale_altivec(c); - return swScale_altivec; -#else - sws_init_swScale_C(c); - return swScale_C; -#endif -#endif //!CONFIG_RUNTIME_CPUDETECT + assert(src1 == src2); } -static void copyPlane(const uint8_t *src, int srcStride, - int srcSliceY, int srcSliceH, int width, - uint8_t *dst, int dstStride) +static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { - dst += dstStride * srcSliceY; - if (dstStride == srcStride && srcStride > 0) { - memcpy(dst, src, srcSliceH * dstStride); - } else { - int i; - for (i=0; i<srcSliceH; i++) { - memcpy(dst, src, width); - src += srcStride; - dst += dstStride; - } + int i; + for (i=0; i<width; i++) { + dstU[i]= src1[2*i + 1]; + dstV[i]= src2[2*i + 1]; } } -static int planarToNv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dstParam[], int dstStride[]) +/* This is almost identical to the previous, end exists only because + * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ +static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width, + uint32_t *unused) { - uint8_t *dst = dstParam[1] + dstStride[1]*srcSliceY/2; - - copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, - dstParam[0], dstStride[0]); - - if (c->dstFormat == PIX_FMT_NV12) - interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]); - else - interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]); - - return srcSliceH; + int i; + for (i=0; i<width; i++) + dst[i]= src[2*i+1]; } -static int planarToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dstParam[], int dstStride[]) +static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { - uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; - - yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); - - return srcSliceH; + int i; + for (i=0; i<width; i++) { + dstU[i]= src1[4*i + 0]; + dstV[i]= src1[4*i + 2]; + } + assert(src1 == src2); } -static int planarToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dstParam[], int dstStride[]) +static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { - uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; - - yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); - - return srcSliceH; + int i; + for (i=0; i<width; i++) { + dstU[i]= src1[2*i]; + dstV[i]= src2[2*i]; + } } -static int yuv422pToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dstParam[], int dstStride[]) +static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, + const uint8_t *src, int width) { - uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; - - yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]); - - return srcSliceH; + int i; + for (i = 0; i < width; i++) { + dst1[i] = src[2*i+0]; + dst2[i] = src[2*i+1]; + } } -static int yuv422pToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dstParam[], int dstStride[]) +static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { - uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; - - yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]); - - return srcSliceH; + nvXXtoUV_c(dstU, dstV, src1, width); } -static int yuyvToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dstParam[], int dstStride[]) +static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { - uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; - uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2; - uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2; + nvXXtoUV_c(dstV, dstU, src1, width); +} - yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); +#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) - if (dstParam[3]) - fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); +// FIXME Maybe dither instead. +static av_always_inline void +yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV, + const uint8_t *_srcU, const uint8_t *_srcV, + int width, enum PixelFormat origin, int depth) +{ + int i; + const uint16_t *srcU = (const uint16_t *) _srcU; + const uint16_t *srcV = (const uint16_t *) _srcV; - return srcSliceH; + for (i = 0; i < width; i++) { + dstU[i] = input_pixel(&srcU[i]) >> (depth - 8); + dstV[i] = input_pixel(&srcV[i]) >> (depth - 8); + } } -static int yuyvToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dstParam[], int dstStride[]) +static av_always_inline void +yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY, + int width, enum PixelFormat origin, int depth) { - uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; - uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY; - uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY; - - yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); + int i; + const uint16_t *srcY = (const uint16_t*)_srcY; - return srcSliceH; + for (i = 0; i < width; i++) + dstY[i] = input_pixel(&srcY[i]) >> (depth - 8); } -static int uyvyToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dstParam[], int dstStride[]) -{ - uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; - uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2; - uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2; +#undef input_pixel + +#define YUV_NBPS(depth, BE_LE, origin) \ +static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *srcU, const uint8_t *srcV, \ + int width, uint32_t *unused) \ +{ \ + yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \ +} \ +static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \ + int width, uint32_t *unused) \ +{ \ + yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \ +} - uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); +YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE); +YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE); +YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE); +YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE); - if (dstParam[3]) - fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); +static void bgr24ToY_c(int16_t *dst, const uint8_t *src, + int width, uint32_t *unused) +{ + int i; + for (i=0; i<width; i++) { + int b= src[i*3+0]; + int g= src[i*3+1]; + int r= src[i*3+2]; - return srcSliceH; + dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); + } } -static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dstParam[], int dstStride[]) +static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { - uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; - uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY; - uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY; - - uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); + int i; + for (i=0; i<width; i++) { + int b= src1[3*i + 0]; + int g= src1[3*i + 1]; + int r= src1[3*i + 2]; - return srcSliceH; + dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + } + assert(src1 == src2); } -static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { - long i; - for (i=0; i<num_pixels; i++) - ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | (src[(i<<1)+1] << 24); + int i; + for (i=0; i<width; i++) { + int b= src1[6*i + 0] + src1[6*i + 3]; + int g= src1[6*i + 1] + src1[6*i + 4]; + int r= src1[6*i + 2] + src1[6*i + 5]; + + dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + } + assert(src1 == src2); } -static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +static void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width, + uint32_t *unused) { - long i; + int i; + for (i=0; i<width; i++) { + int r= src[i*3+0]; + int g= src[i*3+1]; + int b= src[i*3+2]; - for (i=0; i<num_pixels; i++) - ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | src[(i<<1)+1]; + dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); + } } -static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { - long i; + int i; + assert(src1==src2); + for (i=0; i<width; i++) { + int r= src1[3*i + 0]; + int g= src1[3*i + 1]; + int b= src1[3*i + 2]; - for (i=0; i<num_pixels; i++) { - //FIXME slow? - dst[0]= palette[src[i<<1]*4+0]; - dst[1]= palette[src[i<<1]*4+1]; - dst[2]= palette[src[i<<1]*4+2]; - dst+= 3; + dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } } -static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dst[], int dstStride[]) +static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { - const enum PixelFormat srcFormat= c->srcFormat; - const enum PixelFormat dstFormat= c->dstFormat; - void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels, - const uint8_t *palette)=NULL; int i; - uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; - const uint8_t *srcPtr= src[0]; + assert(src1==src2); + for (i=0; i<width; i++) { + int r= src1[6*i + 0] + src1[6*i + 3]; + int g= src1[6*i + 1] + src1[6*i + 4]; + int b= src1[6*i + 2] + src1[6*i + 5]; - if (srcFormat == PIX_FMT_GRAY8A) { - switch (dstFormat) { - case PIX_FMT_RGB32 : conv = gray8aToPacked32; break; - case PIX_FMT_BGR32 : conv = gray8aToPacked32; break; - case PIX_FMT_BGR32_1: conv = gray8aToPacked32_1; break; - case PIX_FMT_RGB32_1: conv = gray8aToPacked32_1; break; - case PIX_FMT_RGB24 : conv = gray8aToPacked24; break; - case PIX_FMT_BGR24 : conv = gray8aToPacked24; break; - } - } else if (usePal(srcFormat)) { - switch (dstFormat) { - case PIX_FMT_RGB32 : conv = sws_convertPalette8ToPacked32; break; - case PIX_FMT_BGR32 : conv = sws_convertPalette8ToPacked32; break; - case PIX_FMT_BGR32_1: conv = sws_convertPalette8ToPacked32; break; - case PIX_FMT_RGB32_1: conv = sws_convertPalette8ToPacked32; break; - case PIX_FMT_RGB24 : conv = sws_convertPalette8ToPacked24; break; - case PIX_FMT_BGR24 : conv = sws_convertPalette8ToPacked24; break; - } + dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); } +} - if (!conv) - av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); - else { - for (i=0; i<srcSliceH; i++) { - conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb); - srcPtr+= srcStride[0]; - dstPtr+= dstStride[0]; + +// bilinear / bicubic scaling +static void hScale_c(int16_t *dst, int dstW, const uint8_t *src, + int srcW, int xInc, + const int16_t *filter, const int16_t *filterPos, + int filterSize) +{ + int i; + for (i=0; i<dstW; i++) { + int j; + int srcPos= filterPos[i]; + int val=0; + for (j=0; j<filterSize; j++) { + val += ((int)src[srcPos + j])*filter[filterSize*i + j]; } + //filter += hFilterSize; + dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ... + //dst[i] = val>>7; } - - return srcSliceH; } -#define isRGBA32(x) ( \ - (x) == PIX_FMT_ARGB \ - || (x) == PIX_FMT_RGBA \ - || (x) == PIX_FMT_BGRA \ - || (x) == PIX_FMT_ABGR \ - ) - -/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */ -static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dst[], int dstStride[]) +static inline void hScale16_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, + const int16_t *filter, const int16_t *filterPos, long filterSize, int shift) { - const enum PixelFormat srcFormat= c->srcFormat; - const enum PixelFormat dstFormat= c->dstFormat; - const int srcBpp= (c->srcFormatBpp + 7) >> 3; - const int dstBpp= (c->dstFormatBpp + 7) >> 3; - const int srcId= c->srcFormatBpp >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */ - const int dstId= c->dstFormatBpp >> 2; - void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL; - -#define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst) - - if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) { - if ( CONV_IS(ABGR, RGBA) - || CONV_IS(ARGB, BGRA) - || CONV_IS(BGRA, ARGB) - || CONV_IS(RGBA, ABGR)) conv = shuffle_bytes_3210; - else if (CONV_IS(ABGR, ARGB) - || CONV_IS(ARGB, ABGR)) conv = shuffle_bytes_0321; - else if (CONV_IS(ABGR, BGRA) - || CONV_IS(ARGB, RGBA)) conv = shuffle_bytes_1230; - else if (CONV_IS(BGRA, RGBA) - || CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103; - else if (CONV_IS(BGRA, ABGR) - || CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012; - } else - /* BGR -> BGR */ - if ( (isBGRinInt(srcFormat) && isBGRinInt(dstFormat)) - || (isRGBinInt(srcFormat) && isRGBinInt(dstFormat))) { - switch(srcId | (dstId<<4)) { - case 0x34: conv= rgb16to15; break; - case 0x36: conv= rgb24to15; break; - case 0x38: conv= rgb32to15; break; - case 0x43: conv= rgb15to16; break; - case 0x46: conv= rgb24to16; break; - case 0x48: conv= rgb32to16; break; - case 0x63: conv= rgb15to24; break; - case 0x64: conv= rgb16to24; break; - case 0x68: conv= rgb32to24; break; - case 0x83: conv= rgb15to32; break; - case 0x84: conv= rgb16to32; break; - case 0x86: conv= rgb24to32; break; - } - } else if ( (isBGRinInt(srcFormat) && isRGBinInt(dstFormat)) - || (isRGBinInt(srcFormat) && isBGRinInt(dstFormat))) { - switch(srcId | (dstId<<4)) { - case 0x33: conv= rgb15tobgr15; break; - case 0x34: conv= rgb16tobgr15; break; - case 0x36: conv= rgb24tobgr15; break; - case 0x38: conv= rgb32tobgr15; break; - case 0x43: conv= rgb15tobgr16; break; - case 0x44: conv= rgb16tobgr16; break; - case 0x46: conv= rgb24tobgr16; break; - case 0x48: conv= rgb32tobgr16; break; - case 0x63: conv= rgb15tobgr24; break; - case 0x64: conv= rgb16tobgr24; break; - case 0x66: conv= rgb24tobgr24; break; - case 0x68: conv= rgb32tobgr24; break; - case 0x83: conv= rgb15tobgr32; break; - case 0x84: conv= rgb16tobgr32; break; - case 0x86: conv= rgb24tobgr32; break; + int i, j; + + for (i=0; i<dstW; i++) { + int srcPos= filterPos[i]; + int val=0; + for (j=0; j<filterSize; j++) { + val += ((int)src[srcPos + j])*filter[filterSize*i + j]; } + dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ... } +} - if (!conv) { - av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); - } else { - const uint8_t *srcPtr= src[0]; - uint8_t *dstPtr= dst[0]; - if ((srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1) && !isRGBA32(dstFormat)) - srcPtr += ALT32_CORR; - - if ((dstFormat == PIX_FMT_RGB32_1 || dstFormat == PIX_FMT_BGR32_1) && !isRGBA32(srcFormat)) - dstPtr += ALT32_CORR; - - if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0 && !(srcStride[0]%srcBpp)) - conv(srcPtr, dstPtr + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); - else { - int i; - dstPtr += dstStride[0]*srcSliceY; - - for (i=0; i<srcSliceH; i++) { - conv(srcPtr, dstPtr, c->srcW*srcBpp); - srcPtr+= srcStride[0]; - dstPtr+= dstStride[0]; - } +static inline void hScale16X_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, + const int16_t *filter, const int16_t *filterPos, long filterSize, int shift) +{ + int i, j; + for (i=0; i<dstW; i++) { + int srcPos= filterPos[i]; + int val=0; + for (j=0; j<filterSize; j++) { + val += ((int)av_bswap16(src[srcPos + j]))*filter[filterSize*i + j]; } + dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ... } - return srcSliceH; } -static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dst[], int dstStride[]) +//FIXME all pal and rgb srcFormats could do this convertion as well +//FIXME all scalers more complex than bilinear could do half of this transform +static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width) { - rgb24toyv12( - src[0], - dst[0]+ srcSliceY *dstStride[0], - dst[1]+(srcSliceY>>1)*dstStride[1], - dst[2]+(srcSliceY>>1)*dstStride[2], - c->srcW, srcSliceH, - dstStride[0], dstStride[1], srcStride[0]); - if (dst[3]) - fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); - return srcSliceH; + int i; + for (i = 0; i < width; i++) { + dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264 + dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264 + } } - -static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dst[], int dstStride[]) +static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width) +{ + int i; + for (i = 0; i < width; i++) { + dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469 + dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469 + } +} +static void lumRangeToJpeg_c(int16_t *dst, int width) +{ + int i; + for (i = 0; i < width; i++) + dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14; +} +static void lumRangeFromJpeg_c(int16_t *dst, int width) { - copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, - dst[0], dstStride[0]); + int i; + for (i = 0; i < width; i++) + dst[i] = (dst[i]*14071 + 33561947)>>14; +} - planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW, - srcSliceH >> 2, srcStride[1], dstStride[1]); - planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW, - srcSliceH >> 2, srcStride[2], dstStride[2]); - if (dst[3]) - fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); - return srcSliceH; +static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, + const uint8_t *src, int srcW, int xInc) +{ + int i; + unsigned int xpos=0; + for (i=0;i<dstWidth;i++) { + register unsigned int xx=xpos>>16; + register unsigned int xalpha=(xpos&0xFFFF)>>9; + dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; + xpos+=xInc; + } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) + dst[i] = src[srcW-1]*128; } -/* unscaled copy like stuff (assumes nearly identical formats) */ -static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dst[], int dstStride[]) +// *** horizontal scale Y line to temp buffer +static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth, + const uint8_t *src, int srcW, int xInc, + const int16_t *hLumFilter, + const int16_t *hLumFilterPos, int hLumFilterSize, + uint8_t *formatConvBuffer, + uint32_t *pal, int isAlpha) { - if (dstStride[0]==srcStride[0] && srcStride[0] > 0) - memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]); - else { - int i; - const uint8_t *srcPtr= src[0]; - uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; - int length=0; + void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; + void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; - /* universal length finder */ - while(length+c->srcW <= FFABS(dstStride[0]) - && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW; - assert(length!=0); + if (toYV12) { + toYV12(formatConvBuffer, src, srcW, pal); + src= formatConvBuffer; + } - for (i=0; i<srcSliceH; i++) { - memcpy(dstPtr, srcPtr, length); - srcPtr+= srcStride[0]; - dstPtr+= dstStride[0]; - } + if (c->hScale16) { + int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift); + } else if (!c->hyscale_fast) { + c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); + } else { // fast bilinear upscale / crap downscale + c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); } - return srcSliceH; -} - -#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\ - uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\ - int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\ - for (i = 0; i < height; i++) {\ - uint8_t *dither= dithers[src_depth-9][i&7];\ - for (j = 0; j < length-7; j+=8){\ - dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\ - dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\ - dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\ - dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\ - dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\ - dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\ - dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\ - dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\ - }\ - for (; j < length; j++)\ - dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\ - dst += dstStride;\ - src += srcStride;\ - } - - -static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dst[], int dstStride[]) -{ - int plane, i, j; - for (plane=0; plane<4; plane++) { - int length= (plane==0 || plane==3) ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample); - int y= (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample); - int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample); - const uint8_t *srcPtr= src[plane]; - uint8_t *dstPtr= dst[plane] + dstStride[plane]*y; - - if (!dst[plane]) continue; - // ignore palette for GRAY8 - if (plane == 1 && !dst[2]) continue; - if (!src[plane] || (plane == 1 && !src[2])) { - if(is16BPS(c->dstFormat)) - length*=2; - fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128); - } else { - if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat) - || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat)) - ) { - const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1; - const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1; - const uint16_t *srcPtr2 = (const uint16_t*)srcPtr; - uint16_t *dstPtr2 = (uint16_t*)dstPtr; - - if (dst_depth == 8) { - if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ - DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , ) - } else { - DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, ) - } - } else if (src_depth == 8) { - for (i = 0; i < height; i++) { - if(isBE(c->dstFormat)){ - for (j = 0; j < length; j++) - AV_WB16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) | - (srcPtr[j]>>(2*8-dst_depth))); - } else { - for (j = 0; j < length; j++) - AV_WL16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) | - (srcPtr[j]>>(2*8-dst_depth))); - } - dstPtr2 += dstStride[plane]/2; - srcPtr += srcStride[plane]; - } - } else if (src_depth <= dst_depth) { - for (i = 0; i < height; i++) { -#define COPY_UP(r,w) \ - for (j = 0; j < length; j++){ \ - unsigned int v= r(&srcPtr2[j]);\ - w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \ - (v>>(2*src_depth-dst_depth)));\ - } - if(isBE(c->srcFormat)){ - if(isBE(c->dstFormat)){ - COPY_UP(AV_RB16, AV_WB16) - } else { - COPY_UP(AV_RB16, AV_WL16) - } - } else { - if(isBE(c->dstFormat)){ - COPY_UP(AV_RL16, AV_WB16) - } else { - COPY_UP(AV_RL16, AV_WL16) - } - } - dstPtr2 += dstStride[plane]/2; - srcPtr2 += srcStride[plane]/2; - } - } else { - if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ - if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ - DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , ) - } else { - DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16) - } - }else{ - if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ - DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, ) - } else { - DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16) - } - } - } - } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat) - && isBE(c->srcFormat) != isBE(c->dstFormat)) { - - for (i=0; i<height; i++) { - for (j=0; j<length; j++) - ((uint16_t*)dstPtr)[j] = av_bswap16(((const uint16_t*)srcPtr)[j]); - srcPtr+= srcStride[plane]; - dstPtr+= dstStride[plane]; - } - } else if (dstStride[plane] == srcStride[plane] && - srcStride[plane] > 0 && srcStride[plane] == length) { - memcpy(dst[plane] + dstStride[plane]*y, src[plane], - height*dstStride[plane]); - } else { - if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) - length*=2; - for (i=0; i<height; i++) { - memcpy(dstPtr, srcPtr, length); - srcPtr+= srcStride[plane]; - dstPtr+= dstStride[plane]; - } - } - } + + if (convertRange) + convertRange(dst, dstWidth); +} + +static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, + int dstWidth, const uint8_t *src1, + const uint8_t *src2, int srcW, int xInc) +{ + int i; + unsigned int xpos=0; + for (i=0;i<dstWidth;i++) { + register unsigned int xx=xpos>>16; + register unsigned int xalpha=(xpos&0xFFFF)>>9; + dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); + dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); + xpos+=xInc; + } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { + dst1[i] = src1[srcW-1]*128; + dst2[i] = src2[srcW-1]*128; } - return srcSliceH; } -int ff_hardcodedcpuflags(void) +static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth, + const uint8_t *src1, const uint8_t *src2, + int srcW, int xInc, const int16_t *hChrFilter, + const int16_t *hChrFilterPos, int hChrFilterSize, + uint8_t *formatConvBuffer, uint32_t *pal) { - int flags = 0; -#if COMPILE_TEMPLATE_MMX2 - flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2; -#elif COMPILE_TEMPLATE_AMD3DNOW - flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW; -#elif COMPILE_TEMPLATE_MMX - flags |= SWS_CPU_CAPS_MMX; -#elif COMPILE_TEMPLATE_ALTIVEC - flags |= SWS_CPU_CAPS_ALTIVEC; -#elif ARCH_BFIN - flags |= SWS_CPU_CAPS_BFIN; -#endif - return flags; -} - -void ff_get_unscaled_swscale(SwsContext *c) -{ - const enum PixelFormat srcFormat = c->srcFormat; - const enum PixelFormat dstFormat = c->dstFormat; - const int flags = c->flags; - const int dstH = c->dstH; - int needsDither; - - needsDither= isAnyRGB(dstFormat) - && c->dstFormatBpp < 24 - && (c->dstFormatBpp < c->srcFormatBpp || (!isAnyRGB(srcFormat))); - - /* yv12_to_nv12 */ - if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) { - c->swScale= planarToNv12Wrapper; - } - /* yuv2bgr */ - if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) - && !(flags & SWS_ACCURATE_RND) && !(dstH&1)) { - c->swScale= ff_yuv2rgb_get_func_ptr(c); - } - - if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT)) { - c->swScale= yvu9ToYv12Wrapper; - } - - /* bgr24toYV12 */ - if (srcFormat==PIX_FMT_BGR24 && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND)) - c->swScale= bgr24ToYv12Wrapper; - - /* RGB/BGR -> RGB/BGR (no dither needed forms) */ - if ( isAnyRGB(srcFormat) - && isAnyRGB(dstFormat) - && srcFormat != PIX_FMT_BGR8 && dstFormat != PIX_FMT_BGR8 - && srcFormat != PIX_FMT_RGB8 && dstFormat != PIX_FMT_RGB8 - && srcFormat != PIX_FMT_BGR4 && dstFormat != PIX_FMT_BGR4 - && srcFormat != PIX_FMT_RGB4 && dstFormat != PIX_FMT_RGB4 - && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE - && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE - && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK - && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE - && srcFormat != PIX_FMT_RGB48LE && dstFormat != PIX_FMT_RGB48LE - && srcFormat != PIX_FMT_RGB48BE && dstFormat != PIX_FMT_RGB48BE - && srcFormat != PIX_FMT_BGR48LE && dstFormat != PIX_FMT_BGR48LE - && srcFormat != PIX_FMT_BGR48BE && dstFormat != PIX_FMT_BGR48BE - && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)))) - c->swScale= rgbToRgbWrapper; - - if ((usePal(srcFormat) && ( - dstFormat == PIX_FMT_RGB32 || - dstFormat == PIX_FMT_RGB32_1 || - dstFormat == PIX_FMT_RGB24 || - dstFormat == PIX_FMT_BGR32 || - dstFormat == PIX_FMT_BGR32_1 || - dstFormat == PIX_FMT_BGR24))) - c->swScale= palToRgbWrapper; - - if (srcFormat == PIX_FMT_YUV422P) { - if (dstFormat == PIX_FMT_YUYV422) - c->swScale= yuv422pToYuy2Wrapper; - else if (dstFormat == PIX_FMT_UYVY422) - c->swScale= yuv422pToUyvyWrapper; - } - - /* LQ converters if -sws 0 or -sws 4*/ - if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) { - /* yv12_to_yuy2 */ - if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) { - if (dstFormat == PIX_FMT_YUYV422) - c->swScale= planarToYuy2Wrapper; - else if (dstFormat == PIX_FMT_UYVY422) - c->swScale= planarToUyvyWrapper; - } + if (c->chrToYV12) { + uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); + c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); + src1= formatConvBuffer; + src2= buf2; } - if(srcFormat == PIX_FMT_YUYV422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P)) - c->swScale= yuyvToYuv420Wrapper; - if(srcFormat == PIX_FMT_UYVY422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P)) - c->swScale= uyvyToYuv420Wrapper; - if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P) - c->swScale= yuyvToYuv422Wrapper; - if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P) - c->swScale= uyvyToYuv422Wrapper; - -#if COMPILE_ALTIVEC - if ((c->flags & SWS_CPU_CAPS_ALTIVEC) && - !(c->flags & SWS_BITEXACT) && - srcFormat == PIX_FMT_YUV420P) { - // unscaled YV12 -> packed YUV, we want speed - if (dstFormat == PIX_FMT_YUYV422) - c->swScale= yv12toyuy2_unscaled_altivec; - else if (dstFormat == PIX_FMT_UYVY422) - c->swScale= yv12touyvy_unscaled_altivec; + + if (c->hScale16) { + int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift); + c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift); + } else if (!c->hcscale_fast) { + c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); + c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); + } else { // fast bilinear upscale / crap downscale + c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc); } -#endif - /* simple copy */ - if ( srcFormat == dstFormat - || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P) - || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P) - || (isPlanarYUV(srcFormat) && isGray(dstFormat)) - || (isPlanarYUV(dstFormat) && isGray(srcFormat)) - || (isGray(dstFormat) && isGray(srcFormat)) - || (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) - && c->chrDstHSubSample == c->chrSrcHSubSample - && c->chrDstVSubSample == c->chrSrcVSubSample - && dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21 - && srcFormat != PIX_FMT_NV12 && srcFormat != PIX_FMT_NV21)) - { - if (isPacked(c->srcFormat)) - c->swScale= packedCopyWrapper; - else /* Planar YUV or gray */ - c->swScale= planarCopyWrapper; - } -#if ARCH_BFIN - if (flags & SWS_CPU_CAPS_BFIN) - ff_bfin_get_unscaled_swscale (c); -#endif + if (c->chrConvertRange) + c->chrConvertRange(dst1, dst2, dstWidth); } -static void reset_ptr(const uint8_t* src[], int format) +static av_always_inline void +find_c_packed_planar_out_funcs(SwsContext *c, + yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX, + yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2, + yuv2packedX_fn *yuv2packedX) { - if(!isALPHA(format)) - src[3]=NULL; - if(!isPlanarYUV(format)) { - src[3]=src[2]=NULL; - - if (!usePal(format)) - src[1]= NULL; + enum PixelFormat dstFormat = c->dstFormat; + + if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) { + *yuv2yuvX = yuv2nv12X_c; + } else if (is16BPS(dstFormat)) { + *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c; + } else if (is9_OR_10BPS(dstFormat)) { + if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) { + *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c; + } else { + *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c; + } + } else { + *yuv2yuv1 = yuv2yuv1_c; + *yuv2yuvX = yuv2yuvX_c; + } + if(c->flags & SWS_FULL_CHR_H_INT) { + *yuv2packedX = yuv2rgbX_c_full; + } else { + switch (dstFormat) { + case PIX_FMT_GRAY16BE: + *yuv2packed1 = yuv2gray16BE_1_c; + *yuv2packed2 = yuv2gray16BE_2_c; + *yuv2packedX = yuv2gray16BE_X_c; + break; + case PIX_FMT_GRAY16LE: + *yuv2packed1 = yuv2gray16LE_1_c; + *yuv2packed2 = yuv2gray16LE_2_c; + *yuv2packedX = yuv2gray16LE_X_c; + break; + case PIX_FMT_MONOWHITE: + *yuv2packed1 = yuv2monowhite_1_c; + *yuv2packed2 = yuv2monowhite_2_c; + *yuv2packedX = yuv2monowhite_X_c; + break; + case PIX_FMT_MONOBLACK: + *yuv2packed1 = yuv2monoblack_1_c; + *yuv2packed2 = yuv2monoblack_2_c; + *yuv2packedX = yuv2monoblack_X_c; + break; + case PIX_FMT_YUYV422: + *yuv2packed1 = yuv2yuyv422_1_c; + *yuv2packed2 = yuv2yuyv422_2_c; + *yuv2packedX = yuv2yuyv422_X_c; + break; + case PIX_FMT_UYVY422: + *yuv2packed1 = yuv2uyvy422_1_c; + *yuv2packed2 = yuv2uyvy422_2_c; + *yuv2packedX = yuv2uyvy422_X_c; + break; + case PIX_FMT_RGB48LE: + //*yuv2packed1 = yuv2rgb48le_1_c; + //*yuv2packed2 = yuv2rgb48le_2_c; + //*yuv2packedX = yuv2rgb48le_X_c; + //break; + case PIX_FMT_RGB48BE: + *yuv2packed1 = yuv2rgb48be_1_c; + *yuv2packed2 = yuv2rgb48be_2_c; + *yuv2packedX = yuv2rgb48be_X_c; + break; + case PIX_FMT_BGR48LE: + //*yuv2packed1 = yuv2bgr48le_1_c; + //*yuv2packed2 = yuv2bgr48le_2_c; + //*yuv2packedX = yuv2bgr48le_X_c; + //break; + case PIX_FMT_BGR48BE: + *yuv2packed1 = yuv2bgr48be_1_c; + *yuv2packed2 = yuv2bgr48be_2_c; + *yuv2packedX = yuv2bgr48be_X_c; + break; + default: + *yuv2packed1 = yuv2packed1_c; + *yuv2packed2 = yuv2packed2_c; + *yuv2packedX = yuv2packedX_c; + break; + } } } -static int check_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, - const int linesizes[4]) +#define DEBUG_SWSCALE_BUFFERS 0 +#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__) + +static int swScale(SwsContext *c, const uint8_t* src[], + int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]) { - const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt]; - int i; + /* load a few things into local vars to make the code more readable? and faster */ + const int srcW= c->srcW; + const int dstW= c->dstW; + const int dstH= c->dstH; + const int chrDstW= c->chrDstW; + const int chrSrcW= c->chrSrcW; + const int lumXInc= c->lumXInc; + const int chrXInc= c->chrXInc; + const enum PixelFormat dstFormat= c->dstFormat; + const int flags= c->flags; + int16_t *vLumFilterPos= c->vLumFilterPos; + int16_t *vChrFilterPos= c->vChrFilterPos; + int16_t *hLumFilterPos= c->hLumFilterPos; + int16_t *hChrFilterPos= c->hChrFilterPos; + int16_t *vLumFilter= c->vLumFilter; + int16_t *vChrFilter= c->vChrFilter; + int16_t *hLumFilter= c->hLumFilter; + int16_t *hChrFilter= c->hChrFilter; + int32_t *lumMmxFilter= c->lumMmxFilter; + int32_t *chrMmxFilter= c->chrMmxFilter; + int32_t av_unused *alpMmxFilter= c->alpMmxFilter; + const int vLumFilterSize= c->vLumFilterSize; + const int vChrFilterSize= c->vChrFilterSize; + const int hLumFilterSize= c->hLumFilterSize; + const int hChrFilterSize= c->hChrFilterSize; + int16_t **lumPixBuf= c->lumPixBuf; + int16_t **chrUPixBuf= c->chrUPixBuf; + int16_t **chrVPixBuf= c->chrVPixBuf; + int16_t **alpPixBuf= c->alpPixBuf; + const int vLumBufSize= c->vLumBufSize; + const int vChrBufSize= c->vChrBufSize; + uint8_t *formatConvBuffer= c->formatConvBuffer; + const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; + const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); + int lastDstY; + uint32_t *pal=c->pal_yuv; + int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat); + yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1; + yuv2planarX_fn yuv2yuvX = c->yuv2yuvX; + yuv2packed1_fn yuv2packed1 = c->yuv2packed1; + yuv2packed2_fn yuv2packed2 = c->yuv2packed2; + yuv2packedX_fn yuv2packedX = c->yuv2packedX; + + /* vars which will change and which we need to store back in the context */ + int dstY= c->dstY; + int lumBufIndex= c->lumBufIndex; + int chrBufIndex= c->chrBufIndex; + int lastInLumBuf= c->lastInLumBuf; + int lastInChrBuf= c->lastInChrBuf; + + if (isPacked(c->srcFormat)) { + src[0]= + src[1]= + src[2]= + src[3]= src[0]; + srcStride[0]= + srcStride[1]= + srcStride[2]= + srcStride[3]= srcStride[0]; + } + srcStride[1]<<= c->vChrDrop; + srcStride[2]<<= c->vChrDrop; + + DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n", + src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3], + dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]); + DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n", + srcSliceY, srcSliceH, dstY, dstH); + DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n", + vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); + + if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) { + static int warnedAlready=0; //FIXME move this into the context perhaps + if (flags & SWS_PRINT_INFO && !warnedAlready) { + av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" + " ->cannot do aligned memory accesses anymore\n"); + warnedAlready=1; + } + } - for (i = 0; i < 4; i++) { - int plane = desc->comp[i].plane; - if (!data[plane] || !linesizes[plane]) - return 0; + /* Note the user might start scaling the picture in the middle so this + will not get executed. This is not really intended but works + currently, so people might do it. */ + if (srcSliceY ==0) { + lumBufIndex=-1; + chrBufIndex=-1; + dstY=0; + lastInLumBuf= -1; + lastInChrBuf= -1; } - return 1; -} + lastDstY= dstY; + + for (;dstY < dstH; dstY++) { + unsigned char *dest =dst[0]+dstStride[0]*dstY; + const int chrDstY= dstY>>c->chrDstVSubSample; + unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; + unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; + unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL; + const uint8_t *lumDither= should_dither ? dithers[7][dstY &7] : flat64; + const uint8_t *chrDither= should_dither ? dithers[7][chrDstY&7] : flat64; + + const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input + const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]; + const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input + int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input + int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input + int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input + int enough_lines; + + //handle holes (FAST_BILINEAR & weird filters) + if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; + if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; + assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1); + assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1); + + DEBUG_BUFFERS("dstY: %d\n", dstY); + DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n", + firstLumSrcY, lastLumSrcY, lastInLumBuf); + DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n", + firstChrSrcY, lastChrSrcY, lastInChrBuf); + + // Do we have enough lines in this slice to output the dstY line + enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample); + + if (!enough_lines) { + lastLumSrcY = srcSliceY + srcSliceH - 1; + lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1; + DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n", + lastLumSrcY, lastChrSrcY); + } -/** - * swscale wrapper, so we don't need to export the SwsContext. - * Assumes planar YUV to be in YUV order instead of YVU. - */ -int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* const dst[], const int dstStride[]) -{ - int i; - const uint8_t* src2[4]= {src[0], src[1], src[2], src[3]}; - uint8_t* dst2[4]= {dst[0], dst[1], dst[2], dst[3]}; - - // do not mess up sliceDir if we have a "trailing" 0-size slice - if (srcSliceH == 0) - return 0; - - if (!check_image_pointers(src, c->srcFormat, srcStride)) { - av_log(c, AV_LOG_ERROR, "bad src image pointers\n"); - return 0; - } - if (!check_image_pointers(dst, c->dstFormat, dstStride)) { - av_log(c, AV_LOG_ERROR, "bad dst image pointers\n"); - return 0; - } - - if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) { - av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n"); - return 0; - } - if (c->sliceDir == 0) { - if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1; - } - - if (usePal(c->srcFormat)) { - for (i=0; i<256; i++) { - int p, r, g, b, y, u, v, a = 0xff; - if(c->srcFormat == PIX_FMT_PAL8) { - p=((const uint32_t*)(src[1]))[i]; - a= (p>>24)&0xFF; - r= (p>>16)&0xFF; - g= (p>> 8)&0xFF; - b= p &0xFF; - } else if(c->srcFormat == PIX_FMT_RGB8) { - r= (i>>5 )*36; - g= ((i>>2)&7)*36; - b= (i&3 )*85; - } else if(c->srcFormat == PIX_FMT_BGR8) { - b= (i>>6 )*85; - g= ((i>>3)&7)*36; - r= (i&7 )*36; - } else if(c->srcFormat == PIX_FMT_RGB4_BYTE) { - r= (i>>3 )*255; - g= ((i>>1)&3)*85; - b= (i&1 )*255; - } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_GRAY8A) { - r = g = b = i; - } else { - assert(c->srcFormat == PIX_FMT_BGR4_BYTE); - b= (i>>3 )*255; - g= ((i>>1)&3)*85; - r= (i&1 )*255; - } - y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); - u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); - v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); - c->pal_yuv[i]= y + (u<<8) + (v<<16) + (a<<24); - - switch(c->dstFormat) { - case PIX_FMT_BGR32: -#if !HAVE_BIGENDIAN - case PIX_FMT_RGB24: -#endif - c->pal_rgb[i]= r + (g<<8) + (b<<16) + (a<<24); - break; - case PIX_FMT_BGR32_1: -#if HAVE_BIGENDIAN - case PIX_FMT_BGR24: -#endif - c->pal_rgb[i]= a + (r<<8) + (g<<16) + (b<<24); - break; - case PIX_FMT_RGB32_1: -#if HAVE_BIGENDIAN - case PIX_FMT_RGB24: -#endif - c->pal_rgb[i]= a + (b<<8) + (g<<16) + (r<<24); - break; - case PIX_FMT_RGB32: -#if !HAVE_BIGENDIAN - case PIX_FMT_BGR24: + //Do horizontal scaling + while(lastInLumBuf < lastLumSrcY) { + const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; + const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3]; + lumBufIndex++; + assert(lumBufIndex < 2*vLumBufSize); + assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); + assert(lastInLumBuf + 1 - srcSliceY >= 0); + hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc, + hLumFilter, hLumFilterPos, hLumFilterSize, + formatConvBuffer, + pal, 0); + if (CONFIG_SWSCALE_ALPHA && alpPixBuf) + hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, + lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize, + formatConvBuffer, + pal, 1); + lastInLumBuf++; + DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n", + lumBufIndex, lastInLumBuf); + } + while(lastInChrBuf < lastChrSrcY) { + const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; + const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; + chrBufIndex++; + assert(chrBufIndex < 2*vChrBufSize); + assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)); + assert(lastInChrBuf + 1 - chrSrcSliceY >= 0); + //FIXME replace parameters through context struct (some at least) + + if (c->needs_hcscale) + hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex], + chrDstW, src1, src2, chrSrcW, chrXInc, + hChrFilter, hChrFilterPos, hChrFilterSize, + formatConvBuffer, pal); + lastInChrBuf++; + DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n", + chrBufIndex, lastInChrBuf); + } + //wrap buf index around to stay inside the ring buffer + if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize; + if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize; + if (!enough_lines) + break; //we can't output a dstY line so let's try with the next slice + +#if HAVE_MMX + updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf); #endif - default: - c->pal_rgb[i]= b + (g<<8) + (r<<16) + (a<<24); + if (dstY >= dstH-2) { + // hmm looks like we can't use MMX here without overwriting this array's tail + find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX, + &yuv2packed1, &yuv2packed2, + &yuv2packedX); + } + + { + const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + + if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like + const int chrSkipMask= (1<<c->chrDstVSubSample)-1; + if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi + if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12 + const int16_t *lumBuf = lumSrcPtr[0]; + const int16_t *chrUBuf= chrUSrcPtr[0]; + const int16_t *chrVBuf= chrVSrcPtr[0]; + const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL; + yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest, + uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); + } else { //General YV12 + yuv2yuvX(c, + vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, + chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); + } + } else { + assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); + assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2); + if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB + int chrAlpha= vChrFilter[2*dstY+1]; + yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1), + *chrVSrcPtr, *(chrVSrcPtr+1), + alpPixBuf ? *alpSrcPtr : NULL, + dest, dstW, chrAlpha, dstFormat, flags, dstY); + } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB + int lumAlpha= vLumFilter[2*dstY+1]; + int chrAlpha= vChrFilter[2*dstY+1]; + lumMmxFilter[2]= + lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001; + chrMmxFilter[2]= + chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001; + yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1), + *chrVSrcPtr, *(chrVSrcPtr+1), + alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL, + dest, dstW, lumAlpha, chrAlpha, dstY); + } else { //general RGB + yuv2packedX(c, + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, dstW, dstY); + } } } } - // copy strides, so they can safely be modified - if (c->sliceDir == 1) { - // slices go from top to bottom - int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2], srcStride[3]}; - int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2], dstStride[3]}; + if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf) + fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255); - reset_ptr(src2, c->srcFormat); - reset_ptr((const uint8_t**)dst2, c->dstFormat); +#if HAVE_MMX2 + if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2) + __asm__ volatile("sfence":::"memory"); +#endif + emms_c(); - /* reset slice direction at end of frame */ - if (srcSliceY + srcSliceH == c->srcH) - c->sliceDir = 0; + /* store changed local vars back in the context */ + c->dstY= dstY; + c->lumBufIndex= lumBufIndex; + c->chrBufIndex= chrBufIndex; + c->lastInLumBuf= lastInLumBuf; + c->lastInChrBuf= lastInChrBuf; - return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, dstStride2); - } else { - // slices go from bottom to top => we flip the image internally - int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2], -srcStride[3]}; - int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2], -dstStride[3]}; + return dstY - lastDstY; +} - src2[0] += (srcSliceH-1)*srcStride[0]; - if (!usePal(c->srcFormat)) - src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1]; - src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2]; - src2[3] += (srcSliceH-1)*srcStride[3]; - dst2[0] += ( c->dstH -1)*dstStride[0]; - dst2[1] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1]; - dst2[2] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]; - dst2[3] += ( c->dstH -1)*dstStride[3]; +static av_cold void sws_init_swScale_c(SwsContext *c) +{ + enum PixelFormat srcFormat = c->srcFormat; - reset_ptr(src2, c->srcFormat); - reset_ptr((const uint8_t**)dst2, c->dstFormat); + find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX, + &c->yuv2packed1, &c->yuv2packed2, + &c->yuv2packedX); - /* reset slice direction at end of frame */ - if (!srcSliceY) - c->sliceDir = 0; + c->hScale = hScale_c; - return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2); + if (c->flags & SWS_FAST_BILINEAR) { + c->hyscale_fast = hyscale_fast_c; + c->hcscale_fast = hcscale_fast_c; } -} -#if LIBSWSCALE_VERSION_MAJOR < 1 -int sws_scale_ordered(SwsContext *c, const uint8_t* const src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dst[], int dstStride[]) -{ - return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride); -} -#endif + c->chrToYV12 = NULL; + switch(srcFormat) { + case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break; + case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break; + case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break; + case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break; + case PIX_FMT_RGB8 : + case PIX_FMT_BGR8 : + case PIX_FMT_PAL8 : + case PIX_FMT_BGR4_BYTE: + case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break; + case PIX_FMT_GRAY16BE : + case PIX_FMT_YUV444P9BE: + case PIX_FMT_YUV420P9BE: + case PIX_FMT_YUV444P10BE: + case PIX_FMT_YUV422P10BE: + case PIX_FMT_YUV420P10BE: + case PIX_FMT_YUV420P16BE: + case PIX_FMT_YUV422P16BE: + case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break; + case PIX_FMT_GRAY16LE : + case PIX_FMT_YUV444P9LE: + case PIX_FMT_YUV420P9LE: + case PIX_FMT_YUV422P10LE: + case PIX_FMT_YUV420P10LE: + case PIX_FMT_YUV444P10LE: + case PIX_FMT_YUV420P16LE: + case PIX_FMT_YUV422P16LE: + case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break; + } + if (c->chrSrcHSubSample) { + switch(srcFormat) { + case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break; + case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break; + case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break; + case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break; + case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break; + case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break; + case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break; + case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break; + case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break; + case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break; + case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break; + case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break; + case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break; + case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break; + case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break; + case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break; + case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break; + case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break; + } + } else { + switch(srcFormat) { + case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break; + case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break; + case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break; + case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break; + case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break; + case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break; + case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break; + case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break; + case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break; + case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break; + case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break; + case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break; + case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break; + case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break; + case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break; + case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break; + case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break; + case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break; + } + } -/* Convert the palette to the same packed 32-bit format as the palette */ -void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) -{ - long i; + c->lumToYV12 = NULL; + c->alpToYV12 = NULL; + switch (srcFormat) { + case PIX_FMT_YUYV422 : + case PIX_FMT_GRAY8A : + c->lumToYV12 = yuy2ToY_c; break; + case PIX_FMT_UYVY422 : + c->lumToYV12 = uyvyToY_c; break; + case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break; + case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break; + case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break; + case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break; + case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break; + case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break; + case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break; + case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break; + case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break; + case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break; + case PIX_FMT_RGB8 : + case PIX_FMT_BGR8 : + case PIX_FMT_PAL8 : + case PIX_FMT_BGR4_BYTE: + case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break; + case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break; + case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break; + case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break; + case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break; + case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break; + case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break; + case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break; + case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break; + case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break; + case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break; + } + if (c->alpPixBuf) { + switch (srcFormat) { + case PIX_FMT_BGRA: + case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break; + case PIX_FMT_ABGR: + case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break; + case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break; + case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break; + } + } + + if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8) + c->hScale16= hScale16_c; - for (i=0; i<num_pixels; i++) - ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]]; + if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { + if (c->srcRange) { + c->lumConvertRange = lumRangeFromJpeg_c; + c->chrConvertRange = chrRangeFromJpeg_c; + } else { + c->lumConvertRange = lumRangeToJpeg_c; + c->chrConvertRange = chrRangeToJpeg_c; + } + } + + if (!(isGray(srcFormat) || isGray(c->dstFormat) || + srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE)) + c->needs_hcscale = 1; } -/* Palette format: ABCD -> dst format: ABC */ -void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +SwsFunc ff_getSwsFunc(SwsContext *c) { - long i; + sws_init_swScale_c(c); - for (i=0; i<num_pixels; i++) { - //FIXME slow? - dst[0]= palette[src[i]*4+0]; - dst[1]= palette[src[i]*4+1]; - dst[2]= palette[src[i]*4+2]; - dst+= 3; - } + if (HAVE_MMX) + ff_sws_init_swScale_mmx(c); + if (HAVE_ALTIVEC) + ff_sws_init_swScale_altivec(c); + + return swScale; } diff --git a/libswscale/swscale.h b/libswscale/swscale.h index 406eec47f2..e798773158 100644 --- a/libswscale/swscale.h +++ b/libswscale/swscale.h @@ -31,7 +31,7 @@ #define LIBSWSCALE_VERSION_MAJOR 0 #define LIBSWSCALE_VERSION_MINOR 14 -#define LIBSWSCALE_VERSION_MICRO 0 +#define LIBSWSCALE_VERSION_MICRO 1 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ @@ -50,6 +50,12 @@ #ifndef FF_API_SWS_GETCONTEXT #define FF_API_SWS_GETCONTEXT (LIBSWSCALE_VERSION_MAJOR < 2) #endif +#ifndef FF_API_SWS_CPU_CAPS +#define FF_API_SWS_CPU_CAPS (LIBSWSCALE_VERSION_MAJOR < 2) +#endif +#ifndef FF_API_SWS_FORMAT_NAME +#define FF_API_SWS_FORMAT_NAME (LIBSWSCALE_VERSION_MAJOR < 2) +#endif /** * Returns the LIBSWSCALE_VERSION_INT constant. @@ -95,12 +101,18 @@ const char *swscale_license(void); #define SWS_ACCURATE_RND 0x40000 #define SWS_BITEXACT 0x80000 +#if FF_API_SWS_CPU_CAPS +/** + * CPU caps are autodetected now, those flags + * are only provided for API compatibility. + */ #define SWS_CPU_CAPS_MMX 0x80000000 #define SWS_CPU_CAPS_MMX2 0x20000000 #define SWS_CPU_CAPS_3DNOW 0x40000000 #define SWS_CPU_CAPS_ALTIVEC 0x10000000 #define SWS_CPU_CAPS_BFIN 0x01000000 #define SWS_CPU_CAPS_SSE2 0x02000000 +#endif #define SWS_MAX_REDUCE_CUTOFF 0.002 @@ -187,6 +199,7 @@ void sws_freeContext(struct SwsContext *swsContext); * @return a pointer to an allocated context, or NULL in case of error * @note this function is to be removed after a saner alternative is * written + * @deprecated Use sws_getCachedContext() instead. */ struct SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, @@ -341,7 +354,7 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context, * @param num_pixels number of pixels to convert * @param palette array with [256] entries, which must match color arrangement (RGB or BGR) of src */ -void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); +void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette); /** * Converts an 8bit paletted frame into a frame with a color depth of 24 bits. @@ -353,7 +366,7 @@ void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pi * @param num_pixels number of pixels to convert * @param palette array with [256] entries, which must match color arrangement (RGB or BGR) of src */ -void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); +void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette); #endif /* SWSCALE_SWSCALE_H */ diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 03c5bf9736..c0f8e64d70 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -35,9 +35,7 @@ #define MAX_FILTER_SIZE 256 -#define VOFW 21504 - -#define VOF (VOFW*2) +#define DITHER1XBPP #if HAVE_BIGENDIAN #define ALT32_CORR (-1) @@ -61,6 +59,41 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]); +typedef void (*yuv2planar1_fn) (struct SwsContext *c, + const int16_t *lumSrc, const int16_t *chrUSrc, + const int16_t *chrVSrc, const int16_t *alpSrc, + uint8_t *dest, + uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, + int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither); +typedef void (*yuv2planarX_fn) (struct SwsContext *c, + const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, + uint8_t *dest, + uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, + int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither); +typedef void (*yuv2packed1_fn) (struct SwsContext *c, + const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, + uint8_t *dest, + int dstW, int uvalpha, int dstFormat, int flags, int y); +typedef void (*yuv2packed2_fn) (struct SwsContext *c, + const uint16_t *buf0, const uint16_t *buf1, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, const uint16_t *abuf1, + uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y); +typedef void (*yuv2packedX_fn) (struct SwsContext *c, + const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, + int dstW, int dstY); + /* This struct should be aligned on at least a 32-byte boundary. */ typedef struct SwsContext { /** @@ -108,7 +141,8 @@ typedef struct SwsContext { */ //@{ int16_t **lumPixBuf; ///< Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler. - int16_t **chrPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. + int16_t **chrUPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. + int16_t **chrVPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler. int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer. int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer. @@ -118,7 +152,7 @@ typedef struct SwsContext { int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source. //@} - uint8_t formatConvBuffer[VOF]; //FIXME dynamic allocation, but we have to change a lot of code for this to be useful + uint8_t *formatConvBuffer; /** * @name Horizontal and vertical filters. @@ -196,6 +230,10 @@ typedef struct SwsContext { #define V_TEMP "11*8+4*4*256*2+32" #define Y_TEMP "11*8+4*4*256*2+40" #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48" +#define UV_OFF "11*8+4*4*256*3+48" +#define UV_OFFx2 "11*8+4*4*256*3+56" +#define DITHER16 "11*8+4*4*256*3+64" +#define DITHER32 "11*8+4*4*256*3+64+16" DECLARE_ALIGNED(8, uint64_t, redDither); DECLARE_ALIGNED(8, uint64_t, greenDither); @@ -218,6 +256,10 @@ typedef struct SwsContext { DECLARE_ALIGNED(8, uint64_t, v_temp); DECLARE_ALIGNED(8, uint64_t, y_temp); int32_t alpMmxFilter[4*MAX_FILTER_SIZE]; + DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes + DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes + uint16_t dither16[8]; + uint32_t dither32[8]; #if HAVE_ALTIVEC vector signed short CY; @@ -249,66 +291,37 @@ typedef struct SwsContext { #endif /* function pointers for swScale() */ - void (*yuv2nv12X )(struct SwsContext *c, - const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, uint8_t *uDest, - int dstW, int chrDstW, int dstFormat); - void (*yuv2yuv1 )(struct SwsContext *c, - const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc, - uint8_t *dest, - uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, - long dstW, long chrDstW); - void (*yuv2yuvX )(struct SwsContext *c, - const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - const int16_t **alpSrc, - uint8_t *dest, - uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, - long dstW, long chrDstW); - void (*yuv2packed1)(struct SwsContext *c, - const uint16_t *buf0, - const uint16_t *uvbuf0, const uint16_t *uvbuf1, - const uint16_t *abuf0, - uint8_t *dest, - int dstW, int uvalpha, int dstFormat, int flags, int y); - void (*yuv2packed2)(struct SwsContext *c, - const uint16_t *buf0, const uint16_t *buf1, - const uint16_t *uvbuf0, const uint16_t *uvbuf1, - const uint16_t *abuf0, const uint16_t *abuf1, - uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y); - void (*yuv2packedX)(struct SwsContext *c, - const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, - long dstW, long dstY); + yuv2planar1_fn yuv2yuv1; + yuv2planarX_fn yuv2yuvX; + yuv2packed1_fn yuv2packed1; + yuv2packed2_fn yuv2packed2; + yuv2packedX_fn yuv2packedX; void (*lumToYV12)(uint8_t *dst, const uint8_t *src, - long width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler. + int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler. void (*alpToYV12)(uint8_t *dst, const uint8_t *src, - long width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler. + int width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler. void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler. + int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler. void (*hyscale_fast)(struct SwsContext *c, - int16_t *dst, long dstWidth, + int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc); void (*hcscale_fast)(struct SwsContext *c, - int16_t *dst, long dstWidth, + int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc); void (*hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc, const int16_t *filter, const int16_t *filterPos, - long filterSize); + int filterSize); - void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed. - void (*chrConvertRange)(int16_t *dst, int width); ///< Color range conversion function for chroma planes if needed. + void (*hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, + int xInc, const int16_t *filter, const int16_t *filterPos, + long filterSize, int shift); - int lumSrcOffset; ///< Offset given to luma src pointers passed to horizontal input functions. - int chrSrcOffset; ///< Offset given to chroma src pointers passed to horizontal input functions. - int alpSrcOffset; ///< Offset given to alpha src pointers passed to horizontal input functions. + void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed. + void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed. int needs_hcscale; ///< Set if there are chroma planes to be converted. @@ -322,18 +335,23 @@ int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int brightness, int contrast, int saturation); +void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, + int lastInLumBuf, int lastInChrBuf); + SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c); SwsFunc ff_yuv2rgb_init_vis(SwsContext *c); SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c); SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c); SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c); void ff_bfin_get_unscaled_swscale(SwsContext *c); -void ff_yuv2packedX_altivec(SwsContext *c, - const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, int dstW, int dstY); +#if FF_API_SWS_FORMAT_NAME +/** + * @deprecated Use av_get_pix_fmt_name() instead. + */ +attribute_deprecated const char *sws_format_name(enum PixelFormat format); +#endif //FIXME replace this with something faster #define is16BPS(x) ( \ @@ -353,6 +371,12 @@ const char *sws_format_name(enum PixelFormat format); #define isNBPS(x) ( \ (x)==PIX_FMT_YUV420P9LE \ || (x)==PIX_FMT_YUV420P9BE \ + || (x)==PIX_FMT_YUV444P9BE \ + || (x)==PIX_FMT_YUV444P9LE \ + || (x)==PIX_FMT_YUV422P10BE \ + || (x)==PIX_FMT_YUV422P10LE \ + || (x)==PIX_FMT_YUV444P10BE \ + || (x)==PIX_FMT_YUV444P10LE \ || (x)==PIX_FMT_YUV420P10LE \ || (x)==PIX_FMT_YUV420P10BE \ || (x)==PIX_FMT_YUV422P10LE \ @@ -374,13 +398,19 @@ const char *sws_format_name(enum PixelFormat format); #define isPlanarYUV(x) ( \ isPlanar8YUV(x) \ || (x)==PIX_FMT_YUV420P9LE \ + || (x)==PIX_FMT_YUV444P9LE \ || (x)==PIX_FMT_YUV420P10LE \ + || (x)==PIX_FMT_YUV422P10LE \ + || (x)==PIX_FMT_YUV444P10LE \ || (x)==PIX_FMT_YUV420P16LE \ || (x)==PIX_FMT_YUV422P10LE \ || (x)==PIX_FMT_YUV422P16LE \ || (x)==PIX_FMT_YUV444P16LE \ || (x)==PIX_FMT_YUV420P9BE \ + || (x)==PIX_FMT_YUV444P9BE \ || (x)==PIX_FMT_YUV420P10BE \ + || (x)==PIX_FMT_YUV422P10BE \ + || (x)==PIX_FMT_YUV444P10BE \ || (x)==PIX_FMT_YUV420P16BE \ || (x)==PIX_FMT_YUV422P10BE \ || (x)==PIX_FMT_YUV422P16BE \ @@ -464,10 +494,20 @@ const char *sws_format_name(enum PixelFormat format); || (x)==PIX_FMT_GRAY8A \ || (x)==PIX_FMT_YUVA420P \ ) +#define isPacked(x) ( \ + (x)==PIX_FMT_PAL8 \ + || (x)==PIX_FMT_YUYV422 \ + || (x)==PIX_FMT_UYVY422 \ + || (x)==PIX_FMT_Y400A \ + || isAnyRGB(x) \ + ) #define usePal(x) ((av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) || (x) == PIX_FMT_GRAY8A) extern const uint64_t ff_dither4[2]; extern const uint64_t ff_dither8[2]; +extern const uint8_t dithers[8][8][8]; +extern const uint16_t dither_scale[15][16]; + extern const AVClass sws_context_class; @@ -477,10 +517,7 @@ extern const AVClass sws_context_class; */ void ff_get_unscaled_swscale(SwsContext *c); -/** - * Returns the SWS_CPU_CAPS for the optimized code compiled into swscale. - */ -int ff_hardcodedcpuflags(void); +void ff_swscale_get_unscaled_altivec(SwsContext *c); /** * Returns function pointer to fastest main scaler path function depending @@ -488,4 +525,7 @@ int ff_hardcodedcpuflags(void); */ SwsFunc ff_getSwsFunc(SwsContext *c); +void ff_sws_init_swScale_altivec(SwsContext *c); +void ff_sws_init_swScale_mmx(SwsContext *c); + #endif /* SWSCALE_SWSCALE_INTERNAL_H */ diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c index e53cfc0752..9ae9fc771c 100644 --- a/libswscale/swscale_template.c +++ b/libswscale/swscale_template.c @@ -18,999 +18,55 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#undef REAL_MOVNTQ -#undef MOVNTQ -#undef PAVGB -#undef PREFETCH - -#if COMPILE_TEMPLATE_AMD3DNOW -#define PREFETCH "prefetch" -#elif COMPILE_TEMPLATE_MMX2 -#define PREFETCH "prefetchnta" -#else -#define PREFETCH " # nop" -#endif - -#if COMPILE_TEMPLATE_MMX2 -#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" -#elif COMPILE_TEMPLATE_AMD3DNOW -#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" -#endif - -#if COMPILE_TEMPLATE_MMX2 -#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" -#else -#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t" -#endif -#define MOVNTQ(a,b) REAL_MOVNTQ(a,b) - -#if COMPILE_TEMPLATE_ALTIVEC -#include "ppc/swscale_altivec_template.c" -#endif - -#define YSCALEYUV2YV12X(x, offset, dest, width) \ - __asm__ volatile(\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ - "movq %%mm3, %%mm4 \n\t"\ - "lea " offset "(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - ".p2align 4 \n\t" /* FIXME Unroll? */\ - "1: \n\t"\ - "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ - "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ - "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* srcData */\ - "add $16, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - "pmulhw %%mm0, %%mm2 \n\t"\ - "pmulhw %%mm0, %%mm5 \n\t"\ - "paddw %%mm2, %%mm3 \n\t"\ - "paddw %%mm5, %%mm4 \n\t"\ - " jnz 1b \n\t"\ - "psraw $3, %%mm3 \n\t"\ - "psraw $3, %%mm4 \n\t"\ - "packuswb %%mm4, %%mm3 \n\t"\ - MOVNTQ(%%mm3, (%1, %%REGa))\ - "add $8, %%"REG_a" \n\t"\ - "cmp %2, %%"REG_a" \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ - "movq %%mm3, %%mm4 \n\t"\ - "lea " offset "(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "jb 1b \n\t"\ - :: "r" (&c->redDither),\ - "r" (dest), "g" ((x86_reg)width)\ - : "%"REG_a, "%"REG_d, "%"REG_S\ - ); - -#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \ - __asm__ volatile(\ - "lea " offset "(%0), %%"REG_d" \n\t"\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - "pxor %%mm4, %%mm4 \n\t"\ - "pxor %%mm5, %%mm5 \n\t"\ - "pxor %%mm6, %%mm6 \n\t"\ - "pxor %%mm7, %%mm7 \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\ - "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ - "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ - "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\ - "movq %%mm0, %%mm3 \n\t"\ - "punpcklwd %%mm1, %%mm0 \n\t"\ - "punpckhwd %%mm1, %%mm3 \n\t"\ - "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ - "pmaddwd %%mm1, %%mm0 \n\t"\ - "pmaddwd %%mm1, %%mm3 \n\t"\ - "paddd %%mm0, %%mm4 \n\t"\ - "paddd %%mm3, %%mm5 \n\t"\ - "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\ - "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ - "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "punpcklwd %%mm3, %%mm2 \n\t"\ - "punpckhwd %%mm3, %%mm0 \n\t"\ - "pmaddwd %%mm1, %%mm2 \n\t"\ - "pmaddwd %%mm1, %%mm0 \n\t"\ - "paddd %%mm2, %%mm6 \n\t"\ - "paddd %%mm0, %%mm7 \n\t"\ - " jnz 1b \n\t"\ - "psrad $16, %%mm4 \n\t"\ - "psrad $16, %%mm5 \n\t"\ - "psrad $16, %%mm6 \n\t"\ - "psrad $16, %%mm7 \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ - "packssdw %%mm5, %%mm4 \n\t"\ - "packssdw %%mm7, %%mm6 \n\t"\ - "paddw %%mm0, %%mm4 \n\t"\ - "paddw %%mm0, %%mm6 \n\t"\ - "psraw $3, %%mm4 \n\t"\ - "psraw $3, %%mm6 \n\t"\ - "packuswb %%mm6, %%mm4 \n\t"\ - MOVNTQ(%%mm4, (%1, %%REGa))\ - "add $8, %%"REG_a" \n\t"\ - "cmp %2, %%"REG_a" \n\t"\ - "lea " offset "(%0), %%"REG_d" \n\t"\ - "pxor %%mm4, %%mm4 \n\t"\ - "pxor %%mm5, %%mm5 \n\t"\ - "pxor %%mm6, %%mm6 \n\t"\ - "pxor %%mm7, %%mm7 \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "jb 1b \n\t"\ - :: "r" (&c->redDither),\ - "r" (dest), "g" ((x86_reg)width)\ - : "%"REG_a, "%"REG_d, "%"REG_S\ - ); - -#define YSCALEYUV2YV121 \ - "mov %2, %%"REG_a" \n\t"\ - ".p2align 4 \n\t" /* FIXME Unroll? */\ - "1: \n\t"\ - "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ - "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\ - "psraw $7, %%mm0 \n\t"\ - "psraw $7, %%mm1 \n\t"\ - "packuswb %%mm1, %%mm0 \n\t"\ - MOVNTQ(%%mm0, (%1, %%REGa))\ - "add $8, %%"REG_a" \n\t"\ - "jnc 1b \n\t" - -#define YSCALEYUV2YV121_ACCURATE \ - "mov %2, %%"REG_a" \n\t"\ - "pcmpeqw %%mm7, %%mm7 \n\t"\ - "psrlw $15, %%mm7 \n\t"\ - "psllw $6, %%mm7 \n\t"\ - ".p2align 4 \n\t" /* FIXME Unroll? */\ - "1: \n\t"\ - "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ - "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\ - "paddsw %%mm7, %%mm0 \n\t"\ - "paddsw %%mm7, %%mm1 \n\t"\ - "psraw $7, %%mm0 \n\t"\ - "psraw $7, %%mm1 \n\t"\ - "packuswb %%mm1, %%mm0 \n\t"\ - MOVNTQ(%%mm0, (%1, %%REGa))\ - "add $8, %%"REG_a" \n\t"\ - "jnc 1b \n\t" - -/* - :: "m" (-lumFilterSize), "m" (-chrFilterSize), - "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), - "r" (dest), "m" (dstW_reg), - "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) - : "%eax", "%ebx", "%ecx", "%edx", "%esi" -*/ -#define YSCALEYUV2PACKEDX_UV \ - __asm__ volatile(\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - ".p2align 4 \n\t"\ - "nop \n\t"\ - "1: \n\t"\ - "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ - "movq %%mm3, %%mm4 \n\t"\ - ".p2align 4 \n\t"\ - "2: \n\t"\ - "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ - "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ - "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\ - "add $16, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "pmulhw %%mm0, %%mm2 \n\t"\ - "pmulhw %%mm0, %%mm5 \n\t"\ - "paddw %%mm2, %%mm3 \n\t"\ - "paddw %%mm5, %%mm4 \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - " jnz 2b \n\t"\ - -#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \ - "lea "offset"(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\ - "movq "#dst1", "#dst2" \n\t"\ - ".p2align 4 \n\t"\ - "2: \n\t"\ - "movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\ - "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\ - "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" /* Y2srcData */\ - "add $16, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "pmulhw "#coeff", "#src1" \n\t"\ - "pmulhw "#coeff", "#src2" \n\t"\ - "paddw "#src1", "#dst1" \n\t"\ - "paddw "#src2", "#dst2" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - " jnz 2b \n\t"\ - -#define YSCALEYUV2PACKEDX \ - YSCALEYUV2PACKEDX_UV \ - YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \ - -#define YSCALEYUV2PACKEDX_END \ - :: "r" (&c->redDither), \ - "m" (dummy), "m" (dummy), "m" (dummy),\ - "r" (dest), "m" (dstW_reg) \ - : "%"REG_a, "%"REG_d, "%"REG_S \ - ); - -#define YSCALEYUV2PACKEDX_ACCURATE_UV \ - __asm__ volatile(\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - ".p2align 4 \n\t"\ - "nop \n\t"\ - "1: \n\t"\ - "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "pxor %%mm4, %%mm4 \n\t"\ - "pxor %%mm5, %%mm5 \n\t"\ - "pxor %%mm6, %%mm6 \n\t"\ - "pxor %%mm7, %%mm7 \n\t"\ - ".p2align 4 \n\t"\ - "2: \n\t"\ - "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ - "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ - "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ - "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\ - "movq %%mm0, %%mm3 \n\t"\ - "punpcklwd %%mm1, %%mm0 \n\t"\ - "punpckhwd %%mm1, %%mm3 \n\t"\ - "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\ - "pmaddwd %%mm1, %%mm0 \n\t"\ - "pmaddwd %%mm1, %%mm3 \n\t"\ - "paddd %%mm0, %%mm4 \n\t"\ - "paddd %%mm3, %%mm5 \n\t"\ - "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\ - "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ - "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "punpcklwd %%mm3, %%mm2 \n\t"\ - "punpckhwd %%mm3, %%mm0 \n\t"\ - "pmaddwd %%mm1, %%mm2 \n\t"\ - "pmaddwd %%mm1, %%mm0 \n\t"\ - "paddd %%mm2, %%mm6 \n\t"\ - "paddd %%mm0, %%mm7 \n\t"\ - " jnz 2b \n\t"\ - "psrad $16, %%mm4 \n\t"\ - "psrad $16, %%mm5 \n\t"\ - "psrad $16, %%mm6 \n\t"\ - "psrad $16, %%mm7 \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ - "packssdw %%mm5, %%mm4 \n\t"\ - "packssdw %%mm7, %%mm6 \n\t"\ - "paddw %%mm0, %%mm4 \n\t"\ - "paddw %%mm0, %%mm6 \n\t"\ - "movq %%mm4, "U_TEMP"(%0) \n\t"\ - "movq %%mm6, "V_TEMP"(%0) \n\t"\ - -#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \ - "lea "offset"(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "pxor %%mm1, %%mm1 \n\t"\ - "pxor %%mm5, %%mm5 \n\t"\ - "pxor %%mm7, %%mm7 \n\t"\ - "pxor %%mm6, %%mm6 \n\t"\ - ".p2align 4 \n\t"\ - "2: \n\t"\ - "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ - "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ - "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ - "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\ - "movq %%mm0, %%mm3 \n\t"\ - "punpcklwd %%mm4, %%mm0 \n\t"\ - "punpckhwd %%mm4, %%mm3 \n\t"\ - "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ - "pmaddwd %%mm4, %%mm0 \n\t"\ - "pmaddwd %%mm4, %%mm3 \n\t"\ - "paddd %%mm0, %%mm1 \n\t"\ - "paddd %%mm3, %%mm5 \n\t"\ - "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\ - "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ - "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "punpcklwd %%mm3, %%mm2 \n\t"\ - "punpckhwd %%mm3, %%mm0 \n\t"\ - "pmaddwd %%mm4, %%mm2 \n\t"\ - "pmaddwd %%mm4, %%mm0 \n\t"\ - "paddd %%mm2, %%mm7 \n\t"\ - "paddd %%mm0, %%mm6 \n\t"\ - " jnz 2b \n\t"\ - "psrad $16, %%mm1 \n\t"\ - "psrad $16, %%mm5 \n\t"\ - "psrad $16, %%mm7 \n\t"\ - "psrad $16, %%mm6 \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ - "packssdw %%mm5, %%mm1 \n\t"\ - "packssdw %%mm6, %%mm7 \n\t"\ - "paddw %%mm0, %%mm1 \n\t"\ - "paddw %%mm0, %%mm7 \n\t"\ - "movq "U_TEMP"(%0), %%mm3 \n\t"\ - "movq "V_TEMP"(%0), %%mm4 \n\t"\ - -#define YSCALEYUV2PACKEDX_ACCURATE \ - YSCALEYUV2PACKEDX_ACCURATE_UV \ - YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET) - -#define YSCALEYUV2RGBX \ - "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\ - "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ - "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\ - "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\ - /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ - "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\ - "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\ - "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\ - "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\ - "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\ - /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ - "paddw %%mm3, %%mm4 \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "movq %%mm5, %%mm6 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ - "punpcklwd %%mm2, %%mm2 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm4, %%mm4 \n\t"\ - "paddw %%mm1, %%mm2 \n\t"\ - "paddw %%mm1, %%mm5 \n\t"\ - "paddw %%mm1, %%mm4 \n\t"\ - "punpckhwd %%mm0, %%mm0 \n\t"\ - "punpckhwd %%mm6, %%mm6 \n\t"\ - "punpckhwd %%mm3, %%mm3 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm6 \n\t"\ - "paddw %%mm7, %%mm3 \n\t"\ - /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ - "packuswb %%mm0, %%mm2 \n\t"\ - "packuswb %%mm6, %%mm5 \n\t"\ - "packuswb %%mm3, %%mm4 \n\t"\ - -#define REAL_YSCALEYUV2PACKED(index, c) \ - "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ - "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\ - "psraw $3, %%mm0 \n\t"\ - "psraw $3, %%mm1 \n\t"\ - "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\ - "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ - "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ - "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ - "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ - "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ - "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ - "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ - "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ - "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ - "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ - "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ - "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\ - "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\ - "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ - "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ - "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - -#define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) - -#define REAL_YSCALEYUV2RGB_UV(index, c) \ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ - "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ - "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ - "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ - "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ - "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ - "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ - "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ - "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ - "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ - "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ - "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ - "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ - /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ - -#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \ - "movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ - "movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ - "movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\ - "movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\ - "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ - "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ - "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - -#define REAL_YSCALEYUV2RGB_COEFF(c) \ - "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ - "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ - "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ - "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ - "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ - /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ - "paddw %%mm3, %%mm4 \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "movq %%mm5, %%mm6 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ - "punpcklwd %%mm2, %%mm2 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm4, %%mm4 \n\t"\ - "paddw %%mm1, %%mm2 \n\t"\ - "paddw %%mm1, %%mm5 \n\t"\ - "paddw %%mm1, %%mm4 \n\t"\ - "punpckhwd %%mm0, %%mm0 \n\t"\ - "punpckhwd %%mm6, %%mm6 \n\t"\ - "punpckhwd %%mm3, %%mm3 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm6 \n\t"\ - "paddw %%mm7, %%mm3 \n\t"\ - /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ - "packuswb %%mm0, %%mm2 \n\t"\ - "packuswb %%mm6, %%mm5 \n\t"\ - "packuswb %%mm3, %%mm4 \n\t"\ - -#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) - -#define YSCALEYUV2RGB(index, c) \ - REAL_YSCALEYUV2RGB_UV(index, c) \ - REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \ - REAL_YSCALEYUV2RGB_COEFF(c) - -#define REAL_YSCALEYUV2PACKED1(index, c) \ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ - "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "psraw $7, %%mm3 \n\t" \ - "psraw $7, %%mm4 \n\t" \ - "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ - "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ - "psraw $7, %%mm1 \n\t" \ - "psraw $7, %%mm7 \n\t" \ - -#define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c) - -#define REAL_YSCALEYUV2RGB1(index, c) \ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ - "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ - "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ - "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ - "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ - "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ - "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ - /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ - "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ - "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ - "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ - "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ - "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ - "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ - /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ - "paddw %%mm3, %%mm4 \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "movq %%mm5, %%mm6 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ - "punpcklwd %%mm2, %%mm2 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm4, %%mm4 \n\t"\ - "paddw %%mm1, %%mm2 \n\t"\ - "paddw %%mm1, %%mm5 \n\t"\ - "paddw %%mm1, %%mm4 \n\t"\ - "punpckhwd %%mm0, %%mm0 \n\t"\ - "punpckhwd %%mm6, %%mm6 \n\t"\ - "punpckhwd %%mm3, %%mm3 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm6 \n\t"\ - "paddw %%mm7, %%mm3 \n\t"\ - /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ - "packuswb %%mm0, %%mm2 \n\t"\ - "packuswb %%mm6, %%mm5 \n\t"\ - "packuswb %%mm3, %%mm4 \n\t"\ - -#define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c) - -#define REAL_YSCALEYUV2PACKED1b(index, c) \ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ - "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ - "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ - "psrlw $8, %%mm3 \n\t" \ - "psrlw $8, %%mm4 \n\t" \ - "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ - "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ - "psraw $7, %%mm1 \n\t" \ - "psraw $7, %%mm7 \n\t" -#define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) - -// do vertical chrominance interpolation -#define REAL_YSCALEYUV2RGB1b(index, c) \ - "xor "#index", "#index" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ - "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ - "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ - "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ - "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ - "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ - "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ - "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ - "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ - /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ - "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ - "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ - "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ - "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ - "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ - "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ - /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ - "paddw %%mm3, %%mm4 \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "movq %%mm5, %%mm6 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ - "punpcklwd %%mm2, %%mm2 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm4, %%mm4 \n\t"\ - "paddw %%mm1, %%mm2 \n\t"\ - "paddw %%mm1, %%mm5 \n\t"\ - "paddw %%mm1, %%mm4 \n\t"\ - "punpckhwd %%mm0, %%mm0 \n\t"\ - "punpckhwd %%mm6, %%mm6 \n\t"\ - "punpckhwd %%mm3, %%mm3 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm6 \n\t"\ - "paddw %%mm7, %%mm3 \n\t"\ - /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ - "packuswb %%mm0, %%mm2 \n\t"\ - "packuswb %%mm6, %%mm5 \n\t"\ - "packuswb %%mm3, %%mm4 \n\t"\ - -#define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c) - -#define REAL_YSCALEYUV2RGB1_ALPHA(index) \ - "movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\ - "movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\ - "psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\ - "psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\ - "packuswb %%mm1, %%mm7 \n\t" -#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index) - -#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \ - "movq "#b", "#q2" \n\t" /* B */\ - "movq "#r", "#t" \n\t" /* R */\ - "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\ - "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\ - "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\ - "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\ - "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\ - "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\ - "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\ - "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\ - "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\ - "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\ -\ - MOVNTQ( q0, (dst, index, 4))\ - MOVNTQ( b, 8(dst, index, 4))\ - MOVNTQ( q2, 16(dst, index, 4))\ - MOVNTQ( q3, 24(dst, index, 4))\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" -#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) - -#define REAL_WRITERGB16(dst, dstw, index) \ - "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ - "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ - "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ - "psrlq $3, %%mm2 \n\t"\ -\ - "movq %%mm2, %%mm1 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ -\ - "punpcklbw %%mm7, %%mm3 \n\t"\ - "punpcklbw %%mm5, %%mm2 \n\t"\ - "punpckhbw %%mm7, %%mm4 \n\t"\ - "punpckhbw %%mm5, %%mm1 \n\t"\ -\ - "psllq $3, %%mm3 \n\t"\ - "psllq $3, %%mm4 \n\t"\ -\ - "por %%mm3, %%mm2 \n\t"\ - "por %%mm4, %%mm1 \n\t"\ -\ - MOVNTQ(%%mm2, (dst, index, 2))\ - MOVNTQ(%%mm1, 8(dst, index, 2))\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" -#define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index) - -#define REAL_WRITERGB15(dst, dstw, index) \ - "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ - "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\ - "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ - "psrlq $3, %%mm2 \n\t"\ - "psrlq $1, %%mm5 \n\t"\ -\ - "movq %%mm2, %%mm1 \n\t"\ - "movq %%mm4, %%mm3 \n\t"\ -\ - "punpcklbw %%mm7, %%mm3 \n\t"\ - "punpcklbw %%mm5, %%mm2 \n\t"\ - "punpckhbw %%mm7, %%mm4 \n\t"\ - "punpckhbw %%mm5, %%mm1 \n\t"\ -\ - "psllq $2, %%mm3 \n\t"\ - "psllq $2, %%mm4 \n\t"\ -\ - "por %%mm3, %%mm2 \n\t"\ - "por %%mm4, %%mm1 \n\t"\ -\ - MOVNTQ(%%mm2, (dst, index, 2))\ - MOVNTQ(%%mm1, 8(dst, index, 2))\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" -#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index) - -#define WRITEBGR24OLD(dst, dstw, index) \ - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ - "movq %%mm2, %%mm1 \n\t" /* B */\ - "movq %%mm5, %%mm6 \n\t" /* R */\ - "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ - "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ - "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ - "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ - "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ - "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ - "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ - "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ - "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ - "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ -\ - "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ - "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\ - "pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 0 */\ - "pand "MANGLE(bm11111000)", %%mm0 \n\t" /* 00RGB000 0.5 */\ - "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\ - "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\ - "psllq $48, %%mm2 \n\t" /* GB000000 1 */\ - "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ -\ - "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\ - "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\ - "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\ - "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\ - "pand "MANGLE(bm00001111)", %%mm2 \n\t" /* 0000RGBR 1 */\ - "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\ - "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\ - "pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 2 */\ - "pand "MANGLE(bm11111000)", %%mm1 \n\t" /* 00RGB000 2.5 */\ - "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\ - "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\ - "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\ - "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\ -\ - "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\ - "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\ - "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\ - "pand "MANGLE(bm00000111)", %%mm5 \n\t" /* 00000RGB 3 */\ - "pand "MANGLE(bm11111000)", %%mm3 \n\t" /* 00RGB000 3.5 */\ - "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\ - "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\ - "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\ -\ - MOVNTQ(%%mm0, (dst))\ - MOVNTQ(%%mm2, 8(dst))\ - MOVNTQ(%%mm3, 16(dst))\ - "add $24, "#dst" \n\t"\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" - -#define WRITEBGR24MMX(dst, dstw, index) \ - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ - "movq %%mm2, %%mm1 \n\t" /* B */\ - "movq %%mm5, %%mm6 \n\t" /* R */\ - "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ - "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ - "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ - "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ - "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ - "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ - "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ - "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ - "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ - "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ -\ - "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ - "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\ - "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\ - "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\ -\ - "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\ - "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\ - "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\ - "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\ -\ - "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\ - "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\ - "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\ - "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\ -\ - "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\ - "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\ - "psllq $40, %%mm2 \n\t" /* GB000000 1 */\ - "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ - MOVNTQ(%%mm0, (dst))\ -\ - "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\ - "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\ - "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\ - "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\ - MOVNTQ(%%mm6, 8(dst))\ -\ - "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\ - "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\ - "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\ - MOVNTQ(%%mm5, 16(dst))\ -\ - "add $24, "#dst" \n\t"\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" - -#define WRITEBGR24MMX2(dst, dstw, index) \ - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ - "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\ - "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\ - "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ - "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ - "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ -\ - "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\ - "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\ - "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\ -\ - "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\ - "por %%mm1, %%mm6 \n\t"\ - "por %%mm3, %%mm6 \n\t"\ - MOVNTQ(%%mm6, (dst))\ -\ - "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\ - "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\ - "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ - "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ -\ - "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ - "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ - "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ -\ - "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\ - "por %%mm3, %%mm6 \n\t"\ - MOVNTQ(%%mm6, 8(dst))\ -\ - "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\ - "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\ - "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\ -\ - "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ - "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ - "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ -\ - "por %%mm1, %%mm3 \n\t"\ - "por %%mm3, %%mm6 \n\t"\ - MOVNTQ(%%mm6, 16(dst))\ -\ - "add $24, "#dst" \n\t"\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" - -#if COMPILE_TEMPLATE_MMX2 -#undef WRITEBGR24 -#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index) -#else -#undef WRITEBGR24 -#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index) -#endif - -#define REAL_WRITEYUY2(dst, dstw, index) \ - "packuswb %%mm3, %%mm3 \n\t"\ - "packuswb %%mm4, %%mm4 \n\t"\ - "packuswb %%mm7, %%mm1 \n\t"\ - "punpcklbw %%mm4, %%mm3 \n\t"\ - "movq %%mm1, %%mm7 \n\t"\ - "punpcklbw %%mm3, %%mm1 \n\t"\ - "punpckhbw %%mm3, %%mm7 \n\t"\ -\ - MOVNTQ(%%mm1, (dst, index, 2))\ - MOVNTQ(%%mm7, 8(dst, index, 2))\ -\ - "add $8, "#index" \n\t"\ - "cmp "#dstw", "#index" \n\t"\ - " jb 1b \n\t" -#define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index) - - -static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW) +static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither) { -#if COMPILE_TEMPLATE_MMX - if(!(c->flags & SWS_BITEXACT)) { - if (c->flags & SWS_ACCURATE_RND) { - if (uDest) { - YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) - YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) - } - if (CONFIG_SWSCALE_ALPHA && aDest) { - YSCALEYUV2YV12X_ACCURATE( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW) - } - - YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW) - } else { - if (uDest) { - YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) - YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) - } - if (CONFIG_SWSCALE_ALPHA && aDest) { - YSCALEYUV2YV12X( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW) - } - - YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW) - } - return; - } -#endif -#if COMPILE_TEMPLATE_ALTIVEC - yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - dest, uDest, vDest, dstW, chrDstW); -#else //COMPILE_TEMPLATE_ALTIVEC yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW); -#endif //!COMPILE_TEMPLATE_ALTIVEC + chrFilter, chrUSrc, chrVSrc, chrFilterSize, + alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); } -static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat) +static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, uint8_t *dest, uint8_t *uDest, + int dstW, int chrDstW, enum PixelFormat dstFormat, const uint8_t *dither, const uint8_t *chrDither) { yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - dest, uDest, dstW, chrDstW, dstFormat); + chrFilter, chrUSrc, chrVSrc, chrFilterSize, + dest, uDest, dstW, chrDstW, dstFormat, dither, chrDither); } -static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW) +static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, + const int16_t *chrUSrc, const int16_t *chrVSrc, + const int16_t *alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither) { int i; -#if COMPILE_TEMPLATE_MMX - if(!(c->flags & SWS_BITEXACT)) { - long p= 4; - const int16_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW}; - uint8_t *dst[4]= {aDest, dest, uDest, vDest}; - x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW}; - if (c->flags & SWS_ACCURATE_RND) { - while(p--) { - if (dst[p]) { - __asm__ volatile( - YSCALEYUV2YV121_ACCURATE - :: "r" (src[p]), "r" (dst[p] + counter[p]), - "g" (-counter[p]) - : "%"REG_a - ); - } - } - } else { - while(p--) { - if (dst[p]) { - __asm__ volatile( - YSCALEYUV2YV121 - :: "r" (src[p]), "r" (dst[p] + counter[p]), - "g" (-counter[p]) - : "%"REG_a - ); - } - } - } - return; - } -#endif for (i=0; i<dstW; i++) { - int val= (lumSrc[i]+64)>>7; - - if (val&256) { - if (val<0) val=0; - else val=255; - } - - dest[i]= val; + int val= (lumSrc[i]+lumDither[i&7])>>7; + dest[i]= av_clip_uint8(val); } if (uDest) for (i=0; i<chrDstW; i++) { - int u=(chrSrc[i ]+64)>>7; - int v=(chrSrc[i + VOFW]+64)>>7; - - if ((u|v)&256) { - if (u<0) u=0; - else if (u>255) u=255; - if (v<0) v=0; - else if (v>255) v=255; - } - - uDest[i]= u; - vDest[i]= v; + int u=(chrUSrc[i]+chrDither[i&7])>>7; + int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7; + uDest[i]= av_clip_uint8(u); + vDest[i]= av_clip_uint8(v); } if (CONFIG_SWSCALE_ALPHA && aDest) for (i=0; i<dstW; i++) { - int val= (alpSrc[i]+64)>>7; + int val= (alpSrc[i]+lumDither[i&7])>>7; aDest[i]= av_clip_uint8(val); } } @@ -1019,343 +75,44 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const /** * vertical scale YV12 to RGB */ -static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) +static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) { -#if COMPILE_TEMPLATE_MMX - x86_reg dummy=0; - x86_reg dstW_reg = dstW; - if(!(c->flags & SWS_BITEXACT)) { - if (c->flags & SWS_ACCURATE_RND) { - switch(c->dstFormat) { - case PIX_FMT_RGB32: - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "movq %%mm2, "U_TEMP"(%0) \n\t" - "movq %%mm4, "V_TEMP"(%0) \n\t" - "movq %%mm5, "Y_TEMP"(%0) \n\t" - YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET) - "movq "Y_TEMP"(%0), %%mm5 \n\t" - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - "packuswb %%mm7, %%mm1 \n\t" - WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6) - - YSCALEYUV2PACKEDX_END - } else { - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - - YSCALEYUV2PACKEDX_END - } - return; - case PIX_FMT_BGR24: - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize - "add %4, %%"REG_c" \n\t" - WRITEBGR24(%%REGc, %5, %%REGa) - - - :: "r" (&c->redDither), - "m" (dummy), "m" (dummy), "m" (dummy), - "r" (dest), "m" (dstW_reg) - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S - ); - return; - case PIX_FMT_RGB555: - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%0), %%mm2\n\t" - "paddusb "GREEN_DITHER"(%0), %%mm4\n\t" - "paddusb "RED_DITHER"(%0), %%mm5\n\t" -#endif - - WRITERGB15(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_RGB565: - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%0), %%mm2\n\t" - "paddusb "GREEN_DITHER"(%0), %%mm4\n\t" - "paddusb "RED_DITHER"(%0), %%mm5\n\t" -#endif - - WRITERGB16(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_YUYV422: - YSCALEYUV2PACKEDX_ACCURATE - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ - - "psraw $3, %%mm3 \n\t" - "psraw $3, %%mm4 \n\t" - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - WRITEYUY2(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - } - } else { - switch(c->dstFormat) { - case PIX_FMT_RGB32: - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7) - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - "packuswb %%mm7, %%mm1 \n\t" - WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) - YSCALEYUV2PACKEDX_END - } else { - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - YSCALEYUV2PACKEDX_END - } - return; - case PIX_FMT_BGR24: - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize - "add %4, %%"REG_c" \n\t" - WRITEBGR24(%%REGc, %5, %%REGa) - - :: "r" (&c->redDither), - "m" (dummy), "m" (dummy), "m" (dummy), - "r" (dest), "m" (dstW_reg) - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S - ); - return; - case PIX_FMT_RGB555: - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" - "paddusb "RED_DITHER"(%0), %%mm5 \n\t" -#endif - - WRITERGB15(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_RGB565: - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" - "paddusb "RED_DITHER"(%0), %%mm5 \n\t" -#endif - - WRITERGB16(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_YUYV422: - YSCALEYUV2PACKEDX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ - - "psraw $3, %%mm3 \n\t" - "psraw $3, %%mm4 \n\t" - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - WRITEYUY2(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - } - } - } -#endif /* COMPILE_TEMPLATE_MMX */ -#if COMPILE_TEMPLATE_ALTIVEC - /* The following list of supported dstFormat values should - match what's found in the body of ff_yuv2packedX_altivec() */ - if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf && - (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA || - c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 || - c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)) - ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - dest, dstW, dstY); - else -#endif yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, + chrFilter, chrUSrc, chrVSrc, chrFilterSize, alpSrc, dest, dstW, dstY); } /** * vertical bilinear scale YV12 to RGB */ -static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1, - const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) +static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, int dstW, + int yalpha, int uvalpha, int y) { int yalpha1=4095- yalpha; int uvalpha1=4095-uvalpha; int i; -#if COMPILE_TEMPLATE_MMX - if(!(c->flags & SWS_BITEXACT)) { - switch(c->dstFormat) { - //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( - case PIX_FMT_RGB32: - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { -#if ARCH_X86_64 - __asm__ volatile( - YSCALEYUV2RGB(%%r8, %5) - YSCALEYUV2RGB_YA(%%r8, %5, %6, %7) - "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ - "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ - "packuswb %%mm7, %%mm1 \n\t" - WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest), - "a" (&c->redDither) - ,"r" (abuf0), "r" (abuf1) - : "%r8" - ); -#else - c->u_temp=(intptr_t)abuf0; - c->v_temp=(intptr_t)abuf1; - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) - "push %0 \n\t" - "push %1 \n\t" - "mov "U_TEMP"(%5), %0 \n\t" - "mov "V_TEMP"(%5), %1 \n\t" - YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1) - "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ - "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ - "packuswb %%mm7, %%mm1 \n\t" - "pop %1 \n\t" - "pop %0 \n\t" - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); -#endif - } else { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) - "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - } - return; - case PIX_FMT_BGR24: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_RGB555: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - - WRITERGB15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_RGB565: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - - WRITERGB16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_YUYV422: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED(%%REGBP, %5) - WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - default: break; - } - } -#endif //COMPILE_TEMPLATE_MMX YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C) } /** * YV12 to RGB without scaling or interpolating */ -static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1, - const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y) +static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, enum PixelFormat dstFormat, + int flags, int y) { const int yalpha1=0; int i; @@ -1363,228 +120,6 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 const int yalpha= 4096; //FIXME ... - if (flags&SWS_FULL_CHR_H_INT) { - c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y); - return; - } - -#if COMPILE_TEMPLATE_MMX - if(!(flags & SWS_BITEXACT)) { - if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster - switch(dstFormat) { - case PIX_FMT_RGB32: - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - YSCALEYUV2RGB1_ALPHA(%%REGBP) - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - } else { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - } - return; - case PIX_FMT_BGR24: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_RGB555: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - WRITERGB15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_RGB565: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - - WRITERGB16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_YUYV422: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED1(%%REGBP, %5) - WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - } - } else { - switch(dstFormat) { - case PIX_FMT_RGB32: - if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - YSCALEYUV2RGB1_ALPHA(%%REGBP) - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - } else { - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - "pcmpeqd %%mm7, %%mm7 \n\t" - WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - } - return; - case PIX_FMT_BGR24: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_RGB555: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - WRITERGB15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_RGB565: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - "pxor %%mm7, %%mm7 \n\t" - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ -#ifdef DITHER1XBPP - "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" - "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" - "paddusb "RED_DITHER"(%5), %%mm5 \n\t" -#endif - - WRITERGB16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_YUYV422: - __asm__ volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED1b(%%REGBP, %5) - WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - } - } - } -#endif /* COMPILE_TEMPLATE_MMX */ if (uvalpha < 2048) { YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) } else { @@ -1594,89 +129,28 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons //FIXME yuy2* can read up to 7 samples too much -static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) +static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width, + uint32_t *unused) { -#if COMPILE_TEMPLATE_MMX - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm2 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",2), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" - "pand %%mm2, %%mm0 \n\t" - "pand %%mm2, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width) - : "%"REG_a - ); -#else int i; for (i=0; i<width; i++) dst[i]= src[2*i]; -#endif } -static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) +static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { -#if COMPILE_TEMPLATE_MMX - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",4), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",4), %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "packuswb %%mm1, %%mm1 \n\t" - "movd %%mm0, (%3, %%"REG_a") \n\t" - "movd %%mm1, (%2, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) - : "%"REG_a - ); -#else int i; for (i=0; i<width; i++) { dstU[i]= src1[4*i + 1]; dstV[i]= src1[4*i + 3]; } -#endif assert(src1 == src2); } -static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) +static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { -#if COMPILE_TEMPLATE_MMX - __asm__ volatile( - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",2), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" - "movq (%2, %%"REG_a",2), %%mm2 \n\t" - "movq 8(%2, %%"REG_a",2), %%mm3 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm1 \n\t" - "psrlw $8, %%mm2 \n\t" - "psrlw $8, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, (%3, %%"REG_a") \n\t" - "movq %%mm2, (%4, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width) - : "%"REG_a - ); -#else int i; // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so // we need to skip each second pixel. Same for BEToUV. @@ -1684,148 +158,47 @@ static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s dstU[i]= src1[2*i + 1]; dstV[i]= src2[2*i + 1]; } -#endif } /* This is almost identical to the previous, end exists only because * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ -static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) +static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width, + uint32_t *unused) { -#if COMPILE_TEMPLATE_MMX - __asm__ volatile( - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",2), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width) - : "%"REG_a - ); -#else int i; for (i=0; i<width; i++) dst[i]= src[2*i+1]; -#endif } -static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) +static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { -#if COMPILE_TEMPLATE_MMX - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",4), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",4), %%mm1 \n\t" - "pand %%mm4, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "packuswb %%mm1, %%mm1 \n\t" - "movd %%mm0, (%3, %%"REG_a") \n\t" - "movd %%mm1, (%2, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) - : "%"REG_a - ); -#else int i; for (i=0; i<width; i++) { dstU[i]= src1[4*i + 0]; dstV[i]= src1[4*i + 2]; } -#endif assert(src1 == src2); } -static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) +static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { -#if COMPILE_TEMPLATE_MMX - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",2), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" - "movq (%2, %%"REG_a",2), %%mm2 \n\t" - "movq 8(%2, %%"REG_a",2), %%mm3 \n\t" - "pand %%mm4, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "pand %%mm4, %%mm2 \n\t" - "pand %%mm4, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, (%3, %%"REG_a") \n\t" - "movq %%mm2, (%4, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width) - : "%"REG_a - ); -#else int i; for (i=0; i<width; i++) { dstU[i]= src1[2*i]; dstV[i]= src2[2*i]; } -#endif } -static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2, - const uint8_t *src, long width) +static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, + const uint8_t *src, int width) { -#if COMPILE_TEMPLATE_MMX - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",2), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - "pand %%mm4, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "psrlw $8, %%mm2 \n\t" - "psrlw $8, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "movq %%mm2, (%3, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width) - : "%"REG_a - ); -#else int i; for (i = 0; i < width; i++) { dst1[i] = src[2*i+0]; dst2[i] = src[2*i+1]; } -#endif -} - -static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *unused) -{ - RENAME(nvXXtoUV)(dstU, dstV, src1, width); -} - -static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - long width, uint32_t *unused) -{ - RENAME(nvXXtoUV)(dstV, dstU, src1, width); } // FIXME Maybe dither instead. @@ -1833,7 +206,7 @@ static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV, #define YUV_NBPS(depth, endianness, rfunc) \ static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ const uint16_t *srcU, const uint16_t *srcV, \ - long width, uint32_t *unused) \ + int width, uint32_t *unused) \ { \ int i; \ for (i = 0; i < width; i++) { \ @@ -1842,7 +215,7 @@ static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ } \ } \ \ -static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, long width, uint32_t *unused) \ +static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, int width, uint32_t *unused) \ { \ int i; \ for (i = 0; i < width; i++) \ @@ -1855,736 +228,51 @@ YUV_NBPS(10, LE, AV_RL16) YUV_NBPS(10, BE, AV_RB16) #endif // YUV_NBPS -#if COMPILE_TEMPLATE_MMX -static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat) -{ - - if(srcFormat == PIX_FMT_BGR24) { - __asm__ volatile( - "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t" - "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t" - : - ); - } else { - __asm__ volatile( - "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t" - "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t" - : - ); - } - - __asm__ volatile( - "movq "MANGLE(ff_bgr24toYOffset)", %%mm4 \n\t" - "mov %2, %%"REG_a" \n\t" - "pxor %%mm7, %%mm7 \n\t" - "1: \n\t" - PREFETCH" 64(%0) \n\t" - "movd (%0), %%mm0 \n\t" - "movd 2(%0), %%mm1 \n\t" - "movd 6(%0), %%mm2 \n\t" - "movd 8(%0), %%mm3 \n\t" - "add $12, %0 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "pmaddwd %%mm5, %%mm0 \n\t" - "pmaddwd %%mm6, %%mm1 \n\t" - "pmaddwd %%mm5, %%mm2 \n\t" - "pmaddwd %%mm6, %%mm3 \n\t" - "paddd %%mm1, %%mm0 \n\t" - "paddd %%mm3, %%mm2 \n\t" - "paddd %%mm4, %%mm0 \n\t" - "paddd %%mm4, %%mm2 \n\t" - "psrad $15, %%mm0 \n\t" - "psrad $15, %%mm2 \n\t" - "packssdw %%mm2, %%mm0 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "movd %%mm0, (%1, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" - " js 1b \n\t" - : "+r" (src) - : "r" (dst+width), "g" ((x86_reg)-width) - : "%"REG_a - ); -} - -static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat) -{ - __asm__ volatile( - "movq 24(%4), %%mm6 \n\t" - "mov %3, %%"REG_a" \n\t" - "pxor %%mm7, %%mm7 \n\t" - "1: \n\t" - PREFETCH" 64(%0) \n\t" - "movd (%0), %%mm0 \n\t" - "movd 2(%0), %%mm1 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - "pmaddwd (%4), %%mm0 \n\t" - "pmaddwd 8(%4), %%mm1 \n\t" - "pmaddwd 16(%4), %%mm2 \n\t" - "pmaddwd %%mm6, %%mm3 \n\t" - "paddd %%mm1, %%mm0 \n\t" - "paddd %%mm3, %%mm2 \n\t" - - "movd 6(%0), %%mm1 \n\t" - "movd 8(%0), %%mm3 \n\t" - "add $12, %0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "movq %%mm1, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "pmaddwd (%4), %%mm1 \n\t" - "pmaddwd 8(%4), %%mm3 \n\t" - "pmaddwd 16(%4), %%mm4 \n\t" - "pmaddwd %%mm6, %%mm5 \n\t" - "paddd %%mm3, %%mm1 \n\t" - "paddd %%mm5, %%mm4 \n\t" - - "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3 \n\t" - "paddd %%mm3, %%mm0 \n\t" - "paddd %%mm3, %%mm2 \n\t" - "paddd %%mm3, %%mm1 \n\t" - "paddd %%mm3, %%mm4 \n\t" - "psrad $15, %%mm0 \n\t" - "psrad $15, %%mm2 \n\t" - "psrad $15, %%mm1 \n\t" - "psrad $15, %%mm4 \n\t" - "packssdw %%mm1, %%mm0 \n\t" - "packssdw %%mm4, %%mm2 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "packuswb %%mm2, %%mm2 \n\t" - "movd %%mm0, (%1, %%"REG_a") \n\t" - "movd %%mm2, (%2, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" - " js 1b \n\t" - : "+r" (src) - : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24]) - : "%"REG_a - ); -} -#endif - -static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) -{ -#if COMPILE_TEMPLATE_MMX - RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24); -#else - int i; - for (i=0; i<width; i++) { - int b= src[i*3+0]; - int g= src[i*3+1]; - int r= src[i*3+2]; - - dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); - } -#endif /* COMPILE_TEMPLATE_MMX */ -} - -static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) -{ -#if COMPILE_TEMPLATE_MMX - RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24); -#else - int i; - for (i=0; i<width; i++) { - int b= src1[3*i + 0]; - int g= src1[3*i + 1]; - int r= src1[3*i + 2]; - - dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; - dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; - } -#endif /* COMPILE_TEMPLATE_MMX */ - assert(src1 == src2); -} - -static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) -{ - int i; - for (i=0; i<width; i++) { - int b= src1[6*i + 0] + src1[6*i + 3]; - int g= src1[6*i + 1] + src1[6*i + 4]; - int r= src1[6*i + 2] + src1[6*i + 5]; - - dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); - } - assert(src1 == src2); -} - -static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) +static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { -#if COMPILE_TEMPLATE_MMX - RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24); -#else - int i; - for (i=0; i<width; i++) { - int r= src[i*3+0]; - int g= src[i*3+1]; - int b= src[i*3+2]; - - dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); - } -#endif + nvXXtoUV_c(dstU, dstV, src1, width); } -static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) +static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { -#if COMPILE_TEMPLATE_MMX - assert(src1==src2); - RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24); -#else - int i; - assert(src1==src2); - for (i=0; i<width; i++) { - int r= src1[3*i + 0]; - int g= src1[3*i + 1]; - int b= src1[3*i + 2]; - - dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; - dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; - } -#endif + nvXXtoUV_c(dstV, dstU, src1, width); } -static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) -{ - int i; - assert(src1==src2); - for (i=0; i<width; i++) { - int r= src1[6*i + 0] + src1[6*i + 3]; - int g= src1[6*i + 1] + src1[6*i + 4]; - int b= src1[6*i + 2] + src1[6*i + 5]; - - dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); - } -} - - // bilinear / bicubic scaling -static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc, - const int16_t *filter, const int16_t *filterPos, long filterSize) +static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src, + int srcW, int xInc, + const int16_t *filter, const int16_t *filterPos, + int filterSize) { -#if COMPILE_TEMPLATE_MMX - assert(filterSize % 4 == 0 && filterSize>0); - if (filterSize==4) { // Always true for upscaling, sometimes for down, too. - x86_reg counter= -2*dstW; - filter-= counter*2; - filterPos-= counter/2; - dst-= counter/2; - __asm__ volatile( -#if defined(PIC) - "push %%"REG_b" \n\t" -#endif - "pxor %%mm7, %%mm7 \n\t" - "push %%"REG_BP" \n\t" // we use 7 regs here ... - "mov %%"REG_a", %%"REG_BP" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - "movzwl (%2, %%"REG_BP"), %%eax \n\t" - "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" - "movq (%1, %%"REG_BP", 4), %%mm1 \n\t" - "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t" - "movd (%3, %%"REG_a"), %%mm0 \n\t" - "movd (%3, %%"REG_b"), %%mm2 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "pmaddwd %%mm1, %%mm0 \n\t" - "pmaddwd %%mm2, %%mm3 \n\t" - "movq %%mm0, %%mm4 \n\t" - "punpckldq %%mm3, %%mm0 \n\t" - "punpckhdq %%mm3, %%mm4 \n\t" - "paddd %%mm4, %%mm0 \n\t" - "psrad $7, %%mm0 \n\t" - "packssdw %%mm0, %%mm0 \n\t" - "movd %%mm0, (%4, %%"REG_BP") \n\t" - "add $4, %%"REG_BP" \n\t" - " jnc 1b \n\t" - - "pop %%"REG_BP" \n\t" -#if defined(PIC) - "pop %%"REG_b" \n\t" -#endif - : "+a" (counter) - : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) -#if !defined(PIC) - : "%"REG_b -#endif - ); - } else if (filterSize==8) { - x86_reg counter= -2*dstW; - filter-= counter*4; - filterPos-= counter/2; - dst-= counter/2; - __asm__ volatile( -#if defined(PIC) - "push %%"REG_b" \n\t" -#endif - "pxor %%mm7, %%mm7 \n\t" - "push %%"REG_BP" \n\t" // we use 7 regs here ... - "mov %%"REG_a", %%"REG_BP" \n\t" - ".p2align 4 \n\t" - "1: \n\t" - "movzwl (%2, %%"REG_BP"), %%eax \n\t" - "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" - "movq (%1, %%"REG_BP", 8), %%mm1 \n\t" - "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t" - "movd (%3, %%"REG_a"), %%mm0 \n\t" - "movd (%3, %%"REG_b"), %%mm2 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "pmaddwd %%mm1, %%mm0 \n\t" - "pmaddwd %%mm2, %%mm3 \n\t" - - "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t" - "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t" - "movd 4(%3, %%"REG_a"), %%mm4 \n\t" - "movd 4(%3, %%"REG_b"), %%mm2 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "pmaddwd %%mm1, %%mm4 \n\t" - "pmaddwd %%mm2, %%mm5 \n\t" - "paddd %%mm4, %%mm0 \n\t" - "paddd %%mm5, %%mm3 \n\t" - "movq %%mm0, %%mm4 \n\t" - "punpckldq %%mm3, %%mm0 \n\t" - "punpckhdq %%mm3, %%mm4 \n\t" - "paddd %%mm4, %%mm0 \n\t" - "psrad $7, %%mm0 \n\t" - "packssdw %%mm0, %%mm0 \n\t" - "movd %%mm0, (%4, %%"REG_BP") \n\t" - "add $4, %%"REG_BP" \n\t" - " jnc 1b \n\t" - - "pop %%"REG_BP" \n\t" -#if defined(PIC) - "pop %%"REG_b" \n\t" -#endif - : "+a" (counter) - : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) -#if !defined(PIC) - : "%"REG_b -#endif - ); - } else { - const uint8_t *offset = src+filterSize; - x86_reg counter= -2*dstW; - //filter-= counter*filterSize/2; - filterPos-= counter/2; - dst-= counter/2; - __asm__ volatile( - "pxor %%mm7, %%mm7 \n\t" - ".p2align 4 \n\t" - "1: \n\t" - "mov %2, %%"REG_c" \n\t" - "movzwl (%%"REG_c", %0), %%eax \n\t" - "movzwl 2(%%"REG_c", %0), %%edx \n\t" - "mov %5, %%"REG_c" \n\t" - "pxor %%mm4, %%mm4 \n\t" - "pxor %%mm5, %%mm5 \n\t" - "2: \n\t" - "movq (%1), %%mm1 \n\t" - "movq (%1, %6), %%mm3 \n\t" - "movd (%%"REG_c", %%"REG_a"), %%mm0 \n\t" - "movd (%%"REG_c", %%"REG_d"), %%mm2 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "pmaddwd %%mm1, %%mm0 \n\t" - "pmaddwd %%mm2, %%mm3 \n\t" - "paddd %%mm3, %%mm5 \n\t" - "paddd %%mm0, %%mm4 \n\t" - "add $8, %1 \n\t" - "add $4, %%"REG_c" \n\t" - "cmp %4, %%"REG_c" \n\t" - " jb 2b \n\t" - "add %6, %1 \n\t" - "movq %%mm4, %%mm0 \n\t" - "punpckldq %%mm5, %%mm4 \n\t" - "punpckhdq %%mm5, %%mm0 \n\t" - "paddd %%mm0, %%mm4 \n\t" - "psrad $7, %%mm4 \n\t" - "packssdw %%mm4, %%mm4 \n\t" - "mov %3, %%"REG_a" \n\t" - "movd %%mm4, (%%"REG_a", %0) \n\t" - "add $4, %0 \n\t" - " jnc 1b \n\t" - - : "+r" (counter), "+r" (filter) - : "m" (filterPos), "m" (dst), "m"(offset), - "m" (src), "r" ((x86_reg)filterSize*2) - : "%"REG_a, "%"REG_c, "%"REG_d - ); - } -#else -#if COMPILE_TEMPLATE_ALTIVEC - hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize); -#else int i; for (i=0; i<dstW; i++) { int j; int srcPos= filterPos[i]; int val=0; - //printf("filterPos: %d\n", filterPos[i]); for (j=0; j<filterSize; j++) { - //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]); val += ((int)src[srcPos + j])*filter[filterSize*i + j]; } //filter += hFilterSize; dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ... //dst[i] = val>>7; } -#endif /* COMPILE_TEMPLATE_ALTIVEC */ -#endif /* COMPILE_MMX */ } -//FIXME all pal and rgb srcFormats could do this convertion as well -//FIXME all scalers more complex than bilinear could do half of this transform -static void RENAME(chrRangeToJpeg)(int16_t *dst, int width) -{ - int i; - for (i = 0; i < width; i++) { - dst[i ] = (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264 - dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264 - } -} -static void RENAME(chrRangeFromJpeg)(int16_t *dst, int width) -{ - int i; - for (i = 0; i < width; i++) { - dst[i ] = (dst[i ]*1799 + 4081085)>>11; //1469 - dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469 - } -} -static void RENAME(lumRangeToJpeg)(int16_t *dst, int width) -{ - int i; - for (i = 0; i < width; i++) - dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14; -} -static void RENAME(lumRangeFromJpeg)(int16_t *dst, int width) -{ - int i; - for (i = 0; i < width; i++) - dst[i] = (dst[i]*14071 + 33561947)>>14; -} - -#define FAST_BILINEAR_X86 \ - "subl %%edi, %%esi \n\t" /* src[xx+1] - src[xx] */ \ - "imull %%ecx, %%esi \n\t" /* (src[xx+1] - src[xx])*xalpha */ \ - "shll $16, %%edi \n\t" \ - "addl %%edi, %%esi \n\t" /* src[xx+1]*xalpha + src[xx]*(1-xalpha) */ \ - "mov %1, %%"REG_D"\n\t" \ - "shrl $9, %%esi \n\t" \ - -static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, - long dstWidth, const uint8_t *src, int srcW, - int xInc) -{ -#if ARCH_X86 -#if COMPILE_TEMPLATE_MMX2 - int32_t *filterPos = c->hLumFilterPos; - int16_t *filter = c->hLumFilter; - int canMMX2BeUsed = c->canMMX2BeUsed; - void *mmx2FilterCode= c->lumMmx2FilterCode; - int i; -#if defined(PIC) - DECLARE_ALIGNED(8, uint64_t, ebxsave); -#endif - if (canMMX2BeUsed) { - __asm__ volatile( -#if defined(PIC) - "mov %%"REG_b", %5 \n\t" -#endif - "pxor %%mm7, %%mm7 \n\t" - "mov %0, %%"REG_c" \n\t" - "mov %1, %%"REG_D" \n\t" - "mov %2, %%"REG_d" \n\t" - "mov %3, %%"REG_b" \n\t" - "xor %%"REG_a", %%"REG_a" \n\t" // i - PREFETCH" (%%"REG_c") \n\t" - PREFETCH" 32(%%"REG_c") \n\t" - PREFETCH" 64(%%"REG_c") \n\t" - -#if ARCH_X86_64 - -#define CALL_MMX2_FILTER_CODE \ - "movl (%%"REG_b"), %%esi \n\t"\ - "call *%4 \n\t"\ - "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\ - "add %%"REG_S", %%"REG_c" \n\t"\ - "add %%"REG_a", %%"REG_D" \n\t"\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - -#else - -#define CALL_MMX2_FILTER_CODE \ - "movl (%%"REG_b"), %%esi \n\t"\ - "call *%4 \n\t"\ - "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\ - "add %%"REG_a", %%"REG_D" \n\t"\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - -#endif /* ARCH_X86_64 */ - - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - -#if defined(PIC) - "mov %5, %%"REG_b" \n\t" -#endif - :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos), - "m" (mmx2FilterCode) -#if defined(PIC) - ,"m" (ebxsave) -#endif - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D -#if !defined(PIC) - ,"%"REG_b -#endif - ); - for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128; - } else { -#endif /* COMPILE_TEMPLATE_MMX2 */ - x86_reg xInc_shr16 = xInc >> 16; - uint16_t xInc_mask = xInc & 0xffff; - x86_reg dstWidth_reg = dstWidth; - //NO MMX just normal asm ... - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" // i - "xor %%"REG_d", %%"REG_d" \n\t" // xx - "xorl %%ecx, %%ecx \n\t" // xalpha - ".p2align 4 \n\t" - "1: \n\t" - "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx] - "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1] - FAST_BILINEAR_X86 - "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t" - "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF - "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry - - "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx] - "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1] - FAST_BILINEAR_X86 - "movw %%si, 2(%%"REG_D", %%"REG_a", 2) \n\t" - "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF - "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry - - - "add $2, %%"REG_a" \n\t" - "cmp %2, %%"REG_a" \n\t" - " jb 1b \n\t" - - - :: "r" (src), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask) - : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi" - ); -#if COMPILE_TEMPLATE_MMX2 - } //if MMX2 can't be used -#endif -#else - int i; - unsigned int xpos=0; - for (i=0;i<dstWidth;i++) { - register unsigned int xx=xpos>>16; - register unsigned int xalpha=(xpos&0xFFFF)>>9; - dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; - xpos+=xInc; - } -#endif /* ARCH_X86 */ -} - - // *** horizontal scale Y line to temp buffer -static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc, - const int16_t *hLumFilter, - const int16_t *hLumFilterPos, int hLumFilterSize, - uint8_t *formatConvBuffer, - uint32_t *pal, int isAlpha) -{ - void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; - void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; - - src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset; - - if (toYV12) { - toYV12(formatConvBuffer, src, srcW, pal); - src= formatConvBuffer; - } - - if (!c->hyscale_fast) { - c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); - } else { // fast bilinear upscale / crap downscale - c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); - } - - if (convertRange) - convertRange(dst, dstWidth); -} - -static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst, - long dstWidth, const uint8_t *src1, - const uint8_t *src2, int srcW, int xInc) -{ -#if ARCH_X86 -#if COMPILE_TEMPLATE_MMX2 - int32_t *filterPos = c->hChrFilterPos; - int16_t *filter = c->hChrFilter; - int canMMX2BeUsed = c->canMMX2BeUsed; - void *mmx2FilterCode= c->chrMmx2FilterCode; - int i; -#if defined(PIC) - DECLARE_ALIGNED(8, uint64_t, ebxsave); -#endif - if (canMMX2BeUsed) { - __asm__ volatile( -#if defined(PIC) - "mov %%"REG_b", %6 \n\t" -#endif - "pxor %%mm7, %%mm7 \n\t" - "mov %0, %%"REG_c" \n\t" - "mov %1, %%"REG_D" \n\t" - "mov %2, %%"REG_d" \n\t" - "mov %3, %%"REG_b" \n\t" - "xor %%"REG_a", %%"REG_a" \n\t" // i - PREFETCH" (%%"REG_c") \n\t" - PREFETCH" 32(%%"REG_c") \n\t" - PREFETCH" 64(%%"REG_c") \n\t" - - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - "xor %%"REG_a", %%"REG_a" \n\t" // i - "mov %5, %%"REG_c" \n\t" // src - "mov %1, %%"REG_D" \n\t" // buf1 - "add $"AV_STRINGIFY(VOF)", %%"REG_D" \n\t" - PREFETCH" (%%"REG_c") \n\t" - PREFETCH" 32(%%"REG_c") \n\t" - PREFETCH" 64(%%"REG_c") \n\t" - - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - -#if defined(PIC) - "mov %6, %%"REG_b" \n\t" -#endif - :: "m" (src1), "m" (dst), "m" (filter), "m" (filterPos), - "m" (mmx2FilterCode), "m" (src2) -#if defined(PIC) - ,"m" (ebxsave) -#endif - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D -#if !defined(PIC) - ,"%"REG_b -#endif - ); - for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { - //printf("%d %d %d\n", dstWidth, i, srcW); - dst[i] = src1[srcW-1]*128; - dst[i+VOFW] = src2[srcW-1]*128; - } - } else { -#endif /* COMPILE_TEMPLATE_MMX2 */ - x86_reg xInc_shr16 = (x86_reg) (xInc >> 16); - uint16_t xInc_mask = xInc & 0xffff; - x86_reg dstWidth_reg = dstWidth; - __asm__ volatile( - "xor %%"REG_a", %%"REG_a" \n\t" // i - "xor %%"REG_d", %%"REG_d" \n\t" // xx - "xorl %%ecx, %%ecx \n\t" // xalpha - ".p2align 4 \n\t" - "1: \n\t" - "mov %0, %%"REG_S" \n\t" - "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx] - "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1] - FAST_BILINEAR_X86 - "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t" - - "movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx] - "movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1] - FAST_BILINEAR_X86 - "movw %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2) \n\t" - - "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF - "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry - "add $1, %%"REG_a" \n\t" - "cmp %2, %%"REG_a" \n\t" - " jb 1b \n\t" - -/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here, -which is needed to support GCC 4.0. */ -#if ARCH_X86_64 && AV_GCC_VERSION_AT_LEAST(3,4) - :: "m" (src1), "m" (dst), "g" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask), -#else - :: "m" (src1), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask), -#endif - "r" (src2) - : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi" - ); -#if COMPILE_TEMPLATE_MMX2 - } //if MMX2 can't be used -#endif -#else - int i; - unsigned int xpos=0; - for (i=0;i<dstWidth;i++) { - register unsigned int xx=xpos>>16; - register unsigned int xalpha=(xpos&0xFFFF)>>9; - dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); - dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); - /* slower - dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; - dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; - */ - xpos+=xInc; - } -#endif /* ARCH_X86 */ -} - -inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2, - int srcW, int xInc, const int16_t *hChrFilter, - const int16_t *hChrFilterPos, int hChrFilterSize, - uint8_t *formatConvBuffer, - uint32_t *pal) -{ - - src1 += c->chrSrcOffset; - src2 += c->chrSrcOffset; - - if (c->chrToYV12) { - c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); - src1= formatConvBuffer; - src2= formatConvBuffer+VOFW; - } - - if (!c->hcscale_fast) { - c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); - c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); - } else { // fast bilinear upscale / crap downscale - c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc); - } - - if (c->chrConvertRange) - c->chrConvertRange(dst, dstWidth); -} #define DEBUG_SWSCALE_BUFFERS 0 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__) -static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dst[], int dstStride[]) +#if HAVE_MMX +static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, + int lastInLumBuf, int lastInChrBuf); +#endif + +static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], + int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) { /* load a few things into local vars to make the code more readable? and faster */ const int srcW= c->srcW; @@ -2612,7 +300,8 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], const int hLumFilterSize= c->hLumFilterSize; const int hChrFilterSize= c->hChrFilterSize; int16_t **lumPixBuf= c->lumPixBuf; - int16_t **chrPixBuf= c->chrPixBuf; + int16_t **chrUPixBuf= c->chrUPixBuf; + int16_t **chrVPixBuf= c->chrVPixBuf; int16_t **alpPixBuf= c->alpPixBuf; const int vLumBufSize= c->vLumBufSize; const int vChrBufSize= c->vChrBufSize; @@ -2678,6 +367,8 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL; + const uint8_t *lumDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][dstY &7] : flat64; + const uint8_t *chrDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][chrDstY&7] : flat64; const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]; @@ -2717,15 +408,15 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], assert(lumBufIndex < 2*vLumBufSize); assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); assert(lastInLumBuf + 1 - srcSliceY >= 0); - RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc, - hLumFilter, hLumFilterPos, hLumFilterSize, - formatConvBuffer, - pal, 0); + hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc, + hLumFilter, hLumFilterPos, hLumFilterSize, + formatConvBuffer, + pal, 0); if (CONFIG_SWSCALE_ALPHA && alpPixBuf) - RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc, - hLumFilter, hLumFilterPos, hLumFilterSize, - formatConvBuffer, - pal, 1); + hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, + lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize, + formatConvBuffer, + pal, 1); lastInLumBuf++; DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n", lumBufIndex, lastInLumBuf); @@ -2740,10 +431,10 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], //FIXME replace parameters through context struct (some at least) if (c->needs_hcscale) - RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, - hChrFilter, hChrFilterPos, hChrFilterSize, - formatConvBuffer, - pal); + hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex], + chrDstW, src1, src2, chrSrcW, chrXInc, + hChrFilter, hChrFilterPos, hChrFilterSize, + formatConvBuffer, pal); lastInChrBuf++; DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n", chrBufIndex, lastInChrBuf); @@ -2754,104 +445,59 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], if (!enough_lines) break; //we can't output a dstY line so let's try with the next slice -#if COMPILE_TEMPLATE_MMX - c->blueDither= ff_dither8[dstY&1]; - if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555) - c->greenDither= ff_dither8[dstY&1]; - else - c->greenDither= ff_dither4[dstY&1]; - c->redDither= ff_dither8[(dstY+1)&1]; +#if HAVE_MMX + updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf); #endif if (dstY < dstH-2) { const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; - const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; -#if COMPILE_TEMPLATE_MMX - int i; - if (flags & SWS_ACCURATE_RND) { - int s= APCK_SIZE / 8; - for (i=0; i<vLumFilterSize; i+=2) { - *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ]; - *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)]; - lumMmxFilter[s*i+APCK_COEF/4 ]= - lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] - + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); - if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { - *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ]; - *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)]; - alpMmxFilter[s*i+APCK_COEF/4 ]= - alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ]; - } - } - for (i=0; i<vChrFilterSize; i+=2) { - *(const void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ]; - *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)]; - chrMmxFilter[s*i+APCK_COEF/4 ]= - chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ] - + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); - } - } else { - for (i=0; i<vLumFilterSize; i++) { - lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i]; - lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32; - lumMmxFilter[4*i+2]= - lumMmxFilter[4*i+3]= - ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001; - if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { - alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i]; - alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32; - alpMmxFilter[4*i+2]= - alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2]; - } - } - for (i=0; i<vChrFilterSize; i++) { - chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i]; - chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32; - chrMmxFilter[4*i+2]= - chrMmxFilter[4*i+3]= - ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001; - } - } -#endif if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) { const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi c->yuv2nv12X(c, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, - vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, - dest, uDest, dstW, chrDstW, dstFormat); + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither); } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi if (is16BPS(dstFormat) || isNBPS(dstFormat)) { - yuv2yuvX16inC( - vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, - vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, - alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, + yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, + chrVSrcPtr, vChrFilterSize, + alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, + (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, dstFormat); } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12 const int16_t *lumBuf = lumSrcPtr[0]; - const int16_t *chrBuf= chrSrcPtr[0]; + const int16_t *chrUBuf= chrUSrcPtr[0]; + const int16_t *chrVBuf= chrVSrcPtr[0]; const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL; - c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW); + c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest, + uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); } else { //General YV12 c->yuv2yuvX(c, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, - vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, - alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, + chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); } } else { - assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); - assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); + assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); + assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2); if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB int chrAlpha= vChrFilter[2*dstY+1]; if(flags & SWS_FULL_CHR_H_INT) { yuv2rgbXinC_full(c, //FIXME write a packed1_full function vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, - vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, + chrVSrcPtr, vChrFilterSize, alpSrcPtr, dest, dstW, dstY); } else { - c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), + c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1), + *chrVSrcPtr, *(chrVSrcPtr+1), alpPixBuf ? *alpSrcPtr : NULL, dest, dstW, chrAlpha, dstFormat, flags, dstY); } @@ -2865,10 +511,11 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], if(flags & SWS_FULL_CHR_H_INT) { yuv2rgbXinC_full(c, //FIXME write a packed2_full function vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, - vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, alpSrcPtr, dest, dstW, dstY); } else { - c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), + c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1), + *chrVSrcPtr, *(chrVSrcPtr+1), alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL, dest, dstW, lumAlpha, chrAlpha, dstY); } @@ -2876,54 +523,55 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], if(flags & SWS_FULL_CHR_H_INT) { yuv2rgbXinC_full(c, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, - vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, alpSrcPtr, dest, dstW, dstY); } else { c->yuv2packedX(c, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, - vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, alpSrcPtr, dest, dstW, dstY); } } } } else { // hmm looks like we can't use MMX here without overwriting this array's tail const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; - const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) { const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi yuv2nv12XinC( vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, - vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, - dest, uDest, dstW, chrDstW, dstFormat); + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither); } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi if (is16BPS(dstFormat) || isNBPS(dstFormat)) { yuv2yuvX16inC( vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, - vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, dstFormat); } else { yuv2yuvXinC( vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, - vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, - alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); } } else { assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); - assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); + assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2); if(flags & SWS_FULL_CHR_H_INT) { yuv2rgbXinC_full(c, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, - vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, alpSrcPtr, dest, dstW, dstY); } else { yuv2packedXinC(c, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, - vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, alpSrcPtr, dest, dstW, dstY); } } @@ -2933,12 +581,12 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf) fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255); -#if COMPILE_TEMPLATE_MMX - if (flags & SWS_CPU_CAPS_MMX2 ) __asm__ volatile("sfence":::"memory"); - /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ - if (flags & SWS_CPU_CAPS_3DNOW) __asm__ volatile("femms" :::"memory"); - else __asm__ volatile("emms" :::"memory"); +#if HAVE_MMX2 + if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2) + __asm__ volatile("sfence":::"memory"); #endif + emms_c(); + /* store changed local vars back in the context */ c->dstY= dstY; c->lumBufIndex= lumBufIndex; @@ -2949,85 +597,82 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], return dstY - lastDstY; } -static void RENAME(sws_init_swScale)(SwsContext *c) +static void sws_init_swScale_c(SwsContext *c) { enum PixelFormat srcFormat = c->srcFormat; - c->yuv2nv12X = RENAME(yuv2nv12X ); - c->yuv2yuv1 = RENAME(yuv2yuv1 ); - c->yuv2yuvX = RENAME(yuv2yuvX ); - c->yuv2packed1 = RENAME(yuv2packed1 ); - c->yuv2packed2 = RENAME(yuv2packed2 ); - c->yuv2packedX = RENAME(yuv2packedX ); + c->yuv2nv12X = yuv2nv12X_c; + c->yuv2yuv1 = yuv2yuv1_c; + c->yuv2yuvX = yuv2yuvX_c; + c->yuv2packed1 = yuv2packed1_c; + c->yuv2packed2 = yuv2packed2_c; + c->yuv2packedX = yuv2packedX_c; - c->hScale = RENAME(hScale ); + c->hScale = hScale_c; -#if COMPILE_TEMPLATE_MMX - // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). - if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) -#else if (c->flags & SWS_FAST_BILINEAR) -#endif { - c->hyscale_fast = RENAME(hyscale_fast); - c->hcscale_fast = RENAME(hcscale_fast); + c->hyscale_fast = hyscale_fast_c; + c->hcscale_fast = hcscale_fast_c; } c->chrToYV12 = NULL; switch(srcFormat) { - case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break; - case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break; - case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break; - case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break; + case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break; + case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break; + case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break; + case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break; case PIX_FMT_RGB8 : case PIX_FMT_BGR8 : case PIX_FMT_PAL8 : case PIX_FMT_BGR4_BYTE: case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break; - case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break; - case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break; + case PIX_FMT_GRAY16BE : + case PIX_FMT_YUV420P9BE: case PIX_FMT_YUV422P10BE: - case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break; - case PIX_FMT_YUV422P10LE: - case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break; + case PIX_FMT_YUV420P10BE: case PIX_FMT_YUV420P16BE: case PIX_FMT_YUV422P16BE: - case PIX_FMT_YUV444P16BE: c->chrToYV12 = RENAME(BEToUV); break; + case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break; + case PIX_FMT_GRAY16LE : + case PIX_FMT_YUV420P9LE: + case PIX_FMT_YUV422P10LE: + case PIX_FMT_YUV420P10LE: case PIX_FMT_YUV420P16LE: case PIX_FMT_YUV422P16LE: - case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break; + case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break; } if (c->chrSrcHSubSample) { switch(srcFormat) { - case PIX_FMT_RGB48BE: - case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break; - case PIX_FMT_BGR48BE: - case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break; + case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half; break; + case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half; break; + case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half; break; + case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half; break; case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half; break; case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break; - case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV_half); break; + case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break; case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break; case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break; case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half; break; case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break; - case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV_half); break; + case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break; case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break; case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break; } } else { switch(srcFormat) { - case PIX_FMT_RGB48BE: - case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break; - case PIX_FMT_BGR48BE: - case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break; + case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV; break; + case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV; break; + case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV; break; + case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV; break; case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV; break; case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break; - case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break; + case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break; case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break; case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break; case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV; break; case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break; - case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV); break; + case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break; case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break; case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break; } @@ -3036,27 +681,15 @@ static void RENAME(sws_init_swScale)(SwsContext *c) c->lumToYV12 = NULL; c->alpToYV12 = NULL; switch (srcFormat) { - case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break; - case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break; - case PIX_FMT_YUV422P10BE: - case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break; - case PIX_FMT_YUV422P10LE: - case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break; case PIX_FMT_YUYV422 : - case PIX_FMT_YUV420P16BE: - case PIX_FMT_YUV422P16BE: - case PIX_FMT_YUV444P16BE: case PIX_FMT_GRAY8A : - case PIX_FMT_GRAY16BE : c->lumToYV12 = RENAME(yuy2ToY); break; + c->lumToYV12 = yuy2ToY_c; break; case PIX_FMT_UYVY422 : - case PIX_FMT_YUV420P16LE: - case PIX_FMT_YUV422P16LE: - case PIX_FMT_YUV444P16LE: - case PIX_FMT_GRAY16LE : c->lumToYV12 = RENAME(uyvyToY); break; - case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break; + c->lumToYV12 = uyvyToY_c; break; + case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break; case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY; break; case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY; break; - case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break; + case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break; case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY; break; case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY; break; case PIX_FMT_RGB8 : @@ -3070,10 +703,10 @@ static void RENAME(sws_init_swScale)(SwsContext *c) case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break; case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY; break; case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break; - case PIX_FMT_RGB48BE: - case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break; - case PIX_FMT_BGR48BE: - case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break; + case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY; break; + case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY; break; + case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY; break; + case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY; break; } if (c->alpPixBuf) { switch (srcFormat) { @@ -3081,11 +714,14 @@ static void RENAME(sws_init_swScale)(SwsContext *c) case PIX_FMT_RGB32_1: case PIX_FMT_BGR32 : case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break; - case PIX_FMT_GRAY8A : c->alpToYV12 = RENAME(yuy2ToY); break; + case PIX_FMT_GRAY8A : c->alpToYV12 = yuy2ToY_c; break; case PIX_FMT_PAL8 : c->alpToYV12 = palToA; break; } } + if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8) + c->hScale16= hScale16_c; + switch (srcFormat) { case PIX_FMT_GRAY8A : c->alpSrcOffset = 1; @@ -3094,21 +730,15 @@ static void RENAME(sws_init_swScale)(SwsContext *c) case PIX_FMT_BGR32 : c->alpSrcOffset = 3; break; - case PIX_FMT_RGB48LE: - case PIX_FMT_BGR48LE: - c->lumSrcOffset = 1; - c->chrSrcOffset = 1; - c->alpSrcOffset = 1; - break; } if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { if (c->srcRange) { - c->lumConvertRange = RENAME(lumRangeFromJpeg); - c->chrConvertRange = RENAME(chrRangeFromJpeg); + c->lumConvertRange = lumRangeFromJpeg_c; + c->chrConvertRange = chrRangeFromJpeg_c; } else { - c->lumConvertRange = RENAME(lumRangeToJpeg); - c->chrConvertRange = RENAME(chrRangeToJpeg); + c->lumConvertRange = lumRangeToJpeg_c; + c->chrConvertRange = chrRangeToJpeg_c; } } diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c new file mode 100644 index 0000000000..e0c4b25846 --- /dev/null +++ b/libswscale/swscale_unscaled.c @@ -0,0 +1,849 @@ +/* + * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <inttypes.h> +#include <string.h> +#include <math.h> +#include <stdio.h> +#include "config.h" +#include <assert.h> +#include "swscale.h" +#include "swscale_internal.h" +#include "rgb2rgb.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/cpu.h" +#include "libavutil/avutil.h" +#include "libavutil/mathematics.h" +#include "libavutil/bswap.h" +#include "libavutil/pixdesc.h" + +#define RGB2YUV_SHIFT 15 +#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5)) +#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5)) +#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5)) +#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5)) +#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5)) +#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5)) +#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5)) +#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5)) +#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5)) + +static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val) +{ + int i; + uint8_t *ptr = plane + stride*y; + for (i=0; i<height; i++) { + memset(ptr, val, width); + ptr += stride; + } +} + +static void copyPlane(const uint8_t *src, int srcStride, + int srcSliceY, int srcSliceH, int width, + uint8_t *dst, int dstStride) +{ + dst += dstStride * srcSliceY; + if (dstStride == srcStride && srcStride > 0) { + memcpy(dst, src, srcSliceH * dstStride); + } else { + int i; + for (i=0; i<srcSliceH; i++) { + memcpy(dst, src, width); + src += srcStride; + dst += dstStride; + } + } +} + +static int planarToNv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]) +{ + uint8_t *dst = dstParam[1] + dstStride[1]*srcSliceY/2; + + copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, + dstParam[0], dstStride[0]); + + if (c->dstFormat == PIX_FMT_NV12) + interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]); + else + interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]); + + return srcSliceH; +} + +static int planarToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]) +{ + uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; + + yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); + + return srcSliceH; +} + +static int planarToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]) +{ + uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; + + yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); + + return srcSliceH; +} + +static int yuv422pToYuy2Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]) +{ + uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; + + yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]); + + return srcSliceH; +} + +static int yuv422pToUyvyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]) +{ + uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; + + yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]); + + return srcSliceH; +} + +static int yuyvToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]) +{ + uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; + uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2; + uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2; + + yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); + + if (dstParam[3]) + fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); + + return srcSliceH; +} + +static int yuyvToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]) +{ + uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; + uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY; + uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY; + + yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); + + return srcSliceH; +} + +static int uyvyToYuv420Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]) +{ + uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; + uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2; + uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2; + + uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); + + if (dstParam[3]) + fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); + + return srcSliceH; +} + +static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]) +{ + uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY; + uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY; + uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY; + + uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); + + return srcSliceH; +} + +static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette) +{ + int i; + for (i=0; i<num_pixels; i++) + ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | (src[(i<<1)+1] << 24); +} + +static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette) +{ + int i; + + for (i=0; i<num_pixels; i++) + ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | src[(i<<1)+1]; +} + +static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette) +{ + int i; + + for (i=0; i<num_pixels; i++) { + //FIXME slow? + dst[0]= palette[src[i<<1]*4+0]; + dst[1]= palette[src[i<<1]*4+1]; + dst[2]= palette[src[i<<1]*4+2]; + dst+= 3; + } +} + +static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + const enum PixelFormat srcFormat= c->srcFormat; + const enum PixelFormat dstFormat= c->dstFormat; + void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels, + const uint8_t *palette)=NULL; + int i; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + const uint8_t *srcPtr= src[0]; + + if (srcFormat == PIX_FMT_GRAY8A) { + switch (dstFormat) { + case PIX_FMT_RGB32 : conv = gray8aToPacked32; break; + case PIX_FMT_BGR32 : conv = gray8aToPacked32; break; + case PIX_FMT_BGR32_1: conv = gray8aToPacked32_1; break; + case PIX_FMT_RGB32_1: conv = gray8aToPacked32_1; break; + case PIX_FMT_RGB24 : conv = gray8aToPacked24; break; + case PIX_FMT_BGR24 : conv = gray8aToPacked24; break; + } + } else if (usePal(srcFormat)) { + switch (dstFormat) { + case PIX_FMT_RGB32 : conv = sws_convertPalette8ToPacked32; break; + case PIX_FMT_BGR32 : conv = sws_convertPalette8ToPacked32; break; + case PIX_FMT_BGR32_1: conv = sws_convertPalette8ToPacked32; break; + case PIX_FMT_RGB32_1: conv = sws_convertPalette8ToPacked32; break; + case PIX_FMT_RGB24 : conv = sws_convertPalette8ToPacked24; break; + case PIX_FMT_BGR24 : conv = sws_convertPalette8ToPacked24; break; + } + } + + if (!conv) + av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); + else { + for (i=0; i<srcSliceH; i++) { + conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } + + return srcSliceH; +} + +#define isRGBA32(x) ( \ + (x) == PIX_FMT_ARGB \ + || (x) == PIX_FMT_RGBA \ + || (x) == PIX_FMT_BGRA \ + || (x) == PIX_FMT_ABGR \ + ) + +/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */ +static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + const enum PixelFormat srcFormat= c->srcFormat; + const enum PixelFormat dstFormat= c->dstFormat; + const int srcBpp= (c->srcFormatBpp + 7) >> 3; + const int dstBpp= (c->dstFormatBpp + 7) >> 3; + const int srcId= c->srcFormatBpp >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */ + const int dstId= c->dstFormatBpp >> 2; + void (*conv)(const uint8_t *src, uint8_t *dst, int src_size)=NULL; + +#define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst) + + if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) { + if ( CONV_IS(ABGR, RGBA) + || CONV_IS(ARGB, BGRA) + || CONV_IS(BGRA, ARGB) + || CONV_IS(RGBA, ABGR)) conv = shuffle_bytes_3210; + else if (CONV_IS(ABGR, ARGB) + || CONV_IS(ARGB, ABGR)) conv = shuffle_bytes_0321; + else if (CONV_IS(ABGR, BGRA) + || CONV_IS(ARGB, RGBA)) conv = shuffle_bytes_1230; + else if (CONV_IS(BGRA, RGBA) + || CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103; + else if (CONV_IS(BGRA, ABGR) + || CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012; + } else + /* BGR -> BGR */ + if ( (isBGRinInt(srcFormat) && isBGRinInt(dstFormat)) + || (isRGBinInt(srcFormat) && isRGBinInt(dstFormat))) { + switch(srcId | (dstId<<4)) { + case 0x34: conv= rgb16to15; break; + case 0x36: conv= rgb24to15; break; + case 0x38: conv= rgb32to15; break; + case 0x43: conv= rgb15to16; break; + case 0x46: conv= rgb24to16; break; + case 0x48: conv= rgb32to16; break; + case 0x63: conv= rgb15to24; break; + case 0x64: conv= rgb16to24; break; + case 0x68: conv= rgb32to24; break; + case 0x83: conv= rgb15to32; break; + case 0x84: conv= rgb16to32; break; + case 0x86: conv= rgb24to32; break; + } + } else if ( (isBGRinInt(srcFormat) && isRGBinInt(dstFormat)) + || (isRGBinInt(srcFormat) && isBGRinInt(dstFormat))) { + switch(srcId | (dstId<<4)) { + case 0x33: conv= rgb15tobgr15; break; + case 0x34: conv= rgb16tobgr15; break; + case 0x36: conv= rgb24tobgr15; break; + case 0x38: conv= rgb32tobgr15; break; + case 0x43: conv= rgb15tobgr16; break; + case 0x44: conv= rgb16tobgr16; break; + case 0x46: conv= rgb24tobgr16; break; + case 0x48: conv= rgb32tobgr16; break; + case 0x63: conv= rgb15tobgr24; break; + case 0x64: conv= rgb16tobgr24; break; + case 0x66: conv= rgb24tobgr24; break; + case 0x68: conv= rgb32tobgr24; break; + case 0x83: conv= rgb15tobgr32; break; + case 0x84: conv= rgb16tobgr32; break; + case 0x86: conv= rgb24tobgr32; break; + } + } + + if (!conv) { + av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); + } else { + const uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst[0]; + if ((srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1) && !isRGBA32(dstFormat)) + srcPtr += ALT32_CORR; + + if ((dstFormat == PIX_FMT_RGB32_1 || dstFormat == PIX_FMT_BGR32_1) && !isRGBA32(srcFormat)) + dstPtr += ALT32_CORR; + + if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0 && !(srcStride[0]%srcBpp)) + conv(srcPtr, dstPtr + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); + else { + int i; + dstPtr += dstStride[0]*srcSliceY; + + for (i=0; i<srcSliceH; i++) { + conv(srcPtr, dstPtr, c->srcW*srcBpp); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } + } + return srcSliceH; +} + +static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + rgb24toyv12( + src[0], + dst[0]+ srcSliceY *dstStride[0], + dst[1]+(srcSliceY>>1)*dstStride[1], + dst[2]+(srcSliceY>>1)*dstStride[2], + c->srcW, srcSliceH, + dstStride[0], dstStride[1], srcStride[0]); + if (dst[3]) + fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); + return srcSliceH; +} + +static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW, + dst[0], dstStride[0]); + + planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW, + srcSliceH >> 2, srcStride[1], dstStride[1]); + planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW, + srcSliceH >> 2, srcStride[2], dstStride[2]); + if (dst[3]) + fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); + return srcSliceH; +} + +/* unscaled copy like stuff (assumes nearly identical formats) */ +static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + if (dstStride[0]==srcStride[0] && srcStride[0] > 0) + memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]); + else { + int i; + const uint8_t *srcPtr= src[0]; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + int length=0; + + /* universal length finder */ + while(length+c->srcW <= FFABS(dstStride[0]) + && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW; + assert(length!=0); + + for (i=0; i<srcSliceH; i++) { + memcpy(dstPtr, srcPtr, length); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + } + return srcSliceH; +} + +#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\ + uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\ + int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\ + for (i = 0; i < height; i++) {\ + const uint8_t *dither= dithers[src_depth-9][i&7];\ + for (j = 0; j < length-7; j+=8){\ + dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\ + dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\ + dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\ + dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\ + dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\ + dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\ + dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\ + dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\ + }\ + for (; j < length; j++)\ + dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\ + dst += dstStride;\ + src += srcStride;\ + } + + +static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + int plane, i, j; + for (plane=0; plane<4; plane++) { + int length= (plane==0 || plane==3) ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample); + int y= (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample); + int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample); + const uint8_t *srcPtr= src[plane]; + uint8_t *dstPtr= dst[plane] + dstStride[plane]*y; + + if (!dst[plane]) continue; + // ignore palette for GRAY8 + if (plane == 1 && !dst[2]) continue; + if (!src[plane] || (plane == 1 && !src[2])) { + if(is16BPS(c->dstFormat)) + length*=2; + fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128); + } else { + if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat) + || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat)) + ) { + const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1; + const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1; + const uint16_t *srcPtr2 = (const uint16_t*)srcPtr; + uint16_t *dstPtr2 = (uint16_t*)dstPtr; + + if (dst_depth == 8) { + if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , ) + } else { + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, ) + } + } else if (src_depth == 8) { + for (i = 0; i < height; i++) { + if(isBE(c->dstFormat)){ + for (j = 0; j < length; j++) + AV_WB16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) | + (srcPtr[j]>>(2*8-dst_depth))); + } else { + for (j = 0; j < length; j++) + AV_WL16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) | + (srcPtr[j]>>(2*8-dst_depth))); + } + dstPtr2 += dstStride[plane]/2; + srcPtr += srcStride[plane]; + } + } else if (src_depth <= dst_depth) { + for (i = 0; i < height; i++) { +#define COPY_UP(r,w) \ + for (j = 0; j < length; j++){ \ + unsigned int v= r(&srcPtr2[j]);\ + w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \ + (v>>(2*src_depth-dst_depth)));\ + } + if(isBE(c->srcFormat)){ + if(isBE(c->dstFormat)){ + COPY_UP(AV_RB16, AV_WB16) + } else { + COPY_UP(AV_RB16, AV_WL16) + } + } else { + if(isBE(c->dstFormat)){ + COPY_UP(AV_RL16, AV_WB16) + } else { + COPY_UP(AV_RL16, AV_WL16) + } + } + dstPtr2 += dstStride[plane]/2; + srcPtr2 += srcStride[plane]/2; + } + } else { + if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , ) + } else { + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16) + } + }else{ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, ) + } else { + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16) + } + } + } + } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat) + && isBE(c->srcFormat) != isBE(c->dstFormat)) { + + for (i=0; i<height; i++) { + for (j=0; j<length; j++) + ((uint16_t*)dstPtr)[j] = av_bswap16(((const uint16_t*)srcPtr)[j]); + srcPtr+= srcStride[plane]; + dstPtr+= dstStride[plane]; + } + } else if (dstStride[plane] == srcStride[plane] && + srcStride[plane] > 0 && srcStride[plane] == length) { + memcpy(dst[plane] + dstStride[plane]*y, src[plane], + height*dstStride[plane]); + } else { + if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) + length*=2; + for (i=0; i<height; i++) { + memcpy(dstPtr, srcPtr, length); + srcPtr+= srcStride[plane]; + dstPtr+= dstStride[plane]; + } + } + } + } + return srcSliceH; +} + +void ff_get_unscaled_swscale(SwsContext *c) +{ + const enum PixelFormat srcFormat = c->srcFormat; + const enum PixelFormat dstFormat = c->dstFormat; + const int flags = c->flags; + const int dstH = c->dstH; + int needsDither; + + needsDither= isAnyRGB(dstFormat) + && c->dstFormatBpp < 24 + && (c->dstFormatBpp < c->srcFormatBpp || (!isAnyRGB(srcFormat))); + + /* yv12_to_nv12 */ + if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) { + c->swScale= planarToNv12Wrapper; + } + /* yuv2bgr */ + if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) + && !(flags & SWS_ACCURATE_RND) && !(dstH&1)) { + c->swScale= ff_yuv2rgb_get_func_ptr(c); + } + + if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT)) { + c->swScale= yvu9ToYv12Wrapper; + } + + /* bgr24toYV12 */ + if (srcFormat==PIX_FMT_BGR24 && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND)) + c->swScale= bgr24ToYv12Wrapper; + + /* RGB/BGR -> RGB/BGR (no dither needed forms) */ + if ( isAnyRGB(srcFormat) + && isAnyRGB(dstFormat) + && srcFormat != PIX_FMT_BGR8 && dstFormat != PIX_FMT_BGR8 + && srcFormat != PIX_FMT_RGB8 && dstFormat != PIX_FMT_RGB8 + && srcFormat != PIX_FMT_BGR4 && dstFormat != PIX_FMT_BGR4 + && srcFormat != PIX_FMT_RGB4 && dstFormat != PIX_FMT_RGB4 + && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE + && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE + && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK + && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE + && srcFormat != PIX_FMT_RGB48LE && dstFormat != PIX_FMT_RGB48LE + && srcFormat != PIX_FMT_RGB48BE && dstFormat != PIX_FMT_RGB48BE + && srcFormat != PIX_FMT_BGR48LE && dstFormat != PIX_FMT_BGR48LE + && srcFormat != PIX_FMT_BGR48BE && dstFormat != PIX_FMT_BGR48BE + && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)))) + c->swScale= rgbToRgbWrapper; + + if ((usePal(srcFormat) && ( + dstFormat == PIX_FMT_RGB32 || + dstFormat == PIX_FMT_RGB32_1 || + dstFormat == PIX_FMT_RGB24 || + dstFormat == PIX_FMT_BGR32 || + dstFormat == PIX_FMT_BGR32_1 || + dstFormat == PIX_FMT_BGR24))) + c->swScale= palToRgbWrapper; + + if (srcFormat == PIX_FMT_YUV422P) { + if (dstFormat == PIX_FMT_YUYV422) + c->swScale= yuv422pToYuy2Wrapper; + else if (dstFormat == PIX_FMT_UYVY422) + c->swScale= yuv422pToUyvyWrapper; + } + + /* LQ converters if -sws 0 or -sws 4*/ + if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)) { + /* yv12_to_yuy2 */ + if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) { + if (dstFormat == PIX_FMT_YUYV422) + c->swScale= planarToYuy2Wrapper; + else if (dstFormat == PIX_FMT_UYVY422) + c->swScale= planarToUyvyWrapper; + } + } + if(srcFormat == PIX_FMT_YUYV422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P)) + c->swScale= yuyvToYuv420Wrapper; + if(srcFormat == PIX_FMT_UYVY422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P)) + c->swScale= uyvyToYuv420Wrapper; + if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P) + c->swScale= yuyvToYuv422Wrapper; + if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P) + c->swScale= uyvyToYuv422Wrapper; + + /* simple copy */ + if ( srcFormat == dstFormat + || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P) + || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P) + || (isPlanarYUV(srcFormat) && isGray(dstFormat)) + || (isPlanarYUV(dstFormat) && isGray(srcFormat)) + || (isGray(dstFormat) && isGray(srcFormat)) + || (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) + && c->chrDstHSubSample == c->chrSrcHSubSample + && c->chrDstVSubSample == c->chrSrcVSubSample + && dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21 + && srcFormat != PIX_FMT_NV12 && srcFormat != PIX_FMT_NV21)) + { + if (isPacked(c->srcFormat)) + c->swScale= packedCopyWrapper; + else /* Planar YUV or gray */ + c->swScale= planarCopyWrapper; + } + + if (ARCH_BFIN) + ff_bfin_get_unscaled_swscale(c); + if (HAVE_ALTIVEC) + ff_swscale_get_unscaled_altivec(c); +} + +static void reset_ptr(const uint8_t* src[], int format) +{ + if(!isALPHA(format)) + src[3]=NULL; + if(!isPlanarYUV(format)) { + src[3]=src[2]=NULL; + + if (!usePal(format)) + src[1]= NULL; + } +} + +static int check_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, + const int linesizes[4]) +{ + const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt]; + int i; + + for (i = 0; i < 4; i++) { + int plane = desc->comp[i].plane; + if (!data[plane] || !linesizes[plane]) + return 0; + } + + return 1; +} + +/** + * swscale wrapper, so we don't need to export the SwsContext. + * Assumes planar YUV to be in YUV order instead of YVU. + */ +int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* const dst[], const int dstStride[]) +{ + int i; + const uint8_t* src2[4]= {src[0], src[1], src[2], src[3]}; + uint8_t* dst2[4]= {dst[0], dst[1], dst[2], dst[3]}; + + // do not mess up sliceDir if we have a "trailing" 0-size slice + if (srcSliceH == 0) + return 0; + + if (!check_image_pointers(src, c->srcFormat, srcStride)) { + av_log(c, AV_LOG_ERROR, "bad src image pointers\n"); + return 0; + } + if (!check_image_pointers(dst, c->dstFormat, dstStride)) { + av_log(c, AV_LOG_ERROR, "bad dst image pointers\n"); + return 0; + } + + if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) { + av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n"); + return 0; + } + if (c->sliceDir == 0) { + if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1; + } + + if (usePal(c->srcFormat)) { + for (i=0; i<256; i++) { + int p, r, g, b, y, u, v, a = 0xff; + if(c->srcFormat == PIX_FMT_PAL8) { + p=((const uint32_t*)(src[1]))[i]; + a= (p>>24)&0xFF; + r= (p>>16)&0xFF; + g= (p>> 8)&0xFF; + b= p &0xFF; + } else if(c->srcFormat == PIX_FMT_RGB8) { + r= (i>>5 )*36; + g= ((i>>2)&7)*36; + b= (i&3 )*85; + } else if(c->srcFormat == PIX_FMT_BGR8) { + b= (i>>6 )*85; + g= ((i>>3)&7)*36; + r= (i&7 )*36; + } else if(c->srcFormat == PIX_FMT_RGB4_BYTE) { + r= (i>>3 )*255; + g= ((i>>1)&3)*85; + b= (i&1 )*255; + } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_GRAY8A) { + r = g = b = i; + } else { + assert(c->srcFormat == PIX_FMT_BGR4_BYTE); + b= (i>>3 )*255; + g= ((i>>1)&3)*85; + r= (i&1 )*255; + } + y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + c->pal_yuv[i]= y + (u<<8) + (v<<16) + (a<<24); + + switch(c->dstFormat) { + case PIX_FMT_BGR32: +#if !HAVE_BIGENDIAN + case PIX_FMT_RGB24: +#endif + c->pal_rgb[i]= r + (g<<8) + (b<<16) + (a<<24); + break; + case PIX_FMT_BGR32_1: +#if HAVE_BIGENDIAN + case PIX_FMT_BGR24: +#endif + c->pal_rgb[i]= a + (r<<8) + (g<<16) + (b<<24); + break; + case PIX_FMT_RGB32_1: +#if HAVE_BIGENDIAN + case PIX_FMT_RGB24: +#endif + c->pal_rgb[i]= a + (b<<8) + (g<<16) + (r<<24); + break; + case PIX_FMT_RGB32: +#if !HAVE_BIGENDIAN + case PIX_FMT_BGR24: +#endif + default: + c->pal_rgb[i]= b + (g<<8) + (r<<16) + (a<<24); + } + } + } + + // copy strides, so they can safely be modified + if (c->sliceDir == 1) { + // slices go from top to bottom + int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2], srcStride[3]}; + int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2], dstStride[3]}; + + reset_ptr(src2, c->srcFormat); + reset_ptr((const uint8_t**)dst2, c->dstFormat); + + /* reset slice direction at end of frame */ + if (srcSliceY + srcSliceH == c->srcH) + c->sliceDir = 0; + + return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, dstStride2); + } else { + // slices go from bottom to top => we flip the image internally + int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2], -srcStride[3]}; + int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2], -dstStride[3]}; + + src2[0] += (srcSliceH-1)*srcStride[0]; + if (!usePal(c->srcFormat)) + src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1]; + src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2]; + src2[3] += (srcSliceH-1)*srcStride[3]; + dst2[0] += ( c->dstH -1)*dstStride[0]; + dst2[1] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1]; + dst2[2] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]; + dst2[3] += ( c->dstH -1)*dstStride[3]; + + reset_ptr(src2, c->srcFormat); + reset_ptr((const uint8_t**)dst2, c->dstFormat); + + /* reset slice direction at end of frame */ + if (!srcSliceY) + c->sliceDir = 0; + + return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2); + } +} + +#if LIBSWSCALE_VERSION_MAJOR < 1 +int sws_scale_ordered(SwsContext *c, const uint8_t* const src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride); +} +#endif + +/* Convert the palette to the same packed 32-bit format as the palette */ +void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette) +{ + int i; + + for (i=0; i<num_pixels; i++) + ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]]; +} + +/* Palette format: ABCD -> dst format: ABC */ +void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette) +{ + int i; + + for (i=0; i<num_pixels; i++) { + //FIXME slow? + dst[0]= palette[src[i]*4+0]; + dst[1]= palette[src[i]*4+1]; + dst[2]= palette[src[i]*4+2]; + dst+= 3; + } +} diff --git a/libswscale/utils.c b/libswscale/utils.c index ea44190ace..984f2c52fa 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -77,13 +77,17 @@ const char *swscale_license(void) || (x)==PIX_FMT_BGR48BE \ || (x)==PIX_FMT_BGR48LE \ || (x)==PIX_FMT_BGR24 \ - || (x)==PIX_FMT_BGR565 \ - || (x)==PIX_FMT_BGR555 \ + || (x)==PIX_FMT_BGR565LE \ + || (x)==PIX_FMT_BGR565BE \ + || (x)==PIX_FMT_BGR555LE \ + || (x)==PIX_FMT_BGR555BE \ || (x)==PIX_FMT_BGR32 \ || (x)==PIX_FMT_BGR32_1 \ || (x)==PIX_FMT_RGB24 \ - || (x)==PIX_FMT_RGB565 \ - || (x)==PIX_FMT_RGB555 \ + || (x)==PIX_FMT_RGB565LE \ + || (x)==PIX_FMT_RGB565BE \ + || (x)==PIX_FMT_RGB555LE \ + || (x)==PIX_FMT_RGB555BE \ || (x)==PIX_FMT_GRAY8 \ || (x)==PIX_FMT_GRAY8A \ || (x)==PIX_FMT_YUV410P \ @@ -108,12 +112,18 @@ const char *swscale_license(void) || (x)==PIX_FMT_MONOWHITE \ || (x)==PIX_FMT_MONOBLACK \ || (x)==PIX_FMT_YUV420P9LE \ + || (x)==PIX_FMT_YUV444P9LE \ || (x)==PIX_FMT_YUV420P10LE \ + || (x)==PIX_FMT_YUV422P10LE \ + || (x)==PIX_FMT_YUV444P10LE \ || (x)==PIX_FMT_YUV420P16LE \ || (x)==PIX_FMT_YUV422P16LE \ || (x)==PIX_FMT_YUV444P16LE \ || (x)==PIX_FMT_YUV420P9BE \ + || (x)==PIX_FMT_YUV444P9BE \ || (x)==PIX_FMT_YUV420P10BE \ + || (x)==PIX_FMT_YUV444P10BE \ + || (x)==PIX_FMT_YUV422P10BE \ || (x)==PIX_FMT_YUV420P16BE \ || (x)==PIX_FMT_YUV422P16BE \ || (x)==PIX_FMT_YUV444P16BE \ @@ -137,7 +147,22 @@ int sws_isSupportedInput(enum PixelFormat pix_fmt) || (x)==PIX_FMT_YUVJ422P \ || (x)==PIX_FMT_YUVJ440P \ || (x)==PIX_FMT_YUVJ444P \ - || isAnyRGB(x) \ + || isRGBinBytes(x) \ + || isBGRinBytes(x) \ + || (x)==PIX_FMT_RGB565 \ + || (x)==PIX_FMT_RGB555 \ + || (x)==PIX_FMT_RGB444 \ + || (x)==PIX_FMT_BGR565 \ + || (x)==PIX_FMT_BGR555 \ + || (x)==PIX_FMT_BGR444 \ + || (x)==PIX_FMT_RGB8 \ + || (x)==PIX_FMT_BGR8 \ + || (x)==PIX_FMT_RGB4_BYTE \ + || (x)==PIX_FMT_BGR4_BYTE \ + || (x)==PIX_FMT_RGB4 \ + || (x)==PIX_FMT_BGR4 \ + || (x)==PIX_FMT_MONOBLACK \ + || (x)==PIX_FMT_MONOWHITE \ || (x)==PIX_FMT_NV12 \ || (x)==PIX_FMT_NV21 \ || (x)==PIX_FMT_GRAY16BE \ @@ -165,17 +190,15 @@ int sws_isSupportedOutput(enum PixelFormat pix_fmt) extern const int32_t ff_yuv2rgb_coeffs[8][4]; +#if FF_API_SWS_FORMAT_NAME const char *sws_format_name(enum PixelFormat format) { - if ((unsigned)format < PIX_FMT_NB && av_pix_fmt_descriptors[format].name) - return av_pix_fmt_descriptors[format].name; - else - return "Unknown format"; + return av_get_pix_fmt_name(format); } +#endif static double getSplineCoeff(double a, double b, double c, double d, double dist) { -// printf("%f %f %f %f %f\n", a,b,c,d,dist); if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a; else return getSplineCoeff( 0.0, b+ 2.0*c + 3.0*d, @@ -185,7 +208,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist } static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc, - int srcW, int dstW, int filterAlign, int one, int flags, + int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags, SwsVector *srcFilter, SwsVector *dstFilter, double param[2]) { int i; @@ -196,10 +219,8 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi int64_t *filter2=NULL; const int64_t fone= 1LL<<54; int ret= -1; -#if ARCH_X86 - if (flags & SWS_CPU_CAPS_MMX) - __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions) -#endif + + emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions) // NOTE: the +1 is for the MMX scaler which reads over the end FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+1)*sizeof(int16_t), fail); @@ -416,7 +437,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi if (min>minFilterSize) minFilterSize= min; } - if (flags & SWS_CPU_CAPS_ALTIVEC) { + if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) { // we can handle the special case 4, // so we don't want to go to the full 8 if (minFilterSize < 5) @@ -431,7 +452,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi filterAlign = 1; } - if (flags & SWS_CPU_CAPS_MMX) { + if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { // special case for unscaled vertical filtering if (minFilterSize == 1 && filterAlign == 2) filterAlign= 1; @@ -521,7 +542,7 @@ fail: return ret; } -#if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) +#if HAVE_MMX2 static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *filter, int32_t *filterPos, int numSplits) { uint8_t *fragmentA; @@ -679,7 +700,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil return fragmentPos + 1; } -#endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */ +#endif /* HAVE_MMX2 */ static void getSubSampleFactors(int *h, int *v, enum PixelFormat format) { @@ -687,8 +708,6 @@ static void getSubSampleFactors(int *h, int *v, enum PixelFormat format) *v = av_pix_fmt_descriptors[format].log2_chroma_h; } -static int update_flags_cpu(int flags); - int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation) { memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4); @@ -703,21 +722,18 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->dstFormat]); c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->srcFormat]); - c->flags = update_flags_cpu(c->flags); ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation); //FIXME factorize -#if HAVE_ALTIVEC - if (c->flags & SWS_CPU_CAPS_ALTIVEC) + if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, saturation); -#endif return 0; } int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation) { - if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; + if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; *inv_table = c->srcColorspaceTable; *table = c->dstColorspaceTable; @@ -741,27 +757,6 @@ static int handle_jpeg(enum PixelFormat *format) } } -static int update_flags_cpu(int flags) -{ -#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off - flags &= ~( SWS_CPU_CAPS_MMX - |SWS_CPU_CAPS_MMX2 - |SWS_CPU_CAPS_3DNOW - |SWS_CPU_CAPS_SSE2 - |SWS_CPU_CAPS_ALTIVEC - |SWS_CPU_CAPS_BFIN); - flags |= ff_hardcodedcpuflags(); -#else /* !CONFIG_RUNTIME_CPUDETECT */ - int cpuflags = av_get_cpu_flags(); - - flags |= (cpuflags & AV_CPU_FLAG_SSE2 ? SWS_CPU_CAPS_SSE2 : 0); - flags |= (cpuflags & AV_CPU_FLAG_MMX ? SWS_CPU_CAPS_MMX : 0); - flags |= (cpuflags & AV_CPU_FLAG_MMX2 ? SWS_CPU_CAPS_MMX2 : 0); - flags |= (cpuflags & AV_CPU_FLAG_3DNOW ? SWS_CPU_CAPS_3DNOW : 0); -#endif /* CONFIG_RUNTIME_CPUDETECT */ - return flags; -} - SwsContext *sws_alloc_context(void) { SwsContext *c= av_mallocz(sizeof(SwsContext)); @@ -782,25 +777,24 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) int srcH= c->srcH; int dstW= c->dstW; int dstH= c->dstH; - int flags; + int dst_stride = FFALIGN(dstW * sizeof(int16_t)+66, 16), dst_stride_px = dst_stride >> 1; + int flags, cpu_flags; enum PixelFormat srcFormat= c->srcFormat; enum PixelFormat dstFormat= c->dstFormat; - flags= c->flags = update_flags_cpu(c->flags); -#if ARCH_X86 - if (flags & SWS_CPU_CAPS_MMX) - __asm__ volatile("emms\n\t"::: "memory"); -#endif - if (!rgb15to16) sws_rgb2rgb_init(flags); + cpu_flags = av_get_cpu_flags(); + flags = c->flags; + emms_c(); + if (!rgb15to16) sws_rgb2rgb_init(); unscaled = (srcW == dstW && srcH == dstH); if (!isSupportedIn(srcFormat)) { - av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat)); + av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", av_get_pix_fmt_name(srcFormat)); return AVERROR(EINVAL); } if (!isSupportedOut(dstFormat)) { - av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat)); + av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", av_get_pix_fmt_name(dstFormat)); return AVERROR(EINVAL); } @@ -816,19 +810,15 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) |SWS_SPLINE |SWS_BICUBLIN); if(!i || (i & (i-1))) { - av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n"); + av_log(c, AV_LOG_ERROR, "Exactly one scaler algorithm must be chosen\n"); return AVERROR(EINVAL); } /* sanity check */ if (srcW<4 || srcH<1 || dstW<8 || dstH<1) { //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code - av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", + av_log(c, AV_LOG_ERROR, "%dx%d -> %dx%d is invalid scaling dimension\n", srcW, srcH, dstW, dstH); return AVERROR(EINVAL); } - if(srcW > VOFW || dstW > VOFW) { - av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n"); - return AVERROR(EINVAL); - } if (!dstFilter) dstFilter= &dummyFilter; if (!srcFilter) srcFilter= &dummyFilter; @@ -879,18 +869,19 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) if (c->swScale) { if (flags&SWS_PRINT_INFO) av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); return 0; } } - if (flags & SWS_CPU_CAPS_MMX2) { + FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); + if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) { c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { if (flags&SWS_PRINT_INFO) av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n"); } - if (usesHFilter) c->canMMX2BeUsed=0; + if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) c->canMMX2BeUsed=0; } else c->canMMX2BeUsed=0; @@ -910,7 +901,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->chrXInc+= 20; } //we don't use the x86 asm scaler if MMX is available - else if (flags & SWS_CPU_CAPS_MMX) { + else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20; } @@ -918,7 +909,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) /* precalculate horizontal scaler filter coefficients */ { -#if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) +#if HAVE_MMX2 // can't downscale !!! if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) { c->lumMmx2FilterCodeSize = initMMX2HScaler( dstW, c->lumXInc, NULL, NULL, NULL, 8); @@ -954,21 +945,21 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ); #endif } else -#endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */ +#endif /* HAVE_MMX2 */ { const int filterAlign= - (flags & SWS_CPU_CAPS_MMX) ? 4 : - (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 : + (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 : + (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : 1; if (initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc, srcW , dstW, filterAlign, 1<<14, - (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, + (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags, srcFilter->lumH, dstFilter->lumH, c->param) < 0) goto fail; if (initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, c->chrSrcW, c->chrDstW, filterAlign, 1<<14, - (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, + (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags, srcFilter->chrH, dstFilter->chrH, c->param) < 0) goto fail; } @@ -977,18 +968,18 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) /* precalculate vertical scaler filter coefficients */ { const int filterAlign= - (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 : - (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 : + (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) && (flags & SWS_ACCURATE_RND) ? 2 : + (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : 1; if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, srcH , dstH, filterAlign, (1<<12), - (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, + (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags, srcFilter->lumV, dstFilter->lumV, c->param) < 0) goto fail; if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, c->chrSrcH, c->chrDstH, filterAlign, (1<<12), - (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, + (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags, srcFilter->chrV, dstFilter->chrV, c->param) < 0) goto fail; @@ -1031,29 +1022,32 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) // allocate pixbufs (we use dynamic allocation because otherwise we would need to // allocate several megabytes to handle all possible cases) FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail); - FF_ALLOC_OR_GOTO(c, c->chrPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); + FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); + FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail); //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000) /* align at 16 bytes for AltiVec */ for (i=0; i<c->vLumBufSize; i++) { - FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], VOF+1, fail); + FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], dst_stride+1, fail); c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize]; } + c->uv_off = dst_stride_px; + c->uv_offx2 = dst_stride; for (i=0; i<c->vChrBufSize; i++) { - FF_ALLOC_OR_GOTO(c, c->chrPixBuf[i+c->vChrBufSize], (VOF+1)*2, fail); - c->chrPixBuf[i] = c->chrPixBuf[i+c->vChrBufSize]; + FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+1, fail); + c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize]; + c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i] + dst_stride_px; } if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) for (i=0; i<c->vLumBufSize; i++) { - FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i+c->vLumBufSize], VOF+1, fail); + FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i+c->vLumBufSize], dst_stride+1, fail); c->alpPixBuf[i] = c->alpPixBuf[i+c->vLumBufSize]; } //try to avoid drawing green stuff between the right end and the stride end - for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2); - - assert(2*VOFW == VOF); + for (i=0; i<c->vChrBufSize; i++) + memset(c->chrUPixBuf[i], 64, dst_stride*2+1); assert(c->chrDstH <= dstH); @@ -1072,7 +1066,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) else av_log(c, AV_LOG_INFO, "ehh flags invalid?! "); av_log(c, AV_LOG_INFO, "from %s to %s%s ", - sws_format_name(srcFormat), + av_get_pix_fmt_name(srcFormat), #ifdef DITHER1XBPP dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 || dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE || @@ -1080,15 +1074,15 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) #else "", #endif - sws_format_name(dstFormat)); + av_get_pix_fmt_name(dstFormat)); - if (flags & SWS_CPU_CAPS_MMX2) av_log(c, AV_LOG_INFO, "using MMX2\n"); - else if (flags & SWS_CPU_CAPS_3DNOW) av_log(c, AV_LOG_INFO, "using 3DNOW\n"); - else if (flags & SWS_CPU_CAPS_MMX) av_log(c, AV_LOG_INFO, "using MMX\n"); - else if (flags & SWS_CPU_CAPS_ALTIVEC) av_log(c, AV_LOG_INFO, "using AltiVec\n"); + if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) av_log(c, AV_LOG_INFO, "using MMX2\n"); + else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) av_log(c, AV_LOG_INFO, "using 3DNOW\n"); + else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) av_log(c, AV_LOG_INFO, "using MMX\n"); + else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) av_log(c, AV_LOG_INFO, "using AltiVec\n"); else av_log(c, AV_LOG_INFO, "using C\n"); - if (flags & SWS_CPU_CAPS_MMX) { + if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR)) av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); else { @@ -1107,7 +1101,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n"); } } else { -#if ARCH_X86 +#if HAVE_MMX av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n"); #else if (flags & SWS_FAST_BILINEAR) @@ -1118,31 +1112,41 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) } if (isPlanarYUV(dstFormat)) { if (c->vLumFilterSize==1) - av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", + (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); else - av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", + (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); } else { if (c->vLumFilterSize==1 && c->vChrFilterSize==2) av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" - " 2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + " 2-tap scaler for vertical chrominance scaling (BGR)\n", + (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); else if (c->vLumFilterSize==2 && c->vChrFilterSize==2) - av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", + (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); else - av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", + (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); } if (dstFormat==PIX_FMT_BGR24) av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n", - (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C")); + (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) ? "MMX2" : + ((HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C")); else if (dstFormat==PIX_FMT_RGB32) - av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", + (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); else if (dstFormat==PIX_FMT_BGR565) - av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", + (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); else if (dstFormat==PIX_FMT_BGR555) - av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", + (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); else if (dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE || dstFormat == PIX_FMT_BGR444BE || dstFormat == PIX_FMT_BGR444LE) - av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n", + (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", @@ -1501,10 +1505,11 @@ void sws_freeContext(SwsContext *c) av_freep(&c->lumPixBuf); } - if (c->chrPixBuf) { + if (c->chrUPixBuf) { for (i=0; i<c->vChrBufSize; i++) - av_freep(&c->chrPixBuf[i]); - av_freep(&c->chrPixBuf); + av_freep(&c->chrUPixBuf[i]); + av_freep(&c->chrUPixBuf); + av_freep(&c->chrVPixBuf); } if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { @@ -1527,7 +1532,7 @@ void sws_freeContext(SwsContext *c) av_freep(&c->hLumFilterPos); av_freep(&c->hChrFilterPos); -#if ARCH_X86 +#if HAVE_MMX #ifdef MAP_ANONYMOUS if (c->lumMmx2FilterCode) munmap(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize); if (c->chrMmx2FilterCode) munmap(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize); @@ -1540,9 +1545,10 @@ void sws_freeContext(SwsContext *c) #endif c->lumMmx2FilterCode=NULL; c->chrMmx2FilterCode=NULL; -#endif /* ARCH_X86 */ +#endif /* HAVE_MMX */ av_freep(&c->yuvTable); + av_freep(&c->formatConvBuffer); av_free(c); } @@ -1557,8 +1563,6 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context, if (!param) param = default_param; - flags = update_flags_cpu(flags); - if (context && (context->srcW != srcW || context->srcH != srcH || diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c new file mode 100644 index 0000000000..ed7f5adb74 --- /dev/null +++ b/libswscale/x86/rgb2rgb.c @@ -0,0 +1,138 @@ +/* + * software RGB to RGB converter + * pluralize by software PAL8 to RGB converter + * software YUV to YUV converter + * software YUV to RGB converter + * Written by Nick Kurshev. + * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "config.h" +#include "libavutil/x86_cpu.h" +#include "libavutil/cpu.h" +#include "libavutil/bswap.h" +#include "libswscale/rgb2rgb.h" +#include "libswscale/swscale.h" +#include "libswscale/swscale_internal.h" + +DECLARE_ASM_CONST(8, uint64_t, mmx_ff) = 0x00000000000000FFULL; +DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL; +DECLARE_ASM_CONST(8, uint64_t, mmx_one) = 0xFFFFFFFFFFFFFFFFULL; +DECLARE_ASM_CONST(8, uint64_t, mask32b) = 0x000000FF000000FFULL; +DECLARE_ASM_CONST(8, uint64_t, mask32g) = 0x0000FF000000FF00ULL; +DECLARE_ASM_CONST(8, uint64_t, mask32r) = 0x00FF000000FF0000ULL; +DECLARE_ASM_CONST(8, uint64_t, mask32a) = 0xFF000000FF000000ULL; +DECLARE_ASM_CONST(8, uint64_t, mask32) = 0x00FFFFFF00FFFFFFULL; +DECLARE_ASM_CONST(8, uint64_t, mask3216br) = 0x00F800F800F800F8ULL; +DECLARE_ASM_CONST(8, uint64_t, mask3216g) = 0x0000FC000000FC00ULL; +DECLARE_ASM_CONST(8, uint64_t, mask3215g) = 0x0000F8000000F800ULL; +DECLARE_ASM_CONST(8, uint64_t, mul3216) = 0x2000000420000004ULL; +DECLARE_ASM_CONST(8, uint64_t, mul3215) = 0x2000000820000008ULL; +DECLARE_ASM_CONST(8, uint64_t, mask24b) = 0x00FF0000FF0000FFULL; +DECLARE_ASM_CONST(8, uint64_t, mask24g) = 0xFF0000FF0000FF00ULL; +DECLARE_ASM_CONST(8, uint64_t, mask24r) = 0x0000FF0000FF0000ULL; +DECLARE_ASM_CONST(8, uint64_t, mask24l) = 0x0000000000FFFFFFULL; +DECLARE_ASM_CONST(8, uint64_t, mask24h) = 0x0000FFFFFF000000ULL; +DECLARE_ASM_CONST(8, uint64_t, mask24hh) = 0xffff000000000000ULL; +DECLARE_ASM_CONST(8, uint64_t, mask24hhh) = 0xffffffff00000000ULL; +DECLARE_ASM_CONST(8, uint64_t, mask24hhhh) = 0xffffffffffff0000ULL; +DECLARE_ASM_CONST(8, uint64_t, mask15b) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */ +DECLARE_ASM_CONST(8, uint64_t, mask15rg) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */ +DECLARE_ASM_CONST(8, uint64_t, mask15s) = 0xFFE0FFE0FFE0FFE0ULL; +DECLARE_ASM_CONST(8, uint64_t, mask15g) = 0x03E003E003E003E0ULL; +DECLARE_ASM_CONST(8, uint64_t, mask15r) = 0x7C007C007C007C00ULL; +#define mask16b mask15b +DECLARE_ASM_CONST(8, uint64_t, mask16g) = 0x07E007E007E007E0ULL; +DECLARE_ASM_CONST(8, uint64_t, mask16r) = 0xF800F800F800F800ULL; +DECLARE_ASM_CONST(8, uint64_t, red_16mask) = 0x0000f8000000f800ULL; +DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL; +DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL; +DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; +DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; +DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; + +#define RGB2YUV_SHIFT 8 +#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) +#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5)) +#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) +#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5)) +#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5)) +#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5)) +#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5)) +#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) +#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5)) + +//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one. + +#define COMPILE_TEMPLATE_MMX2 0 +#define COMPILE_TEMPLATE_AMD3DNOW 0 +#define COMPILE_TEMPLATE_SSE2 0 + +//MMX versions +#undef RENAME +#define RENAME(a) a ## _MMX +#include "rgb2rgb_template.c" + +//MMX2 versions +#undef RENAME +#undef COMPILE_TEMPLATE_MMX2 +#define COMPILE_TEMPLATE_MMX2 1 +#define RENAME(a) a ## _MMX2 +#include "rgb2rgb_template.c" + +//SSE2 versions +#undef RENAME +#undef COMPILE_TEMPLATE_SSE2 +#define COMPILE_TEMPLATE_SSE2 1 +#define RENAME(a) a ## _SSE2 +#include "rgb2rgb_template.c" + +//3DNOW versions +#undef RENAME +#undef COMPILE_TEMPLATE_MMX2 +#undef COMPILE_TEMPLATE_SSE2 +#undef COMPILE_TEMPLATE_AMD3DNOW +#define COMPILE_TEMPLATE_MMX2 0 +#define COMPILE_TEMPLATE_SSE2 0 +#define COMPILE_TEMPLATE_AMD3DNOW 1 +#define RENAME(a) a ## _3DNOW +#include "rgb2rgb_template.c" + +/* + RGB15->RGB16 original by Strepto/Astral + ported to gcc & bugfixed : A'rpi + MMX2, 3DNOW optimization by Nick Kurshev + 32-bit C version, and and&add trick by Michael Niedermayer +*/ + +void rgb2rgb_init_x86(void) +{ + int cpu_flags = av_get_cpu_flags(); + + if (cpu_flags & AV_CPU_FLAG_MMX) + rgb2rgb_init_MMX(); + if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) + rgb2rgb_init_3DNOW(); + if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) + rgb2rgb_init_MMX2(); + if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2) + rgb2rgb_init_SSE2(); +} diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c new file mode 100644 index 0000000000..baef3f8ae5 --- /dev/null +++ b/libswscale/x86/rgb2rgb_template.c @@ -0,0 +1,2607 @@ +/* + * software RGB to RGB converter + * pluralize by software PAL8 to RGB converter + * software YUV to YUV converter + * software YUV to RGB converter + * Written by Nick Kurshev. + * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) + * lot of big-endian byte order fixes by Alex Beregszaszi + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stddef.h> + +#undef PREFETCH +#undef MOVNTQ +#undef EMMS +#undef SFENCE +#undef PAVGB + +#if COMPILE_TEMPLATE_AMD3DNOW +#define PREFETCH "prefetch" +#define PAVGB "pavgusb" +#elif COMPILE_TEMPLATE_MMX2 +#define PREFETCH "prefetchnta" +#define PAVGB "pavgb" +#else +#define PREFETCH " # nop" +#endif + +#if COMPILE_TEMPLATE_AMD3DNOW +/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ +#define EMMS "femms" +#else +#define EMMS "emms" +#endif + +#if COMPILE_TEMPLATE_MMX2 +#define MOVNTQ "movntq" +#define SFENCE "sfence" +#else +#define MOVNTQ "movq" +#define SFENCE " # nop" +#endif + +#if !COMPILE_TEMPLATE_SSE2 + +#if !COMPILE_TEMPLATE_AMD3DNOW + +static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size) +{ + uint8_t *dest = dst; + const uint8_t *s = src; + const uint8_t *end; + const uint8_t *mm_end; + end = s + src_size; + __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); + mm_end = end - 23; + __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movd %1, %%mm0 \n\t" + "punpckldq 3%1, %%mm0 \n\t" + "movd 6%1, %%mm1 \n\t" + "punpckldq 9%1, %%mm1 \n\t" + "movd 12%1, %%mm2 \n\t" + "punpckldq 15%1, %%mm2 \n\t" + "movd 18%1, %%mm3 \n\t" + "punpckldq 21%1, %%mm3 \n\t" + "por %%mm7, %%mm0 \n\t" + "por %%mm7, %%mm1 \n\t" + "por %%mm7, %%mm2 \n\t" + "por %%mm7, %%mm3 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + MOVNTQ" %%mm1, 8%0 \n\t" + MOVNTQ" %%mm2, 16%0 \n\t" + MOVNTQ" %%mm3, 24%0" + :"=m"(*dest) + :"m"(*s) + :"memory"); + dest += 32; + s += 24; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + *dest++ = *s++; + *dest++ = *s++; + *dest++ = *s++; + *dest++ = 255; + } +} + +#define STORE_BGR24_MMX \ + "psrlq $8, %%mm2 \n\t" \ + "psrlq $8, %%mm3 \n\t" \ + "psrlq $8, %%mm6 \n\t" \ + "psrlq $8, %%mm7 \n\t" \ + "pand "MANGLE(mask24l)", %%mm0\n\t" \ + "pand "MANGLE(mask24l)", %%mm1\n\t" \ + "pand "MANGLE(mask24l)", %%mm4\n\t" \ + "pand "MANGLE(mask24l)", %%mm5\n\t" \ + "pand "MANGLE(mask24h)", %%mm2\n\t" \ + "pand "MANGLE(mask24h)", %%mm3\n\t" \ + "pand "MANGLE(mask24h)", %%mm6\n\t" \ + "pand "MANGLE(mask24h)", %%mm7\n\t" \ + "por %%mm2, %%mm0 \n\t" \ + "por %%mm3, %%mm1 \n\t" \ + "por %%mm6, %%mm4 \n\t" \ + "por %%mm7, %%mm5 \n\t" \ + \ + "movq %%mm1, %%mm2 \n\t" \ + "movq %%mm4, %%mm3 \n\t" \ + "psllq $48, %%mm2 \n\t" \ + "psllq $32, %%mm3 \n\t" \ + "pand "MANGLE(mask24hh)", %%mm2\n\t" \ + "pand "MANGLE(mask24hhh)", %%mm3\n\t" \ + "por %%mm2, %%mm0 \n\t" \ + "psrlq $16, %%mm1 \n\t" \ + "psrlq $32, %%mm4 \n\t" \ + "psllq $16, %%mm5 \n\t" \ + "por %%mm3, %%mm1 \n\t" \ + "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \ + "por %%mm5, %%mm4 \n\t" \ + \ + MOVNTQ" %%mm0, %0 \n\t" \ + MOVNTQ" %%mm1, 8%0 \n\t" \ + MOVNTQ" %%mm4, 16%0" + + +static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) +{ + uint8_t *dest = dst; + const uint8_t *s = src; + const uint8_t *end; + const uint8_t *mm_end; + end = s + src_size; + __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); + mm_end = end - 31; + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movq %1, %%mm0 \n\t" + "movq 8%1, %%mm1 \n\t" + "movq 16%1, %%mm4 \n\t" + "movq 24%1, %%mm5 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm3 \n\t" + "movq %%mm4, %%mm6 \n\t" + "movq %%mm5, %%mm7 \n\t" + STORE_BGR24_MMX + :"=m"(*dest) + :"m"(*s) + :"memory"); + dest += 24; + s += 32; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + *dest++ = *s++; + *dest++ = *s++; + *dest++ = *s++; + s++; + } +} + +/* + original by Strepto/Astral + ported to gcc & bugfixed: A'rpi + MMX2, 3DNOW optimization by Nick Kurshev + 32-bit C version, and and&add trick by Michael Niedermayer +*/ +static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size) +{ + register const uint8_t* s=src; + register uint8_t* d=dst; + register const uint8_t *end; + const uint8_t *mm_end; + end = s + src_size; + __asm__ volatile(PREFETCH" %0"::"m"(*s)); + __asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); + mm_end = end - 15; + while (s<mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movq %1, %%mm0 \n\t" + "movq 8%1, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "pand %%mm4, %%mm0 \n\t" + "pand %%mm4, %%mm2 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm3, %%mm2 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + MOVNTQ" %%mm2, 8%0" + :"=m"(*d) + :"m"(*s) + ); + d+=16; + s+=16; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + mm_end = end - 3; + while (s < mm_end) { + register unsigned x= *((const uint32_t *)s); + *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); + d+=4; + s+=4; + } + if (s < end) { + register unsigned short x= *((const uint16_t *)s); + *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); + } +} + +static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size) +{ + register const uint8_t* s=src; + register uint8_t* d=dst; + register const uint8_t *end; + const uint8_t *mm_end; + end = s + src_size; + __asm__ volatile(PREFETCH" %0"::"m"(*s)); + __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); + __asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); + mm_end = end - 15; + while (s<mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movq %1, %%mm0 \n\t" + "movq 8%1, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "psrlq $1, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm7, %%mm2 \n\t" + "pand %%mm6, %%mm1 \n\t" + "pand %%mm6, %%mm3 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm3, %%mm2 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + MOVNTQ" %%mm2, 8%0" + :"=m"(*d) + :"m"(*s) + ); + d+=16; + s+=16; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + mm_end = end - 3; + while (s < mm_end) { + register uint32_t x= *((const uint32_t*)s); + *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); + s+=4; + d+=4; + } + if (s < end) { + register uint16_t x= *((const uint16_t*)s); + *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); + } +} + +static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint8_t *s = src; + const uint8_t *end; + const uint8_t *mm_end; + uint16_t *d = (uint16_t *)dst; + end = s + src_size; + mm_end = end - 15; +#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) + __asm__ volatile( + "movq %3, %%mm5 \n\t" + "movq %4, %%mm6 \n\t" + "movq %5, %%mm7 \n\t" + "jmp 2f \n\t" + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 4(%1), %%mm3 \n\t" + "punpckldq 8(%1), %%mm0 \n\t" + "punpckldq 12(%1), %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm3, %%mm4 \n\t" + "pand %%mm6, %%mm0 \n\t" + "pand %%mm6, %%mm3 \n\t" + "pmaddwd %%mm7, %%mm0 \n\t" + "pmaddwd %%mm7, %%mm3 \n\t" + "pand %%mm5, %%mm1 \n\t" + "pand %%mm5, %%mm4 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "psrld $5, %%mm0 \n\t" + "pslld $11, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, (%0) \n\t" + "add $16, %1 \n\t" + "add $8, %0 \n\t" + "2: \n\t" + "cmp %2, %1 \n\t" + " jb 1b \n\t" + : "+r" (d), "+r"(s) + : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) + ); +#else + __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); + __asm__ volatile( + "movq %0, %%mm7 \n\t" + "movq %1, %%mm6 \n\t" + ::"m"(red_16mask),"m"(green_16mask)); + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movd %1, %%mm0 \n\t" + "movd 4%1, %%mm3 \n\t" + "punpckldq 8%1, %%mm0 \n\t" + "punpckldq 12%1, %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm3, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "psrlq $3, %%mm0 \n\t" + "psrlq $3, %%mm3 \n\t" + "pand %2, %%mm0 \n\t" + "pand %2, %%mm3 \n\t" + "psrlq $5, %%mm1 \n\t" + "psrlq $5, %%mm4 \n\t" + "pand %%mm6, %%mm1 \n\t" + "pand %%mm6, %%mm4 \n\t" + "psrlq $8, %%mm2 \n\t" + "psrlq $8, %%mm5 \n\t" + "pand %%mm7, %%mm2 \n\t" + "pand %%mm7, %%mm5 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm2, %%mm0 \n\t" + "por %%mm5, %%mm3 \n\t" + "psllq $16, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + d += 4; + s += 16; + } +#endif + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + register int rgb = *(const uint32_t*)s; s += 4; + *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); + } +} + +static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint8_t *s = src; + const uint8_t *end; + const uint8_t *mm_end; + uint16_t *d = (uint16_t *)dst; + end = s + src_size; + __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); + __asm__ volatile( + "movq %0, %%mm7 \n\t" + "movq %1, %%mm6 \n\t" + ::"m"(red_16mask),"m"(green_16mask)); + mm_end = end - 15; + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movd %1, %%mm0 \n\t" + "movd 4%1, %%mm3 \n\t" + "punpckldq 8%1, %%mm0 \n\t" + "punpckldq 12%1, %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm3, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "psllq $8, %%mm0 \n\t" + "psllq $8, %%mm3 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm7, %%mm3 \n\t" + "psrlq $5, %%mm1 \n\t" + "psrlq $5, %%mm4 \n\t" + "pand %%mm6, %%mm1 \n\t" + "pand %%mm6, %%mm4 \n\t" + "psrlq $19, %%mm2 \n\t" + "psrlq $19, %%mm5 \n\t" + "pand %2, %%mm2 \n\t" + "pand %2, %%mm5 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm2, %%mm0 \n\t" + "por %%mm5, %%mm3 \n\t" + "psllq $16, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + d += 4; + s += 16; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + register int rgb = *(const uint32_t*)s; s += 4; + *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); + } +} + +static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint8_t *s = src; + const uint8_t *end; + const uint8_t *mm_end; + uint16_t *d = (uint16_t *)dst; + end = s + src_size; + mm_end = end - 15; +#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) + __asm__ volatile( + "movq %3, %%mm5 \n\t" + "movq %4, %%mm6 \n\t" + "movq %5, %%mm7 \n\t" + "jmp 2f \n\t" + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 4(%1), %%mm3 \n\t" + "punpckldq 8(%1), %%mm0 \n\t" + "punpckldq 12(%1), %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm3, %%mm4 \n\t" + "pand %%mm6, %%mm0 \n\t" + "pand %%mm6, %%mm3 \n\t" + "pmaddwd %%mm7, %%mm0 \n\t" + "pmaddwd %%mm7, %%mm3 \n\t" + "pand %%mm5, %%mm1 \n\t" + "pand %%mm5, %%mm4 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "psrld $6, %%mm0 \n\t" + "pslld $10, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, (%0) \n\t" + "add $16, %1 \n\t" + "add $8, %0 \n\t" + "2: \n\t" + "cmp %2, %1 \n\t" + " jb 1b \n\t" + : "+r" (d), "+r"(s) + : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) + ); +#else + __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); + __asm__ volatile( + "movq %0, %%mm7 \n\t" + "movq %1, %%mm6 \n\t" + ::"m"(red_15mask),"m"(green_15mask)); + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movd %1, %%mm0 \n\t" + "movd 4%1, %%mm3 \n\t" + "punpckldq 8%1, %%mm0 \n\t" + "punpckldq 12%1, %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm3, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "psrlq $3, %%mm0 \n\t" + "psrlq $3, %%mm3 \n\t" + "pand %2, %%mm0 \n\t" + "pand %2, %%mm3 \n\t" + "psrlq $6, %%mm1 \n\t" + "psrlq $6, %%mm4 \n\t" + "pand %%mm6, %%mm1 \n\t" + "pand %%mm6, %%mm4 \n\t" + "psrlq $9, %%mm2 \n\t" + "psrlq $9, %%mm5 \n\t" + "pand %%mm7, %%mm2 \n\t" + "pand %%mm7, %%mm5 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm2, %%mm0 \n\t" + "por %%mm5, %%mm3 \n\t" + "psllq $16, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + d += 4; + s += 16; + } +#endif + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + register int rgb = *(const uint32_t*)s; s += 4; + *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); + } +} + +static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint8_t *s = src; + const uint8_t *end; + const uint8_t *mm_end; + uint16_t *d = (uint16_t *)dst; + end = s + src_size; + __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); + __asm__ volatile( + "movq %0, %%mm7 \n\t" + "movq %1, %%mm6 \n\t" + ::"m"(red_15mask),"m"(green_15mask)); + mm_end = end - 15; + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movd %1, %%mm0 \n\t" + "movd 4%1, %%mm3 \n\t" + "punpckldq 8%1, %%mm0 \n\t" + "punpckldq 12%1, %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm3, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "psllq $7, %%mm0 \n\t" + "psllq $7, %%mm3 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm7, %%mm3 \n\t" + "psrlq $6, %%mm1 \n\t" + "psrlq $6, %%mm4 \n\t" + "pand %%mm6, %%mm1 \n\t" + "pand %%mm6, %%mm4 \n\t" + "psrlq $19, %%mm2 \n\t" + "psrlq $19, %%mm5 \n\t" + "pand %2, %%mm2 \n\t" + "pand %2, %%mm5 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm2, %%mm0 \n\t" + "por %%mm5, %%mm3 \n\t" + "psllq $16, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + d += 4; + s += 16; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + register int rgb = *(const uint32_t*)s; s += 4; + *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); + } +} + +static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint8_t *s = src; + const uint8_t *end; + const uint8_t *mm_end; + uint16_t *d = (uint16_t *)dst; + end = s + src_size; + __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); + __asm__ volatile( + "movq %0, %%mm7 \n\t" + "movq %1, %%mm6 \n\t" + ::"m"(red_16mask),"m"(green_16mask)); + mm_end = end - 11; + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movd %1, %%mm0 \n\t" + "movd 3%1, %%mm3 \n\t" + "punpckldq 6%1, %%mm0 \n\t" + "punpckldq 9%1, %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm3, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "psrlq $3, %%mm0 \n\t" + "psrlq $3, %%mm3 \n\t" + "pand %2, %%mm0 \n\t" + "pand %2, %%mm3 \n\t" + "psrlq $5, %%mm1 \n\t" + "psrlq $5, %%mm4 \n\t" + "pand %%mm6, %%mm1 \n\t" + "pand %%mm6, %%mm4 \n\t" + "psrlq $8, %%mm2 \n\t" + "psrlq $8, %%mm5 \n\t" + "pand %%mm7, %%mm2 \n\t" + "pand %%mm7, %%mm5 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm2, %%mm0 \n\t" + "por %%mm5, %%mm3 \n\t" + "psllq $16, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + d += 4; + s += 12; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + const int b = *s++; + const int g = *s++; + const int r = *s++; + *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); + } +} + +static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint8_t *s = src; + const uint8_t *end; + const uint8_t *mm_end; + uint16_t *d = (uint16_t *)dst; + end = s + src_size; + __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); + __asm__ volatile( + "movq %0, %%mm7 \n\t" + "movq %1, %%mm6 \n\t" + ::"m"(red_16mask),"m"(green_16mask)); + mm_end = end - 15; + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movd %1, %%mm0 \n\t" + "movd 3%1, %%mm3 \n\t" + "punpckldq 6%1, %%mm0 \n\t" + "punpckldq 9%1, %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm3, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "psllq $8, %%mm0 \n\t" + "psllq $8, %%mm3 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm7, %%mm3 \n\t" + "psrlq $5, %%mm1 \n\t" + "psrlq $5, %%mm4 \n\t" + "pand %%mm6, %%mm1 \n\t" + "pand %%mm6, %%mm4 \n\t" + "psrlq $19, %%mm2 \n\t" + "psrlq $19, %%mm5 \n\t" + "pand %2, %%mm2 \n\t" + "pand %2, %%mm5 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm2, %%mm0 \n\t" + "por %%mm5, %%mm3 \n\t" + "psllq $16, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + d += 4; + s += 12; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + const int r = *s++; + const int g = *s++; + const int b = *s++; + *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); + } +} + +static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint8_t *s = src; + const uint8_t *end; + const uint8_t *mm_end; + uint16_t *d = (uint16_t *)dst; + end = s + src_size; + __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); + __asm__ volatile( + "movq %0, %%mm7 \n\t" + "movq %1, %%mm6 \n\t" + ::"m"(red_15mask),"m"(green_15mask)); + mm_end = end - 11; + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movd %1, %%mm0 \n\t" + "movd 3%1, %%mm3 \n\t" + "punpckldq 6%1, %%mm0 \n\t" + "punpckldq 9%1, %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm3, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "psrlq $3, %%mm0 \n\t" + "psrlq $3, %%mm3 \n\t" + "pand %2, %%mm0 \n\t" + "pand %2, %%mm3 \n\t" + "psrlq $6, %%mm1 \n\t" + "psrlq $6, %%mm4 \n\t" + "pand %%mm6, %%mm1 \n\t" + "pand %%mm6, %%mm4 \n\t" + "psrlq $9, %%mm2 \n\t" + "psrlq $9, %%mm5 \n\t" + "pand %%mm7, %%mm2 \n\t" + "pand %%mm7, %%mm5 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm2, %%mm0 \n\t" + "por %%mm5, %%mm3 \n\t" + "psllq $16, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + d += 4; + s += 12; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + const int b = *s++; + const int g = *s++; + const int r = *s++; + *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); + } +} + +static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint8_t *s = src; + const uint8_t *end; + const uint8_t *mm_end; + uint16_t *d = (uint16_t *)dst; + end = s + src_size; + __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); + __asm__ volatile( + "movq %0, %%mm7 \n\t" + "movq %1, %%mm6 \n\t" + ::"m"(red_15mask),"m"(green_15mask)); + mm_end = end - 15; + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movd %1, %%mm0 \n\t" + "movd 3%1, %%mm3 \n\t" + "punpckldq 6%1, %%mm0 \n\t" + "punpckldq 9%1, %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm3, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "psllq $7, %%mm0 \n\t" + "psllq $7, %%mm3 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm7, %%mm3 \n\t" + "psrlq $6, %%mm1 \n\t" + "psrlq $6, %%mm4 \n\t" + "pand %%mm6, %%mm1 \n\t" + "pand %%mm6, %%mm4 \n\t" + "psrlq $19, %%mm2 \n\t" + "psrlq $19, %%mm5 \n\t" + "pand %2, %%mm2 \n\t" + "pand %2, %%mm5 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm2, %%mm0 \n\t" + "por %%mm5, %%mm3 \n\t" + "psllq $16, %%mm3 \n\t" + "por %%mm3, %%mm0 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + d += 4; + s += 12; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + const int r = *s++; + const int g = *s++; + const int b = *s++; + *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); + } +} + +/* + I use less accurate approximation here by simply left-shifting the input + value and filling the low order bits with zeroes. This method improves PNG + compression but this scheme cannot reproduce white exactly, since it does + not generate an all-ones maximum value; the net effect is to darken the + image slightly. + + The better method should be "left bit replication": + + 4 3 2 1 0 + --------- + 1 1 0 1 1 + + 7 6 5 4 3 2 1 0 + ---------------- + 1 1 0 1 1 1 1 0 + |=======| |===| + | leftmost bits repeated to fill open bits + | + original bits +*/ +static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint16_t *end; + const uint16_t *mm_end; + uint8_t *d = dst; + const uint16_t *s = (const uint16_t*)src; + end = s + src_size/2; + __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); + mm_end = end - 7; + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movq %1, %%mm0 \n\t" + "movq %1, %%mm1 \n\t" + "movq %1, %%mm2 \n\t" + "pand %2, %%mm0 \n\t" + "pand %3, %%mm1 \n\t" + "pand %4, %%mm2 \n\t" + "psllq $3, %%mm0 \n\t" + "psrlq $2, %%mm1 \n\t" + "psrlq $7, %%mm2 \n\t" + "movq %%mm0, %%mm3 \n\t" + "movq %%mm1, %%mm4 \n\t" + "movq %%mm2, %%mm5 \n\t" + "punpcklwd %5, %%mm0 \n\t" + "punpcklwd %5, %%mm1 \n\t" + "punpcklwd %5, %%mm2 \n\t" + "punpckhwd %5, %%mm3 \n\t" + "punpckhwd %5, %%mm4 \n\t" + "punpckhwd %5, %%mm5 \n\t" + "psllq $8, %%mm1 \n\t" + "psllq $16, %%mm2 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm2, %%mm0 \n\t" + "psllq $8, %%mm4 \n\t" + "psllq $16, %%mm5 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm5, %%mm3 \n\t" + + "movq %%mm0, %%mm6 \n\t" + "movq %%mm3, %%mm7 \n\t" + + "movq 8%1, %%mm0 \n\t" + "movq 8%1, %%mm1 \n\t" + "movq 8%1, %%mm2 \n\t" + "pand %2, %%mm0 \n\t" + "pand %3, %%mm1 \n\t" + "pand %4, %%mm2 \n\t" + "psllq $3, %%mm0 \n\t" + "psrlq $2, %%mm1 \n\t" + "psrlq $7, %%mm2 \n\t" + "movq %%mm0, %%mm3 \n\t" + "movq %%mm1, %%mm4 \n\t" + "movq %%mm2, %%mm5 \n\t" + "punpcklwd %5, %%mm0 \n\t" + "punpcklwd %5, %%mm1 \n\t" + "punpcklwd %5, %%mm2 \n\t" + "punpckhwd %5, %%mm3 \n\t" + "punpckhwd %5, %%mm4 \n\t" + "punpckhwd %5, %%mm5 \n\t" + "psllq $8, %%mm1 \n\t" + "psllq $16, %%mm2 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm2, %%mm0 \n\t" + "psllq $8, %%mm4 \n\t" + "psllq $16, %%mm5 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm5, %%mm3 \n\t" + + :"=m"(*d) + :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) + :"memory"); + /* borrowed 32 to 24 */ + __asm__ volatile( + "movq %%mm0, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "movq %%mm6, %%mm0 \n\t" + "movq %%mm7, %%mm1 \n\t" + + "movq %%mm4, %%mm6 \n\t" + "movq %%mm5, %%mm7 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm3 \n\t" + + STORE_BGR24_MMX + + :"=m"(*d) + :"m"(*s) + :"memory"); + d += 24; + s += 8; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + register uint16_t bgr; + bgr = *s++; + *d++ = (bgr&0x1F)<<3; + *d++ = (bgr&0x3E0)>>2; + *d++ = (bgr&0x7C00)>>7; + } +} + +static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint16_t *end; + const uint16_t *mm_end; + uint8_t *d = (uint8_t *)dst; + const uint16_t *s = (const uint16_t *)src; + end = s + src_size/2; + __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); + mm_end = end - 7; + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movq %1, %%mm0 \n\t" + "movq %1, %%mm1 \n\t" + "movq %1, %%mm2 \n\t" + "pand %2, %%mm0 \n\t" + "pand %3, %%mm1 \n\t" + "pand %4, %%mm2 \n\t" + "psllq $3, %%mm0 \n\t" + "psrlq $3, %%mm1 \n\t" + "psrlq $8, %%mm2 \n\t" + "movq %%mm0, %%mm3 \n\t" + "movq %%mm1, %%mm4 \n\t" + "movq %%mm2, %%mm5 \n\t" + "punpcklwd %5, %%mm0 \n\t" + "punpcklwd %5, %%mm1 \n\t" + "punpcklwd %5, %%mm2 \n\t" + "punpckhwd %5, %%mm3 \n\t" + "punpckhwd %5, %%mm4 \n\t" + "punpckhwd %5, %%mm5 \n\t" + "psllq $8, %%mm1 \n\t" + "psllq $16, %%mm2 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm2, %%mm0 \n\t" + "psllq $8, %%mm4 \n\t" + "psllq $16, %%mm5 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm5, %%mm3 \n\t" + + "movq %%mm0, %%mm6 \n\t" + "movq %%mm3, %%mm7 \n\t" + + "movq 8%1, %%mm0 \n\t" + "movq 8%1, %%mm1 \n\t" + "movq 8%1, %%mm2 \n\t" + "pand %2, %%mm0 \n\t" + "pand %3, %%mm1 \n\t" + "pand %4, %%mm2 \n\t" + "psllq $3, %%mm0 \n\t" + "psrlq $3, %%mm1 \n\t" + "psrlq $8, %%mm2 \n\t" + "movq %%mm0, %%mm3 \n\t" + "movq %%mm1, %%mm4 \n\t" + "movq %%mm2, %%mm5 \n\t" + "punpcklwd %5, %%mm0 \n\t" + "punpcklwd %5, %%mm1 \n\t" + "punpcklwd %5, %%mm2 \n\t" + "punpckhwd %5, %%mm3 \n\t" + "punpckhwd %5, %%mm4 \n\t" + "punpckhwd %5, %%mm5 \n\t" + "psllq $8, %%mm1 \n\t" + "psllq $16, %%mm2 \n\t" + "por %%mm1, %%mm0 \n\t" + "por %%mm2, %%mm0 \n\t" + "psllq $8, %%mm4 \n\t" + "psllq $16, %%mm5 \n\t" + "por %%mm4, %%mm3 \n\t" + "por %%mm5, %%mm3 \n\t" + :"=m"(*d) + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) + :"memory"); + /* borrowed 32 to 24 */ + __asm__ volatile( + "movq %%mm0, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "movq %%mm6, %%mm0 \n\t" + "movq %%mm7, %%mm1 \n\t" + + "movq %%mm4, %%mm6 \n\t" + "movq %%mm5, %%mm7 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm3 \n\t" + + STORE_BGR24_MMX + + :"=m"(*d) + :"m"(*s) + :"memory"); + d += 24; + s += 8; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + register uint16_t bgr; + bgr = *s++; + *d++ = (bgr&0x1F)<<3; + *d++ = (bgr&0x7E0)>>3; + *d++ = (bgr&0xF800)>>8; + } +} + +/* + * mm0 = 00 B3 00 B2 00 B1 00 B0 + * mm1 = 00 G3 00 G2 00 G1 00 G0 + * mm2 = 00 R3 00 R2 00 R1 00 R0 + * mm6 = FF FF FF FF FF FF FF FF + * mm7 = 00 00 00 00 00 00 00 00 + */ +#define PACK_RGB32 \ + "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \ + "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \ + "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \ + "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \ + "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \ + "movq %%mm0, %%mm3 \n\t" \ + "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ + "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ + MOVNTQ" %%mm0, %0 \n\t" \ + MOVNTQ" %%mm3, 8%0 \n\t" \ + +static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint16_t *end; + const uint16_t *mm_end; + uint8_t *d = dst; + const uint16_t *s = (const uint16_t *)src; + end = s + src_size/2; + __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); + __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); + __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); + mm_end = end - 3; + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movq %1, %%mm0 \n\t" + "movq %1, %%mm1 \n\t" + "movq %1, %%mm2 \n\t" + "pand %2, %%mm0 \n\t" + "pand %3, %%mm1 \n\t" + "pand %4, %%mm2 \n\t" + "psllq $3, %%mm0 \n\t" + "psrlq $2, %%mm1 \n\t" + "psrlq $7, %%mm2 \n\t" + PACK_RGB32 + :"=m"(*d) + :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) + :"memory"); + d += 16; + s += 4; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + register uint16_t bgr; + bgr = *s++; + *d++ = (bgr&0x1F)<<3; + *d++ = (bgr&0x3E0)>>2; + *d++ = (bgr&0x7C00)>>7; + *d++ = 255; + } +} + +static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size) +{ + const uint16_t *end; + const uint16_t *mm_end; + uint8_t *d = dst; + const uint16_t *s = (const uint16_t*)src; + end = s + src_size/2; + __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); + __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); + __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); + mm_end = end - 3; + while (s < mm_end) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movq %1, %%mm0 \n\t" + "movq %1, %%mm1 \n\t" + "movq %1, %%mm2 \n\t" + "pand %2, %%mm0 \n\t" + "pand %3, %%mm1 \n\t" + "pand %4, %%mm2 \n\t" + "psllq $3, %%mm0 \n\t" + "psrlq $3, %%mm1 \n\t" + "psrlq $8, %%mm2 \n\t" + PACK_RGB32 + :"=m"(*d) + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) + :"memory"); + d += 16; + s += 4; + } + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + while (s < end) { + register uint16_t bgr; + bgr = *s++; + *d++ = (bgr&0x1F)<<3; + *d++ = (bgr&0x7E0)>>3; + *d++ = (bgr&0xF800)>>8; + *d++ = 255; + } +} + +static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size) +{ + x86_reg idx = 15 - src_size; + const uint8_t *s = src-idx; + uint8_t *d = dst-idx; + __asm__ volatile( + "test %0, %0 \n\t" + "jns 2f \n\t" + PREFETCH" (%1, %0) \n\t" + "movq %3, %%mm7 \n\t" + "pxor %4, %%mm7 \n\t" + "movq %%mm7, %%mm6 \n\t" + "pxor %5, %%mm7 \n\t" + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 32(%1, %0) \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq 8(%1, %0), %%mm1 \n\t" +# if COMPILE_TEMPLATE_MMX2 + "pshufw $177, %%mm0, %%mm3 \n\t" + "pshufw $177, %%mm1, %%mm5 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm6, %%mm3 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm6, %%mm5 \n\t" + "por %%mm3, %%mm0 \n\t" + "por %%mm5, %%mm1 \n\t" +# else + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm4 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm6, %%mm2 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm6, %%mm4 \n\t" + "movq %%mm2, %%mm3 \n\t" + "movq %%mm4, %%mm5 \n\t" + "pslld $16, %%mm2 \n\t" + "psrld $16, %%mm3 \n\t" + "pslld $16, %%mm4 \n\t" + "psrld $16, %%mm5 \n\t" + "por %%mm2, %%mm0 \n\t" + "por %%mm4, %%mm1 \n\t" + "por %%mm3, %%mm0 \n\t" + "por %%mm5, %%mm1 \n\t" +# endif + MOVNTQ" %%mm0, (%2, %0) \n\t" + MOVNTQ" %%mm1, 8(%2, %0) \n\t" + "add $16, %0 \n\t" + "js 1b \n\t" + SFENCE" \n\t" + EMMS" \n\t" + "2: \n\t" + : "+&r"(idx) + : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) + : "memory"); + for (; idx<15; idx+=4) { + register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; + v &= 0xff00ff; + *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); + } +} + +static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) +{ + unsigned i; + x86_reg mmx_size= 23 - src_size; + __asm__ volatile ( + "test %%"REG_a", %%"REG_a" \n\t" + "jns 2f \n\t" + "movq "MANGLE(mask24r)", %%mm5 \n\t" + "movq "MANGLE(mask24g)", %%mm6 \n\t" + "movq "MANGLE(mask24b)", %%mm7 \n\t" + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 32(%1, %%"REG_a") \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG + "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG + "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B + "psllq $16, %%mm0 \n\t" // 00 BGR BGR + "pand %%mm5, %%mm0 \n\t" + "pand %%mm6, %%mm1 \n\t" + "pand %%mm7, %%mm2 \n\t" + "por %%mm0, %%mm1 \n\t" + "por %%mm2, %%mm1 \n\t" + "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG + MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG + "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B + "movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR + "pand %%mm7, %%mm0 \n\t" + "pand %%mm5, %%mm1 \n\t" + "pand %%mm6, %%mm2 \n\t" + "por %%mm0, %%mm1 \n\t" + "por %%mm2, %%mm1 \n\t" + "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B + MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R + "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR + "movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG + "pand %%mm6, %%mm0 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm5, %%mm2 \n\t" + "por %%mm0, %%mm1 \n\t" + "por %%mm2, %%mm1 \n\t" + MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t" + "add $24, %%"REG_a" \n\t" + " js 1b \n\t" + "2: \n\t" + : "+a" (mmx_size) + : "r" (src-mmx_size), "r"(dst-mmx_size) + ); + + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); + + if (mmx_size==23) return; //finished, was multiple of 8 + + src+= src_size; + dst+= src_size; + src_size= 23-mmx_size; + src-= src_size; + dst-= src_size; + for (i=0; i<src_size; i+=3) { + register uint8_t x; + x = src[i + 2]; + dst[i + 1] = src[i + 1]; + dst[i + 2] = src[i + 0]; + dst[i + 0] = x; + } +} + +static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, int dstStride, int vertLumPerChroma) +{ + int y; + const x86_reg chromWidth= width>>1; + for (y=0; y<height; y++) { + //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) + __asm__ volatile( + "xor %%"REG_a", %%"REG_a" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 32(%1, %%"REG_a", 2) \n\t" + PREFETCH" 32(%2, %%"REG_a") \n\t" + PREFETCH" 32(%3, %%"REG_a") \n\t" + "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) + "movq %%mm0, %%mm2 \n\t" // U(0) + "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) + "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) + "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) + + "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) + "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) + "movq %%mm3, %%mm4 \n\t" // Y(0) + "movq %%mm5, %%mm6 \n\t" // Y(8) + "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) + "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) + "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) + "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) + + MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t" + MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" + MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t" + MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" + + "add $8, %%"REG_a" \n\t" + "cmp %4, %%"REG_a" \n\t" + " jb 1b \n\t" + ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) + : "%"REG_a + ); + if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { + usrc += chromStride; + vsrc += chromStride; + } + ysrc += lumStride; + dst += dstStride; + } + __asm__(EMMS" \n\t" + SFENCE" \n\t" + :::"memory"); +} + +/** + * Height should be a multiple of 2 and width should be a multiple of 16. + * (If this is a problem for anyone then tell me, and I will fix it.) + */ +static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, int dstStride) +{ + //FIXME interpolate chroma + RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); +} + +static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, int dstStride, int vertLumPerChroma) +{ + int y; + const x86_reg chromWidth= width>>1; + for (y=0; y<height; y++) { + //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) + __asm__ volatile( + "xor %%"REG_a", %%"REG_a" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 32(%1, %%"REG_a", 2) \n\t" + PREFETCH" 32(%2, %%"REG_a") \n\t" + PREFETCH" 32(%3, %%"REG_a") \n\t" + "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) + "movq %%mm0, %%mm2 \n\t" // U(0) + "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) + "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) + "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) + + "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) + "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) + "movq %%mm0, %%mm4 \n\t" // Y(0) + "movq %%mm2, %%mm6 \n\t" // Y(8) + "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0) + "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4) + "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8) + "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12) + + MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t" + MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" + MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t" + MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" + + "add $8, %%"REG_a" \n\t" + "cmp %4, %%"REG_a" \n\t" + " jb 1b \n\t" + ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) + : "%"REG_a + ); + if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { + usrc += chromStride; + vsrc += chromStride; + } + ysrc += lumStride; + dst += dstStride; + } + __asm__(EMMS" \n\t" + SFENCE" \n\t" + :::"memory"); +} + +/** + * Height should be a multiple of 2 and width should be a multiple of 16 + * (If this is a problem for anyone then tell me, and I will fix it.) + */ +static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, int dstStride) +{ + //FIXME interpolate chroma + RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); +} + +/** + * Width should be a multiple of 16. + */ +static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, int dstStride) +{ + RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); +} + +/** + * Width should be a multiple of 16. + */ +static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, + int width, int height, + int lumStride, int chromStride, int dstStride) +{ + RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); +} + +/** + * Height should be a multiple of 2 and width should be a multiple of 16. + * (If this is a problem for anyone then tell me, and I will fix it.) + */ +static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride) +{ + int y; + const x86_reg chromWidth= width>>1; + for (y=0; y<height; y+=2) { + __asm__ volatile( + "xor %%"REG_a", %%"REG_a" \n\t" + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 64(%0, %%"REG_a", 4) \n\t" + "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) + "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) + "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) + "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) + "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) + "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4) + "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0) + "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4) + "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) + "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) + + MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" + + "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8) + "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12) + "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8) + "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12) + "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8) + "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12) + "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8) + "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12) + "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) + "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) + + MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" + + "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) + "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) + "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) + "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) + "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) + "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) + "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) + "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) + + MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" + MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" + + "add $8, %%"REG_a" \n\t" + "cmp %4, %%"REG_a" \n\t" + " jb 1b \n\t" + ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) + : "memory", "%"REG_a + ); + + ydst += lumStride; + src += srcStride; + + __asm__ volatile( + "xor %%"REG_a", %%"REG_a" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 64(%0, %%"REG_a", 4) \n\t" + "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) + "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) + "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) + "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) + "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) + "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) + "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) + "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12) + "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) + "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) + + MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" + MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" + + "add $8, %%"REG_a" \n\t" + "cmp %4, %%"REG_a" \n\t" + " jb 1b \n\t" + + ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) + : "memory", "%"REG_a + ); + udst += chromStride; + vdst += chromStride; + ydst += lumStride; + src += srcStride; + } + __asm__ volatile(EMMS" \n\t" + SFENCE" \n\t" + :::"memory"); +} +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ + +#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW +static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) +{ + int x,y; + + dst[0]= src[0]; + + // first line + for (x=0; x<srcWidth-1; x++) { + dst[2*x+1]= (3*src[x] + src[x+1])>>2; + dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; + } + dst[2*srcWidth-1]= src[srcWidth-1]; + + dst+= dstStride; + + for (y=1; y<srcHeight; y++) { + const x86_reg mmxSize= srcWidth&~15; + __asm__ volatile( + "mov %4, %%"REG_a" \n\t" + "movq "MANGLE(mmx_ff)", %%mm0 \n\t" + "movq (%0, %%"REG_a"), %%mm4 \n\t" + "movq %%mm4, %%mm2 \n\t" + "psllq $8, %%mm4 \n\t" + "pand %%mm0, %%mm2 \n\t" + "por %%mm2, %%mm4 \n\t" + "movq (%1, %%"REG_a"), %%mm5 \n\t" + "movq %%mm5, %%mm3 \n\t" + "psllq $8, %%mm5 \n\t" + "pand %%mm0, %%mm3 \n\t" + "por %%mm3, %%mm5 \n\t" + "1: \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq 1(%0, %%"REG_a"), %%mm2 \n\t" + "movq 1(%1, %%"REG_a"), %%mm3 \n\t" + PAVGB" %%mm0, %%mm5 \n\t" + PAVGB" %%mm0, %%mm3 \n\t" + PAVGB" %%mm0, %%mm5 \n\t" + PAVGB" %%mm0, %%mm3 \n\t" + PAVGB" %%mm1, %%mm4 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + PAVGB" %%mm1, %%mm4 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + "movq %%mm5, %%mm7 \n\t" + "movq %%mm4, %%mm6 \n\t" + "punpcklbw %%mm3, %%mm5 \n\t" + "punpckhbw %%mm3, %%mm7 \n\t" + "punpcklbw %%mm2, %%mm4 \n\t" + "punpckhbw %%mm2, %%mm6 \n\t" + MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t" + MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t" + MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t" + MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t" + "add $8, %%"REG_a" \n\t" + "movq -1(%0, %%"REG_a"), %%mm4 \n\t" + "movq -1(%1, %%"REG_a"), %%mm5 \n\t" + " js 1b \n\t" + :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), + "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), + "g" (-mmxSize) + : "%"REG_a + ); + + for (x=mmxSize-1; x<srcWidth-1; x++) { + dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; + dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; + dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; + dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; + } + dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2; + dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2; + + dst+=dstStride*2; + src+=srcStride; + } + + // last line + dst[0]= src[0]; + + for (x=0; x<srcWidth-1; x++) { + dst[2*x+1]= (3*src[x] + src[x+1])>>2; + dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; + } + dst[2*srcWidth-1]= src[srcWidth-1]; + + __asm__ volatile(EMMS" \n\t" + SFENCE" \n\t" + :::"memory"); +} +#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ + +#if !COMPILE_TEMPLATE_AMD3DNOW +/** + * Height should be a multiple of 2 and width should be a multiple of 16. + * (If this is a problem for anyone then tell me, and I will fix it.) + * Chrominance data is only taken from every second line, others are ignored. + * FIXME: Write HQ version. + */ +static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride) +{ + int y; + const x86_reg chromWidth= width>>1; + for (y=0; y<height; y+=2) { + __asm__ volatile( + "xor %%"REG_a", %%"REG_a" \n\t" + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 64(%0, %%"REG_a", 4) \n\t" + "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0) + "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4) + "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) + "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) + "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) + "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4) + "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0) + "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4) + "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) + "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) + + MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" + + "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8) + "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12) + "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) + "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) + "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) + "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12) + "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8) + "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12) + "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) + "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) + + MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" + + "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) + "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) + "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) + "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) + "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) + "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) + "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) + "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) + + MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" + MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" + + "add $8, %%"REG_a" \n\t" + "cmp %4, %%"REG_a" \n\t" + " jb 1b \n\t" + ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) + : "memory", "%"REG_a + ); + + ydst += lumStride; + src += srcStride; + + __asm__ volatile( + "xor %%"REG_a", %%"REG_a" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 64(%0, %%"REG_a", 4) \n\t" + "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) + "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) + "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) + "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) + "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) + "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) + "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) + "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12) + "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) + "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) + + MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" + MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" + + "add $8, %%"REG_a" \n\t" + "cmp %4, %%"REG_a" \n\t" + " jb 1b \n\t" + + ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) + : "memory", "%"REG_a + ); + udst += chromStride; + vdst += chromStride; + ydst += lumStride; + src += srcStride; + } + __asm__ volatile(EMMS" \n\t" + SFENCE" \n\t" + :::"memory"); +} +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ + +/** + * Height should be a multiple of 2 and width should be a multiple of 2. + * (If this is a problem for anyone then tell me, and I will fix it.) + * Chrominance data is only taken from every second line, + * others are ignored in the C version. + * FIXME: Write HQ version. + */ +static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + int width, int height, + int lumStride, int chromStride, int srcStride) +{ + int y; + const x86_reg chromWidth= width>>1; + for (y=0; y<height-2; y+=2) { + int i; + for (i=0; i<2; i++) { + __asm__ volatile( + "mov %2, %%"REG_a" \n\t" + "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" + "movq "MANGLE(ff_w1111)", %%mm5 \n\t" + "pxor %%mm7, %%mm7 \n\t" + "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 64(%0, %%"REG_d") \n\t" + "movd (%0, %%"REG_d"), %%mm0 \n\t" + "movd 3(%0, %%"REG_d"), %%mm1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "movd 6(%0, %%"REG_d"), %%mm2 \n\t" + "movd 9(%0, %%"REG_d"), %%mm3 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "pmaddwd %%mm6, %%mm0 \n\t" + "pmaddwd %%mm6, %%mm1 \n\t" + "pmaddwd %%mm6, %%mm2 \n\t" + "pmaddwd %%mm6, %%mm3 \n\t" +#ifndef FAST_BGR2YV12 + "psrad $8, %%mm0 \n\t" + "psrad $8, %%mm1 \n\t" + "psrad $8, %%mm2 \n\t" + "psrad $8, %%mm3 \n\t" +#endif + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm3, %%mm2 \n\t" + "pmaddwd %%mm5, %%mm0 \n\t" + "pmaddwd %%mm5, %%mm2 \n\t" + "packssdw %%mm2, %%mm0 \n\t" + "psraw $7, %%mm0 \n\t" + + "movd 12(%0, %%"REG_d"), %%mm4 \n\t" + "movd 15(%0, %%"REG_d"), %%mm1 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "movd 18(%0, %%"REG_d"), %%mm2 \n\t" + "movd 21(%0, %%"REG_d"), %%mm3 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "pmaddwd %%mm6, %%mm4 \n\t" + "pmaddwd %%mm6, %%mm1 \n\t" + "pmaddwd %%mm6, %%mm2 \n\t" + "pmaddwd %%mm6, %%mm3 \n\t" +#ifndef FAST_BGR2YV12 + "psrad $8, %%mm4 \n\t" + "psrad $8, %%mm1 \n\t" + "psrad $8, %%mm2 \n\t" + "psrad $8, %%mm3 \n\t" +#endif + "packssdw %%mm1, %%mm4 \n\t" + "packssdw %%mm3, %%mm2 \n\t" + "pmaddwd %%mm5, %%mm4 \n\t" + "pmaddwd %%mm5, %%mm2 \n\t" + "add $24, %%"REG_d" \n\t" + "packssdw %%mm2, %%mm4 \n\t" + "psraw $7, %%mm4 \n\t" + + "packuswb %%mm4, %%mm0 \n\t" + "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" + + MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) + : "%"REG_a, "%"REG_d + ); + ydst += lumStride; + src += srcStride; + } + src -= srcStride*2; + __asm__ volatile( + "mov %4, %%"REG_a" \n\t" + "movq "MANGLE(ff_w1111)", %%mm5 \n\t" + "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" + "pxor %%mm7, %%mm7 \n\t" + "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" + "add %%"REG_d", %%"REG_d" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + PREFETCH" 64(%0, %%"REG_d") \n\t" + PREFETCH" 64(%1, %%"REG_d") \n\t" +#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW + "movq (%0, %%"REG_d"), %%mm0 \n\t" + "movq (%1, %%"REG_d"), %%mm1 \n\t" + "movq 6(%0, %%"REG_d"), %%mm2 \n\t" + "movq 6(%1, %%"REG_d"), %%mm3 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "psrlq $24, %%mm0 \n\t" + "psrlq $24, %%mm2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" +#else + "movd (%0, %%"REG_d"), %%mm0 \n\t" + "movd (%1, %%"REG_d"), %%mm1 \n\t" + "movd 3(%0, %%"REG_d"), %%mm2 \n\t" + "movd 3(%1, %%"REG_d"), %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm2, %%mm0 \n\t" + "movd 6(%0, %%"REG_d"), %%mm4 \n\t" + "movd 6(%1, %%"REG_d"), %%mm1 \n\t" + "movd 9(%0, %%"REG_d"), %%mm2 \n\t" + "movd 9(%1, %%"REG_d"), %%mm3 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "paddw %%mm1, %%mm4 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm4, %%mm2 \n\t" + "psrlw $2, %%mm0 \n\t" + "psrlw $2, %%mm2 \n\t" +#endif + "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" + "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" + + "pmaddwd %%mm0, %%mm1 \n\t" + "pmaddwd %%mm2, %%mm3 \n\t" + "pmaddwd %%mm6, %%mm0 \n\t" + "pmaddwd %%mm6, %%mm2 \n\t" +#ifndef FAST_BGR2YV12 + "psrad $8, %%mm0 \n\t" + "psrad $8, %%mm1 \n\t" + "psrad $8, %%mm2 \n\t" + "psrad $8, %%mm3 \n\t" +#endif + "packssdw %%mm2, %%mm0 \n\t" + "packssdw %%mm3, %%mm1 \n\t" + "pmaddwd %%mm5, %%mm0 \n\t" + "pmaddwd %%mm5, %%mm1 \n\t" + "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 + "psraw $7, %%mm0 \n\t" + +#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW + "movq 12(%0, %%"REG_d"), %%mm4 \n\t" + "movq 12(%1, %%"REG_d"), %%mm1 \n\t" + "movq 18(%0, %%"REG_d"), %%mm2 \n\t" + "movq 18(%1, %%"REG_d"), %%mm3 \n\t" + PAVGB" %%mm1, %%mm4 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "movq %%mm4, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "psrlq $24, %%mm4 \n\t" + "psrlq $24, %%mm2 \n\t" + PAVGB" %%mm1, %%mm4 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" +#else + "movd 12(%0, %%"REG_d"), %%mm4 \n\t" + "movd 12(%1, %%"REG_d"), %%mm1 \n\t" + "movd 15(%0, %%"REG_d"), %%mm2 \n\t" + "movd 15(%1, %%"REG_d"), %%mm3 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "paddw %%mm1, %%mm4 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm2, %%mm4 \n\t" + "movd 18(%0, %%"REG_d"), %%mm5 \n\t" + "movd 18(%1, %%"REG_d"), %%mm1 \n\t" + "movd 21(%0, %%"REG_d"), %%mm2 \n\t" + "movd 21(%1, %%"REG_d"), %%mm3 \n\t" + "punpcklbw %%mm7, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "paddw %%mm1, %%mm5 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm5, %%mm2 \n\t" + "movq "MANGLE(ff_w1111)", %%mm5 \n\t" + "psrlw $2, %%mm4 \n\t" + "psrlw $2, %%mm2 \n\t" +#endif + "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" + "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" + + "pmaddwd %%mm4, %%mm1 \n\t" + "pmaddwd %%mm2, %%mm3 \n\t" + "pmaddwd %%mm6, %%mm4 \n\t" + "pmaddwd %%mm6, %%mm2 \n\t" +#ifndef FAST_BGR2YV12 + "psrad $8, %%mm4 \n\t" + "psrad $8, %%mm1 \n\t" + "psrad $8, %%mm2 \n\t" + "psrad $8, %%mm3 \n\t" +#endif + "packssdw %%mm2, %%mm4 \n\t" + "packssdw %%mm3, %%mm1 \n\t" + "pmaddwd %%mm5, %%mm4 \n\t" + "pmaddwd %%mm5, %%mm1 \n\t" + "add $24, %%"REG_d" \n\t" + "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 + "psraw $7, %%mm4 \n\t" + + "movq %%mm0, %%mm1 \n\t" + "punpckldq %%mm4, %%mm0 \n\t" + "punpckhdq %%mm4, %%mm1 \n\t" + "packsswb %%mm1, %%mm0 \n\t" + "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" + "movd %%mm0, (%2, %%"REG_a") \n\t" + "punpckhdq %%mm0, %%mm0 \n\t" + "movd %%mm0, (%3, %%"REG_a") \n\t" + "add $4, %%"REG_a" \n\t" + " js 1b \n\t" + : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) + : "%"REG_a, "%"REG_d + ); + + udst += chromStride; + vdst += chromStride; + src += srcStride*2; + } + + __asm__ volatile(EMMS" \n\t" + SFENCE" \n\t" + :::"memory"); + + rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride); +} +#endif /* !COMPILE_TEMPLATE_SSE2 */ + +#if !COMPILE_TEMPLATE_AMD3DNOW +static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, + int width, int height, int src1Stride, + int src2Stride, int dstStride) +{ + int h; + + for (h=0; h < height; h++) { + int w; + +#if COMPILE_TEMPLATE_SSE2 + __asm__( + "xor %%"REG_a", %%"REG_a" \n\t" + "1: \n\t" + PREFETCH" 64(%1, %%"REG_a") \n\t" + PREFETCH" 64(%2, %%"REG_a") \n\t" + "movdqa (%1, %%"REG_a"), %%xmm0 \n\t" + "movdqa (%1, %%"REG_a"), %%xmm1 \n\t" + "movdqa (%2, %%"REG_a"), %%xmm2 \n\t" + "punpcklbw %%xmm2, %%xmm0 \n\t" + "punpckhbw %%xmm2, %%xmm1 \n\t" + "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t" + "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t" + "add $16, %%"REG_a" \n\t" + "cmp %3, %%"REG_a" \n\t" + " jb 1b \n\t" + ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) + : "memory", "%"REG_a"" + ); +#else + __asm__( + "xor %%"REG_a", %%"REG_a" \n\t" + "1: \n\t" + PREFETCH" 64(%1, %%"REG_a") \n\t" + PREFETCH" 64(%2, %%"REG_a") \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq 8(%1, %%"REG_a"), %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "movq (%2, %%"REG_a"), %%mm4 \n\t" + "movq 8(%2, %%"REG_a"), %%mm5 \n\t" + "punpcklbw %%mm4, %%mm0 \n\t" + "punpckhbw %%mm4, %%mm1 \n\t" + "punpcklbw %%mm5, %%mm2 \n\t" + "punpckhbw %%mm5, %%mm3 \n\t" + MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t" + MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t" + MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t" + MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t" + "add $16, %%"REG_a" \n\t" + "cmp %3, %%"REG_a" \n\t" + " jb 1b \n\t" + ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) + : "memory", "%"REG_a + ); +#endif + for (w= (width&(~15)); w < width; w++) { + dest[2*w+0] = src1[w]; + dest[2*w+1] = src2[w]; + } + dest += dstStride; + src1 += src1Stride; + src2 += src2Stride; + } + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +} +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ + +#if !COMPILE_TEMPLATE_SSE2 +#if !COMPILE_TEMPLATE_AMD3DNOW +static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, + uint8_t *dst1, uint8_t *dst2, + int width, int height, + int srcStride1, int srcStride2, + int dstStride1, int dstStride2) +{ + x86_reg y; + int x,w,h; + w=width/2; h=height/2; + __asm__ volatile( + PREFETCH" %0 \n\t" + PREFETCH" %1 \n\t" + ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); + for (y=0;y<h;y++) { + const uint8_t* s1=src1+srcStride1*(y>>1); + uint8_t* d=dst1+dstStride1*y; + x=0; + for (;x<w-31;x+=32) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movq %1, %%mm0 \n\t" + "movq 8%1, %%mm2 \n\t" + "movq 16%1, %%mm4 \n\t" + "movq 24%1, %%mm6 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "movq %%mm4, %%mm5 \n\t" + "movq %%mm6, %%mm7 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpcklbw %%mm2, %%mm2 \n\t" + "punpckhbw %%mm3, %%mm3 \n\t" + "punpcklbw %%mm4, %%mm4 \n\t" + "punpckhbw %%mm5, %%mm5 \n\t" + "punpcklbw %%mm6, %%mm6 \n\t" + "punpckhbw %%mm7, %%mm7 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + MOVNTQ" %%mm1, 8%0 \n\t" + MOVNTQ" %%mm2, 16%0 \n\t" + MOVNTQ" %%mm3, 24%0 \n\t" + MOVNTQ" %%mm4, 32%0 \n\t" + MOVNTQ" %%mm5, 40%0 \n\t" + MOVNTQ" %%mm6, 48%0 \n\t" + MOVNTQ" %%mm7, 56%0" + :"=m"(d[2*x]) + :"m"(s1[x]) + :"memory"); + } + for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; + } + for (y=0;y<h;y++) { + const uint8_t* s2=src2+srcStride2*(y>>1); + uint8_t* d=dst2+dstStride2*y; + x=0; + for (;x<w-31;x+=32) { + __asm__ volatile( + PREFETCH" 32%1 \n\t" + "movq %1, %%mm0 \n\t" + "movq 8%1, %%mm2 \n\t" + "movq 16%1, %%mm4 \n\t" + "movq 24%1, %%mm6 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "movq %%mm4, %%mm5 \n\t" + "movq %%mm6, %%mm7 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpcklbw %%mm2, %%mm2 \n\t" + "punpckhbw %%mm3, %%mm3 \n\t" + "punpcklbw %%mm4, %%mm4 \n\t" + "punpckhbw %%mm5, %%mm5 \n\t" + "punpcklbw %%mm6, %%mm6 \n\t" + "punpckhbw %%mm7, %%mm7 \n\t" + MOVNTQ" %%mm0, %0 \n\t" + MOVNTQ" %%mm1, 8%0 \n\t" + MOVNTQ" %%mm2, 16%0 \n\t" + MOVNTQ" %%mm3, 24%0 \n\t" + MOVNTQ" %%mm4, 32%0 \n\t" + MOVNTQ" %%mm5, 40%0 \n\t" + MOVNTQ" %%mm6, 48%0 \n\t" + MOVNTQ" %%mm7, 56%0" + :"=m"(d[2*x]) + :"m"(s2[x]) + :"memory"); + } + for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; + } + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +} + +static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, + uint8_t *dst, + int width, int height, + int srcStride1, int srcStride2, + int srcStride3, int dstStride) +{ + x86_reg x; + int y,w,h; + w=width/2; h=height; + for (y=0;y<h;y++) { + const uint8_t* yp=src1+srcStride1*y; + const uint8_t* up=src2+srcStride2*(y>>2); + const uint8_t* vp=src3+srcStride3*(y>>2); + uint8_t* d=dst+dstStride*y; + x=0; + for (;x<w-7;x+=8) { + __asm__ volatile( + PREFETCH" 32(%1, %0) \n\t" + PREFETCH" 32(%2, %0) \n\t" + PREFETCH" 32(%3, %0) \n\t" + "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ + "movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */ + "movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */ + "movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ + "movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */ + "movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */ + "punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */ + "punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */ + "punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */ + "punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */ + + "movq %%mm1, %%mm6 \n\t" + "punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/ + "punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/ + "punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/ + MOVNTQ" %%mm0, (%4, %0, 8) \n\t" + MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t" + + "punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/ + "movq 8(%1, %0, 4), %%mm0 \n\t" + "movq %%mm0, %%mm3 \n\t" + "punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/ + "punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/ + MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t" + MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t" + + "movq %%mm4, %%mm6 \n\t" + "movq 16(%1, %0, 4), %%mm0 \n\t" + "movq %%mm0, %%mm3 \n\t" + "punpcklbw %%mm5, %%mm4 \n\t" + "punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/ + "punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/ + MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t" + MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t" + + "punpckhbw %%mm5, %%mm6 \n\t" + "movq 24(%1, %0, 4), %%mm0 \n\t" + "movq %%mm0, %%mm3 \n\t" + "punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/ + "punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/ + MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t" + MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t" + + : "+r" (x) + : "r"(yp), "r" (up), "r"(vp), "r"(d) + :"memory"); + } + for (; x<w; x++) { + const int x2 = x<<2; + d[8*x+0] = yp[x2]; + d[8*x+1] = up[x]; + d[8*x+2] = yp[x2+1]; + d[8*x+3] = vp[x]; + d[8*x+4] = yp[x2+2]; + d[8*x+5] = up[x]; + d[8*x+6] = yp[x2+3]; + d[8*x+7] = vp[x]; + } + } + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +} +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ + +static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) +{ + dst += count; + src += 2*count; + count= - count; + + if(count <= -16) { + count += 15; + __asm__ volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $8, %%mm7 \n\t" + "1: \n\t" + "movq -30(%1, %0, 2), %%mm0 \n\t" + "movq -22(%1, %0, 2), %%mm1 \n\t" + "movq -14(%1, %0, 2), %%mm2 \n\t" + "movq -6(%1, %0, 2), %%mm3 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm2 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + MOVNTQ" %%mm0,-15(%2, %0) \n\t" + MOVNTQ" %%mm2,- 7(%2, %0) \n\t" + "add $16, %0 \n\t" + " js 1b \n\t" + : "+r"(count) + : "r"(src), "r"(dst) + ); + count -= 15; + } + while(count<0) { + dst[count]= src[2*count]; + count++; + } +} + +#if !COMPILE_TEMPLATE_AMD3DNOW +static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) +{ + dst0+= count; + dst1+= count; + src += 4*count; + count= - count; + if(count <= -8) { + count += 7; + __asm__ volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $8, %%mm7 \n\t" + "1: \n\t" + "movq -28(%1, %0, 4), %%mm0 \n\t" + "movq -20(%1, %0, 4), %%mm1 \n\t" + "movq -12(%1, %0, 4), %%mm2 \n\t" + "movq -4(%1, %0, 4), %%mm3 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm2 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm2 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm2, %%mm0 \n\t" + "packuswb %%mm3, %%mm1 \n\t" + MOVNTQ" %%mm0,- 7(%3, %0) \n\t" + MOVNTQ" %%mm1,- 7(%2, %0) \n\t" + "add $8, %0 \n\t" + " js 1b \n\t" + : "+r"(count) + : "r"(src), "r"(dst0), "r"(dst1) + ); + count -= 7; + } + while(count<0) { + dst0[count]= src[4*count+0]; + dst1[count]= src[4*count+2]; + count++; + } +} +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ + +static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) +{ + dst0 += count; + dst1 += count; + src0 += 4*count; + src1 += 4*count; + count= - count; +#ifdef PAVGB + if(count <= -8) { + count += 7; + __asm__ volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $8, %%mm7 \n\t" + "1: \n\t" + "movq -28(%1, %0, 4), %%mm0 \n\t" + "movq -20(%1, %0, 4), %%mm1 \n\t" + "movq -12(%1, %0, 4), %%mm2 \n\t" + "movq -4(%1, %0, 4), %%mm3 \n\t" + PAVGB" -28(%2, %0, 4), %%mm0 \n\t" + PAVGB" -20(%2, %0, 4), %%mm1 \n\t" + PAVGB" -12(%2, %0, 4), %%mm2 \n\t" + PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" + "pand %%mm7, %%mm0 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm2 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm2 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm2, %%mm0 \n\t" + "packuswb %%mm3, %%mm1 \n\t" + MOVNTQ" %%mm0,- 7(%4, %0) \n\t" + MOVNTQ" %%mm1,- 7(%3, %0) \n\t" + "add $8, %0 \n\t" + " js 1b \n\t" + : "+r"(count) + : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) + ); + count -= 7; + } +#endif + while(count<0) { + dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; + dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; + count++; + } +} + +#if !COMPILE_TEMPLATE_AMD3DNOW +static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) +{ + dst0+= count; + dst1+= count; + src += 4*count; + count= - count; + if(count <= -8) { + count += 7; + __asm__ volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $8, %%mm7 \n\t" + "1: \n\t" + "movq -28(%1, %0, 4), %%mm0 \n\t" + "movq -20(%1, %0, 4), %%mm1 \n\t" + "movq -12(%1, %0, 4), %%mm2 \n\t" + "movq -4(%1, %0, 4), %%mm3 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm1 \n\t" + "psrlw $8, %%mm2 \n\t" + "psrlw $8, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm2 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm2, %%mm0 \n\t" + "packuswb %%mm3, %%mm1 \n\t" + MOVNTQ" %%mm0,- 7(%3, %0) \n\t" + MOVNTQ" %%mm1,- 7(%2, %0) \n\t" + "add $8, %0 \n\t" + " js 1b \n\t" + : "+r"(count) + : "r"(src), "r"(dst0), "r"(dst1) + ); + count -= 7; + } + src++; + while(count<0) { + dst0[count]= src[4*count+0]; + dst1[count]= src[4*count+2]; + count++; + } +} +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ + +static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) +{ + dst0 += count; + dst1 += count; + src0 += 4*count; + src1 += 4*count; + count= - count; +#ifdef PAVGB + if(count <= -8) { + count += 7; + __asm__ volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $8, %%mm7 \n\t" + "1: \n\t" + "movq -28(%1, %0, 4), %%mm0 \n\t" + "movq -20(%1, %0, 4), %%mm1 \n\t" + "movq -12(%1, %0, 4), %%mm2 \n\t" + "movq -4(%1, %0, 4), %%mm3 \n\t" + PAVGB" -28(%2, %0, 4), %%mm0 \n\t" + PAVGB" -20(%2, %0, 4), %%mm1 \n\t" + PAVGB" -12(%2, %0, 4), %%mm2 \n\t" + PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm1 \n\t" + "psrlw $8, %%mm2 \n\t" + "psrlw $8, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm2 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm3 \n\t" + "packuswb %%mm2, %%mm0 \n\t" + "packuswb %%mm3, %%mm1 \n\t" + MOVNTQ" %%mm0,- 7(%4, %0) \n\t" + MOVNTQ" %%mm1,- 7(%3, %0) \n\t" + "add $8, %0 \n\t" + " js 1b \n\t" + : "+r"(count) + : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) + ); + count -= 7; + } +#endif + src0++; + src1++; + while(count<0) { + dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; + dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; + count++; + } +} + +static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + int width, int height, + int lumStride, int chromStride, int srcStride) +{ + int y; + const int chromWidth= -((-width)>>1); + + for (y=0; y<height; y++) { + RENAME(extract_even)(src, ydst, width); + if(y&1) { + RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth); + udst+= chromStride; + vdst+= chromStride; + } + + src += srcStride; + ydst+= lumStride; + } + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +} + +#if !COMPILE_TEMPLATE_AMD3DNOW +static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + int width, int height, + int lumStride, int chromStride, int srcStride) +{ + int y; + const int chromWidth= -((-width)>>1); + + for (y=0; y<height; y++) { + RENAME(extract_even)(src, ydst, width); + RENAME(extract_odd2)(src, udst, vdst, chromWidth); + + src += srcStride; + ydst+= lumStride; + udst+= chromStride; + vdst+= chromStride; + } + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +} +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ + +static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + int width, int height, + int lumStride, int chromStride, int srcStride) +{ + int y; + const int chromWidth= -((-width)>>1); + + for (y=0; y<height; y++) { + RENAME(extract_even)(src+1, ydst, width); + if(y&1) { + RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); + udst+= chromStride; + vdst+= chromStride; + } + + src += srcStride; + ydst+= lumStride; + } + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +} + +#if !COMPILE_TEMPLATE_AMD3DNOW +static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, + int width, int height, + int lumStride, int chromStride, int srcStride) +{ + int y; + const int chromWidth= -((-width)>>1); + + for (y=0; y<height; y++) { + RENAME(extract_even)(src+1, ydst, width); + RENAME(extract_even2)(src, udst, vdst, chromWidth); + + src += srcStride; + ydst+= lumStride; + udst+= chromStride; + vdst+= chromStride; + } + __asm__( + EMMS" \n\t" + SFENCE" \n\t" + ::: "memory" + ); +} +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* !COMPILE_TEMPLATE_SSE2 */ + +static inline void RENAME(rgb2rgb_init)(void) +{ +#if !COMPILE_TEMPLATE_SSE2 +#if !COMPILE_TEMPLATE_AMD3DNOW + rgb15to16 = RENAME(rgb15to16); + rgb15tobgr24 = RENAME(rgb15tobgr24); + rgb15to32 = RENAME(rgb15to32); + rgb16tobgr24 = RENAME(rgb16tobgr24); + rgb16to32 = RENAME(rgb16to32); + rgb16to15 = RENAME(rgb16to15); + rgb24tobgr16 = RENAME(rgb24tobgr16); + rgb24tobgr15 = RENAME(rgb24tobgr15); + rgb24tobgr32 = RENAME(rgb24tobgr32); + rgb32to16 = RENAME(rgb32to16); + rgb32to15 = RENAME(rgb32to15); + rgb32tobgr24 = RENAME(rgb32tobgr24); + rgb24to15 = RENAME(rgb24to15); + rgb24to16 = RENAME(rgb24to16); + rgb24tobgr24 = RENAME(rgb24tobgr24); + shuffle_bytes_2103 = RENAME(shuffle_bytes_2103); + rgb32tobgr16 = RENAME(rgb32tobgr16); + rgb32tobgr15 = RENAME(rgb32tobgr15); + yv12toyuy2 = RENAME(yv12toyuy2); + yv12touyvy = RENAME(yv12touyvy); + yuv422ptoyuy2 = RENAME(yuv422ptoyuy2); + yuv422ptouyvy = RENAME(yuv422ptouyvy); + yuy2toyv12 = RENAME(yuy2toyv12); + vu9_to_vu12 = RENAME(vu9_to_vu12); + yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); + uyvytoyuv422 = RENAME(uyvytoyuv422); + yuyvtoyuv422 = RENAME(yuyvtoyuv422); +#endif /* !COMPILE_TEMPLATE_SSE2 */ + +#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW + planar2x = RENAME(planar2x); +#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ + rgb24toyv12 = RENAME(rgb24toyv12); + + yuyvtoyuv420 = RENAME(yuyvtoyuv420); + uyvytoyuv420 = RENAME(uyvytoyuv420); +#endif /* COMPILE_TEMPLATE_SSE2 */ + +#if !COMPILE_TEMPLATE_AMD3DNOW + interleaveBytes = RENAME(interleaveBytes); +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +} diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c new file mode 100644 index 0000000000..775d5f683d --- /dev/null +++ b/libswscale/x86/swscale_mmx.c @@ -0,0 +1,189 @@ +/* + * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <inttypes.h> +#include "config.h" +#include "libswscale/swscale.h" +#include "libswscale/swscale_internal.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/x86_cpu.h" +#include "libavutil/cpu.h" +#include "libavutil/pixdesc.h" + +DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL; +DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; +DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; +DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; +DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL; +DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL; +DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL; +DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { + 0x0103010301030103LL, + 0x0200020002000200LL,}; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { + 0x0602060206020602LL, + 0x0004000400040004LL,}; + +DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; +DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; +DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; +DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL; +DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL; +DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL; + +DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL; +DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL; +DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL; + +#ifdef FAST_BGR2YV12 +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL; +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL; +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL; +#else +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL; +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL; +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; +#endif /* FAST_BGR2YV12 */ +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; +DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; +DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; + +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL; +DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL; +DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008010000080100ULL; + +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = { + {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL}, + {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL}, +}; + +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040010000400100ULL; + +//MMX versions +#if HAVE_MMX +#undef RENAME +#define COMPILE_TEMPLATE_MMX2 0 +#define RENAME(a) a ## _MMX +#include "swscale_template.c" +#endif + +//MMX2 versions +#if HAVE_MMX2 +#undef RENAME +#undef COMPILE_TEMPLATE_MMX2 +#define COMPILE_TEMPLATE_MMX2 1 +#define RENAME(a) a ## _MMX2 +#include "swscale_template.c" +#endif + +void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, + int lastInLumBuf, int lastInChrBuf) +{ + const int dstH= c->dstH; + const int flags= c->flags; + int16_t **lumPixBuf= c->lumPixBuf; + int16_t **chrUPixBuf= c->chrUPixBuf; + int16_t **alpPixBuf= c->alpPixBuf; + const int vLumBufSize= c->vLumBufSize; + const int vChrBufSize= c->vChrBufSize; + int16_t *vLumFilterPos= c->vLumFilterPos; + int16_t *vChrFilterPos= c->vChrFilterPos; + int16_t *vLumFilter= c->vLumFilter; + int16_t *vChrFilter= c->vChrFilter; + int32_t *lumMmxFilter= c->lumMmxFilter; + int32_t *chrMmxFilter= c->chrMmxFilter; + int32_t av_unused *alpMmxFilter= c->alpMmxFilter; + const int vLumFilterSize= c->vLumFilterSize; + const int vChrFilterSize= c->vChrFilterSize; + const int chrDstY= dstY>>c->chrDstVSubSample; + const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input + const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input + + c->blueDither= ff_dither8[dstY&1]; + if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555) + c->greenDither= ff_dither8[dstY&1]; + else + c->greenDither= ff_dither4[dstY&1]; + c->redDither= ff_dither8[(dstY+1)&1]; + if (dstY < dstH - 2) { + const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + int i; + if (flags & SWS_ACCURATE_RND) { + int s= APCK_SIZE / 8; + for (i=0; i<vLumFilterSize; i+=2) { + *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ]; + *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)]; + lumMmxFilter[s*i+APCK_COEF/4 ]= + lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] + + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); + if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { + *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ]; + *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)]; + alpMmxFilter[s*i+APCK_COEF/4 ]= + alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ]; + } + } + for (i=0; i<vChrFilterSize; i+=2) { + *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ]; + *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)]; + chrMmxFilter[s*i+APCK_COEF/4 ]= + chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ] + + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); + } + } else { + for (i=0; i<vLumFilterSize; i++) { + *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i]; + lumMmxFilter[4*i+2]= + lumMmxFilter[4*i+3]= + ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001; + if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { + *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i]; + alpMmxFilter[4*i+2]= + alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2]; + } + } + for (i=0; i<vChrFilterSize; i++) { + *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i]; + chrMmxFilter[4*i+2]= + chrMmxFilter[4*i+3]= + ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001; + } + } + } +} + +void ff_sws_init_swScale_mmx(SwsContext *c) +{ + int cpu_flags = av_get_cpu_flags(); + + if (cpu_flags & AV_CPU_FLAG_MMX) + sws_init_swScale_MMX(c); +#if HAVE_MMX2 + if (cpu_flags & AV_CPU_FLAG_MMX2) + sws_init_swScale_MMX2(c); +#endif +} diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c new file mode 100644 index 0000000000..25399fadef --- /dev/null +++ b/libswscale/x86/swscale_template.c @@ -0,0 +1,2493 @@ +/* + * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#undef REAL_MOVNTQ +#undef MOVNTQ +#undef PREFETCH + +#if COMPILE_TEMPLATE_MMX2 +#define PREFETCH "prefetchnta" +#else +#define PREFETCH " # nop" +#endif + +#if COMPILE_TEMPLATE_MMX2 +#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" +#else +#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t" +#endif +#define MOVNTQ(a,b) REAL_MOVNTQ(a,b) + +#define YSCALEYUV2YV12X(offset, dest, end, pos) \ + __asm__ volatile(\ + "movq "DITHER16"+0(%0), %%mm3 \n\t"\ + "movq "DITHER16"+8(%0), %%mm4 \n\t"\ + "lea " offset "(%0), %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + ".p2align 4 \n\t" /* FIXME Unroll? */\ + "1: \n\t"\ + "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ + "movq (%%"REG_S", %3, 2), %%mm2 \n\t" /* srcData */\ + "movq 8(%%"REG_S", %3, 2), %%mm5 \n\t" /* srcData */\ + "add $16, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + "pmulhw %%mm0, %%mm2 \n\t"\ + "pmulhw %%mm0, %%mm5 \n\t"\ + "paddw %%mm2, %%mm3 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + " jnz 1b \n\t"\ + "psraw $3, %%mm3 \n\t"\ + "psraw $3, %%mm4 \n\t"\ + "packuswb %%mm4, %%mm3 \n\t"\ + MOVNTQ(%%mm3, (%1, %3))\ + "add $8, %3 \n\t"\ + "cmp %2, %3 \n\t"\ + "movq "DITHER16"+0(%0), %%mm3 \n\t"\ + "movq "DITHER16"+8(%0), %%mm4 \n\t"\ + "lea " offset "(%0), %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "jb 1b \n\t"\ + :: "r" (&c->redDither),\ + "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\ + : "%"REG_d, "%"REG_S\ + ); + +static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) +{ + int i; + if (uDest) { + x86_reg uv_off = c->uv_off; + for(i=0; i<8; i++) c->dither16[i] = chrDither[i]>>4; + YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) + for(i=0; i<8; i++) c->dither16[i] = chrDither[(i+3)&7]>>4; + YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off) + } + for(i=0; i<8; i++) c->dither16[i] = lumDither[i]>>4; + if (CONFIG_SWSCALE_ALPHA && aDest) { + YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) + } + + YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, dest, dstW, 0) +} + +#define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \ + __asm__ volatile(\ + "lea " offset "(%0), %%"REG_d" \n\t"\ + "movq "DITHER32"+0(%0), %%mm4 \n\t"\ + "movq "DITHER32"+8(%0), %%mm5 \n\t"\ + "movq "DITHER32"+16(%0), %%mm6 \n\t"\ + "movq "DITHER32"+24(%0), %%mm7 \n\t"\ + "pxor %%mm4, %%mm4 \n\t"\ + "pxor %%mm5, %%mm5 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "pxor %%mm7, %%mm7 \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + ".p2align 4 \n\t"\ + "1: \n\t"\ + "movq (%%"REG_S", %3, 2), %%mm0 \n\t" /* srcData */\ + "movq 8(%%"REG_S", %3, 2), %%mm2 \n\t" /* srcData */\ + "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ + "movq (%%"REG_S", %3, 2), %%mm1 \n\t" /* srcData */\ + "movq %%mm0, %%mm3 \n\t"\ + "punpcklwd %%mm1, %%mm0 \n\t"\ + "punpckhwd %%mm1, %%mm3 \n\t"\ + "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ + "pmaddwd %%mm1, %%mm0 \n\t"\ + "pmaddwd %%mm1, %%mm3 \n\t"\ + "paddd %%mm0, %%mm4 \n\t"\ + "paddd %%mm3, %%mm5 \n\t"\ + "movq 8(%%"REG_S", %3, 2), %%mm3 \n\t" /* srcData */\ + "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ + "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + "movq %%mm2, %%mm0 \n\t"\ + "punpcklwd %%mm3, %%mm2 \n\t"\ + "punpckhwd %%mm3, %%mm0 \n\t"\ + "pmaddwd %%mm1, %%mm2 \n\t"\ + "pmaddwd %%mm1, %%mm0 \n\t"\ + "paddd %%mm2, %%mm6 \n\t"\ + "paddd %%mm0, %%mm7 \n\t"\ + " jnz 1b \n\t"\ + "psrad $19, %%mm4 \n\t"\ + "psrad $19, %%mm5 \n\t"\ + "psrad $19, %%mm6 \n\t"\ + "psrad $19, %%mm7 \n\t"\ + "packssdw %%mm5, %%mm4 \n\t"\ + "packssdw %%mm7, %%mm6 \n\t"\ + "packuswb %%mm6, %%mm4 \n\t"\ + MOVNTQ(%%mm4, (%1, %3))\ + "add $8, %3 \n\t"\ + "cmp %2, %3 \n\t"\ + "lea " offset "(%0), %%"REG_d" \n\t"\ + "movq "DITHER32"+0(%0), %%mm4 \n\t"\ + "movq "DITHER32"+8(%0), %%mm5 \n\t"\ + "movq "DITHER32"+16(%0), %%mm6 \n\t"\ + "movq "DITHER32"+24(%0), %%mm7 \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "jb 1b \n\t"\ + :: "r" (&c->redDither),\ + "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\ + : "%"REG_a, "%"REG_d, "%"REG_S\ + ); + +static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) +{ + int i; + if (uDest) { + x86_reg uv_off = c->uv_off; + for(i=0; i<8; i++) c->dither32[i] = chrDither[i]<<12; + YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) + for(i=0; i<8; i++) c->dither32[i] = chrDither[(i+3)&7]<<12; + YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off) + } + for(i=0; i<8; i++) c->dither32[i] = lumDither[i]<<12; + if (CONFIG_SWSCALE_ALPHA && aDest) { + YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) + } + + YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0) +} + +static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, + const int16_t *chrUSrc, const int16_t *chrVSrc, + const int16_t *alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) +{ + int p= 4; + const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW }; + uint8_t *dst[4]= { aDest, dest, uDest, vDest }; + x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW }; + + while (p--) { + if (dst[p]) { + __asm__ volatile( + "mov %2, %%"REG_a" \n\t" + ".p2align 4 \n\t" /* FIXME Unroll? */ + "1: \n\t" + "movq (%0, %%"REG_a", 2), %%mm0 \n\t" + "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t" + "psraw $7, %%mm0 \n\t" + "psraw $7, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + MOVNTQ(%%mm0, (%1, %%REGa)) + "add $8, %%"REG_a" \n\t" + "jnc 1b \n\t" + :: "r" (src[p]), "r" (dst[p] + counter[p]), + "g" (-counter[p]) + : "%"REG_a + ); + } + } +} + +static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, + const int16_t *chrUSrc, const int16_t *chrVSrc, + const int16_t *alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) +{ + int p= 4; + const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW }; + uint8_t *dst[4]= { aDest, dest, uDest, vDest }; + x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW }; + + while (p--) { + if (dst[p]) { + int i; + for(i=0; i<8; i++) c->dither16[i] = i<2 ? lumDither[i] : chrDither[i]; + __asm__ volatile( + "mov %2, %%"REG_a" \n\t" + "movq 0(%3), %%mm6 \n\t" + "movq 8(%3), %%mm7 \n\t" + ".p2align 4 \n\t" /* FIXME Unroll? */ + "1: \n\t" + "movq (%0, %%"REG_a", 2), %%mm0 \n\t" + "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t" + "paddsw %%mm6, %%mm0 \n\t" + "paddsw %%mm7, %%mm1 \n\t" + "psraw $7, %%mm0 \n\t" + "psraw $7, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + MOVNTQ(%%mm0, (%1, %%REGa)) + "add $8, %%"REG_a" \n\t" + "jnc 1b \n\t" + :: "r" (src[p]), "r" (dst[p] + counter[p]), + "g" (-counter[p]), "r"(c->dither16) + : "%"REG_a + ); + } + } +} + +#define YSCALEYUV2PACKEDX_UV \ + __asm__ volatile(\ + "xor %%"REG_a", %%"REG_a" \n\t"\ + ".p2align 4 \n\t"\ + "nop \n\t"\ + "1: \n\t"\ + "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ + "movq %%mm3, %%mm4 \n\t"\ + ".p2align 4 \n\t"\ + "2: \n\t"\ + "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ + "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ + "add %6, %%"REG_S" \n\t" \ + "movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\ + "add $16, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "pmulhw %%mm0, %%mm2 \n\t"\ + "pmulhw %%mm0, %%mm5 \n\t"\ + "paddw %%mm2, %%mm3 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + " jnz 2b \n\t"\ + +#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \ + "lea "offset"(%0), %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\ + "movq "#dst1", "#dst2" \n\t"\ + ".p2align 4 \n\t"\ + "2: \n\t"\ + "movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\ + "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\ + "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" /* Y2srcData */\ + "add $16, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "pmulhw "#coeff", "#src1" \n\t"\ + "pmulhw "#coeff", "#src2" \n\t"\ + "paddw "#src1", "#dst1" \n\t"\ + "paddw "#src2", "#dst2" \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + " jnz 2b \n\t"\ + +#define YSCALEYUV2PACKEDX \ + YSCALEYUV2PACKEDX_UV \ + YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \ + +#define YSCALEYUV2PACKEDX_END \ + :: "r" (&c->redDither), \ + "m" (dummy), "m" (dummy), "m" (dummy),\ + "r" (dest), "m" (dstW_reg), "m"(uv_off) \ + : "%"REG_a, "%"REG_d, "%"REG_S \ + ); + +#define YSCALEYUV2PACKEDX_ACCURATE_UV \ + __asm__ volatile(\ + "xor %%"REG_a", %%"REG_a" \n\t"\ + ".p2align 4 \n\t"\ + "nop \n\t"\ + "1: \n\t"\ + "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "pxor %%mm4, %%mm4 \n\t"\ + "pxor %%mm5, %%mm5 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "pxor %%mm7, %%mm7 \n\t"\ + ".p2align 4 \n\t"\ + "2: \n\t"\ + "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ + "add %6, %%"REG_S" \n\t" \ + "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ + "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ + "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\ + "movq %%mm0, %%mm3 \n\t"\ + "punpcklwd %%mm1, %%mm0 \n\t"\ + "punpckhwd %%mm1, %%mm3 \n\t"\ + "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\ + "pmaddwd %%mm1, %%mm0 \n\t"\ + "pmaddwd %%mm1, %%mm3 \n\t"\ + "paddd %%mm0, %%mm4 \n\t"\ + "paddd %%mm3, %%mm5 \n\t"\ + "add %6, %%"REG_S" \n\t" \ + "movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\ + "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ + "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + "movq %%mm2, %%mm0 \n\t"\ + "punpcklwd %%mm3, %%mm2 \n\t"\ + "punpckhwd %%mm3, %%mm0 \n\t"\ + "pmaddwd %%mm1, %%mm2 \n\t"\ + "pmaddwd %%mm1, %%mm0 \n\t"\ + "paddd %%mm2, %%mm6 \n\t"\ + "paddd %%mm0, %%mm7 \n\t"\ + " jnz 2b \n\t"\ + "psrad $16, %%mm4 \n\t"\ + "psrad $16, %%mm5 \n\t"\ + "psrad $16, %%mm6 \n\t"\ + "psrad $16, %%mm7 \n\t"\ + "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ + "packssdw %%mm5, %%mm4 \n\t"\ + "packssdw %%mm7, %%mm6 \n\t"\ + "paddw %%mm0, %%mm4 \n\t"\ + "paddw %%mm0, %%mm6 \n\t"\ + "movq %%mm4, "U_TEMP"(%0) \n\t"\ + "movq %%mm6, "V_TEMP"(%0) \n\t"\ + +#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \ + "lea "offset"(%0), %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "pxor %%mm1, %%mm1 \n\t"\ + "pxor %%mm5, %%mm5 \n\t"\ + "pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + ".p2align 4 \n\t"\ + "2: \n\t"\ + "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ + "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ + "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ + "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\ + "movq %%mm0, %%mm3 \n\t"\ + "punpcklwd %%mm4, %%mm0 \n\t"\ + "punpckhwd %%mm4, %%mm3 \n\t"\ + "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ + "pmaddwd %%mm4, %%mm0 \n\t"\ + "pmaddwd %%mm4, %%mm3 \n\t"\ + "paddd %%mm0, %%mm1 \n\t"\ + "paddd %%mm3, %%mm5 \n\t"\ + "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\ + "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ + "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + "movq %%mm2, %%mm0 \n\t"\ + "punpcklwd %%mm3, %%mm2 \n\t"\ + "punpckhwd %%mm3, %%mm0 \n\t"\ + "pmaddwd %%mm4, %%mm2 \n\t"\ + "pmaddwd %%mm4, %%mm0 \n\t"\ + "paddd %%mm2, %%mm7 \n\t"\ + "paddd %%mm0, %%mm6 \n\t"\ + " jnz 2b \n\t"\ + "psrad $16, %%mm1 \n\t"\ + "psrad $16, %%mm5 \n\t"\ + "psrad $16, %%mm7 \n\t"\ + "psrad $16, %%mm6 \n\t"\ + "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ + "packssdw %%mm5, %%mm1 \n\t"\ + "packssdw %%mm6, %%mm7 \n\t"\ + "paddw %%mm0, %%mm1 \n\t"\ + "paddw %%mm0, %%mm7 \n\t"\ + "movq "U_TEMP"(%0), %%mm3 \n\t"\ + "movq "V_TEMP"(%0), %%mm4 \n\t"\ + +#define YSCALEYUV2PACKEDX_ACCURATE \ + YSCALEYUV2PACKEDX_ACCURATE_UV \ + YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET) + +#define YSCALEYUV2RGBX \ + "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\ + "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\ + "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ + "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ + "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\ + "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\ + /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ + "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\ + "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\ + "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\ + "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\ + "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\ + "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\ + /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ + "paddw %%mm3, %%mm4 \n\t"\ + "movq %%mm2, %%mm0 \n\t"\ + "movq %%mm5, %%mm6 \n\t"\ + "movq %%mm4, %%mm3 \n\t"\ + "punpcklwd %%mm2, %%mm2 \n\t"\ + "punpcklwd %%mm5, %%mm5 \n\t"\ + "punpcklwd %%mm4, %%mm4 \n\t"\ + "paddw %%mm1, %%mm2 \n\t"\ + "paddw %%mm1, %%mm5 \n\t"\ + "paddw %%mm1, %%mm4 \n\t"\ + "punpckhwd %%mm0, %%mm0 \n\t"\ + "punpckhwd %%mm6, %%mm6 \n\t"\ + "punpckhwd %%mm3, %%mm3 \n\t"\ + "paddw %%mm7, %%mm0 \n\t"\ + "paddw %%mm7, %%mm6 \n\t"\ + "paddw %%mm7, %%mm3 \n\t"\ + /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ + "packuswb %%mm0, %%mm2 \n\t"\ + "packuswb %%mm6, %%mm5 \n\t"\ + "packuswb %%mm3, %%mm4 \n\t"\ + +#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \ + "movq "#b", "#q2" \n\t" /* B */\ + "movq "#r", "#t" \n\t" /* R */\ + "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\ + "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\ + "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\ + "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\ + "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\ + "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\ + "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\ + "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\ + "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\ + "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\ +\ + MOVNTQ( q0, (dst, index, 4))\ + MOVNTQ( b, 8(dst, index, 4))\ + MOVNTQ( q2, 16(dst, index, 4))\ + MOVNTQ( q3, 24(dst, index, 4))\ +\ + "add $8, "#index" \n\t"\ + "cmp "#dstw", "#index" \n\t"\ + " jb 1b \n\t" +#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) + +static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + x86_reg uv_off = c->uv_off << 1; + + if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + "movq %%mm2, "U_TEMP"(%0) \n\t" + "movq %%mm4, "V_TEMP"(%0) \n\t" + "movq %%mm5, "Y_TEMP"(%0) \n\t" + YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET) + "movq "Y_TEMP"(%0), %%mm5 \n\t" + "psraw $3, %%mm1 \n\t" + "psraw $3, %%mm7 \n\t" + "packuswb %%mm7, %%mm1 \n\t" + WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6) + YSCALEYUV2PACKEDX_END + } else { + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + "pcmpeqd %%mm7, %%mm7 \n\t" + WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + YSCALEYUV2PACKEDX_END + } +} + +static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + x86_reg uv_off = c->uv_off << 1; + + if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7) + "psraw $3, %%mm1 \n\t" + "psraw $3, %%mm7 \n\t" + "packuswb %%mm7, %%mm1 \n\t" + WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) + YSCALEYUV2PACKEDX_END + } else { + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + "pcmpeqd %%mm7, %%mm7 \n\t" + WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + YSCALEYUV2PACKEDX_END + } +} + +#define REAL_WRITERGB16(dst, dstw, index) \ + "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ + "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ + "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ + "psrlq $3, %%mm2 \n\t"\ +\ + "movq %%mm2, %%mm1 \n\t"\ + "movq %%mm4, %%mm3 \n\t"\ +\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm5, %%mm2 \n\t"\ + "punpckhbw %%mm7, %%mm4 \n\t"\ + "punpckhbw %%mm5, %%mm1 \n\t"\ +\ + "psllq $3, %%mm3 \n\t"\ + "psllq $3, %%mm4 \n\t"\ +\ + "por %%mm3, %%mm2 \n\t"\ + "por %%mm4, %%mm1 \n\t"\ +\ + MOVNTQ(%%mm2, (dst, index, 2))\ + MOVNTQ(%%mm1, 8(dst, index, 2))\ +\ + "add $8, "#index" \n\t"\ + "cmp "#dstw", "#index" \n\t"\ + " jb 1b \n\t" +#define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index) + +static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + x86_reg uv_off = c->uv_off << 1; + + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%0), %%mm2\n\t" + "paddusb "GREEN_DITHER"(%0), %%mm4\n\t" + "paddusb "RED_DITHER"(%0), %%mm5\n\t" +#endif + WRITERGB16(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END +} + +static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + x86_reg uv_off = c->uv_off << 1; + + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" + "paddusb "RED_DITHER"(%0), %%mm5 \n\t" +#endif + WRITERGB16(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END +} + +#define REAL_WRITERGB15(dst, dstw, index) \ + "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ + "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\ + "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ + "psrlq $3, %%mm2 \n\t"\ + "psrlq $1, %%mm5 \n\t"\ +\ + "movq %%mm2, %%mm1 \n\t"\ + "movq %%mm4, %%mm3 \n\t"\ +\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm5, %%mm2 \n\t"\ + "punpckhbw %%mm7, %%mm4 \n\t"\ + "punpckhbw %%mm5, %%mm1 \n\t"\ +\ + "psllq $2, %%mm3 \n\t"\ + "psllq $2, %%mm4 \n\t"\ +\ + "por %%mm3, %%mm2 \n\t"\ + "por %%mm4, %%mm1 \n\t"\ +\ + MOVNTQ(%%mm2, (dst, index, 2))\ + MOVNTQ(%%mm1, 8(dst, index, 2))\ +\ + "add $8, "#index" \n\t"\ + "cmp "#dstw", "#index" \n\t"\ + " jb 1b \n\t" +#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index) + +static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + x86_reg uv_off = c->uv_off << 1; + + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%0), %%mm2\n\t" + "paddusb "GREEN_DITHER"(%0), %%mm4\n\t" + "paddusb "RED_DITHER"(%0), %%mm5\n\t" +#endif + WRITERGB15(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END +} + +static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + x86_reg uv_off = c->uv_off << 1; + + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" + "paddusb "RED_DITHER"(%0), %%mm5 \n\t" +#endif + WRITERGB15(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END +} + +#define WRITEBGR24MMX(dst, dstw, index) \ + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ + "movq %%mm2, %%mm1 \n\t" /* B */\ + "movq %%mm5, %%mm6 \n\t" /* R */\ + "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ + "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ + "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ + "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ + "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ + "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ + "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ + "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ + "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ + "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ +\ + "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ + "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\ + "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\ + "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\ +\ + "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\ + "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\ + "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\ + "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\ +\ + "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\ + "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\ + "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\ + "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\ +\ + "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\ + "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\ + "psllq $40, %%mm2 \n\t" /* GB000000 1 */\ + "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ + MOVNTQ(%%mm0, (dst))\ +\ + "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\ + "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\ + "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\ + "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\ + MOVNTQ(%%mm6, 8(dst))\ +\ + "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\ + "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\ + "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\ + MOVNTQ(%%mm5, 16(dst))\ +\ + "add $24, "#dst" \n\t"\ +\ + "add $8, "#index" \n\t"\ + "cmp "#dstw", "#index" \n\t"\ + " jb 1b \n\t" + +#define WRITEBGR24MMX2(dst, dstw, index) \ + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ + "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\ + "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\ + "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ + "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ + "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ +\ + "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\ + "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\ + "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\ +\ + "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\ + "por %%mm1, %%mm6 \n\t"\ + "por %%mm3, %%mm6 \n\t"\ + MOVNTQ(%%mm6, (dst))\ +\ + "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\ + "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\ + "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ + "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ +\ + "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ + "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ + "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ +\ + "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\ + "por %%mm3, %%mm6 \n\t"\ + MOVNTQ(%%mm6, 8(dst))\ +\ + "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\ + "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\ + "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\ +\ + "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ + "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ + "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ +\ + "por %%mm1, %%mm3 \n\t"\ + "por %%mm3, %%mm6 \n\t"\ + MOVNTQ(%%mm6, 16(dst))\ +\ + "add $24, "#dst" \n\t"\ +\ + "add $8, "#index" \n\t"\ + "cmp "#dstw", "#index" \n\t"\ + " jb 1b \n\t" + +#if COMPILE_TEMPLATE_MMX2 +#undef WRITEBGR24 +#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index) +#else +#undef WRITEBGR24 +#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index) +#endif + +static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + x86_reg uv_off = c->uv_off << 1; + + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize + "add %4, %%"REG_c" \n\t" + WRITEBGR24(%%REGc, %5, %%REGa) + :: "r" (&c->redDither), + "m" (dummy), "m" (dummy), "m" (dummy), + "r" (dest), "m" (dstW_reg), "m"(uv_off) + : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S + ); +} + +static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + x86_reg uv_off = c->uv_off << 1; + + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize + "add %4, %%"REG_c" \n\t" + WRITEBGR24(%%REGc, %5, %%REGa) + :: "r" (&c->redDither), + "m" (dummy), "m" (dummy), "m" (dummy), + "r" (dest), "m" (dstW_reg), "m"(uv_off) + : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S + ); +} + +#define REAL_WRITEYUY2(dst, dstw, index) \ + "packuswb %%mm3, %%mm3 \n\t"\ + "packuswb %%mm4, %%mm4 \n\t"\ + "packuswb %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm4, %%mm3 \n\t"\ + "movq %%mm1, %%mm7 \n\t"\ + "punpcklbw %%mm3, %%mm1 \n\t"\ + "punpckhbw %%mm3, %%mm7 \n\t"\ +\ + MOVNTQ(%%mm1, (dst, index, 2))\ + MOVNTQ(%%mm7, 8(dst, index, 2))\ +\ + "add $8, "#index" \n\t"\ + "cmp "#dstw", "#index" \n\t"\ + " jb 1b \n\t" +#define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index) + +static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + x86_reg uv_off = c->uv_off << 1; + + YSCALEYUV2PACKEDX_ACCURATE + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + "psraw $3, %%mm3 \n\t" + "psraw $3, %%mm4 \n\t" + "psraw $3, %%mm1 \n\t" + "psraw $3, %%mm7 \n\t" + WRITEYUY2(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END +} + +static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) +{ + x86_reg dummy=0; + x86_reg dstW_reg = dstW; + x86_reg uv_off = c->uv_off << 1; + + YSCALEYUV2PACKEDX + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + "psraw $3, %%mm3 \n\t" + "psraw $3, %%mm4 \n\t" + "psraw $3, %%mm1 \n\t" + "psraw $3, %%mm7 \n\t" + WRITEYUY2(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END +} + +#define REAL_YSCALEYUV2RGB_UV(index, c) \ + "xor "#index", "#index" \n\t"\ + ".p2align 4 \n\t"\ + "1: \n\t"\ + "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ + "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ + "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ + "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ + "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ + "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ + "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ + "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ + "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ + "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ + "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ + "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ + "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ + "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ + "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ + "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ + "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ + /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ + +#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \ + "movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ + "movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ + "movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\ + "movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\ + "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ + "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ + "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ + "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ + "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ + "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ + "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ + "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ + +#define REAL_YSCALEYUV2RGB_COEFF(c) \ + "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ + "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ + "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ + "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ + "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ + "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ + /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ + "paddw %%mm3, %%mm4 \n\t"\ + "movq %%mm2, %%mm0 \n\t"\ + "movq %%mm5, %%mm6 \n\t"\ + "movq %%mm4, %%mm3 \n\t"\ + "punpcklwd %%mm2, %%mm2 \n\t"\ + "punpcklwd %%mm5, %%mm5 \n\t"\ + "punpcklwd %%mm4, %%mm4 \n\t"\ + "paddw %%mm1, %%mm2 \n\t"\ + "paddw %%mm1, %%mm5 \n\t"\ + "paddw %%mm1, %%mm4 \n\t"\ + "punpckhwd %%mm0, %%mm0 \n\t"\ + "punpckhwd %%mm6, %%mm6 \n\t"\ + "punpckhwd %%mm3, %%mm3 \n\t"\ + "paddw %%mm7, %%mm0 \n\t"\ + "paddw %%mm7, %%mm6 \n\t"\ + "paddw %%mm7, %%mm3 \n\t"\ + /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ + "packuswb %%mm0, %%mm2 \n\t"\ + "packuswb %%mm6, %%mm5 \n\t"\ + "packuswb %%mm3, %%mm4 \n\t"\ + +#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) + +#define YSCALEYUV2RGB(index, c) \ + REAL_YSCALEYUV2RGB_UV(index, c) \ + REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \ + REAL_YSCALEYUV2RGB_COEFF(c) + +/** + * vertical bilinear scale YV12 to RGB + */ +static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y) +{ + if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { +#if ARCH_X86_64 + __asm__ volatile( + YSCALEYUV2RGB(%%r8, %5) + YSCALEYUV2RGB_YA(%%r8, %5, %6, %7) + "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ + "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ + "packuswb %%mm7, %%mm1 \n\t" + WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest), + "a" (&c->redDither), + "r" (abuf0), "r" (abuf1) + : "%r8" + ); +#else + c->u_temp=(intptr_t)abuf0; + c->v_temp=(intptr_t)abuf1; + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB(%%REGBP, %5) + "push %0 \n\t" + "push %1 \n\t" + "mov "U_TEMP"(%5), %0 \n\t" + "mov "V_TEMP"(%5), %1 \n\t" + YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1) + "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ + "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ + "packuswb %%mm7, %%mm1 \n\t" + "pop %1 \n\t" + "pop %0 \n\t" + WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); +#endif + } else { + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB(%%REGBP, %5) + "pcmpeqd %%mm7, %%mm7 \n\t" + WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } +} + +static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y) +{ + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + WRITEBGR24(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); +} + +static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y) +{ + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" +#endif + WRITERGB15(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); +} + +static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y) +{ + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" +#endif + WRITERGB16(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); +} + +#define REAL_YSCALEYUV2PACKED(index, c) \ + "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ + "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\ + "psraw $3, %%mm0 \n\t"\ + "psraw $3, %%mm1 \n\t"\ + "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\ + "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\ + "xor "#index", "#index" \n\t"\ + ".p2align 4 \n\t"\ + "1: \n\t"\ + "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ + "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ + "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ + "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ + "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ + "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ + "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ + "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ + "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ + "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ + "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ + "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ + "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ + "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\ + "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\ + "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ + "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ + "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ + "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ + "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ + "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ + "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ + "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ + +#define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) + +static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y) +{ + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2PACKED(%%REGBP, %5) + WRITEYUY2(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); +} + +#define REAL_YSCALEYUV2RGB1(index, c) \ + "xor "#index", "#index" \n\t"\ + ".p2align 4 \n\t"\ + "1: \n\t"\ + "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ + "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ + "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ + "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ + "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ + "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ + "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ + "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ + /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ + "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ + "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ + "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ + "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ + "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ + "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ + "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ + "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ + "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ + "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ + /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ + "paddw %%mm3, %%mm4 \n\t"\ + "movq %%mm2, %%mm0 \n\t"\ + "movq %%mm5, %%mm6 \n\t"\ + "movq %%mm4, %%mm3 \n\t"\ + "punpcklwd %%mm2, %%mm2 \n\t"\ + "punpcklwd %%mm5, %%mm5 \n\t"\ + "punpcklwd %%mm4, %%mm4 \n\t"\ + "paddw %%mm1, %%mm2 \n\t"\ + "paddw %%mm1, %%mm5 \n\t"\ + "paddw %%mm1, %%mm4 \n\t"\ + "punpckhwd %%mm0, %%mm0 \n\t"\ + "punpckhwd %%mm6, %%mm6 \n\t"\ + "punpckhwd %%mm3, %%mm3 \n\t"\ + "paddw %%mm7, %%mm0 \n\t"\ + "paddw %%mm7, %%mm6 \n\t"\ + "paddw %%mm7, %%mm3 \n\t"\ + /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ + "packuswb %%mm0, %%mm2 \n\t"\ + "packuswb %%mm6, %%mm5 \n\t"\ + "packuswb %%mm3, %%mm4 \n\t"\ + +#define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c) + +// do vertical chrominance interpolation +#define REAL_YSCALEYUV2RGB1b(index, c) \ + "xor "#index", "#index" \n\t"\ + ".p2align 4 \n\t"\ + "1: \n\t"\ + "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ + "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ + "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ + "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ + "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ + "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ + "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ + "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ + "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ + "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ + "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ + "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ + /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ + "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ + "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ + "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ + "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ + "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ + "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ + "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ + "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ + "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ + "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ + /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ + "paddw %%mm3, %%mm4 \n\t"\ + "movq %%mm2, %%mm0 \n\t"\ + "movq %%mm5, %%mm6 \n\t"\ + "movq %%mm4, %%mm3 \n\t"\ + "punpcklwd %%mm2, %%mm2 \n\t"\ + "punpcklwd %%mm5, %%mm5 \n\t"\ + "punpcklwd %%mm4, %%mm4 \n\t"\ + "paddw %%mm1, %%mm2 \n\t"\ + "paddw %%mm1, %%mm5 \n\t"\ + "paddw %%mm1, %%mm4 \n\t"\ + "punpckhwd %%mm0, %%mm0 \n\t"\ + "punpckhwd %%mm6, %%mm6 \n\t"\ + "punpckhwd %%mm3, %%mm3 \n\t"\ + "paddw %%mm7, %%mm0 \n\t"\ + "paddw %%mm7, %%mm6 \n\t"\ + "paddw %%mm7, %%mm3 \n\t"\ + /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ + "packuswb %%mm0, %%mm2 \n\t"\ + "packuswb %%mm6, %%mm5 \n\t"\ + "packuswb %%mm3, %%mm4 \n\t"\ + +#define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c) + +#define REAL_YSCALEYUV2RGB1_ALPHA(index) \ + "movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\ + "movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\ + "psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\ + "psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\ + "packuswb %%mm1, %%mm7 \n\t" +#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index) + +/** + * YV12 to RGB without scaling or interpolating + */ +static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, enum PixelFormat dstFormat, + int flags, int y) +{ + const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + + if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster + if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + YSCALEYUV2RGB1_ALPHA(%%REGBP) + WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } else { + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + "pcmpeqd %%mm7, %%mm7 \n\t" + WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } + } else { + if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + YSCALEYUV2RGB1_ALPHA(%%REGBP) + WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } else { + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + "pcmpeqd %%mm7, %%mm7 \n\t" + WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } + } +} + +static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, enum PixelFormat dstFormat, + int flags, int y) +{ + const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + + if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + WRITEBGR24(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } else { + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + WRITEBGR24(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } +} + +static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, enum PixelFormat dstFormat, + int flags, int y) +{ + const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + + if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" +#endif + WRITERGB15(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } else { + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" +#endif + WRITERGB15(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } +} + +static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, enum PixelFormat dstFormat, + int flags, int y) +{ + const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + + if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" +#endif + WRITERGB16(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } else { + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ +#ifdef DITHER1XBPP + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" +#endif + WRITERGB16(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } +} + +#define REAL_YSCALEYUV2PACKED1(index, c) \ + "xor "#index", "#index" \n\t"\ + ".p2align 4 \n\t"\ + "1: \n\t"\ + "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "psraw $7, %%mm3 \n\t" \ + "psraw $7, %%mm4 \n\t" \ + "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ + "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ + "psraw $7, %%mm1 \n\t" \ + "psraw $7, %%mm7 \n\t" \ + +#define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c) + +#define REAL_YSCALEYUV2PACKED1b(index, c) \ + "xor "#index", "#index" \n\t"\ + ".p2align 4 \n\t"\ + "1: \n\t"\ + "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ + "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ + "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ + "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ + "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ + "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ + "psrlw $8, %%mm3 \n\t" \ + "psrlw $8, %%mm4 \n\t" \ + "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ + "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ + "psraw $7, %%mm1 \n\t" \ + "psraw $7, %%mm7 \n\t" +#define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) + +static void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, enum PixelFormat dstFormat, + int flags, int y) +{ + const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + + if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2PACKED1(%%REGBP, %5) + WRITEYUY2(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } else { + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2PACKED1b(%%REGBP, %5) + WRITEYUY2(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), + "a" (&c->redDither) + ); + } +} + +#if !COMPILE_TEMPLATE_MMX2 +//FIXME yuy2* can read up to 7 samples too much + +static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, + int width, uint32_t *unused) +{ + __asm__ volatile( + "movq "MANGLE(bm01010101)", %%mm2 \n\t" + "mov %0, %%"REG_a" \n\t" + "1: \n\t" + "movq (%1, %%"REG_a",2), %%mm0 \n\t" + "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" + "pand %%mm2, %%mm0 \n\t" + "pand %%mm2, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movq %%mm0, (%2, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width) + : "%"REG_a + ); +} + +static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ + __asm__ volatile( + "movq "MANGLE(bm01010101)", %%mm4 \n\t" + "mov %0, %%"REG_a" \n\t" + "1: \n\t" + "movq (%1, %%"REG_a",4), %%mm0 \n\t" + "movq 8(%1, %%"REG_a",4), %%mm1 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movq %%mm0, %%mm1 \n\t" + "psrlw $8, %%mm0 \n\t" + "pand %%mm4, %%mm1 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "packuswb %%mm1, %%mm1 \n\t" + "movd %%mm0, (%3, %%"REG_a") \n\t" + "movd %%mm1, (%2, %%"REG_a") \n\t" + "add $4, %%"REG_a" \n\t" + " js 1b \n\t" + : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) + : "%"REG_a + ); + assert(src1 == src2); +} + +static void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ + __asm__ volatile( + "mov %0, %%"REG_a" \n\t" + "1: \n\t" + "movq (%1, %%"REG_a",2), %%mm0 \n\t" + "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" + "movq (%2, %%"REG_a",2), %%mm2 \n\t" + "movq 8(%2, %%"REG_a",2), %%mm3 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm1 \n\t" + "psrlw $8, %%mm2 \n\t" + "psrlw $8, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "movq %%mm0, (%3, %%"REG_a") \n\t" + "movq %%mm2, (%4, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width) + : "%"REG_a + ); +} + +/* This is almost identical to the previous, end exists only because + * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ +static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, + int width, uint32_t *unused) +{ + __asm__ volatile( + "mov %0, %%"REG_a" \n\t" + "1: \n\t" + "movq (%1, %%"REG_a",2), %%mm0 \n\t" + "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movq %%mm0, (%2, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width) + : "%"REG_a + ); +} + +static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ + __asm__ volatile( + "movq "MANGLE(bm01010101)", %%mm4 \n\t" + "mov %0, %%"REG_a" \n\t" + "1: \n\t" + "movq (%1, %%"REG_a",4), %%mm0 \n\t" + "movq 8(%1, %%"REG_a",4), %%mm1 \n\t" + "pand %%mm4, %%mm0 \n\t" + "pand %%mm4, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movq %%mm0, %%mm1 \n\t" + "psrlw $8, %%mm0 \n\t" + "pand %%mm4, %%mm1 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "packuswb %%mm1, %%mm1 \n\t" + "movd %%mm0, (%3, %%"REG_a") \n\t" + "movd %%mm1, (%2, %%"REG_a") \n\t" + "add $4, %%"REG_a" \n\t" + " js 1b \n\t" + : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) + : "%"REG_a + ); + assert(src1 == src2); +} + +static void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ + __asm__ volatile( + "movq "MANGLE(bm01010101)", %%mm4 \n\t" + "mov %0, %%"REG_a" \n\t" + "1: \n\t" + "movq (%1, %%"REG_a",2), %%mm0 \n\t" + "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" + "movq (%2, %%"REG_a",2), %%mm2 \n\t" + "movq 8(%2, %%"REG_a",2), %%mm3 \n\t" + "pand %%mm4, %%mm0 \n\t" + "pand %%mm4, %%mm1 \n\t" + "pand %%mm4, %%mm2 \n\t" + "pand %%mm4, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "movq %%mm0, (%3, %%"REG_a") \n\t" + "movq %%mm2, (%4, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width) + : "%"REG_a + ); +} + +static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2, + const uint8_t *src, int width) +{ + __asm__ volatile( + "movq "MANGLE(bm01010101)", %%mm4 \n\t" + "mov %0, %%"REG_a" \n\t" + "1: \n\t" + "movq (%1, %%"REG_a",2), %%mm0 \n\t" + "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm3 \n\t" + "pand %%mm4, %%mm0 \n\t" + "pand %%mm4, %%mm1 \n\t" + "psrlw $8, %%mm2 \n\t" + "psrlw $8, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "movq %%mm0, (%2, %%"REG_a") \n\t" + "movq %%mm2, (%3, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width) + : "%"REG_a + ); +} + +static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ + RENAME(nvXXtoUV)(dstU, dstV, src1, width); +} + +static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ + RENAME(nvXXtoUV)(dstV, dstU, src1, width); +} +#endif /* !COMPILE_TEMPLATE_MMX2 */ + +static av_always_inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, + int width, enum PixelFormat srcFormat) +{ + + if(srcFormat == PIX_FMT_BGR24) { + __asm__ volatile( + "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t" + "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t" + : + ); + } else { + __asm__ volatile( + "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t" + "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t" + : + ); + } + + __asm__ volatile( + "movq "MANGLE(ff_bgr24toYOffset)", %%mm4 \n\t" + "mov %2, %%"REG_a" \n\t" + "pxor %%mm7, %%mm7 \n\t" + "1: \n\t" + PREFETCH" 64(%0) \n\t" + "movd (%0), %%mm0 \n\t" + "movd 2(%0), %%mm1 \n\t" + "movd 6(%0), %%mm2 \n\t" + "movd 8(%0), %%mm3 \n\t" + "add $12, %0 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "pmaddwd %%mm5, %%mm0 \n\t" + "pmaddwd %%mm6, %%mm1 \n\t" + "pmaddwd %%mm5, %%mm2 \n\t" + "pmaddwd %%mm6, %%mm3 \n\t" + "paddd %%mm1, %%mm0 \n\t" + "paddd %%mm3, %%mm2 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "paddd %%mm4, %%mm2 \n\t" + "psrad $9, %%mm0 \n\t" + "psrad $9, %%mm2 \n\t" + "packssdw %%mm2, %%mm0 \n\t" + "movq %%mm0, (%1, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + : "+r" (src) + : "r" (dst+width), "g" ((x86_reg)-2*width) + : "%"REG_a + ); +} + +static void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, + int width, uint32_t *unused) +{ + RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24); +} + +static void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, + int width, uint32_t *unused) +{ + RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24); +} + +static av_always_inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, + const uint8_t *src, int width, + enum PixelFormat srcFormat) +{ + __asm__ volatile( + "movq 24(%4), %%mm6 \n\t" + "mov %3, %%"REG_a" \n\t" + "pxor %%mm7, %%mm7 \n\t" + "1: \n\t" + PREFETCH" 64(%0) \n\t" + "movd (%0), %%mm0 \n\t" + "movd 2(%0), %%mm1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm3 \n\t" + "pmaddwd (%4), %%mm0 \n\t" + "pmaddwd 8(%4), %%mm1 \n\t" + "pmaddwd 16(%4), %%mm2 \n\t" + "pmaddwd %%mm6, %%mm3 \n\t" + "paddd %%mm1, %%mm0 \n\t" + "paddd %%mm3, %%mm2 \n\t" + + "movd 6(%0), %%mm1 \n\t" + "movd 8(%0), %%mm3 \n\t" + "add $12, %0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "movq %%mm1, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "pmaddwd (%4), %%mm1 \n\t" + "pmaddwd 8(%4), %%mm3 \n\t" + "pmaddwd 16(%4), %%mm4 \n\t" + "pmaddwd %%mm6, %%mm5 \n\t" + "paddd %%mm3, %%mm1 \n\t" + "paddd %%mm5, %%mm4 \n\t" + + "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3 \n\t" + "paddd %%mm3, %%mm0 \n\t" + "paddd %%mm3, %%mm2 \n\t" + "paddd %%mm3, %%mm1 \n\t" + "paddd %%mm3, %%mm4 \n\t" + "psrad $9, %%mm0 \n\t" + "psrad $9, %%mm2 \n\t" + "psrad $9, %%mm1 \n\t" + "psrad $9, %%mm4 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm4, %%mm2 \n\t" + "movq %%mm0, (%1, %%"REG_a") \n\t" + "movq %%mm2, (%2, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + : "+r" (src) + : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-2*width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24]) + : "%"REG_a + ); +} + +static void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ + RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24); + assert(src1 == src2); +} + +static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ + assert(src1==src2); + RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24); +} + +#if !COMPILE_TEMPLATE_MMX2 +// bilinear / bicubic scaling +static void RENAME(hScale)(int16_t *dst, int dstW, + const uint8_t *src, int srcW, + int xInc, const int16_t *filter, + const int16_t *filterPos, int filterSize) +{ + assert(filterSize % 4 == 0 && filterSize>0); + if (filterSize==4) { // Always true for upscaling, sometimes for down, too. + x86_reg counter= -2*dstW; + filter-= counter*2; + filterPos-= counter/2; + dst-= counter/2; + __asm__ volatile( +#if defined(PIC) + "push %%"REG_b" \n\t" +#endif + "pxor %%mm7, %%mm7 \n\t" + "push %%"REG_BP" \n\t" // we use 7 regs here ... + "mov %%"REG_a", %%"REG_BP" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "movzwl (%2, %%"REG_BP"), %%eax \n\t" + "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" + "movq (%1, %%"REG_BP", 4), %%mm1 \n\t" + "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t" + "movd (%3, %%"REG_a"), %%mm0 \n\t" + "movd (%3, %%"REG_b"), %%mm2 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "pmaddwd %%mm1, %%mm0 \n\t" + "pmaddwd %%mm2, %%mm3 \n\t" + "movq %%mm0, %%mm4 \n\t" + "punpckldq %%mm3, %%mm0 \n\t" + "punpckhdq %%mm3, %%mm4 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "psrad $7, %%mm0 \n\t" + "packssdw %%mm0, %%mm0 \n\t" + "movd %%mm0, (%4, %%"REG_BP") \n\t" + "add $4, %%"REG_BP" \n\t" + " jnc 1b \n\t" + + "pop %%"REG_BP" \n\t" +#if defined(PIC) + "pop %%"REG_b" \n\t" +#endif + : "+a" (counter) + : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) +#if !defined(PIC) + : "%"REG_b +#endif + ); + } else if (filterSize==8) { + x86_reg counter= -2*dstW; + filter-= counter*4; + filterPos-= counter/2; + dst-= counter/2; + __asm__ volatile( +#if defined(PIC) + "push %%"REG_b" \n\t" +#endif + "pxor %%mm7, %%mm7 \n\t" + "push %%"REG_BP" \n\t" // we use 7 regs here ... + "mov %%"REG_a", %%"REG_BP" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "movzwl (%2, %%"REG_BP"), %%eax \n\t" + "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" + "movq (%1, %%"REG_BP", 8), %%mm1 \n\t" + "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t" + "movd (%3, %%"REG_a"), %%mm0 \n\t" + "movd (%3, %%"REG_b"), %%mm2 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "pmaddwd %%mm1, %%mm0 \n\t" + "pmaddwd %%mm2, %%mm3 \n\t" + + "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t" + "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t" + "movd 4(%3, %%"REG_a"), %%mm4 \n\t" + "movd 4(%3, %%"REG_b"), %%mm2 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "pmaddwd %%mm1, %%mm4 \n\t" + "pmaddwd %%mm2, %%mm5 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "paddd %%mm5, %%mm3 \n\t" + "movq %%mm0, %%mm4 \n\t" + "punpckldq %%mm3, %%mm0 \n\t" + "punpckhdq %%mm3, %%mm4 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "psrad $7, %%mm0 \n\t" + "packssdw %%mm0, %%mm0 \n\t" + "movd %%mm0, (%4, %%"REG_BP") \n\t" + "add $4, %%"REG_BP" \n\t" + " jnc 1b \n\t" + + "pop %%"REG_BP" \n\t" +#if defined(PIC) + "pop %%"REG_b" \n\t" +#endif + : "+a" (counter) + : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) +#if !defined(PIC) + : "%"REG_b +#endif + ); + } else { + const uint8_t *offset = src+filterSize; + x86_reg counter= -2*dstW; + //filter-= counter*filterSize/2; + filterPos-= counter/2; + dst-= counter/2; + __asm__ volatile( + "pxor %%mm7, %%mm7 \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "mov %2, %%"REG_c" \n\t" + "movzwl (%%"REG_c", %0), %%eax \n\t" + "movzwl 2(%%"REG_c", %0), %%edx \n\t" + "mov %5, %%"REG_c" \n\t" + "pxor %%mm4, %%mm4 \n\t" + "pxor %%mm5, %%mm5 \n\t" + "2: \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1, %6), %%mm3 \n\t" + "movd (%%"REG_c", %%"REG_a"), %%mm0 \n\t" + "movd (%%"REG_c", %%"REG_d"), %%mm2 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "pmaddwd %%mm1, %%mm0 \n\t" + "pmaddwd %%mm2, %%mm3 \n\t" + "paddd %%mm3, %%mm5 \n\t" + "paddd %%mm0, %%mm4 \n\t" + "add $8, %1 \n\t" + "add $4, %%"REG_c" \n\t" + "cmp %4, %%"REG_c" \n\t" + " jb 2b \n\t" + "add %6, %1 \n\t" + "movq %%mm4, %%mm0 \n\t" + "punpckldq %%mm5, %%mm4 \n\t" + "punpckhdq %%mm5, %%mm0 \n\t" + "paddd %%mm0, %%mm4 \n\t" + "psrad $7, %%mm4 \n\t" + "packssdw %%mm4, %%mm4 \n\t" + "mov %3, %%"REG_a" \n\t" + "movd %%mm4, (%%"REG_a", %0) \n\t" + "add $4, %0 \n\t" + " jnc 1b \n\t" + + : "+r" (counter), "+r" (filter) + : "m" (filterPos), "m" (dst), "m"(offset), + "m" (src), "r" ((x86_reg)filterSize*2) + : "%"REG_a, "%"REG_c, "%"REG_d + ); + } +} +#endif /* !COMPILE_TEMPLATE_MMX2 */ + +static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, + const int16_t *filter, const int16_t *filterPos, long filterSize, int shift) +{ + int i, j; + + assert(filterSize % 4 == 0 && filterSize>0); + if (filterSize==4 && shift<15) { // Always true for upscaling, sometimes for down, too. + x86_reg counter= -2*dstW; + filter-= counter*2; + filterPos-= counter/2; + dst-= counter/2; + __asm__ volatile( + "movd %5, %%mm7 \n\t" +#if defined(PIC) + "push %%"REG_b" \n\t" +#endif + "push %%"REG_BP" \n\t" // we use 7 regs here ... + "mov %%"REG_a", %%"REG_BP" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "movzwl (%2, %%"REG_BP"), %%eax \n\t" + "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" + "movq (%1, %%"REG_BP", 4), %%mm1 \n\t" + "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t" + "movq (%3, %%"REG_a", 2), %%mm0 \n\t" + "movq (%3, %%"REG_b", 2), %%mm2 \n\t" + "pmaddwd %%mm1, %%mm0 \n\t" + "pmaddwd %%mm2, %%mm3 \n\t" + "movq %%mm0, %%mm4 \n\t" + "punpckldq %%mm3, %%mm0 \n\t" + "punpckhdq %%mm3, %%mm4 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "psrad %%mm7, %%mm0 \n\t" + "packssdw %%mm0, %%mm0 \n\t" + "movd %%mm0, (%4, %%"REG_BP") \n\t" + "add $4, %%"REG_BP" \n\t" + " jnc 1b \n\t" + + "pop %%"REG_BP" \n\t" +#if defined(PIC) + "pop %%"REG_b" \n\t" +#endif + : "+a" (counter) + : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift) +#if !defined(PIC) + : "%"REG_b +#endif + ); + } else if (filterSize==8 && shift<15) { + x86_reg counter= -2*dstW; + filter-= counter*4; + filterPos-= counter/2; + dst-= counter/2; + __asm__ volatile( + "movd %5, %%mm7 \n\t" +#if defined(PIC) + "push %%"REG_b" \n\t" +#endif + "push %%"REG_BP" \n\t" // we use 7 regs here ... + "mov %%"REG_a", %%"REG_BP" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "movzwl (%2, %%"REG_BP"), %%eax \n\t" + "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" + "movq (%1, %%"REG_BP", 8), %%mm1 \n\t" + "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t" + "movq (%3, %%"REG_a", 2), %%mm0 \n\t" + "movq (%3, %%"REG_b", 2), %%mm2 \n\t" + "pmaddwd %%mm1, %%mm0 \n\t" + "pmaddwd %%mm2, %%mm3 \n\t" + + "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t" + "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t" + "movq 8(%3, %%"REG_a", 2), %%mm4 \n\t" + "movq 8(%3, %%"REG_b", 2), %%mm2 \n\t" + "pmaddwd %%mm1, %%mm4 \n\t" + "pmaddwd %%mm2, %%mm5 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "paddd %%mm5, %%mm3 \n\t" + "movq %%mm0, %%mm4 \n\t" + "punpckldq %%mm3, %%mm0 \n\t" + "punpckhdq %%mm3, %%mm4 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "psrad %%mm7, %%mm0 \n\t" + "packssdw %%mm0, %%mm0 \n\t" + "movd %%mm0, (%4, %%"REG_BP") \n\t" + "add $4, %%"REG_BP" \n\t" + " jnc 1b \n\t" + + "pop %%"REG_BP" \n\t" +#if defined(PIC) + "pop %%"REG_b" \n\t" +#endif + : "+a" (counter) + : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift) +#if !defined(PIC) + : "%"REG_b +#endif + ); + } else if (shift<15){ + const uint16_t *offset = src+filterSize; + x86_reg counter= -2*dstW; + //filter-= counter*filterSize/2; + filterPos-= counter/2; + dst-= counter/2; + __asm__ volatile( + "movd %7, %%mm7 \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "mov %2, %%"REG_c" \n\t" + "movzwl (%%"REG_c", %0), %%eax \n\t" + "movzwl 2(%%"REG_c", %0), %%edx \n\t" + "mov %5, %%"REG_c" \n\t" + "pxor %%mm4, %%mm4 \n\t" + "pxor %%mm5, %%mm5 \n\t" + "2: \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1, %6), %%mm3 \n\t" + "movq (%%"REG_c", %%"REG_a", 2), %%mm0 \n\t" + "movq (%%"REG_c", %%"REG_d", 2), %%mm2 \n\t" + "pmaddwd %%mm1, %%mm0 \n\t" + "pmaddwd %%mm2, %%mm3 \n\t" + "paddd %%mm3, %%mm5 \n\t" + "paddd %%mm0, %%mm4 \n\t" + "add $8, %1 \n\t" + "add $8, %%"REG_c" \n\t" + "cmp %4, %%"REG_c" \n\t" + " jb 2b \n\t" + "add %6, %1 \n\t" + "movq %%mm4, %%mm0 \n\t" + "punpckldq %%mm5, %%mm4 \n\t" + "punpckhdq %%mm5, %%mm0 \n\t" + "paddd %%mm0, %%mm4 \n\t" + "psrad %%mm7, %%mm4 \n\t" + "packssdw %%mm4, %%mm4 \n\t" + "mov %3, %%"REG_a" \n\t" + "movd %%mm4, (%%"REG_a", %0) \n\t" + "add $4, %0 \n\t" + " jnc 1b \n\t" + + : "+r" (counter), "+r" (filter) + : "m" (filterPos), "m" (dst), "m"(offset), + "m" (src), "r" ((x86_reg)filterSize*2), "m"(shift) + : "%"REG_a, "%"REG_c, "%"REG_d + ); + } else + for (i=0; i<dstW; i++) { + int srcPos= filterPos[i]; + int val=0; + for (j=0; j<filterSize; j++) { + val += ((int)src[srcPos + j])*filter[filterSize*i + j]; + } + dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ... + } +} + + +#if COMPILE_TEMPLATE_MMX2 +static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, + int dstWidth, const uint8_t *src, + int srcW, int xInc) +{ + int16_t *filterPos = c->hLumFilterPos; + int16_t *filter = c->hLumFilter; + void *mmx2FilterCode= c->lumMmx2FilterCode; + int i; +#if defined(PIC) + DECLARE_ALIGNED(8, uint64_t, ebxsave); +#endif + + __asm__ volatile( +#if defined(PIC) + "mov %%"REG_b", %5 \n\t" +#endif + "pxor %%mm7, %%mm7 \n\t" + "mov %0, %%"REG_c" \n\t" + "mov %1, %%"REG_D" \n\t" + "mov %2, %%"REG_d" \n\t" + "mov %3, %%"REG_b" \n\t" + "xor %%"REG_a", %%"REG_a" \n\t" // i + PREFETCH" (%%"REG_c") \n\t" + PREFETCH" 32(%%"REG_c") \n\t" + PREFETCH" 64(%%"REG_c") \n\t" + +#if ARCH_X86_64 +#define CALL_MMX2_FILTER_CODE \ + "movl (%%"REG_b"), %%esi \n\t"\ + "call *%4 \n\t"\ + "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\ + "add %%"REG_S", %%"REG_c" \n\t"\ + "add %%"REG_a", %%"REG_D" \n\t"\ + "xor %%"REG_a", %%"REG_a" \n\t"\ + +#else +#define CALL_MMX2_FILTER_CODE \ + "movl (%%"REG_b"), %%esi \n\t"\ + "call *%4 \n\t"\ + "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\ + "add %%"REG_a", %%"REG_D" \n\t"\ + "xor %%"REG_a", %%"REG_a" \n\t"\ + +#endif /* ARCH_X86_64 */ + + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + +#if defined(PIC) + "mov %5, %%"REG_b" \n\t" +#endif + :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos), + "m" (mmx2FilterCode) +#if defined(PIC) + ,"m" (ebxsave) +#endif + : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D +#if !defined(PIC) + ,"%"REG_b +#endif + ); + + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) + dst[i] = src[srcW-1]*128; +} + +static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, + int dstWidth, const uint8_t *src1, + const uint8_t *src2, int srcW, int xInc) +{ + int16_t *filterPos = c->hChrFilterPos; + int16_t *filter = c->hChrFilter; + void *mmx2FilterCode= c->chrMmx2FilterCode; + int i; +#if defined(PIC) + DECLARE_ALIGNED(8, uint64_t, ebxsave); +#endif + + __asm__ volatile( +#if defined(PIC) + "mov %%"REG_b", %7 \n\t" +#endif + "pxor %%mm7, %%mm7 \n\t" + "mov %0, %%"REG_c" \n\t" + "mov %1, %%"REG_D" \n\t" + "mov %2, %%"REG_d" \n\t" + "mov %3, %%"REG_b" \n\t" + "xor %%"REG_a", %%"REG_a" \n\t" // i + PREFETCH" (%%"REG_c") \n\t" + PREFETCH" 32(%%"REG_c") \n\t" + PREFETCH" 64(%%"REG_c") \n\t" + + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + "xor %%"REG_a", %%"REG_a" \n\t" // i + "mov %5, %%"REG_c" \n\t" // src + "mov %6, %%"REG_D" \n\t" // buf2 + PREFETCH" (%%"REG_c") \n\t" + PREFETCH" 32(%%"REG_c") \n\t" + PREFETCH" 64(%%"REG_c") \n\t" + + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + CALL_MMX2_FILTER_CODE + +#if defined(PIC) + "mov %7, %%"REG_b" \n\t" +#endif + :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos), + "m" (mmx2FilterCode), "m" (src2), "m"(dst2) +#if defined(PIC) + ,"m" (ebxsave) +#endif + : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D +#if !defined(PIC) + ,"%"REG_b +#endif + ); + + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { + dst1[i] = src1[srcW-1]*128; + dst2[i] = src2[srcW-1]*128; + } +} +#endif /* COMPILE_TEMPLATE_MMX2 */ + +static av_cold void RENAME(sws_init_swScale)(SwsContext *c) +{ + enum PixelFormat srcFormat = c->srcFormat, + dstFormat = c->dstFormat; + + if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12 + && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) { + if (c->flags & SWS_ACCURATE_RND) { + c->yuv2yuv1 = RENAME(yuv2yuv1_ar ); + c->yuv2yuvX = RENAME(yuv2yuvX_ar ); + if (!(c->flags & SWS_FULL_CHR_H_INT)) { + switch (c->dstFormat) { + case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break; + case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break; + case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break; + case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break; + case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break; + default: break; + } + } + } else { + int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat); + c->yuv2yuv1 = should_dither ? RENAME(yuv2yuv1_ar ) : RENAME(yuv2yuv1 ); + c->yuv2yuvX = RENAME(yuv2yuvX ); + if (!(c->flags & SWS_FULL_CHR_H_INT)) { + switch (c->dstFormat) { + case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break; + case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break; + case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break; + case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break; + case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break; + default: break; + } + } + } + if (!(c->flags & SWS_FULL_CHR_H_INT)) { + switch (c->dstFormat) { + case PIX_FMT_RGB32: + c->yuv2packed1 = RENAME(yuv2rgb32_1); + c->yuv2packed2 = RENAME(yuv2rgb32_2); + break; + case PIX_FMT_BGR24: + c->yuv2packed1 = RENAME(yuv2bgr24_1); + c->yuv2packed2 = RENAME(yuv2bgr24_2); + break; + case PIX_FMT_RGB555: + c->yuv2packed1 = RENAME(yuv2rgb555_1); + c->yuv2packed2 = RENAME(yuv2rgb555_2); + break; + case PIX_FMT_RGB565: + c->yuv2packed1 = RENAME(yuv2rgb565_1); + c->yuv2packed2 = RENAME(yuv2rgb565_2); + break; + case PIX_FMT_YUYV422: + c->yuv2packed1 = RENAME(yuv2yuyv422_1); + c->yuv2packed2 = RENAME(yuv2yuyv422_2); + break; + default: + break; + } + } + } + +#if !COMPILE_TEMPLATE_MMX2 + c->hScale = RENAME(hScale ); +#endif /* !COMPILE_TEMPLATE_MMX2 */ + + // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). +#if COMPILE_TEMPLATE_MMX2 + if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) + { + c->hyscale_fast = RENAME(hyscale_fast); + c->hcscale_fast = RENAME(hcscale_fast); + } else { +#endif /* COMPILE_TEMPLATE_MMX2 */ + c->hyscale_fast = NULL; + c->hcscale_fast = NULL; +#if COMPILE_TEMPLATE_MMX2 + } +#endif /* COMPILE_TEMPLATE_MMX2 */ + +#if !COMPILE_TEMPLATE_MMX2 + switch(srcFormat) { + case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break; + case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break; + case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break; + case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break; + case PIX_FMT_GRAY16LE : + case PIX_FMT_YUV420P9LE: + case PIX_FMT_YUV422P10LE: + case PIX_FMT_YUV420P10LE: + case PIX_FMT_YUV420P16LE: + case PIX_FMT_YUV422P16LE: + case PIX_FMT_YUV444P16LE: c->hScale16= RENAME(hScale16); break; + } +#endif /* !COMPILE_TEMPLATE_MMX2 */ + if (!c->chrSrcHSubSample) { + switch(srcFormat) { + case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break; + case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV); break; + default: break; + } + } + + switch (srcFormat) { +#if !COMPILE_TEMPLATE_MMX2 + case PIX_FMT_YUYV422 : + case PIX_FMT_Y400A : + c->lumToYV12 = RENAME(yuy2ToY); break; + case PIX_FMT_UYVY422 : + c->lumToYV12 = RENAME(uyvyToY); break; +#endif /* !COMPILE_TEMPLATE_MMX2 */ + case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break; + case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break; + default: break; + } +#if !COMPILE_TEMPLATE_MMX2 + if (c->alpPixBuf) { + switch (srcFormat) { + case PIX_FMT_Y400A : c->alpToYV12 = RENAME(yuy2ToY); break; + default: break; + } + } +#endif /* !COMPILE_TEMPLATE_MMX2 */ + if(isAnyRGB(c->srcFormat)) + c->hScale16= RENAME(hScale16); +} diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c index 6478311f00..df0e1a3726 100644 --- a/libswscale/x86/yuv2rgb_mmx.c +++ b/libswscale/x86/yuv2rgb_mmx.c @@ -34,6 +34,7 @@ #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" #include "libavutil/x86_cpu.h" +#include "libavutil/cpu.h" #define DITHER1XBPP // only for MMX @@ -46,57 +47,60 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; //MMX versions +#if HAVE_MMX #undef RENAME -#undef HAVE_MMX2 -#undef HAVE_AMD3DNOW -#define HAVE_MMX2 0 -#define HAVE_AMD3DNOW 0 +#undef COMPILE_TEMPLATE_MMX2 +#define COMPILE_TEMPLATE_MMX2 0 #define RENAME(a) a ## _MMX #include "yuv2rgb_template.c" +#endif /* HAVE_MMX */ //MMX2 versions +#if HAVE_MMX2 #undef RENAME -#undef HAVE_MMX2 -#define HAVE_MMX2 1 +#undef COMPILE_TEMPLATE_MMX2 +#define COMPILE_TEMPLATE_MMX2 1 #define RENAME(a) a ## _MMX2 #include "yuv2rgb_template.c" +#endif /* HAVE_MMX2 */ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) { - if (c->flags & SWS_CPU_CAPS_MMX2) { + int cpu_flags = av_get_cpu_flags(); + + if (c->srcFormat != PIX_FMT_YUV420P && + c->srcFormat != PIX_FMT_YUVA420P) + return NULL; + +#if HAVE_MMX2 + if (cpu_flags & AV_CPU_FLAG_MMX2) { switch (c->dstFormat) { - case PIX_FMT_RGB32: - if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) { - if (HAVE_7REGS) return yuva420_rgb32_MMX2; - break; - } else return yuv420_rgb32_MMX2; - case PIX_FMT_BGR32: - if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) { - if (HAVE_7REGS) return yuva420_bgr32_MMX2; - break; - } else return yuv420_bgr32_MMX2; case PIX_FMT_RGB24: return yuv420_rgb24_MMX2; case PIX_FMT_BGR24: return yuv420_bgr24_MMX2; - case PIX_FMT_RGB565: return yuv420_rgb16_MMX2; - case PIX_FMT_RGB555: return yuv420_rgb15_MMX2; } } - if (c->flags & SWS_CPU_CAPS_MMX) { +#endif + + if (cpu_flags & AV_CPU_FLAG_MMX) { switch (c->dstFormat) { - case PIX_FMT_RGB32: - if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) { - if (HAVE_7REGS) return yuva420_rgb32_MMX; - break; - } else return yuv420_rgb32_MMX; - case PIX_FMT_BGR32: - if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) { - if (HAVE_7REGS) return yuva420_bgr32_MMX; - break; - } else return yuv420_bgr32_MMX; - case PIX_FMT_RGB24: return yuv420_rgb24_MMX; - case PIX_FMT_BGR24: return yuv420_bgr24_MMX; - case PIX_FMT_RGB565: return yuv420_rgb16_MMX; - case PIX_FMT_RGB555: return yuv420_rgb15_MMX; + case PIX_FMT_RGB32: + if (c->srcFormat == PIX_FMT_YUVA420P) { +#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA + return yuva420_rgb32_MMX; +#endif + break; + } else return yuv420_rgb32_MMX; + case PIX_FMT_BGR32: + if (c->srcFormat == PIX_FMT_YUVA420P) { +#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA + return yuva420_bgr32_MMX; +#endif + break; + } else return yuv420_bgr32_MMX; + case PIX_FMT_RGB24: return yuv420_rgb24_MMX; + case PIX_FMT_BGR24: return yuv420_bgr24_MMX; + case PIX_FMT_RGB565: return yuv420_rgb16_MMX; + case PIX_FMT_RGB555: return yuv420_rgb15_MMX; } } diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index 8050932d1d..926e3fb9c4 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -25,14 +25,7 @@ #undef EMMS #undef SFENCE -#if HAVE_AMD3DNOW -/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ -#define EMMS "femms" -#else -#define EMMS "emms" -#endif - -#if HAVE_MMX2 +#if COMPILE_TEMPLATE_MMX2 #define MOVNTQ "movntq" #define SFENCE "sfence" #else @@ -50,17 +43,14 @@ if (h_size * depth > FFABS(dstStride[0])) \ h_size -= 8; \ \ - if (c->srcFormat == PIX_FMT_YUV422P) { \ - srcStride[1] *= 2; \ - srcStride[2] *= 2; \ - } \ + vshift = c->srcFormat != PIX_FMT_YUV422P; \ \ __asm__ volatile ("pxor %mm4, %mm4\n\t"); \ for (y = 0; y < srcSliceH; y++) { \ uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ const uint8_t *py = src[0] + y * srcStride[0]; \ - const uint8_t *pu = src[1] + (y >> 1) * srcStride[1]; \ - const uint8_t *pv = src[2] + (y >> 1) * srcStride[2]; \ + const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ + const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ #define YUV2RGB_INITIAL_LOAD \ @@ -159,7 +149,8 @@ } \ #define YUV2RGB_ENDFUNC \ - __asm__ volatile (SFENCE"\n\t"EMMS); \ + __asm__ volatile (SFENCE"\n\t" \ + "emms \n\t"); \ return srcSliceH; \ #define IF0(x) @@ -188,12 +179,13 @@ "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ "paddusb "RED_DITHER"(%4), %%mm1\n\t" \ +#if !COMPILE_TEMPLATE_MMX2 static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(2) @@ -221,7 +213,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(2) @@ -243,6 +235,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], YUV2RGB_OPERANDS YUV2RGB_ENDFUNC } +#endif /* !COMPILE_TEMPLATE_MMX2 */ #define RGB_PACK24(blue, red)\ "packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\ @@ -259,7 +252,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], "punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\ RGB_PACK24_B -#if HAVE_MMX2 +#if COMPILE_TEMPLATE_MMX2 DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1}; DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0}; DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0}; @@ -310,7 +303,7 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(3) @@ -328,7 +321,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(3) @@ -366,12 +359,13 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], MOVNTQ " %%mm5, 16(%1)\n\t" \ MOVNTQ " %%mm"alpha", 24(%1)\n\t" \ +#if !COMPILE_TEMPLATE_MMX2 static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -386,13 +380,13 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], YUV2RGB_ENDFUNC } +#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { -#if HAVE_7REGS - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -406,16 +400,15 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], YUV2RGB_ENDLOOP(4) YUV2RGB_OPERANDS_ALPHA YUV2RGB_ENDFUNC -#endif - return 0; } +#endif static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -430,13 +423,13 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], YUV2RGB_ENDFUNC } +#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { -#if HAVE_7REGS - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -450,6 +443,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], YUV2RGB_ENDLOOP(4) YUV2RGB_OPERANDS_ALPHA YUV2RGB_ENDFUNC -#endif - return 0; } +#endif + +#endif /* !COMPILE_TEMPLATE_MMX2 */ diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c index f8365ef567..36182a5ea9 100644 --- a/libswscale/yuv2rgb.c +++ b/libswscale/yuv2rgb.c @@ -32,8 +32,9 @@ #include "rgb2rgb.h" #include "swscale.h" #include "swscale_internal.h" -#include "libavutil/x86_cpu.h" +#include "libavutil/cpu.h" #include "libavutil/bswap.h" +#include "libavutil/pixdesc.h" extern const uint8_t dither_4x4_16[4][8]; extern const uint8_t dither_8x8_32[8][8]; @@ -366,28 +367,6 @@ YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0) PUTRGB(dst_1,py_1,3); CLOSEYUV2RGBFUNC(8) -#if 0 // Currently unused -// This is exactly the same code as yuv2rgb_c_32 except for the types of -// r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_8, uint8_t, 0) - LOADCHROMA(0); - PUTRGB(dst_1,py_1,0); - PUTRGB(dst_2,py_2,0); - - LOADCHROMA(1); - PUTRGB(dst_2,py_2,1); - PUTRGB(dst_1,py_1,1); - - LOADCHROMA(2); - PUTRGB(dst_1,py_1,2); - PUTRGB(dst_2,py_2,2); - - LOADCHROMA(3); - PUTRGB(dst_2,py_2,3); - PUTRGB(dst_1,py_1,3); -CLOSEYUV2RGBFUNC(8) -#endif - // r, g, b, dst_1, dst_2 YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0) const uint8_t *d16 = dither_4x4_16[y&3]; @@ -441,36 +420,6 @@ YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0) PUTRGB8(dst_1,py_1,3,6); CLOSEYUV2RGBFUNC(8) -#if 0 // Currently unused -// This is exactly the same code as yuv2rgb_c_32 except for the types of -// r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_4, uint8_t, 0) - int acc; -#define PUTRGB4(dst,src,i) \ - Y = src[2*i]; \ - acc = r[Y] + g[Y] + b[Y]; \ - Y = src[2*i+1]; \ - acc |= (r[Y] + g[Y] + b[Y])<<4; \ - dst[i] = acc; - - LOADCHROMA(0); - PUTRGB4(dst_1,py_1,0); - PUTRGB4(dst_2,py_2,0); - - LOADCHROMA(1); - PUTRGB4(dst_2,py_2,1); - PUTRGB4(dst_1,py_1,1); - - LOADCHROMA(2); - PUTRGB4(dst_1,py_1,2); - PUTRGB4(dst_2,py_2,2); - - LOADCHROMA(3); - PUTRGB4(dst_2,py_2,3); - PUTRGB4(dst_1,py_1,3); -CLOSEYUV2RGBFUNC(4) -#endif - YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0) const uint8_t *d64 = dither_8x8_73[y&7]; const uint8_t *d128 = dither_8x8_220[y&7]; @@ -500,28 +449,6 @@ YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0) PUTRGB4D(dst_1,py_1,3,6); CLOSEYUV2RGBFUNC(4) -#if 0 // Currently unused -// This is exactly the same code as yuv2rgb_c_32 except for the types of -// r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t, 0) - LOADCHROMA(0); - PUTRGB(dst_1,py_1,0); - PUTRGB(dst_2,py_2,0); - - LOADCHROMA(1); - PUTRGB(dst_2,py_2,1); - PUTRGB(dst_1,py_1,1); - - LOADCHROMA(2); - PUTRGB(dst_1,py_1,2); - PUTRGB(dst_2,py_2,2); - - LOADCHROMA(3); - PUTRGB(dst_2,py_2,3); - PUTRGB(dst_1,py_1,3); -CLOSEYUV2RGBFUNC(8) -#endif - YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0) const uint8_t *d64 = dither_8x8_73[y&7]; const uint8_t *d128 = dither_8x8_220[y&7]; @@ -579,29 +506,24 @@ CLOSEYUV2RGBFUNC(1) SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) { SwsFunc t = NULL; -#if HAVE_MMX - t = ff_yuv2rgb_init_mmx(c); -#endif -#if HAVE_VIS - t = ff_yuv2rgb_init_vis(c); -#endif -#if CONFIG_MLIB - t = ff_yuv2rgb_init_mlib(c); -#endif -#if HAVE_ALTIVEC - if (c->flags & SWS_CPU_CAPS_ALTIVEC) - t = ff_yuv2rgb_init_altivec(c); -#endif -#if ARCH_BFIN - if (c->flags & SWS_CPU_CAPS_BFIN) + if (HAVE_MMX) { + t = ff_yuv2rgb_init_mmx(c); + } else if (HAVE_VIS) { + t = ff_yuv2rgb_init_vis(c); + } else if (CONFIG_MLIB) { + t = ff_yuv2rgb_init_mlib(c); + } else if (HAVE_ALTIVEC) { + t = ff_yuv2rgb_init_altivec(c); + } else if (ARCH_BFIN) { t = ff_yuv2rgb_get_func_ptr_bfin(c); -#endif + } if (t) return t; - av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", sws_format_name(c->srcFormat), sws_format_name(c->dstFormat)); + av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", + av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); switch (c->dstFormat) { case PIX_FMT_BGR48BE: diff --git a/mt-work/todo.txt b/mt-work/todo.txt index 013853e3ae..f64514f6fa 100644 --- a/mt-work/todo.txt +++ b/mt-work/todo.txt @@ -1,7 +1,7 @@ Todo -- For other people -- Multithread vp8 or vc1. +- Multithread vc1. - Multithread an intra codec like mjpeg (trivial). - Fix mpeg1 (see below). - Try the first three items under Optimization. @@ -18,11 +18,11 @@ work.) In general testing error paths should be done more. bugs in vsync in ffmpeg.c, which are currently obscuring real failures. h264: -- Files split at the wrong NAL unit don't (and can't) -be decoded with threads (e.g. TS split so PPS is after -the frame, PAFF with two fields in a packet). Scan the -packet at the start of decode and don't finish setup -until all PPS/SPS have been encountered. +- Files that aren't parsed (e.g. mp4) and contain PAFF with two +field pictures in the same packet are not optimal. Modify the +nals_needed check so that the second field's first slice is +considered as needed, then uncomment the FIXME code in decode_postinit. +Ex: http://astrange.ithinksw.net/ffmpeg/mt-samples/PAFF-Chalet-Tire.mp4 mpeg4: - Packed B-frames need to be explicitly split up diff --git a/mt-work/valgrind-check.sh b/mt-work/valgrind-check.sh index dc3833abb6..276327a76a 100644 --- a/mt-work/valgrind-check.sh +++ b/mt-work/valgrind-check.sh @@ -1,3 +1,5 @@ #!/bin/bash -valgrind --leak-check=full ./ffmpeg_g -threads 3 -vsync 0 -y -t 30 -i "$1" -an -f framecrc /dev/null
\ No newline at end of file +valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 1 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null + +valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 3 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null diff --git a/subdir.mak b/subdir.mak index 8a407fe164..092662fc14 100644 --- a/subdir.mak +++ b/subdir.mak @@ -50,12 +50,12 @@ endif clean:: $(RM) $(addprefix $(SUBDIR),*-example$(EXESUF) *-test$(EXESUF) $(CLEANFILES) $(CLEANSUFFIXES) $(LIBSUFFIXES)) \ - $(addprefix $(SUBDIR), $(foreach suffix,$(CLEANSUFFIXES),$(addsuffix /$(suffix),$(DIRS)))) \ + $(foreach dir,$(DIRS),$(CLEANSUFFIXES:%=$(SUBDIR)$(dir)/%)) \ $(HOSTOBJS) $(HOSTPROGS) distclean:: clean - $(RM) $(addprefix $(SUBDIR),$(DISTCLEANSUFFIXES)) \ - $(addprefix $(SUBDIR), $(foreach suffix,$(DISTCLEANSUFFIXES),$(addsuffix /$(suffix),$(DIRS)))) + $(RM) $(DISTCLEANSUFFIXES:%=$(SUBDIR)%) \ + $(foreach dir,$(DIRS),$(DISTCLEANSUFFIXES:%=$(SUBDIR)$(dir)/%)) install-lib$(NAME)-shared: $(SUBDIR)$(SLIBNAME) $(Q)mkdir -p "$(SHLIBDIR)" @@ -88,9 +88,9 @@ uninstall-libs:: -$(RM) "$(LIBDIR)/$(LIBNAME)" uninstall-headers:: - $(RM) $(addprefix "$(INCINSTDIR)/",$(HEADERS)) + $(RM) $(addprefix "$(INCINSTDIR)/",$(HEADERS)) $(addprefix "$(INCINSTDIR)/",$(BUILT_HEADERS)) $(RM) "$(LIBDIR)/pkgconfig/lib$(NAME).pc" - -rmdir "$(INCDIR)" + -rmdir "$(INCINSTDIR)" endef $(eval $(RULES)) diff --git a/tests/codec-regression.sh b/tests/codec-regression.sh index 5f4e539381..70a77e5558 100755 --- a/tests/codec-regression.sh +++ b/tests/codec-regression.sh @@ -12,14 +12,13 @@ set -e eval do_$test=y rm -f "$logfile" -rm -f "$benchfile" # generate reference for quality check if [ -n "$do_vref" ]; then -do_ffmpeg_nocheck $raw_ref -f image2 -vcodec pgmyuv -i $raw_src -an -f rawvideo $target_path/$raw_ref +do_ffmpeg $raw_ref -f image2 -vcodec pgmyuv -i $raw_src -an -f rawvideo fi if [ -n "$do_aref" ]; then -do_ffmpeg_nocheck $pcm_ref -ab 128k -ac 2 -ar 44100 -f s16le -i $pcm_src -f wav $target_path/$pcm_ref +do_ffmpeg $pcm_ref -ab 128k -ac 2 -ar 44100 -f s16le -i $pcm_src -f wav fi if [ -n "$do_mpeg" ] ; then diff --git a/tests/fate.sh b/tests/fate.sh index 6f0e0fffcb..9fd117c3bc 100755 --- a/tests/fate.sh +++ b/tests/fate.sh @@ -75,7 +75,7 @@ clean(){ report(){ date=$(date -u +%Y%m%d%H%M%S) - echo "fate:0:${date}:${slot}:${version}:$1:$2" >report + echo "fate:0:${date}:${slot}:${version}:$1:$2:${comment}" >report cat ${build}/config.fate ${build}/tests/data/fate/*.rep >>report test -n "$fate_recv" && $tar report *.log | gzip | $fate_recv } @@ -91,8 +91,8 @@ lock ${workdir} || die "${workdir} locked" cd ${workdir} || die "cd ${workdir} failed" src=${workdir}/src -build=${workdir}/build -inst=${workdir}/install +: ${build:=${workdir}/build} +: ${inst:=${workdir}/install} test -d "$src" && update || checkout || die "Error fetching source" diff --git a/tests/fate/h264.mak b/tests/fate/h264.mak index 0f18be4888..969bf413e4 100644 --- a/tests/fate/h264.mak +++ b/tests/fate/h264.mak @@ -130,6 +130,7 @@ FATE_H264 = aud_mw_e \ frext-pph10i1_panasonic_a \ frext-pph10i2_panasonic_a \ frext-pph10i3_panasonic_a \ + frext-pph10i4_panasonic_a \ frext-pph10i5_panasonic_a \ frext-pph10i6_panasonic_a \ frext-pph10i7_panasonic_a \ @@ -178,23 +179,23 @@ FATE_H264 := $(FATE_H264:%=fate-h264-conformance-%) \ FATE_TESTS += $(FATE_H264) fate-h264: $(FATE_H264) -fate-h264-conformance-aud_mw_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/AUD_MW_E.264 -fate-h264-conformance-ba1_ft_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/BA1_FT_C.264 -fate-h264-conformance-ba1_sony_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/BA1_Sony_D.jsv -fate-h264-conformance-ba2_sony_f: CMD = framecrc -i $(SAMPLES)/h264-conformance/BA2_Sony_F.jsv +fate-h264-conformance-aud_mw_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/AUD_MW_E.264 +fate-h264-conformance-ba1_ft_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BA1_FT_C.264 +fate-h264-conformance-ba1_sony_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BA1_Sony_D.jsv +fate-h264-conformance-ba2_sony_f: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BA2_Sony_F.jsv fate-h264-conformance-ba3_sva_c: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/BA3_SVA_C.264 -fate-h264-conformance-ba_mw_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/BA_MW_D.264 -fate-h264-conformance-bamq1_jvc_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/BAMQ1_JVC_C.264 -fate-h264-conformance-bamq2_jvc_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/BAMQ2_JVC_C.264 -fate-h264-conformance-banm_mw_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/BANM_MW_D.264 -fate-h264-conformance-basqp1_sony_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/BASQP1_Sony_C.jsv -fate-h264-conformance-caba1_sony_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/CABA1_Sony_D.jsv -fate-h264-conformance-caba1_sva_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/CABA1_SVA_B.264 -fate-h264-conformance-caba2_sony_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/CABA2_Sony_E.jsv -fate-h264-conformance-caba2_sva_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/CABA2_SVA_B.264 +fate-h264-conformance-ba_mw_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BA_MW_D.264 +fate-h264-conformance-bamq1_jvc_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BAMQ1_JVC_C.264 +fate-h264-conformance-bamq2_jvc_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BAMQ2_JVC_C.264 +fate-h264-conformance-banm_mw_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BANM_MW_D.264 +fate-h264-conformance-basqp1_sony_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/BASQP1_Sony_C.jsv +fate-h264-conformance-caba1_sony_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CABA1_Sony_D.jsv +fate-h264-conformance-caba1_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CABA1_SVA_B.264 +fate-h264-conformance-caba2_sony_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CABA2_Sony_E.jsv +fate-h264-conformance-caba2_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CABA2_SVA_B.264 fate-h264-conformance-caba3_sony_c: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CABA3_Sony_C.jsv fate-h264-conformance-caba3_sva_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CABA3_SVA_B.264 -fate-h264-conformance-caba3_toshiba_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/CABA3_TOSHIBA_E.264 +fate-h264-conformance-caba3_toshiba_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CABA3_TOSHIBA_E.264 fate-h264-conformance-cabac_mot_fld0_full: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/camp_mot_fld0_full.26l fate-h264-conformance-cabac_mot_frm0_full: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/camp_mot_frm0_full.26l fate-h264-conformance-cabac_mot_mbaff0_full: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/camp_mot_mbaff0_full.26l @@ -205,7 +206,7 @@ fate-h264-conformance-cabastbr3_sony_b: CMD = framecrc -vsync 0 -strict 1 -i $( fate-h264-conformance-cabref3_sand_d: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CABREF3_Sand_D.264 fate-h264-conformance-cacqp3_sony_d: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CACQP3_Sony_D.jsv fate-h264-conformance-cafi1_sva_c: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAFI1_SVA_C.264 -fate-h264-conformance-cama1_sony_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/CAMA1_Sony_C.jsv +fate-h264-conformance-cama1_sony_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CAMA1_Sony_C.jsv fate-h264-conformance-cama1_toshiba_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAMA1_TOSHIBA_B.264 fate-h264-conformance-cama1_vtc_c: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/cama1_vtc_c.avc fate-h264-conformance-cama2_vtc_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/cama2_vtc_b.avc @@ -218,37 +219,37 @@ fate-h264-conformance-camanl3_sand_e: CMD = framecrc -vsync 0 -strict 1 -i $(SA fate-h264-conformance-camasl3_sony_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAMASL3_Sony_B.jsv fate-h264-conformance-camp_mot_mbaff_l30: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAMP_MOT_MBAFF_L30.26l fate-h264-conformance-camp_mot_mbaff_l31: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAMP_MOT_MBAFF_L31.26l -fate-h264-conformance-canl1_sony_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/CANL1_Sony_E.jsv -fate-h264-conformance-canl1_sva_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/CANL1_SVA_B.264 -fate-h264-conformance-canl1_toshiba_g: CMD = framecrc -i $(SAMPLES)/h264-conformance/CANL1_TOSHIBA_G.264 -fate-h264-conformance-canl2_sony_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/CANL2_Sony_E.jsv -fate-h264-conformance-canl2_sva_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/CANL2_SVA_B.264 +fate-h264-conformance-canl1_sony_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL1_Sony_E.jsv +fate-h264-conformance-canl1_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL1_SVA_B.264 +fate-h264-conformance-canl1_toshiba_g: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL1_TOSHIBA_G.264 +fate-h264-conformance-canl2_sony_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL2_Sony_E.jsv +fate-h264-conformance-canl2_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL2_SVA_B.264 fate-h264-conformance-canl3_sony_c: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CANL3_Sony_C.jsv -fate-h264-conformance-canl3_sva_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/CANL3_SVA_B.264 -fate-h264-conformance-canl4_sva_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/CANL4_SVA_B.264 -fate-h264-conformance-canlma2_sony_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/CANLMA2_Sony_C.jsv -fate-h264-conformance-canlma3_sony_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/CANLMA3_Sony_C.jsv +fate-h264-conformance-canl3_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL3_SVA_B.264 +fate-h264-conformance-canl4_sva_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANL4_SVA_B.264 +fate-h264-conformance-canlma2_sony_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANLMA2_Sony_C.jsv +fate-h264-conformance-canlma3_sony_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CANLMA3_Sony_C.jsv fate-h264-conformance-capa1_toshiba_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAPA1_TOSHIBA_B.264 fate-h264-conformance-capama3_sand_f: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAPAMA3_Sand_F.264 -fate-h264-conformance-capcm1_sand_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/CAPCM1_Sand_E.264 -fate-h264-conformance-capcmnl1_sand_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/CAPCMNL1_Sand_E.264 +fate-h264-conformance-capcm1_sand_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CAPCM1_Sand_E.264 +fate-h264-conformance-capcmnl1_sand_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CAPCMNL1_Sand_E.264 fate-h264-conformance-capm3_sony_d: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAPM3_Sony_D.jsv -fate-h264-conformance-caqp1_sony_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/CAQP1_Sony_B.jsv +fate-h264-conformance-caqp1_sony_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CAQP1_Sony_B.jsv fate-h264-conformance-cavlc_mot_fld0_full_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/cvmp_mot_fld0_full_B.26l fate-h264-conformance-cavlc_mot_frm0_full_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/cvmp_mot_frm0_full_B.26l fate-h264-conformance-cavlc_mot_mbaff0_full_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/cvmp_mot_mbaff0_full_B.26l fate-h264-conformance-cavlc_mot_picaff0_full_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/cvmp_mot_picaff0_full_B.26l -fate-h264-conformance-cawp1_toshiba_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/CAWP1_TOSHIBA_E.264 +fate-h264-conformance-cawp1_toshiba_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CAWP1_TOSHIBA_E.264 fate-h264-conformance-cawp5_toshiba_e: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CAWP5_TOSHIBA_E.264 -fate-h264-conformance-ci1_ft_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/CI1_FT_B.264 -fate-h264-conformance-ci_mw_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/CI_MW_D.264 +fate-h264-conformance-ci1_ft_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CI1_FT_B.264 +fate-h264-conformance-ci_mw_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CI_MW_D.264 fate-h264-conformance-cvbs3_sony_c: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVBS3_Sony_C.jsv -fate-h264-conformance-cvcanlma2_sony_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/CVCANLMA2_Sony_C.jsv +fate-h264-conformance-cvcanlma2_sony_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CVCANLMA2_Sony_C.jsv fate-h264-conformance-cvfi1_sony_d: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVFI1_Sony_D.jsv fate-h264-conformance-cvfi1_sva_c: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVFI1_SVA_C.264 fate-h264-conformance-cvfi2_sony_h: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVFI2_Sony_H.jsv fate-h264-conformance-cvfi2_sva_c: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVFI2_SVA_C.264 -fate-h264-conformance-cvma1_sony_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/CVMA1_Sony_D.jsv +fate-h264-conformance-cvma1_sony_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CVMA1_Sony_D.jsv fate-h264-conformance-cvma1_toshiba_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVMA1_TOSHIBA_B.264 fate-h264-conformance-cvmanl1_toshiba_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVMANL1_TOSHIBA_B.264 fate-h264-conformance-cvmanl2_toshiba_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVMANL2_TOSHIBA_B.264 @@ -260,67 +261,68 @@ fate-h264-conformance-cvmp_mot_frm_l31_b: CMD = framecrc -vsync 0 -strict 1 -i fate-h264-conformance-cvnlfi1_sony_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CVNLFI1_Sony_C.jsv fate-h264-conformance-cvnlfi2_sony_h: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVNLFI2_Sony_H.jsv fate-h264-conformance-cvpa1_toshiba_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVPA1_TOSHIBA_B.264 -fate-h264-conformance-cvpcmnl1_sva_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/CVPCMNL1_SVA_C.264 -fate-h264-conformance-cvpcmnl2_sva_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/CVPCMNL2_SVA_C.264 -fate-h264-conformance-cvwp1_toshiba_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/CVWP1_TOSHIBA_E.264 +fate-h264-conformance-cvpcmnl1_sva_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CVPCMNL1_SVA_C.264 +fate-h264-conformance-cvpcmnl2_sva_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CVPCMNL2_SVA_C.264 +fate-h264-conformance-cvwp1_toshiba_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/CVWP1_TOSHIBA_E.264 fate-h264-conformance-cvwp2_toshiba_e: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVWP2_TOSHIBA_E.264 fate-h264-conformance-cvwp3_toshiba_e: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVWP3_TOSHIBA_E.264 fate-h264-conformance-cvwp5_toshiba_e: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/CVWP5_TOSHIBA_E.264 fate-h264-conformance-fi1_sony_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FI1_Sony_E.jsv -fate-h264-conformance-frext-alphaconformanceg: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/test8b43.264 -fate-h264-conformance-frext-bcrm_freh10: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/freh10.264 -vsync 0 -fate-h264-conformance-frext-brcm_freh11: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/freh11.264 -vsync 0 -fate-h264-conformance-frext-brcm_freh3: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/freh3.264 -fate-h264-conformance-frext-brcm_freh4: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/freh4.264 -vsync 0 -fate-h264-conformance-frext-brcm_freh5: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/freh5.264 -fate-h264-conformance-frext-brcm_freh8: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/freh8.264 -fate-h264-conformance-frext-brcm_freh9: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/freh9.264 -fate-h264-conformance-frext-freh12_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/Freh12_B.264 -fate-h264-conformance-frext-freh1_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/Freh1_B.264 -fate-h264-conformance-frext-freh2_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/Freh2_B.264 -fate-h264-conformance-frext-freh6: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/freh6.264 -vsync 0 -fate-h264-conformance-frext-freh7_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/Freh7_B.264 -vsync 0 -fate-h264-conformance-frext-frext01_jvc_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/FREXT01_JVC_D.264 -fate-h264-conformance-frext-frext02_jvc_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/FREXT02_JVC_C.264 -fate-h264-conformance-frext-frext1_panasonic_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/FRExt1_Panasonic.avc -fate-h264-conformance-frext-frext2_panasonic_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/FRExt2_Panasonic.avc -vsync 0 -fate-h264-conformance-frext-frext3_panasonic_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/FRExt3_Panasonic.avc -fate-h264-conformance-frext-frext4_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/FRExt4_Panasonic.avc +fate-h264-conformance-frext-alphaconformanceg: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/test8b43.264 +fate-h264-conformance-frext-bcrm_freh10: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh10.264 -vsync 0 +fate-h264-conformance-frext-brcm_freh11: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh11.264 -vsync 0 +fate-h264-conformance-frext-brcm_freh3: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh3.264 +fate-h264-conformance-frext-brcm_freh4: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh4.264 -vsync 0 +fate-h264-conformance-frext-brcm_freh5: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh5.264 +fate-h264-conformance-frext-brcm_freh8: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh8.264 +fate-h264-conformance-frext-brcm_freh9: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh9.264 +fate-h264-conformance-frext-freh12_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/Freh12_B.264 +fate-h264-conformance-frext-freh1_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/Freh1_B.264 +fate-h264-conformance-frext-freh2_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/Freh2_B.264 +fate-h264-conformance-frext-freh6: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/freh6.264 -vsync 0 +fate-h264-conformance-frext-freh7_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/Freh7_B.264 -vsync 0 +fate-h264-conformance-frext-frext01_jvc_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FREXT01_JVC_D.264 +fate-h264-conformance-frext-frext02_jvc_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FREXT02_JVC_C.264 +fate-h264-conformance-frext-frext1_panasonic_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FRExt1_Panasonic.avc +fate-h264-conformance-frext-frext2_panasonic_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FRExt2_Panasonic.avc -vsync 0 +fate-h264-conformance-frext-frext3_panasonic_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FRExt3_Panasonic.avc +fate-h264-conformance-frext-frext4_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FRExt4_Panasonic.avc fate-h264-conformance-frext-frext_mmco4_sony_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/FRExt_MMCO4_Sony_B.264 -fate-h264-conformance-frext-hcaff1_hhi_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HCAFF1_HHI.264 -fate-h264-conformance-frext-hcafr1_hhi_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HCAFR1_HHI.264 -fate-h264-conformance-frext-hcafr2_hhi_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HCAFR2_HHI.264 -fate-h264-conformance-frext-hcafr3_hhi_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HCAFR3_HHI.264 -fate-h264-conformance-frext-hcafr4_hhi_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HCAFR4_HHI.264 -fate-h264-conformance-frext-hcamff1_hhi_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HCAMFF1_HHI.264 -fate-h264-conformance-frext-hpca_brcm_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCA_BRCM_C.264 -fate-h264-conformance-frext-hpcadq_brcm_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCADQ_BRCM_B.264 -fate-h264-conformance-frext-hpcafl_bcrm_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCAFL_BRCM_C.264 -vsync 0 -fate-h264-conformance-frext-hpcaflnl_bcrm_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCAFLNL_BRCM_C.264 -vsync 0 -fate-h264-conformance-frext-hpcalq_brcm_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCALQ_BRCM_B.264 -fate-h264-conformance-frext-hpcamapalq_bcrm_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCAMAPALQ_BRCM_B.264 -vsync 0 -fate-h264-conformance-frext-hpcamolq_brcm_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCAMOLQ_BRCM_B.264 -fate-h264-conformance-frext-hpcanl_brcm_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCANL_BRCM_C.264 -fate-h264-conformance-frext-hpcaq2lq_brcm_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCAQ2LQ_BRCM_B.264 -fate-h264-conformance-frext-hpcv_brcm_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCV_BRCM_A.264 -fate-h264-conformance-frext-hpcvfl_bcrm_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCVFL_BRCM_A.264 -vsync 0 -fate-h264-conformance-frext-hpcvflnl_bcrm_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCVFLNL_BRCM_A.264 -vsync 0 -fate-h264-conformance-frext-hpcvmolq_brcm_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCVMOLQ_BRCM_B.264 -fate-h264-conformance-frext-hpcvnl_brcm_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/HPCVNL_BRCM_A.264 -fate-h264-conformance-frext-pph10i1_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I1_Panasonic_A.264 -pix_fmt yuv420p10le -fate-h264-conformance-frext-pph10i2_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I2_Panasonic_A.264 -pix_fmt yuv420p10le -fate-h264-conformance-frext-pph10i3_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I3_Panasonic_A.264 -pix_fmt yuv420p10le -fate-h264-conformance-frext-pph10i5_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I5_Panasonic_A.264 -pix_fmt yuv420p10le -fate-h264-conformance-frext-pph10i6_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I6_Panasonic_A.264 -pix_fmt yuv420p10le -fate-h264-conformance-frext-pph10i7_panasonic_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/FRext/PPH10I7_Panasonic_A.264 -pix_fmt yuv420p10le +fate-h264-conformance-frext-hcaff1_hhi_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAFF1_HHI.264 +fate-h264-conformance-frext-hcafr1_hhi_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAFR1_HHI.264 +fate-h264-conformance-frext-hcafr2_hhi_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAFR2_HHI.264 +fate-h264-conformance-frext-hcafr3_hhi_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAFR3_HHI.264 +fate-h264-conformance-frext-hcafr4_hhi_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAFR4_HHI.264 +fate-h264-conformance-frext-hcamff1_hhi_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HCAMFF1_HHI.264 +fate-h264-conformance-frext-hpca_brcm_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCA_BRCM_C.264 +fate-h264-conformance-frext-hpcadq_brcm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCADQ_BRCM_B.264 +fate-h264-conformance-frext-hpcafl_bcrm_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCAFL_BRCM_C.264 -vsync 0 +fate-h264-conformance-frext-hpcaflnl_bcrm_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCAFLNL_BRCM_C.264 -vsync 0 +fate-h264-conformance-frext-hpcalq_brcm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCALQ_BRCM_B.264 +fate-h264-conformance-frext-hpcamapalq_bcrm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCAMAPALQ_BRCM_B.264 -vsync 0 +fate-h264-conformance-frext-hpcamolq_brcm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCAMOLQ_BRCM_B.264 +fate-h264-conformance-frext-hpcanl_brcm_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCANL_BRCM_C.264 +fate-h264-conformance-frext-hpcaq2lq_brcm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCAQ2LQ_BRCM_B.264 +fate-h264-conformance-frext-hpcv_brcm_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCV_BRCM_A.264 +fate-h264-conformance-frext-hpcvfl_bcrm_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCVFL_BRCM_A.264 -vsync 0 +fate-h264-conformance-frext-hpcvflnl_bcrm_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCVFLNL_BRCM_A.264 -vsync 0 +fate-h264-conformance-frext-hpcvmolq_brcm_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCVMOLQ_BRCM_B.264 +fate-h264-conformance-frext-hpcvnl_brcm_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/HPCVNL_BRCM_A.264 +fate-h264-conformance-frext-pph10i1_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I1_Panasonic_A.264 -pix_fmt yuv420p10le +fate-h264-conformance-frext-pph10i2_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I2_Panasonic_A.264 -pix_fmt yuv420p10le +fate-h264-conformance-frext-pph10i3_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I3_Panasonic_A.264 -pix_fmt yuv420p10le +fate-h264-conformance-frext-pph10i4_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I4_Panasonic_A.264 -pix_fmt yuv420p10le +fate-h264-conformance-frext-pph10i5_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I5_Panasonic_A.264 -pix_fmt yuv420p10le +fate-h264-conformance-frext-pph10i6_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I6_Panasonic_A.264 -pix_fmt yuv420p10le +fate-h264-conformance-frext-pph10i7_panasonic_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/FRext/PPH10I7_Panasonic_A.264 -pix_fmt yuv420p10le fate-h264-conformance-hcbp2_hhi_a: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/HCBP2_HHI_A.264 fate-h264-conformance-hcmp1_hhi_a: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/HCMP1_HHI_A.264 -fate-h264-conformance-ls_sva_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/LS_SVA_D.264 -fate-h264-conformance-midr_mw_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/MIDR_MW_D.264 -fate-h264-conformance-mps_mw_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/MPS_MW_A.264 -fate-h264-conformance-mr1_bt_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/MR1_BT_A.h264 -fate-h264-conformance-mr1_mw_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/MR1_MW_A.264 -fate-h264-conformance-mr2_mw_a: CMD = framecrc -i $(SAMPLES)/h264-conformance/MR2_MW_A.264 +fate-h264-conformance-ls_sva_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/LS_SVA_D.264 +fate-h264-conformance-midr_mw_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/MIDR_MW_D.264 +fate-h264-conformance-mps_mw_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/MPS_MW_A.264 +fate-h264-conformance-mr1_bt_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/MR1_BT_A.h264 +fate-h264-conformance-mr1_mw_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/MR1_MW_A.264 +fate-h264-conformance-mr2_mw_a: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/MR2_MW_A.264 fate-h264-conformance-mr2_tandberg_e: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/MR2_TANDBERG_E.264 fate-h264-conformance-mr3_tandberg_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/MR3_TANDBERG_B.264 fate-h264-conformance-mr4_tandberg_c: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/MR4_TANDBERG_C.264 @@ -330,26 +332,26 @@ fate-h264-conformance-mr7_bt_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES) fate-h264-conformance-mr8_bt_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/MR8_BT_B.h264 fate-h264-conformance-mr9_bt_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/MR9_BT_B.h264 fate-h264-conformance-mv1_brcm_d: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/src19td.IBP.264 -fate-h264-conformance-nl1_sony_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/NL1_Sony_D.jsv -fate-h264-conformance-nl2_sony_h: CMD = framecrc -i $(SAMPLES)/h264-conformance/NL2_Sony_H.jsv -fate-h264-conformance-nl3_sva_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/NL3_SVA_E.264 -fate-h264-conformance-nlmq1_jvc_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/NLMQ1_JVC_C.264 -fate-h264-conformance-nlmq2_jvc_c: CMD = framecrc -i $(SAMPLES)/h264-conformance/NLMQ2_JVC_C.264 -fate-h264-conformance-nrf_mw_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/NRF_MW_E.264 +fate-h264-conformance-nl1_sony_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NL1_Sony_D.jsv +fate-h264-conformance-nl2_sony_h: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NL2_Sony_H.jsv +fate-h264-conformance-nl3_sva_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NL3_SVA_E.264 +fate-h264-conformance-nlmq1_jvc_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NLMQ1_JVC_C.264 +fate-h264-conformance-nlmq2_jvc_c: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NLMQ2_JVC_C.264 +fate-h264-conformance-nrf_mw_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/NRF_MW_E.264 fate-h264-conformance-sharp_mp_field_1_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/Sharp_MP_Field_1_B.jvt fate-h264-conformance-sharp_mp_field_2_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/Sharp_MP_Field_2_B.jvt fate-h264-conformance-sharp_mp_field_3_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/Sharp_MP_Field_3_B.jvt fate-h264-conformance-sharp_mp_paff_1r2: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/Sharp_MP_PAFF_1r2.jvt fate-h264-conformance-sharp_mp_paff_2r: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/Sharp_MP_PAFF_2.jvt fate-h264-conformance-sl1_sva_b: CMD = framecrc -vsync 0 -strict 1 -i $(SAMPLES)/h264-conformance/SL1_SVA_B.264 -fate-h264-conformance-sva_ba1_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/SVA_BA1_B.264 -fate-h264-conformance-sva_ba2_d: CMD = framecrc -i $(SAMPLES)/h264-conformance/SVA_BA2_D.264 -fate-h264-conformance-sva_base_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/SVA_Base_B.264 -fate-h264-conformance-sva_cl1_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/SVA_CL1_E.264 -fate-h264-conformance-sva_fm1_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/SVA_FM1_E.264 -fate-h264-conformance-sva_nl1_b: CMD = framecrc -i $(SAMPLES)/h264-conformance/SVA_NL1_B.264 -fate-h264-conformance-sva_nl2_e: CMD = framecrc -i $(SAMPLES)/h264-conformance/SVA_NL2_E.264 +fate-h264-conformance-sva_ba1_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_BA1_B.264 +fate-h264-conformance-sva_ba2_d: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_BA2_D.264 +fate-h264-conformance-sva_base_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_Base_B.264 +fate-h264-conformance-sva_cl1_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_CL1_E.264 +fate-h264-conformance-sva_fm1_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_FM1_E.264 +fate-h264-conformance-sva_nl1_b: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_NL1_B.264 +fate-h264-conformance-sva_nl2_e: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264-conformance/SVA_NL2_E.264 -fate-h264-interlace-crop: CMD = framecrc -vframes 3 -i $(SAMPLES)/h264/interlaced_crop.mp4 -fate-h264-lossless: CMD = framecrc -i $(SAMPLES)/h264/lossless.h264 +fate-h264-interlace-crop: CMD = framecrc -vsync 0 -vframes 3 -i $(SAMPLES)/h264/interlaced_crop.mp4 +fate-h264-lossless: CMD = framecrc -vsync 0 -i $(SAMPLES)/h264/lossless.h264 fate-h264-extreme-plane-pred: CMD = framemd5 -strict 1 -vsync 0 -i $(SAMPLES)/h264/extreme-plane-pred.h264 diff --git a/tests/fate2.mak b/tests/fate2.mak index 6a9448faf1..066f9ef583 100644 --- a/tests/fate2.mak +++ b/tests/fate2.mak @@ -165,7 +165,7 @@ fate-wmapro-2ch: CMP = oneoff fate-wmapro-2ch: REF = $(SAMPLES)/wmapro/Beethovens_9th-1_small.pcm FATE_TESTS += fate-ansi -fate-ansi: CMD = framecrc -i $(SAMPLES)/ansi/TRE-IOM5.ANS -pix_fmt rgb24 +fate-ansi: CMD = framecrc -ar 44100 -i $(SAMPLES)/ansi/TRE-IOM5.ANS -pix_fmt rgb24 FATE_TESTS += fate-wmv8-drm # discard last packet to avoid fails due to overread of VC-1 decoder diff --git a/tests/lavf-regression.sh b/tests/lavf-regression.sh index 28f53f78b0..39e752b3c6 100755 --- a/tests/lavf-regression.sh +++ b/tests/lavf-regression.sh @@ -14,7 +14,7 @@ eval do_$test=y do_lavf() { file=${outfile}lavf.$1 - do_ffmpeg $file $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $DEC_OPTS -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 $2 + do_ffmpeg $file $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $DEC_OPTS -ar 44100 -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 $2 do_ffmpeg_crc $file $DEC_OPTS -i $target_path/$file $3 } @@ -39,12 +39,11 @@ do_image_formats() do_audio_only() { file=${outfile}lavf.$1 - do_ffmpeg $file $DEC_OPTS $2 -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 $3 - do_ffmpeg_crc $file $DEC_OPTS -i $target_path/$file + do_ffmpeg $file $DEC_OPTS $2 -ar 44100 -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 $3 + do_ffmpeg_crc $file $DEC_OPTS $4 -i $target_path/$file } rm -f "$logfile" -rm -f "$benchfile" if [ -n "$do_avi" ] ; then do_lavf avi @@ -56,7 +55,7 @@ fi if [ -n "$do_rm" ] ; then file=${outfile}lavf.rm -do_ffmpeg $file $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $DEC_OPTS -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 -acodec ac3_fixed +do_ffmpeg $file $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $DEC_OPTS -ar 44100 -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 -acodec ac3_fixed # broken #do_ffmpeg_crc $file -i $target_path/$file fi @@ -182,11 +181,11 @@ do_audio_only wav fi if [ -n "$do_alaw" ] ; then -do_audio_only al +do_audio_only al "" "" "-ar 44100" fi if [ -n "$do_mulaw" ] ; then -do_audio_only ul +do_audio_only ul "" "" "-ar 44100" fi if [ -n "$do_au" ] ; then @@ -227,8 +226,8 @@ conversions="yuv420p yuv422p yuv444p yuyv422 yuv410p yuv411p yuvj420p \ monob yuv440p yuvj440p" for pix_fmt in $conversions ; do file=${outfile}${pix_fmt}.yuv - do_ffmpeg_nocheck $file $DEC_OPTS -r 1 -t 1 -f image2 -vcodec pgmyuv -i $raw_src \ - $ENC_OPTS -f rawvideo -s 352x288 -pix_fmt $pix_fmt $target_path/$raw_dst + run_ffmpeg $DEC_OPTS -r 1 -t 1 -f image2 -vcodec pgmyuv -i $raw_src \ + $ENC_OPTS -f rawvideo -s 352x288 -pix_fmt $pix_fmt $target_path/$raw_dst do_ffmpeg $file $DEC_OPTS -f rawvideo -s 352x288 -pix_fmt $pix_fmt -i $target_path/$raw_dst \ $ENC_OPTS -f rawvideo -s 352x288 -pix_fmt yuv444p done diff --git a/tests/lavfi-regression.sh b/tests/lavfi-regression.sh index 129358090e..0322134163 100755 --- a/tests/lavfi-regression.sh +++ b/tests/lavfi-regression.sh @@ -12,7 +12,6 @@ set -e eval do_$test=y rm -f "$logfile" -rm -f "$benchfile" do_video_filter() { label=$1 diff --git a/tests/ref/acodec/aref b/tests/ref/acodec/aref new file mode 100644 index 0000000000..8e6773be3b --- /dev/null +++ b/tests/ref/acodec/aref @@ -0,0 +1,2 @@ +95e54b261530a1bcf6de6fe3b21dc5f6 *./tests/data/acodec.ref.wav +1058444 ./tests/data/acodec.ref.wav diff --git a/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a b/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a index d351a7eb1f..a06c4577ff 100644 --- a/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a +++ b/tests/ref/fate/h264-conformance-frext-pph10i4_panasonic_a @@ -4,16 +4,7 @@ 0, 10800, 6220800, 0x7459a1cc 0, 14400, 6220800, 0x02191aa9 0, 18000, 6220800, 0x88dca590 -0, 21600, 6220800, 0x56dd150a -0, 25200, 6220800, 0x5f56a56f -0, 28800, 6220800, 0x67ada4b7 -0, 32400, 6220800, 0x88dca590 -0, 36000, 6220800, 0xd3b09fe5 -0, 39600, 6220800, 0x2223998c -0, 43200, 6220800, 0x5e5b2da5 -0, 46800, 6220800, 0x88dca590 -0, 50400, 6220800, 0x5e5b2da5 -0, 54000, 6220800, 0x88dca590 -0, 57600, 6220800, 0x5e5b2da5 -0, 61200, 6220800, 0x88dca590 -0, 64800, 6220800, 0x26e1ec8b +0, 21600, 6220800, 0x4484d484 +0, 25200, 6220800, 0x8afdb53f +0, 28800, 6220800, 0xd3d6017a +0, 32400, 6220800, 0xf5162af0 diff --git a/tests/ref/fate/iff-fibonacci b/tests/ref/fate/iff-fibonacci index e452f31e6c..947f78e964 100644 --- a/tests/ref/fate/iff-fibonacci +++ b/tests/ref/fate/iff-fibonacci @@ -1 +1 @@ -e968a853779bb6438339e3b8d69d8d24 +e76b025238a6a27968f8644f4ccc3207 diff --git a/tests/ref/fate/lmlm4-demux b/tests/ref/fate/lmlm4-demux index f322300eee..1d0d59f54d 100644 --- a/tests/ref/fate/lmlm4-demux +++ b/tests/ref/fate/lmlm4-demux @@ -214,334 +214,3 @@ 0, 267267, 1327, 0x7d15307c 1, 267840, 768, 0x8d766d40 0, 270270, 1225, 0x1b5d0f5f -0, 273273, 1173, 0x840efed5 -0, 276276, 1215, 0xa8e0035e -0, 279279, 1295, 0x142918ca -0, 282282, 1144, 0xf50cef50 -0, 285285, 1527, 0x7d13bd9d -0, 288288, 5609, 0x1ae1921d -0, 291291, 1303, 0xabdc264f -0, 294294, 1419, 0x878169bf -0, 297297, 972, 0x00c4a257 -0, 300300, 1277, 0x87d520cf -0, 303303, 1014, 0x5946b4ee -0, 306306, 1177, 0x124e0e23 -0, 309309, 1402, 0x8e6363cc -0, 312312, 1171, 0x9bdaeda2 -0, 315315, 1389, 0x2db53b22 -0, 318318, 1056, 0xd1c3de3e -0, 321321, 1320, 0x1ea142c7 -0, 324324, 1250, 0x33612229 -0, 327327, 1477, 0xb9648b48 -0, 330330, 1522, 0x5352c318 -0, 333333, 1391, 0x5e9157e0 -0, 336336, 5545, 0x569e64c1 -0, 339339, 1354, 0xdb39469e -0, 342342, 1302, 0x79912b5d -0, 345345, 1065, 0x4befcdd2 -0, 348348, 1408, 0x7d2f65a2 -0, 351351, 1727, 0x9cac0398 -0, 354354, 1590, 0xa321b563 -0, 357357, 1039, 0xfa35cabf -0, 360360, 1184, 0xb332fde7 -0, 363363, 669, 0xb10e3783 -0, 366366, 784, 0x57275e09 -0, 369369, 1051, 0xe072cd33 -0, 372372, 1119, 0x635ee9ee -0, 375375, 1147, 0x3916f981 -0, 378378, 1086, 0x306ef895 -0, 381381, 827, 0x213f7aef -0, 384384, 5525, 0x19157827 -0, 387387, 1044, 0xb661abc5 -0, 390390, 1143, 0x032e1109 -0, 393393, 1460, 0x5a2f9503 -0, 396396, 1178, 0xd038141f -0, 399399, 1004, 0x410ec3b2 -0, 402402, 1089, 0xc89af8c9 -0, 405405, 1367, 0x52085e0a -0, 408408, 1115, 0x8bb2ee7f -0, 411411, 1325, 0xc2e05647 -0, 414414, 1295, 0x213951c9 -0, 417417, 1054, 0xbb8bdaae -0, 420420, 1210, 0x431122bd -0, 423423, 1400, 0x47526fcc -0, 426426, 1188, 0x19770b07 -0, 429429, 1301, 0x437161c8 -0, 432432, 5281, 0xc0c92b55 -0, 435435, 840, 0x67da7b2f -0, 438438, 1094, 0x3fd6d944 -0, 441441, 832, 0x0eda74bc -0, 444444, 1096, 0x3300da7b -0, 447447, 1018, 0xa208c971 -0, 450450, 1389, 0x1167724c -0, 453453, 1411, 0xe3be666b -0, 456456, 1294, 0xa8f35cc6 -0, 459459, 1232, 0xfd0d20fd -0, 462462, 1252, 0xadd83a26 -0, 465465, 844, 0xcbaf6a55 -0, 468468, 979, 0x78d9b241 -0, 471471, 1057, 0x6743e16c -0, 474474, 776, 0xfedd6615 -0, 477477, 1158, 0xa39fee34 -0, 480480, 5288, 0x5f26ee02 -0, 483483, 1029, 0xa681bee8 -0, 486486, 1106, 0xa68dea33 -0, 489489, 844, 0x42fd83ec -0, 492492, 779, 0xb5006759 -0, 495495, 951, 0xec13af4f -0, 498498, 1011, 0x90e5c86e -0, 501501, 892, 0x4db48ca4 -0, 504504, 804, 0x59bf73a7 -0, 507507, 1001, 0x10c2b3ff -0, 510510, 879, 0x65c57eaf -0, 513513, 1320, 0x80815836 -0, 516516, 1448, 0xaf457b3b -0, 519519, 1168, 0x65b9f96a -0, 522522, 1002, 0x053fafb9 -0, 525525, 1101, 0x2d30c3d5 -0, 528528, 5314, 0x87cee383 -0, 531531, 1305, 0xb19035db -0, 534534, 1240, 0xdc6a0a65 -0, 537537, 1067, 0x9c88ba67 -0, 540540, 823, 0x2f736a43 -0, 543543, 1183, 0x2ef9f3c9 -0, 546546, 899, 0x3fcc8d11 -0, 549549, 886, 0xccec8d49 -0, 552552, 1190, 0x2d020fa1 -0, 555555, 1017, 0x0776b627 -0, 558558, 1202, 0xbdd808d5 -0, 561561, 998, 0x64c7c246 -0, 564564, 1200, 0x9d6e2289 -0, 567567, 895, 0xa8a68d80 -0, 570570, 748, 0xe61a49fb -0, 573573, 929, 0x30168b50 -0, 576576, 5276, 0xceb2edf2 -0, 579579, 1127, 0xab43ddc3 -0, 582582, 1028, 0xaacfbff5 -0, 585585, 914, 0xb63c8fb0 -0, 588588, 1067, 0xbdacd1ed -0, 591591, 1109, 0x6792ddec -0, 594594, 1310, 0x71bc4da2 -0, 597597, 1098, 0xc464de9b -0, 600600, 1018, 0x6833b875 -0, 603603, 1210, 0x44faf34b -0, 606606, 1200, 0x9ee816f6 -0, 609609, 1461, 0xc76b7d2b -0, 612612, 829, 0x006677e6 -0, 615615, 1145, 0xc769fb13 -0, 618618, 1292, 0xb63225f5 -0, 621621, 1252, 0x0e2a2626 -0, 624624, 5257, 0x3877eca1 -0, 627627, 952, 0x7f708d25 -0, 630630, 1125, 0x140cd81b -0, 633633, 1095, 0x3025dade -0, 636636, 1388, 0xd7494d4e -0, 639639, 1124, 0x0c48ee92 -0, 642642, 1556, 0xa0749ee2 -0, 645645, 1461, 0xe5fd7d7f -0, 648648, 903, 0x07a58303 -0, 651651, 1049, 0x4b6cd03b -0, 654654, 1044, 0x5f47cb48 -0, 657657, 1253, 0xba281c6a -0, 660660, 1618, 0xed7cd040 -0, 663663, 981, 0x2926b6f4 -0, 666666, 1560, 0xa0e1ab73 -0, 669669, 1479, 0x41a77e88 -0, 672672, 5222, 0xc2dbd182 -0, 675675, 925, 0x967580dd -0, 678678, 1284, 0x5b7822e0 -0, 681681, 1512, 0xe84da1e0 -0, 684684, 1514, 0xc38bb09e -0, 687687, 1224, 0x8752228e -0, 690690, 1296, 0xcf053c03 -0, 693693, 1117, 0x9a81e659 -0, 696696, 1090, 0x003ed687 -0, 699699, 1196, 0x3a510937 -0, 702702, 1075, 0x05eec8d4 -0, 705705, 1048, 0x3b19cb96 -0, 708708, 944, 0xaad89770 -0, 711711, 960, 0x94649e4c -0, 714714, 1079, 0x530ddaba -0, 717717, 1150, 0x0339e696 -0, 720720, 5189, 0xb8dac0bf -0, 723723, 1129, 0x3b2cd64d -0, 726726, 962, 0xe9df9a07 -0, 729729, 1113, 0xc6ccddb2 -0, 732732, 1069, 0xf589d4a4 -0, 735735, 889, 0x5f7b8762 -0, 738738, 863, 0xe9c36be4 -0, 741741, 1021, 0xcfb5a737 -0, 744744, 1048, 0x203ac9ff -0, 747747, 1223, 0x3e30fe35 -0, 750750, 814, 0x59c076fc -0, 753753, 1157, 0x0dcf0bd0 -0, 756756, 1691, 0xdd030547 -0, 759759, 1700, 0x7641fb7e -0, 762762, 1791, 0x57ac147b -0, 765765, 2008, 0x3d4483ca -0, 768768, 4579, 0x874aa75b -0, 771771, 1647, 0xeddef621 -0, 774774, 1999, 0x61d4a23a -0, 777777, 1572, 0x1c3ae6e1 -0, 780780, 1803, 0xb31c3a11 -0, 783783, 1919, 0xccbf64e3 -0, 786786, 1720, 0xa4d010e5 -0, 789789, 1721, 0x87ee0c7b -0, 792792, 1626, 0x8211f3d0 -0, 795795, 1675, 0xef8a0b3d -0, 798798, 1609, 0x8731ce06 -0, 801801, 1691, 0xcf24038b -0, 804804, 1637, 0x21d8e1b2 -0, 807807, 1546, 0xc597a700 -0, 810810, 1518, 0xb944bc11 -0, 813813, 1403, 0x999e59a8 -0, 816816, 2467, 0xe69f2507 -0, 819819, 531, 0x3c7cea7e -0, 822822, 555, 0xdf20fb22 -0, 825825, 500, 0xebeee00d -0, 828828, 446, 0x664cc711 -0, 831831, 521, 0xf223df4b -0, 834834, 559, 0x4dc60028 -0, 837837, 593, 0xec440ba9 -0, 840840, 557, 0xef0100b1 -0, 843843, 602, 0x7b1cfd88 -0, 846846, 566, 0x77700a1d -0, 849849, 523, 0x3df7eb64 -0, 852852, 482, 0x5da1dba9 -0, 855855, 541, 0x9c8ff3d7 -0, 858858, 572, 0x3e1204b2 -0, 861861, 549, 0x0921fe3d -0, 864864, 2429, 0xba4fe5a8 -0, 867867, 495, 0xc35ade54 -0, 870870, 453, 0xcc66c9dc -0, 873873, 421, 0x3aa7ce8f -0, 876876, 448, 0x56c6d3d7 -0, 879879, 478, 0x4131d467 -0, 882882, 497, 0xac3ce3ca -0, 885885, 470, 0x41b9d9d3 -0, 888888, 454, 0x44c2d956 -0, 891891, 460, 0x6629db01 -0, 894894, 488, 0x6be2dd68 -0, 897897, 512, 0xda4cf116 -0, 900900, 550, 0x6e990da9 -0, 903903, 561, 0x81180e5e -0, 906906, 689, 0xe58a5a9a -0, 909909, 548, 0xfa1417a9 -0, 912912, 2832, 0x942495a5 -0, 915915, 610, 0x6b201ab9 -0, 918918, 1015, 0x5f36b3f9 -0, 921921, 870, 0x14e48f0c -0, 924924, 716, 0xf4034b52 -0, 927927, 763, 0xcbf4694e -0, 930930, 778, 0xb9396764 -0, 933933, 831, 0x31999005 -0, 936936, 877, 0xc95e977f -0, 939939, 836, 0xb56c7d61 -0, 942942, 853, 0x2d5980cf -0, 945945, 861, 0x25629295 -0, 948948, 897, 0x0ff78a5f -0, 951951, 1016, 0x4dd8cdfd -0, 954954, 1117, 0x763f06c4 -0, 957957, 984, 0xcf7bc906 -0, 960960, 2750, 0xd428962d -0, 963963, 995, 0x5cbdd6a4 -0, 966966, 894, 0xc42b9e25 -0, 969969, 1028, 0xdf8ad906 -0, 972972, 1059, 0x4c49f0cc -0, 975975, 1122, 0x8880eed8 -0, 978978, 1007, 0xa9b4c243 -0, 981981, 1055, 0x6051dcd6 -0, 984984, 1293, 0xc3b32fa5 -0, 987987, 1101, 0xf986f9af -0, 990990, 1272, 0x13883127 -0, 993993, 1037, 0xb97cebff -0, 996996, 980, 0x0931d807 -0, 999999, 928, 0xbc3eb30b -0, 1003002, 1068, 0x62d9e8de -0, 1006005, 852, 0x9278a49a -0, 1009008, 2841, 0x3091d12d -0, 1012011, 931, 0x60f6c26e -0, 1015014, 949, 0x31b9c856 -0, 1018017, 835, 0xfe018775 -0, 1021020, 779, 0x85356cd7 -0, 1024023, 748, 0x862756bf -0, 1027026, 768, 0x0b7d645c -0, 1030029, 786, 0x7c196f5b -0, 1033032, 716, 0x4e8252cc -0, 1036035, 671, 0x0b2d3023 -0, 1039038, 708, 0x3b2b4f25 -0, 1042041, 786, 0x523d670e -0, 1045044, 680, 0x329142ec -0, 1048047, 703, 0x841b456c -0, 1051050, 660, 0x5cf332f1 -0, 1054053, 681, 0xcd7b3915 -0, 1057056, 2445, 0x27660ecb -0, 1060059, 667, 0xf3d53d2a -0, 1063062, 652, 0xe2b037b0 -0, 1066065, 695, 0x200248fc -0, 1069068, 659, 0x7f6434c5 -0, 1072071, 682, 0x8d243afb -0, 1075074, 701, 0x16e6476f -0, 1078077, 636, 0x319a3236 -0, 1081080, 679, 0x81fa41f9 -0, 1084083, 740, 0xb32850af -0, 1087086, 694, 0xe3f832c2 -0, 1090089, 681, 0x8174353f -0, 1093092, 757, 0xebbe5a1f -0, 1096095, 683, 0x9b46383c -0, 1099098, 816, 0xd41e6bdf -0, 1102101, 1058, 0x6170d2e6 -0, 1105104, 2489, 0x58fb28e1 -0, 1108107, 804, 0xb3037da8 -0, 1111110, 1053, 0x81ffc0a8 -0, 1114113, 868, 0xf73583cb -0, 1117116, 875, 0xfa5d85bd -0, 1120119, 723, 0x0714418d -0, 1123122, 670, 0xd04333a1 -0, 1126125, 854, 0x370e730d -0, 1129128, 794, 0x3d8a5e3c -0, 1132131, 836, 0xebe26aa7 -0, 1135134, 871, 0x1da58c5e -0, 1138137, 827, 0xda1e6ccb -0, 1141140, 805, 0x10ad6a44 -0, 1144143, 831, 0x826f6fc9 -0, 1147146, 832, 0xb2517364 -0, 1150149, 887, 0x11bf8a3f -0, 1153152, 2718, 0x26a8a174 -0, 1156155, 805, 0x4d0179f9 -0, 1159158, 699, 0x176c4f45 -0, 1162161, 758, 0xc1fc5b16 -0, 1165164, 707, 0x161b4891 -0, 1168167, 733, 0x99b554c0 -0, 1171170, 671, 0xccee2f89 -0, 1174173, 762, 0xd6416c9d -0, 1177176, 721, 0x2ad94f0c -0, 1180179, 727, 0x6280572e -0, 1183182, 856, 0x0a7b797e -0, 1186185, 843, 0xc64288aa -0, 1189188, 877, 0x6d1c945d -0, 1192191, 780, 0x4ba464e8 -0, 1195194, 808, 0xb3087cca -0, 1198197, 870, 0x75809930 -0, 1201200, 2919, 0x5a80f685 -0, 1204203, 1027, 0xc98add3d -0, 1207206, 1003, 0x0d88bd54 -0, 1210209, 1189, 0xb2f91ec7 -0, 1213212, 1320, 0x5acc4db3 -0, 1216215, 1381, 0xbd585feb -0, 1219218, 1378, 0xe1a656f0 -0, 1222221, 1398, 0x88b57a5e -0, 1225224, 1449, 0x1c737698 -0, 1228227, 1420, 0x6f0f80cd -0, 1231230, 1032, 0x2d16d643 -0, 1234233, 1275, 0x38844729 -0, 1237236, 1112, 0x300207ea -0, 1240239, 1105, 0xa2b700be -0, 1243242, 1283, 0x08d04bef -0, 1246245, 1056, 0xf795d994 -0, 1249248, 3202, 0xebf07050 -0, 1252251, 1034, 0x1099dbe5 -0, 1255254, 922, 0x88be9edc -0, 1258257, 1050, 0xd3d7eb96 -0, 1261260, 979, 0x8de6b302 -0, 1264263, 1053, 0x5de2eca8 diff --git a/tests/ref/lavf/pixfmt b/tests/ref/lavf/pixfmt index 186dde5ed3..a6618390e4 100644 --- a/tests/ref/lavf/pixfmt +++ b/tests/ref/lavf/pixfmt @@ -16,15 +16,15 @@ e176bd14185788110e055f945de7f95f *./tests/data/pixfmt/yuvj420p.yuv 304128 ./tests/data/pixfmt/yuvj422p.yuv c10442da177c9f1d12be3c53be6fa12c *./tests/data/pixfmt/yuvj444p.yuv 304128 ./tests/data/pixfmt/yuvj444p.yuv -c6e0f9b5817f484b175c1ec4ffb4e9c9 *./tests/data/pixfmt/rgb24.yuv +6bb61113e7b70eb09dbcec356122a0e2 *./tests/data/pixfmt/rgb24.yuv 304128 ./tests/data/pixfmt/rgb24.yuv -c6e0f9b5817f484b175c1ec4ffb4e9c9 *./tests/data/pixfmt/bgr24.yuv +6bb61113e7b70eb09dbcec356122a0e2 *./tests/data/pixfmt/bgr24.yuv 304128 ./tests/data/pixfmt/bgr24.yuv -c6e0f9b5817f484b175c1ec4ffb4e9c9 *./tests/data/pixfmt/rgb32.yuv +6bb61113e7b70eb09dbcec356122a0e2 *./tests/data/pixfmt/rgb32.yuv 304128 ./tests/data/pixfmt/rgb32.yuv -66d39d464bd89ded2a124897f0a75ade *./tests/data/pixfmt/rgb565.yuv +efa7c0337cc00c796c6df615223716f1 *./tests/data/pixfmt/rgb565.yuv 304128 ./tests/data/pixfmt/rgb565.yuv -c894c3bd8d2631ed1964500b90a0c350 *./tests/data/pixfmt/rgb555.yuv +0df2a477af1415a1b8fbf2a3e552bc39 *./tests/data/pixfmt/rgb555.yuv 304128 ./tests/data/pixfmt/rgb555.yuv 6be306b0cce5f8e6c271ea17fef9745b *./tests/data/pixfmt/gray.yuv 304128 ./tests/data/pixfmt/gray.yuv diff --git a/tests/ref/lavfi/pixfmts_scale_le b/tests/ref/lavfi/pixfmts_scale_le index 5c32a363c8..6e9ab9ae49 100644 --- a/tests/ref/lavfi/pixfmts_scale_le +++ b/tests/ref/lavfi/pixfmts_scale_le @@ -1,46 +1,46 @@ -abgr d894cb97f6c80eb21bdbe8a4eea62d86 -argb 54346f2b2eef10919e0f247241df3b24 -bgr24 570f8d6b51a838aed022ef67535f6bdc -bgr48be fcc0f2dbf45d325f84f816c74cbeeebe -bgr48le 3f9c2b23eed3b8d196d1c14b38ce50f5 +abgr cff82561a074874027ac1cc896fd2730 +argb 756dd1eaa5baca2238ce23dbdc452684 +bgr24 e44192347a45586c6c157e3059610cd1 +bgr48be 6d01b6ccd2ccf18c12985bcb2fde2218 +bgr48le 4caa6914091ad03b8f67c02d6b050bc0 bgr4_byte ee1d35a7baf8e9016891929a2f565c0b -bgr555le 36b745067197f9ca8c1731cac51329c9 -bgr565le 3a514a298c6161a071ddf9963c06509d +bgr555le 41e3e0961478dc634bf68a7bbd670cc9 +bgr565le 614897eaeb422bd9a972f8ee51909be5 bgr8 7f007fa6c153a16e808a9c51605a4016 -bgra a5e7040f9a80cccd65e5acf2ca09ace5 +bgra 01cfdda1f72fcabb6c46424e27f8c519 gray d7786a7d9d99ac74230cc045cab5632c -gray16be af39ce3a497f6734b157c8b94544f537 -gray16le 7ac1b788bcc472010df7a97e762485e0 -monob 88c4c050758e64d120f50c7eff694381 -monow d31772ebaa877fc2a78565937f7f9673 +gray16be 5ba22d4802b40ec27e62abb22ad1d1cc +gray16le 2d5e83aa875a4c3baa6fecf55e3223bf +monob cb62f31b701c6e987b574974d1b31e32 +monow fd5d417ab7728acddffc06870661df61 nv12 4676d59db43d657dc12841f6bc3ab452 nv21 69c699510ff1fb777b118ebee1002f14 -rgb24 514692e28e8ff6860e415ce4fcf6eb8c -rgb48be 1894cd30dabcd3180518e4d5f09f25e7 -rgb48le 1354e6e27ce3c1d4d4989ee56030c94b +rgb24 13ff53ebeab74dc05492836f1cfbd2c1 +rgb48be f82e99f13d5ede2a53cf3bf7178ca350 +rgb48le 3a09d89e4b27ea1a98f762e662e306a7 rgb4_byte d81ffd3add95842a618eec81024f0b5c -rgb555le a350ef1dc2c9688ed49e7ba018843795 -rgb565le 6f4bb711238baa762d73305213f8d035 +rgb555le bd698d86c03170c4a16607c0fd1f750f +rgb565le bfa0c639d80c3c03fd0c9e5f34296a5e rgb8 091d0170b354ef0e97312b95feb5483f -rgba a3d362f222098a00e63867f612018659 +rgba 16873e3ac914e76116629a5ff8940ac4 uyvy422 314bd486277111a95d9369b944fa0400 yuv410p 7df8f6d69b56a8dcb6c7ee908e5018b5 yuv411p 1143e7c5cc28fe0922b051b17733bc4c yuv420p fdad2d8df8985e3d17e73c71f713cb14 -yuv420p10be 6d335e75b553da590135cf8bb999610c -yuv420p10le d510ddbabefd03ef39ec943fcb51b709 -yuv420p16be 29a0265764530070f5cd3251cc01f66a -yuv420p16le 6f3a265b084a78baec229238d9f7945f -yuv420p9be ec4983b7a949c0472110a7a2c58e278a -yuv420p9le c136dce5913a722eee44ab72cff664b2 +yuv420p10be c143e77e97d2f7d62c3b518857ba9f9b +yuv420p10le 72d90eccf5c34691ff057dafb7447aa2 +yuv420p16be 01da53e7f4f9882d5189ec1b1165ee05 +yuv420p16le 165f9aaf5332e5d088f44534d8ed2bc9 +yuv420p9be bb87fddca65d1742412c8d2b1caf96c6 +yuv420p9le 828eec50014a41258a5423c1fe56ac97 yuv422p 918e37701ee7377d16a8a6c119c56a40 -yuv422p10le aeb0ef08a883f43429ca9d886d8fc095 -yuv422p16be ef3e865fc1d0c68977c735323c50af6e -yuv422p16le 428a9b96214c09cb5a983ce36d6961ff +yuv422p10le a10c4a5837547716f13cd61918b145f9 +yuv422p16be 961860aa4f229e09f1249910c687081c +yuv422p16le 7695ee42c0581279bbe68de81deb7aee yuv440p 461503fdb9b90451020aa3b25ddf041c yuv444p 81b2eba962d12e8d64f003ac56f6faf2 -yuv444p16be 99a3738c70c8fbdc5a0e4ad4bf50648d -yuv444p16le 385d0cc5240d62da0871915be5d86f0a +yuv444p16be 5f924c2b385826106300cecc4ef4d2df +yuv444p16le 40a55a85858508138b7661c83d95223e yuva420p 8673a9131fb47de69788863f93a50eb7 yuvj420p 30427bd6caf5bda93a173dbebe759e09 yuvj422p fc8288f64fd149573f73cf8da05d8e6d diff --git a/tests/ref/vsynth1/flashsv b/tests/ref/vsynth1/flashsv index 7920193aa7..77fefe0bc0 100644 --- a/tests/ref/vsynth1/flashsv +++ b/tests/ref/vsynth1/flashsv @@ -1,4 +1,4 @@ 97894502b4cb57aca1105b6333f72dae *./tests/data/vsynth1/flashsv.flv 14681925 ./tests/data/vsynth1/flashsv.flv -947cb24ec45a453348ae6fe3fa278071 *./tests/data/flashsv.vsynth1.out.yuv -stddev: 2.85 PSNR: 39.03 MAXDIFF: 49 bytes: 7603200/ 7603200 +791e1fb999deb2e4156e2286d48c4ed1 *./tests/data/flashsv.vsynth1.out.yuv +stddev: 2.84 PSNR: 39.04 MAXDIFF: 49 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth1/jpegls b/tests/ref/vsynth1/jpegls index 636f7fc556..153f9b9c0f 100644 --- a/tests/ref/vsynth1/jpegls +++ b/tests/ref/vsynth1/jpegls @@ -1,4 +1,4 @@ 519e26bb1ac0f3db8f90b36537f2f760 *./tests/data/vsynth1/jpegls.avi 9089812 ./tests/data/vsynth1/jpegls.avi -947cb24ec45a453348ae6fe3fa278071 *./tests/data/jpegls.vsynth1.out.yuv -stddev: 2.85 PSNR: 39.03 MAXDIFF: 49 bytes: 7603200/ 7603200 +791e1fb999deb2e4156e2286d48c4ed1 *./tests/data/jpegls.vsynth1.out.yuv +stddev: 2.84 PSNR: 39.04 MAXDIFF: 49 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth1/msvideo1 b/tests/ref/vsynth1/msvideo1 index 479234d846..7978258c99 100644 --- a/tests/ref/vsynth1/msvideo1 +++ b/tests/ref/vsynth1/msvideo1 @@ -1,4 +1,4 @@ 267a152a73cbc5ac4694a6e3b254be34 *./tests/data/vsynth1/msvideo1.avi 2162264 ./tests/data/vsynth1/msvideo1.avi -cf15dd12b8347567ae350383bf4ef4bb *./tests/data/msvideo1.vsynth1.out.yuv -stddev: 11.81 PSNR: 26.68 MAXDIFF: 151 bytes: 7603200/ 7603200 +c0665fac1bd896b6fe7fe0eead805bd5 *./tests/data/msvideo1.vsynth1.out.yuv +stddev: 11.80 PSNR: 26.69 MAXDIFF: 151 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth1/qtrle b/tests/ref/vsynth1/qtrle index 9988897b91..d25b986184 100644 --- a/tests/ref/vsynth1/qtrle +++ b/tests/ref/vsynth1/qtrle @@ -1,4 +1,4 @@ d14041925ce5ec5001dc519276b1a1ab *./tests/data/vsynth1/qtrle.mov 15263232 ./tests/data/vsynth1/qtrle.mov -243325fb2cae1a9245efd49aff936327 *./tests/data/qtrle.vsynth1.out.yuv -stddev: 3.42 PSNR: 37.43 MAXDIFF: 48 bytes: 7603200/ 7603200 +93695a27c24a61105076ca7b1f010bbd *./tests/data/qtrle.vsynth1.out.yuv +stddev: 3.42 PSNR: 37.44 MAXDIFF: 48 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth1/rgb b/tests/ref/vsynth1/rgb index 10a0a13329..1c353dae7f 100644 --- a/tests/ref/vsynth1/rgb +++ b/tests/ref/vsynth1/rgb @@ -1,4 +1,4 @@ 05f0719cb52486d9a4beb9cfae3f2571 *./tests/data/vsynth1/rgb.avi 15213260 ./tests/data/vsynth1/rgb.avi -243325fb2cae1a9245efd49aff936327 *./tests/data/rgb.vsynth1.out.yuv -stddev: 3.42 PSNR: 37.43 MAXDIFF: 48 bytes: 7603200/ 7603200 +93695a27c24a61105076ca7b1f010bbd *./tests/data/rgb.vsynth1.out.yuv +stddev: 3.42 PSNR: 37.44 MAXDIFF: 48 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth1/vref b/tests/ref/vsynth1/vref new file mode 100644 index 0000000000..2defdac870 --- /dev/null +++ b/tests/ref/vsynth1/vref @@ -0,0 +1,2 @@ +c5ccac874dbf808e9088bc3107860042 *./tests/data/vsynth1.ref.yuv +7603200 ./tests/data/vsynth1.ref.yuv diff --git a/tests/ref/vsynth2/flashsv b/tests/ref/vsynth2/flashsv index bfbb9e1c08..0b7b7d925e 100644 --- a/tests/ref/vsynth2/flashsv +++ b/tests/ref/vsynth2/flashsv @@ -1,4 +1,4 @@ 0667077971e0cb63b5f49c580006e90e *./tests/data/vsynth2/flashsv.flv 12368953 ./tests/data/vsynth2/flashsv.flv -592b3321994e26a990deb3a0a1415de9 *./tests/data/flashsv.vsynth2.out.yuv -stddev: 0.65 PSNR: 51.84 MAXDIFF: 14 bytes: 7603200/ 7603200 +3a984506f1ebfc9fb73b6814cab201cc *./tests/data/flashsv.vsynth2.out.yuv +stddev: 0.66 PSNR: 51.73 MAXDIFF: 14 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth2/jpegls b/tests/ref/vsynth2/jpegls index e7fa2df46c..229937ec80 100644 --- a/tests/ref/vsynth2/jpegls +++ b/tests/ref/vsynth2/jpegls @@ -1,4 +1,4 @@ 4fc53937f048c900ae6d50fda9dba206 *./tests/data/vsynth2/jpegls.avi 8334630 ./tests/data/vsynth2/jpegls.avi -592b3321994e26a990deb3a0a1415de9 *./tests/data/jpegls.vsynth2.out.yuv -stddev: 0.65 PSNR: 51.84 MAXDIFF: 14 bytes: 7603200/ 7603200 +3a984506f1ebfc9fb73b6814cab201cc *./tests/data/jpegls.vsynth2.out.yuv +stddev: 0.66 PSNR: 51.73 MAXDIFF: 14 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth2/msvideo1 b/tests/ref/vsynth2/msvideo1 index b630b159e4..f71be35278 100644 --- a/tests/ref/vsynth2/msvideo1 +++ b/tests/ref/vsynth2/msvideo1 @@ -1,4 +1,4 @@ 5dddbbd6616d9be4bc0fd0c9650bd9e3 *./tests/data/vsynth2/msvideo1.avi 1259308 ./tests/data/vsynth2/msvideo1.avi -e2e7a952135f6307a74f2e178dc0df20 *./tests/data/msvideo1.vsynth2.out.yuv -stddev: 7.42 PSNR: 30.71 MAXDIFF: 123 bytes: 7603200/ 7603200 +cd83ffcbc73573044e3aead3094229e5 *./tests/data/msvideo1.vsynth2.out.yuv +stddev: 7.42 PSNR: 30.72 MAXDIFF: 123 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth2/qtrle b/tests/ref/vsynth2/qtrle index 6b2a01168e..d87a6b96ac 100644 --- a/tests/ref/vsynth2/qtrle +++ b/tests/ref/vsynth2/qtrle @@ -1,4 +1,4 @@ d8c1604dc46d9aa4ec0385e6722c6989 *./tests/data/vsynth2/qtrle.mov 14798419 ./tests/data/vsynth2/qtrle.mov -b2418e0e3a9a8619b31219cbcf24dc82 *./tests/data/qtrle.vsynth2.out.yuv -stddev: 1.26 PSNR: 46.06 MAXDIFF: 13 bytes: 7603200/ 7603200 +98d0e2854731472c5bf13d8638502d0a *./tests/data/qtrle.vsynth2.out.yuv +stddev: 1.26 PSNR: 46.10 MAXDIFF: 13 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth2/rgb b/tests/ref/vsynth2/rgb index ea83470814..bcdef36b72 100644 --- a/tests/ref/vsynth2/rgb +++ b/tests/ref/vsynth2/rgb @@ -1,4 +1,4 @@ f2e9c419023c743bf99aa5b2e55ad233 *./tests/data/vsynth2/rgb.avi 15213260 ./tests/data/vsynth2/rgb.avi -b2418e0e3a9a8619b31219cbcf24dc82 *./tests/data/rgb.vsynth2.out.yuv -stddev: 1.26 PSNR: 46.06 MAXDIFF: 13 bytes: 7603200/ 7603200 +98d0e2854731472c5bf13d8638502d0a *./tests/data/rgb.vsynth2.out.yuv +stddev: 1.26 PSNR: 46.10 MAXDIFF: 13 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth2/vref b/tests/ref/vsynth2/vref new file mode 100644 index 0000000000..8f83b6c7ba --- /dev/null +++ b/tests/ref/vsynth2/vref @@ -0,0 +1,2 @@ +dde5895817ad9d219f79a52d0bdfb001 *./tests/data/vsynth2.ref.yuv +7603200 ./tests/data/vsynth2.ref.yuv diff --git a/tests/regression-funcs.sh b/tests/regression-funcs.sh index 0e4ea44f46..e57cdf111e 100755 --- a/tests/regression-funcs.sh +++ b/tests/regression-funcs.sh @@ -23,9 +23,6 @@ errfile="$datadir/$this.err" # various files ffmpeg="$target_exec ${target_path}/ffmpeg" tiny_psnr="tests/tiny_psnr" -benchfile="$datadir/$this.bench" -bench="$datadir/$this.bench.tmp" -bench2="$datadir/$this.bench2.tmp" raw_src="${target_path}/$raw_src_dir/%02d.pgm" raw_dst="$datadir/$this.out.yuv" raw_ref="$datadir/$test_ref.ref.yuv" @@ -35,7 +32,7 @@ pcm_ref="$datadir/$test_ref.ref.wav" crcfile="$datadir/$this.crc" target_crcfile="$target_datadir/$this.crc" -cleanfiles="$raw_dst $pcm_dst $crcfile $bench $bench2" +cleanfiles="$raw_dst $pcm_dst $crcfile" trap 'rm -f -- $cleanfiles' EXIT mkdir -p "$datadir" @@ -56,7 +53,7 @@ echov(){ FFMPEG_OPTS="-v 0 -y" COMMON_OPTS="-flags +bitexact -idct simple -sws_flags +accurate_rnd+bitexact" DEC_OPTS="$COMMON_OPTS -threads $threads" -ENC_OPTS="$COMMON_OPTS -dct fastint" +ENC_OPTS="$COMMON_OPTS -threads 1 -dct fastint" run_ffmpeg() { @@ -69,7 +66,7 @@ do_ffmpeg() f="$1" shift set -- $* ${target_path}/$f - run_ffmpeg -benchmark $* > $bench + run_ffmpeg $* do_md5sum $f >> $logfile if [ $f = $raw_dst ] ; then $tiny_psnr $f $raw_ref >> $logfile @@ -78,8 +75,6 @@ do_ffmpeg() else wc -c $f >> $logfile fi - expr "$(cat $bench)" : '.*utime=\(.*s\)' > $bench2 - echo $(cat $bench2) $f >> $benchfile } do_ffmpeg_nomd5() @@ -87,7 +82,7 @@ do_ffmpeg_nomd5() f="$1" shift set -- $* ${target_path}/$f - run_ffmpeg -benchmark $* > $bench + run_ffmpeg $* if [ $f = $raw_dst ] ; then $tiny_psnr $f $raw_ref >> $logfile elif [ $f = $pcm_dst ] ; then @@ -95,8 +90,6 @@ do_ffmpeg_nomd5() else wc -c $f >> $logfile fi - expr "$(cat $bench)" : '.*utime=\(.*s\)' > $bench2 - echo $(cat $bench2) $f >> $benchfile } do_ffmpeg_crc() @@ -107,18 +100,9 @@ do_ffmpeg_crc() echo "$f $(cat $crcfile)" >> $logfile } -do_ffmpeg_nocheck() -{ - f="$1" - shift - run_ffmpeg -benchmark $* > $bench - expr "$(cat $bench)" : '.*utime=\(.*s\)' > $bench2 - echo $(cat $bench2) $f >> $benchfile -} - do_video_decoding() { - do_ffmpeg $raw_dst $DEC_OPTS $1 -i $target_path/$file -f rawvideo $ENC_OPTS $2 + do_ffmpeg $raw_dst $DEC_OPTS $1 -i $target_path/$file -f rawvideo $ENC_OPTS -vsync 0 $2 } do_video_encoding() @@ -130,7 +114,7 @@ do_video_encoding() do_audio_encoding() { file=${outfile}$1 - do_ffmpeg $file $DEC_OPTS -ac 2 -f s16le -i $pcm_src -ab 128k $ENC_OPTS $2 + do_ffmpeg $file $DEC_OPTS -ac 2 -ar 44100 -f s16le -i $pcm_src -ab 128k $ENC_OPTS $2 } do_audio_decoding() diff --git a/tools/build_avopt b/tools/build_avopt deleted file mode 100755 index fcf165765c..0000000000 --- a/tools/build_avopt +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -sed 's/unsigned//g' |\ - sed 's/enum//g' |\ - egrep '^ *(int|float|double|AVRational|char *\*) *[a-zA-Z_0-9]* *;' |\ - sed 's/^ *\([^ ]*\)[ *]*\([^;]*\);.*$/{"\2", NULL, OFFSET(\2), FF_OPT_TYPE_\U\1, DEFAULT, \1_MIN, \1_MAX},/' |\ - sed 's/AVRATIONAL_M/INT_M/g'|\ - sed 's/TYPE_AVRATIONAL/TYPE_RATIONAL/g'|\ - sed 's/FLOAT_M/FLT_M/g'|\ - sed 's/FF_OPT_TYPE_CHAR/FF_OPT_TYPE_STRING/g' diff --git a/tools/cws2fws.c b/tools/cws2fws.c index aa7d690be3..b8535feaa4 100644 --- a/tools/cws2fws.c +++ b/tools/cws2fws.c @@ -35,14 +35,14 @@ int main(int argc, char *argv[]) fd_in = open(argv[1], O_RDONLY); if (fd_in < 0) { - perror("Error while opening: "); + perror("Error opening input file"); exit(1); } fd_out = open(argv[2], O_WRONLY|O_CREAT, 00644); if (fd_out < 0) { - perror("Error while opening: "); + perror("Error opening output file"); close(fd_in); exit(1); } @@ -69,7 +69,10 @@ int main(int argc, char *argv[]) // write out modified header buf_in[0] = 'F'; - write(fd_out, &buf_in, 8); + if (write(fd_out, &buf_in, 8) < 8) { + perror("Error writing output file"); + exit(1); + } zstream.zalloc = NULL; zstream.zfree = NULL; @@ -101,7 +104,10 @@ int main(int argc, char *argv[]) zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out, zstream.total_out-last_out); - write(fd_out, &buf_out, zstream.total_out-last_out); + if (write(fd_out, &buf_out, zstream.total_out - last_out) < zstream.total_out - last_out) { + perror("Error writing output file"); + exit(1); + } i += len; @@ -120,7 +126,10 @@ int main(int argc, char *argv[]) buf_in[3] = ((zstream.total_out+8) >> 24) & 0xff; lseek(fd_out, 4, SEEK_SET); - write(fd_out, &buf_in, 4); + if (write(fd_out, &buf_in, 4) < 4) { + perror("Error writing output file"); + exit(1); + } } inflateEnd(&zstream); diff --git a/tools/jauche_sortierer.sh b/tools/jauche_sortierer.sh deleted file mode 100755 index 1f84f1a2a9..0000000000 --- a/tools/jauche_sortierer.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh -#GPL -#TODO -#add pixelformat/sampleformat into the path of the codecs - -FFP=../ffprobe -TMP=$(mktemp) || exit 1 -TARGET=$1 -shift - -for v do - BASE=$(basename $v) - echo $v | egrep -i '(public|private)' >/dev/null && echo Warning $v may be private - $FFP $v 2> $TMP - FORM=$((grep 'Input #0, ' -m1 $TMP || echo 'Input #0, unknown') | sed 's/Input #0, \([a-zA-Z0-9_]*\).*/\1/' ) - mkdir -p $TARGET/container/$FORM - ln -s $v $TARGET/container/$FORM/$BASE - eval $(grep 'Stream #0\.[^:]*: [a-zA-Z0-9][^:]*: [a-zA-Z0-9]' $TMP | sed 's#[^:]*: \([a-zA-Z0-9]*\)[^:]*: \([a-zA-Z0-9]*\).*#mkdir -p '$TARGET'/\1/\2 ; ln -s '$v' '$TARGET'/\1/\2/'$BASE' ; #') -done - -rm $TMP diff --git a/tools/lavfi-showfiltfmts.c b/tools/lavfi-showfiltfmts.c index 19bb6f6819..cc19e00a8c 100644 --- a/tools/lavfi-showfiltfmts.c +++ b/tools/lavfi-showfiltfmts.c @@ -81,7 +81,7 @@ int main(int argc, char **argv) for (j = 0; j < fmts->format_count; j++) printf("INPUT[%d] %s: %s\n", i, filter_ctx->filter->inputs[i].name, - av_pix_fmt_descriptors[fmts->formats[j]].name); + av_get_pix_fmt_name(fmts->formats[j])); } /* print the supported formats in output */ @@ -90,7 +90,7 @@ int main(int argc, char **argv) for (j = 0; j < fmts->format_count; j++) printf("OUTPUT[%d] %s: %s\n", i, filter_ctx->filter->outputs[i].name, - av_pix_fmt_descriptors[fmts->formats[j]].name); + av_get_pix_fmt_name(fmts->formats[j])); } avfilter_free(filter_ctx); diff --git a/tools/pktdumper.c b/tools/pktdumper.c index ee60414060..ddb4b2b9fb 100644 --- a/tools/pktdumper.c +++ b/tools/pktdumper.c @@ -104,7 +104,11 @@ int main(int argc, char **argv) //printf("open(\"%s\")\n", pktfilename); if (!nowrite) { fd = open(pktfilename, O_WRONLY|O_CREAT, 0644); - write(fd, pkt.data, pkt.size); + err = write(fd, pkt.data, pkt.size); + if (err < 0) { + fprintf(stderr, "write: error %d\n", err); + return 1; + } close(fd); } av_free_packet(&pkt); |