diff options
Diffstat (limited to 'libswscale')
28 files changed, 1594 insertions, 479 deletions
diff --git a/libswscale/bfin/internal_bfin.S b/libswscale/bfin/internal_bfin.S index 9f985e7824..cb8d71253c 100644 --- a/libswscale/bfin/internal_bfin.S +++ b/libswscale/bfin/internal_bfin.S @@ -5,20 +5,20 @@ * Blackfin video color space converter operations * convert I420 YV12 to RGB in various formats * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c index c8030ba29f..4b26ba67c2 100644 --- a/libswscale/bfin/swscale_bfin.c +++ b/libswscale/bfin/swscale_bfin.c @@ -3,20 +3,20 @@ * * Blackfin software video scaler operations * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/bfin/yuv2rgb_bfin.c b/libswscale/bfin/yuv2rgb_bfin.c index 68af522642..7a7dc7f0e6 100644 --- a/libswscale/bfin/yuv2rgb_bfin.c +++ b/libswscale/bfin/yuv2rgb_bfin.c @@ -4,20 +4,20 @@ * Blackfin video color space converter operations * convert I420 YV12 to RGB in various formats * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -28,6 +28,7 @@ #include <assert.h> #include "config.h" #include <unistd.h> +#include "libavutil/pixdesc.h" #include "libswscale/rgb2rgb.h" #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" @@ -197,7 +198,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c) } av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n", - sws_format_name (c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); return f; } diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c index 07c1cbd803..34095d8532 100644 --- a/libswscale/colorspace-test.c +++ b/libswscale/colorspace-test.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/mlib/yuv2rgb_mlib.c b/libswscale/mlib/yuv2rgb_mlib.c index 526c97505c..e9f11494ee 100644 --- a/libswscale/mlib/yuv2rgb_mlib.c +++ b/libswscale/mlib/yuv2rgb_mlib.c @@ -3,20 +3,20 @@ * * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/options.c b/libswscale/options.c index ecd0ecd53a..010c68ecbe 100644 --- a/libswscale/options.c +++ b/libswscale/options.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index 42e965de95..197000beb9 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -36,13 +36,13 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) register int i; vector unsigned int altivec_vectorShiftInt19 = vec_add(vec_splat_u32(10), vec_splat_u32(9)); - if ((unsigned int)dest % 16) { + if ((uintptr_t)dest % 16) { /* badly aligned store, we force store alignment */ /* and will handle load misalignment on val w/ vec_perm */ vector unsigned char perm1; vector signed int v1; for (i = 0 ; (i < dstW) && - (((unsigned int)dest + i) % 16) ; i++) { + (((uintptr_t)dest + i) % 16) ; i++) { int t = val[i] >> 19; dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t); } diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c index 476db22489..e13702b100 100644 --- a/libswscale/ppc/yuv2rgb_altivec.c +++ b/libswscale/ppc/yuv2rgb_altivec.c @@ -3,20 +3,20 @@ * * copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -95,6 +95,7 @@ adjustment. #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" #include "libavutil/cpu.h" +#include "libavutil/pixdesc.h" #include "yuv2rgb_altivec.h" #undef PROFILE_THE_BEAST @@ -298,7 +299,7 @@ static int altivec_##name (SwsContext *c, \ vector signed short R1,G1,B1; \ vector unsigned char R,G,B; \ \ - vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ + const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ vector unsigned char align_perm; \ \ vector signed short \ @@ -335,10 +336,10 @@ static int altivec_##name (SwsContext *c, \ \ for (j=0;j<w/16;j++) { \ \ - y1ivP = (vector unsigned char *)y1i; \ - y2ivP = (vector unsigned char *)y2i; \ - uivP = (vector unsigned char *)ui; \ - vivP = (vector unsigned char *)vi; \ + y1ivP = (const vector unsigned char *)y1i; \ + y2ivP = (const vector unsigned char *)y2i; \ + uivP = (const vector unsigned char *)ui; \ + vivP = (const vector unsigned char *)vi; \ \ align_perm = vec_lvsl (0, y1i); \ y0 = (vector unsigned char) \ @@ -720,7 +721,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, static int printed_error_message; if (!printed_error_message) { av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); printed_error_message=1; } return; @@ -795,7 +796,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, default: /* Unreachable, I think. */ av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); return; } diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h index b54a856905..15385b1d3b 100644 --- a/libswscale/ppc/yuv2rgb_altivec.h +++ b/libswscale/ppc/yuv2rgb_altivec.h @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c index 4cd02ffe1d..82c265afd2 100644 --- a/libswscale/ppc/yuv2yuv_altivec.c +++ b/libswscale/ppc/yuv2yuv_altivec.c @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c index 51cb600e6b..84ef43b774 100644 --- a/libswscale/rgb2rgb.c +++ b/libswscale/rgb2rgb.c @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include <inttypes.h> @@ -123,6 +123,34 @@ void sws_rgb2rgb_init(void) rgb2rgb_init_x86(); } +#if LIBSWSCALE_VERSION_MAJOR < 1 +void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +{ + sws_convertPalette8ToPacked32(src, dst, num_pixels, palette); +} + +void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +{ + sws_convertPalette8ToPacked24(src, dst, num_pixels, palette); +} + +/** + * Palette is assumed to contain BGR16, see rgb32to16 to convert the palette. + */ +void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +{ + long i; + for (i=0; i<num_pixels; i++) + ((uint16_t *)dst)[i] = ((const uint16_t *)palette)[src[i]]; +} +void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +{ + long i; + for (i=0; i<num_pixels; i++) + ((uint16_t *)dst)[i] = av_bswap16(((const uint16_t *)palette)[src[i]]); +} +#endif + void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size) { int i; diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h index 9d051de4f6..6923dd9608 100644 --- a/libswscale/rgb2rgb.h +++ b/libswscale/rgb2rgb.h @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -68,6 +68,17 @@ void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, int src_size); +#if LIBSWSCALE_VERSION_MAJOR < 1 +/* deprecated, use the public versions in swscale.h */ +attribute_deprecated void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); +attribute_deprecated void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); + +/* totally deprecated, please fix code that uses this */ +attribute_deprecated void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); +attribute_deprecated void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); +#endif + + void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride); diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index c02015e5b3..0734e8891b 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -7,20 +7,20 @@ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * lot of big-endian byte order fixes by Alex Beregszaszi * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -658,6 +658,9 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ydst += lumStride; src += srcStride; + if(y+1 == height) + break; + for (i=0; i<chromWidth; i++) { unsigned int b = src[6*i+0]; unsigned int g = src[6*i+1]; diff --git a/libswscale/sparc/yuv2rgb_vis.c b/libswscale/sparc/yuv2rgb_vis.c index 2111ea8f64..cc98f04053 100644 --- a/libswscale/sparc/yuv2rgb_vis.c +++ b/libswscale/sparc/yuv2rgb_vis.c @@ -2,20 +2,20 @@ * VIS optimized software YUV to RGB converter * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c index b5cf1d202e..888cbab26a 100644 --- a/libswscale/swscale-test.c +++ b/libswscale/swscale-test.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/swscale.c b/libswscale/swscale.c index f729c117a7..4318e0bf15 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -60,6 +60,7 @@ untested special converters #include "swscale.h" #include "swscale_internal.h" #include "rgb2rgb.h" +#include "libavutil/avassert.h" #include "libavutil/intreadwrite.h" #include "libavutil/cpu.h" #include "libavutil/avutil.h" @@ -67,7 +68,6 @@ untested special converters #include "libavutil/bswap.h" #include "libavutil/pixdesc.h" -#define DITHER1XBPP #define RGB2YUV_SHIFT 15 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5)) @@ -194,6 +194,101 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ }; #endif +DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={ +{ + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, +},{ + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, +},{ + { 2, 4, 3, 5, 2, 4, 3, 5,}, + { 6, 0, 7, 1, 6, 0, 7, 1,}, + { 3, 5, 2, 4, 3, 5, 2, 4,}, + { 7, 1, 6, 0, 7, 1, 6, 0,}, + { 2, 4, 3, 5, 2, 4, 3, 5,}, + { 6, 0, 7, 1, 6, 0, 7, 1,}, + { 3, 5, 2, 4, 3, 5, 2, 4,}, + { 7, 1, 6, 0, 7, 1, 6, 0,}, +},{ + { 4, 8, 7, 11, 4, 8, 7, 11,}, + { 12, 0, 15, 3, 12, 0, 15, 3,}, + { 6, 10, 5, 9, 6, 10, 5, 9,}, + { 14, 2, 13, 1, 14, 2, 13, 1,}, + { 4, 8, 7, 11, 4, 8, 7, 11,}, + { 12, 0, 15, 3, 12, 0, 15, 3,}, + { 6, 10, 5, 9, 6, 10, 5, 9,}, + { 14, 2, 13, 1, 14, 2, 13, 1,}, +},{ + { 9, 17, 15, 23, 8, 16, 14, 22,}, + { 25, 1, 31, 7, 24, 0, 30, 6,}, + { 13, 21, 11, 19, 12, 20, 10, 18,}, + { 29, 5, 27, 3, 28, 4, 26, 2,}, + { 8, 16, 14, 22, 9, 17, 15, 23,}, + { 24, 0, 30, 6, 25, 1, 31, 7,}, + { 12, 20, 10, 18, 13, 21, 11, 19,}, + { 28, 4, 26, 2, 29, 5, 27, 3,}, +},{ + { 18, 34, 30, 46, 17, 33, 29, 45,}, + { 50, 2, 62, 14, 49, 1, 61, 13,}, + { 26, 42, 22, 38, 25, 41, 21, 37,}, + { 58, 10, 54, 6, 57, 9, 53, 5,}, + { 16, 32, 28, 44, 19, 35, 31, 47,}, + { 48, 0, 60, 12, 51, 3, 63, 15,}, + { 24, 40, 20, 36, 27, 43, 23, 39,}, + { 56, 8, 52, 4, 59, 11, 55, 7,}, +},{ + { 18, 34, 30, 46, 17, 33, 29, 45,}, + { 50, 2, 62, 14, 49, 1, 61, 13,}, + { 26, 42, 22, 38, 25, 41, 21, 37,}, + { 58, 10, 54, 6, 57, 9, 53, 5,}, + { 16, 32, 28, 44, 19, 35, 31, 47,}, + { 48, 0, 60, 12, 51, 3, 63, 15,}, + { 24, 40, 20, 36, 27, 43, 23, 39,}, + { 56, 8, 52, 4, 59, 11, 55, 7,}, +},{ + { 36, 68, 60, 92, 34, 66, 58, 90,}, + { 100, 4,124, 28, 98, 2,122, 26,}, + { 52, 84, 44, 76, 50, 82, 42, 74,}, + { 116, 20,108, 12,114, 18,106, 10,}, + { 32, 64, 56, 88, 38, 70, 62, 94,}, + { 96, 0,120, 24,102, 6,126, 30,}, + { 48, 80, 40, 72, 54, 86, 46, 78,}, + { 112, 16,104, 8,118, 22,110, 14,}, +}}; + +static const uint8_t flat64[8]={64,64,64,64,64,64,64,64}; + +const uint16_t dither_scale[15][16]={ +{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,}, +{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,}, +{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,}, +{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,}, +{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,}, +{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,}, +{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,}, +{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,}, +{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,}, +{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,}, +{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,}, +{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,}, +{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,}, +{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,}, +{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,}, +}; + static av_always_inline void yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, @@ -291,12 +386,13 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { //FIXME Optimize (just quickly written not optimized..) int i; for (i=0; i<dstW; i++) { - int val=1<<18; + int val = lumDither[i&7] << 12; int j; for (j=0; j<lumFilterSize; j++) val += lumSrc[j][i] * lumFilter[j]; @@ -306,8 +402,8 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, if (uDest) for (i=0; i<chrDstW; i++) { - int u=1<<18; - int v=1<<18; + int u = chrDither[i&7] << 12; + int v = chrDither[(i+3)&7] << 12; int j; for (j=0; j<chrFilterSize; j++) { u += chrUSrc[j][i] * chrFilter[j]; @@ -320,7 +416,7 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, if (CONFIG_SWSCALE_ALPHA && aDest) for (i=0; i<dstW; i++) { - int val=1<<18; + int val = lumDither[i&7] << 12; int j; for (j=0; j<lumFilterSize; j++) val += alpSrc[j][i] * lumFilter[j]; @@ -330,28 +426,29 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, } static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, - const int16_t *chrUSrc, const int16_t *chrVSrc, - const int16_t *alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + const int16_t *chrUSrc, const int16_t *chrVSrc, + const int16_t *alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither) { int i; + for (i=0; i<dstW; i++) { - int val= (lumSrc[i]+64)>>7; + int val= (lumSrc[i]+lumDither[i&7])>>7; dest[i]= av_clip_uint8(val); } if (uDest) for (i=0; i<chrDstW; i++) { - int u=(chrUSrc[i]+64)>>7; - int v=(chrVSrc[i]+64)>>7; + int u=(chrUSrc[i]+chrDither[i&7])>>7; + int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7; uDest[i]= av_clip_uint8(u); vDest[i]= av_clip_uint8(v); } if (CONFIG_SWSCALE_ALPHA && aDest) for (i=0; i<dstW; i++) { - int val= (alpSrc[i]+64)>>7; + int val= (alpSrc[i]+lumDither[i&7])>>7; aDest[i]= av_clip_uint8(val); } } @@ -362,14 +459,15 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, - int dstW, int chrDstW) + int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { enum PixelFormat dstFormat = c->dstFormat; //FIXME Optimize (just quickly written not optimized..) int i; for (i=0; i<dstW; i++) { - int val=1<<18; + int val = lumDither[i&7]<<12; int j; for (j=0; j<lumFilterSize; j++) val += lumSrc[j][i] * lumFilter[j]; @@ -382,8 +480,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, if (dstFormat == PIX_FMT_NV12) for (i=0; i<chrDstW; i++) { - int u=1<<18; - int v=1<<18; + int u = chrDither[i&7]<<12; + int v = chrDither[(i+3)&7]<<12; int j; for (j=0; j<chrFilterSize; j++) { u += chrUSrc[j][i] * chrFilter[j]; @@ -395,8 +493,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, } else for (i=0; i<chrDstW; i++) { - int u=1<<18; - int v=1<<18; + int u = chrDither[i&7]<<12; + int v = chrDither[(i+3)&7]<<12; int j; for (j=0; j<chrFilterSize; j++) { u += chrUSrc[j][i] * chrFilter[j]; @@ -936,9 +1034,9 @@ YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE); #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \ for (i=0; i<dstW; i++) {\ int j;\ - int Y = 0;\ - int U = -128<<19;\ - int V = -128<<19;\ + int Y = 1<<9;\ + int U = (1<<9)-(128<<19);\ + int V = (1<<9)-(128<<19);\ int av_unused A;\ int R,G,B;\ \ @@ -953,7 +1051,7 @@ YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE); U >>=10;\ V >>=10;\ if (alpha) {\ - A = rnd;\ + A = rnd>>3;\ for (j=0; j<lumFilterSize; j++)\ A += alpSrc[j][i ] * lumFilter[j];\ A >>=19;\ @@ -1321,50 +1419,50 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride, #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r) static av_always_inline void -rgb48ToY_c_template(uint8_t *dst, const uint8_t *src, int width, +rgb48ToY_c_template(int16_t *dst, const uint16_t *src, int width, enum PixelFormat origin) { int i; for (i = 0; i < width; i++) { - int r_b = input_pixel(&src[i*6+0]) >> 8; - int g = input_pixel(&src[i*6+2]) >> 8; - int b_r = input_pixel(&src[i*6+4]) >> 8; + int r_b = input_pixel(&src[i*3+0]); + int g = input_pixel(&src[i*3+1]); + int b_r = input_pixel(&src[i*3+2]); - dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8); } } static av_always_inline void -rgb48ToUV_c_template(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, +rgb48ToUV_c_template(int16_t *dstU, int16_t *dstV, + const uint16_t *src1, const uint16_t *src2, int width, enum PixelFormat origin) { int i; assert(src1==src2); for (i = 0; i < width; i++) { - int r_b = input_pixel(&src1[i*6+0]) >> 8; - int g = input_pixel(&src1[i*6+2]) >> 8; - int b_r = input_pixel(&src1[i*6+4]) >> 8; + int r_b = input_pixel(&src1[i*3+0]); + int g = input_pixel(&src1[i*3+1]); + int b_r = input_pixel(&src1[i*3+2]); - dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8); + dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8); } } static av_always_inline void -rgb48ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, +rgb48ToUV_half_c_template(int16_t *dstU, int16_t *dstV, + const uint16_t *src1, const uint16_t *src2, int width, enum PixelFormat origin) { int i; assert(src1==src2); for (i = 0; i < width; i++) { - int r_b = (input_pixel(&src1[12*i + 0]) >> 8) + (input_pixel(&src1[12*i + 6]) >> 8); - int g = (input_pixel(&src1[12*i + 2]) >> 8) + (input_pixel(&src1[12*i + 8]) >> 8); - int b_r = (input_pixel(&src1[12*i + 4]) >> 8) + (input_pixel(&src1[12*i + 10]) >> 8); + int r_b = (input_pixel(&src1[6*i + 0])) + (input_pixel(&src1[6*i + 3])); + int g = (input_pixel(&src1[6*i + 1])) + (input_pixel(&src1[6*i + 4])); + int b_r = (input_pixel(&src1[6*i + 2])) + (input_pixel(&src1[6*i + 5])); - dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); + dstU[i]= (RU*r + GU*g + BU*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8); + dstV[i]= (RV*r + GV*g + BV*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8); } } @@ -1403,14 +1501,14 @@ rgb48funcs(bgr, BE, PIX_FMT_BGR48BE); (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2]))) static av_always_inline void -rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src, +rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src, int width, enum PixelFormat origin, int shr, int shg, int shb, int shp, int maskr, int maskg, int maskb, int rsh, int gsh, int bsh, int S) { const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh, - rnd = 33 << (S - 1); + rnd = (32<<((S)-1)) + (1<<(S-7)); int i; for (i = 0; i < width; i++) { @@ -1419,12 +1517,12 @@ rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src, int g = (px & maskg) >> shg; int r = (px & maskr) >> shr; - dst[i] = (ry * r + gy * g + by * b + rnd) >> S; + dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6); } } static av_always_inline void -rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV, +rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV, const uint8_t *src, int width, enum PixelFormat origin, int shr, int shg, int shb, int shp, @@ -1433,7 +1531,7 @@ rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV, { const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, - rnd = 257 << (S - 1); + rnd = (256<<((S)-1)) + (1<<(S-7)); int i; for (i = 0; i < width; i++) { @@ -1442,13 +1540,13 @@ rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV, int g = (px & maskg) >> shg; int r = (px & maskr) >> shr; - dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S; - dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S; + dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6); + dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6); } } static av_always_inline void -rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, +rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV, const uint8_t *src, int width, enum PixelFormat origin, int shr, int shg, int shb, int shp, @@ -1457,7 +1555,7 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, { const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, - rnd = 257 << S, maskgx = ~(maskr | maskb); + rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb); int i; maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1; @@ -1476,8 +1574,8 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, } r = (rb & maskr) >> shr; - dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1); - dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1); + dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1); + dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1); } } @@ -1521,65 +1619,73 @@ rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8); rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7); -static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) +static void abgrToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i; for (i=0; i<width; i++) { - dst[i]= src[4*i]; + dst[i]= src[4*i]<<6; } } -static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) +static void rgbaToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i; for (i=0; i<width; i++) { - dst[i]= src[4*i+3]; + dst[i]= src[4*i+3]<<6; } } -static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal) +static void palToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *pal) { int i; for (i=0; i<width; i++) { int d= src[i]; - dst[i]= pal[d] & 0xFF; + dst[i]= (pal[d] >> 24)<<6; } } -static void palToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *pal) +static void palToY_c(int16_t *dst, const uint8_t *src, long width, uint32_t *pal) +{ + int i; + for (i=0; i<width; i++) { + int d= src[i]; + + dst[i]= (pal[d] & 0xFF)<<6; + } +} + +static void palToUV_c(uint16_t *dstU, int16_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *pal) { int i; assert(src1 == src2); for (i=0; i<width; i++) { int p= pal[src1[i]]; - dstU[i]= p>>8; - dstV[i]= p>>16; + dstU[i]= (uint8_t)(p>> 8)<<6; + dstV[i]= (uint8_t)(p>>16)<<6; } } -static void monowhite2Y_c(uint8_t *dst, const uint8_t *src, - int width, uint32_t *unused) +static void monowhite2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i, j; for (i=0; i<width/8; i++) { int d= ~src[i]; for(j=0; j<8; j++) - dst[8*i+j]= ((d>>(7-j))&1)*255; + dst[8*i+j]= ((d>>(7-j))&1)*16383; } } -static void monoblack2Y_c(uint8_t *dst, const uint8_t *src, - int width, uint32_t *unused) +static void monoblack2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i, j; for (i=0; i<width/8; i++) { int d= src[i]; for(j=0; j<8; j++) - dst[8*i+j]= ((d>>(7-j))&1)*255; + dst[8*i+j]= ((d>>(7-j))&1)*16383; } } @@ -1718,7 +1824,7 @@ YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE); YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE); YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE); -static void bgr24ToY_c(uint8_t *dst, const uint8_t *src, +static void bgr24ToY_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i; @@ -1727,11 +1833,11 @@ static void bgr24ToY_c(uint8_t *dst, const uint8_t *src, int g= src[i*3+1]; int r= src[i*3+2]; - dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } -static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -1740,13 +1846,13 @@ static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[3*i + 1]; int r= src1[3*i + 2]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; - dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; + dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } assert(src1 == src2); } -static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -1755,13 +1861,13 @@ static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[6*i + 1] + src1[6*i + 4]; int r= src1[6*i + 2] + src1[6*i + 5]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); + dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); } assert(src1 == src2); } -static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width, +static void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { int i; @@ -1770,11 +1876,11 @@ static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width, int g= src[i*3+1]; int b= src[i*3+2]; - dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } -static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -1784,13 +1890,13 @@ static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[3*i + 1]; int b= src1[3*i + 2]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; - dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; + dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } } -static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) +static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { int i; assert(src1==src2); @@ -1799,8 +1905,8 @@ static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[6*i + 1] + src1[6*i + 4]; int b= src1[6*i + 2] + src1[6*i + 5]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); + dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); } } @@ -1825,6 +1931,35 @@ static void hScale_c(int16_t *dst, int dstW, const uint8_t *src, } } +static inline void hScale16_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, + const int16_t *filter, const int16_t *filterPos, long filterSize, int shift) +{ + int i, j; + + for (i=0; i<dstW; i++) { + int srcPos= filterPos[i]; + int val=0; + for (j=0; j<filterSize; j++) { + val += ((int)src[srcPos + j])*filter[filterSize*i + j]; + } + dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ... + } +} + +static inline void hScale16X_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, + const int16_t *filter, const int16_t *filterPos, long filterSize, int shift) +{ + int i, j; + for (i=0; i<dstW; i++) { + int srcPos= filterPos[i]; + int val=0; + for (j=0; j<filterSize; j++) { + val += ((int)av_bswap16(src[srcPos + j]))*filter[filterSize*i + j]; + } + dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ... + } +} + //FIXME all pal and rgb srcFormats could do this convertion as well //FIXME all scalers more complex than bilinear could do half of this transform static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width) @@ -1867,6 +2002,8 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; xpos+=xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) + dst[i] = src[srcW-1]*128; } // *** horizontal scale Y line to temp buffer @@ -1885,7 +2022,10 @@ static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth, src= formatConvBuffer; } - if (!c->hyscale_fast) { + if (c->hScale16) { + int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift); + } else if (!c->hyscale_fast) { c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); } else { // fast bilinear upscale / crap downscale c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); @@ -1908,6 +2048,10 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); xpos+=xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { + dst1[i] = src1[srcW-1]*128; + dst2[i] = src2[srcW-1]*128; + } } static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth, @@ -1917,13 +2061,17 @@ static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *ds uint8_t *formatConvBuffer, uint32_t *pal) { if (c->chrToYV12) { - uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16); + uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); src1= formatConvBuffer; src2= buf2; } - if (!c->hcscale_fast) { + if (c->hScale16) { + int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift); + c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift); + } else if (!c->hcscale_fast) { c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); } else { // fast bilinear upscale / crap downscale @@ -2062,6 +2210,7 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); int lastDstY; uint32_t *pal=c->pal_yuv; + int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat); yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1; yuv2planarX_fn yuv2yuvX = c->yuv2yuvX; yuv2packed1_fn yuv2packed1 = c->yuv2packed1; @@ -2124,6 +2273,8 @@ static int swScale(SwsContext *c, const uint8_t* src[], unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL; + const uint8_t *lumDither= should_dither ? dithers[7][dstY &7] : flat64; + const uint8_t *chrDither= should_dither ? dithers[7][chrDstY&7] : flat64; const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]; @@ -2215,6 +2366,7 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi @@ -2224,13 +2376,13 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int16_t *chrVBuf= chrVSrcPtr[0]; const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL; yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest, - uDest, vDest, aDest, dstW, chrDstW); + uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); } else { //General YV12 yuv2yuvX(c, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, - alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); + alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); } } else { assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); @@ -2307,22 +2459,24 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_PAL8 : case PIX_FMT_BGR4_BYTE: case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break; + case PIX_FMT_GRAY16BE : case PIX_FMT_YUV444P9BE: - case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break; - case PIX_FMT_YUV444P9LE: - case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break; + case PIX_FMT_YUV420P9BE: case PIX_FMT_YUV444P10BE: case PIX_FMT_YUV422P10BE: - case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break; - case PIX_FMT_YUV422P10LE: - case PIX_FMT_YUV444P10LE: - case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break; + case PIX_FMT_YUV420P10BE: case PIX_FMT_YUV420P16BE: case PIX_FMT_YUV422P16BE: - case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break; + case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break; + case PIX_FMT_GRAY16LE : + case PIX_FMT_YUV444P9LE: + case PIX_FMT_YUV420P9LE: + case PIX_FMT_YUV422P10LE: + case PIX_FMT_YUV420P10LE: + case PIX_FMT_YUV444P10LE: case PIX_FMT_YUV420P16LE: case PIX_FMT_YUV422P16LE: - case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break; + case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break; } if (c->chrSrcHSubSample) { switch(srcFormat) { @@ -2371,27 +2525,11 @@ static av_cold void sws_init_swScale_c(SwsContext *c) c->lumToYV12 = NULL; c->alpToYV12 = NULL; switch (srcFormat) { - case PIX_FMT_YUV444P9BE: - case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break; - case PIX_FMT_YUV444P9LE: - case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break; - case PIX_FMT_YUV444P10BE: - case PIX_FMT_YUV422P10BE: - case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break; - case PIX_FMT_YUV444P10LE: - case PIX_FMT_YUV422P10LE: - case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break; case PIX_FMT_YUYV422 : - case PIX_FMT_YUV420P16BE: - case PIX_FMT_YUV422P16BE: - case PIX_FMT_YUV444P16BE: - case PIX_FMT_Y400A : - case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break; + case PIX_FMT_GRAY8A : + c->lumToYV12 = yuy2ToY_c; break; case PIX_FMT_UYVY422 : - case PIX_FMT_YUV420P16LE: - case PIX_FMT_YUV422P16LE: - case PIX_FMT_YUV444P16LE: - case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break; + c->lumToYV12 = uyvyToY_c; break; case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break; case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break; case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break; @@ -2425,9 +2563,13 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_ABGR: case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break; case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break; + case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break; } } + if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8) + c->hScale16= hScale16_c; + if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { if (c->srcRange) { c->lumConvertRange = lumRangeFromJpeg_c; diff --git a/libswscale/swscale.h b/libswscale/swscale.h index b0ad912a08..e798773158 100644 --- a/libswscale/swscale.h +++ b/libswscale/swscale.h @@ -1,20 +1,20 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -29,9 +29,9 @@ #include "libavutil/avutil.h" -#define LIBSWSCALE_VERSION_MAJOR 1 -#define LIBSWSCALE_VERSION_MINOR 1 -#define LIBSWSCALE_VERSION_MICRO 0 +#define LIBSWSCALE_VERSION_MAJOR 0 +#define LIBSWSCALE_VERSION_MINOR 14 +#define LIBSWSCALE_VERSION_MICRO 1 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ @@ -53,6 +53,9 @@ #ifndef FF_API_SWS_CPU_CAPS #define FF_API_SWS_CPU_CAPS (LIBSWSCALE_VERSION_MAJOR < 2) #endif +#ifndef FF_API_SWS_FORMAT_NAME +#define FF_API_SWS_FORMAT_NAME (LIBSWSCALE_VERSION_MAJOR < 2) +#endif /** * Returns the LIBSWSCALE_VERSION_INT constant. @@ -233,6 +236,15 @@ struct SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat int sws_scale(struct SwsContext *context, const uint8_t* const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t* const dst[], const int dstStride[]); +#if LIBSWSCALE_VERSION_MAJOR < 1 +/** + * @deprecated Use sws_scale() instead. + */ +int sws_scale_ordered(struct SwsContext *context, const uint8_t* const src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t* dst[], int dstStride[]) attribute_deprecated; +#endif + /** * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x] * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235 @@ -292,6 +304,13 @@ void sws_shiftVec(SwsVector *a, int shift); */ SwsVector *sws_cloneVec(SwsVector *a); +#if LIBSWSCALE_VERSION_MAJOR < 1 +/** + * @deprecated Use sws_printVec2() instead. + */ +attribute_deprecated void sws_printVec(SwsVector *a); +#endif + /** * Prints with av_log() a textual representation of the vector a * if log_level <= av_log_level. diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index ea34d8ce0e..c0f8e64d70 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -1,20 +1,20 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -35,6 +35,8 @@ #define MAX_FILTER_SIZE 256 +#define DITHER1XBPP + #if HAVE_BIGENDIAN #define ALT32_CORR (-1) #else @@ -62,7 +64,7 @@ typedef void (*yuv2planar1_fn) (struct SwsContext *c, const int16_t *chrVSrc, const int16_t *alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, - int dstW, int chrDstW); + int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither); typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, @@ -70,7 +72,7 @@ typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, - int dstW, int chrDstW); + int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither); typedef void (*yuv2packed1_fn) (struct SwsContext *c, const uint16_t *buf0, const uint16_t *ubuf0, const uint16_t *ubuf1, @@ -230,6 +232,8 @@ typedef struct SwsContext { #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48" #define UV_OFF "11*8+4*4*256*3+48" #define UV_OFFx2 "11*8+4*4*256*3+56" +#define DITHER16 "11*8+4*4*256*3+64" +#define DITHER32 "11*8+4*4*256*3+64+16" DECLARE_ALIGNED(8, uint64_t, redDither); DECLARE_ALIGNED(8, uint64_t, greenDither); @@ -254,6 +258,8 @@ typedef struct SwsContext { int32_t alpMmxFilter[4*MAX_FILTER_SIZE]; DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes + uint16_t dither16[8]; + uint32_t dither32[8]; #if HAVE_ALTIVEC vector signed short CY; @@ -310,6 +316,10 @@ typedef struct SwsContext { int xInc, const int16_t *filter, const int16_t *filterPos, int filterSize); + void (*hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, + int xInc, const int16_t *filter, const int16_t *filterPos, + long filterSize, int shift); + void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed. void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed. @@ -335,7 +345,13 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c); SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c); void ff_bfin_get_unscaled_swscale(SwsContext *c); +#if FF_API_SWS_FORMAT_NAME +/** + * @deprecated Use av_get_pix_fmt_name() instead. + */ +attribute_deprecated const char *sws_format_name(enum PixelFormat format); +#endif //FIXME replace this with something faster #define is16BPS(x) ( \ @@ -352,7 +368,7 @@ const char *sws_format_name(enum PixelFormat format); || (x)==PIX_FMT_YUV422P16BE \ || (x)==PIX_FMT_YUV444P16BE \ ) -#define is9_OR_10BPS(x) ( \ +#define isNBPS(x) ( \ (x)==PIX_FMT_YUV420P9LE \ || (x)==PIX_FMT_YUV420P9BE \ || (x)==PIX_FMT_YUV444P9BE \ @@ -363,7 +379,10 @@ const char *sws_format_name(enum PixelFormat format); || (x)==PIX_FMT_YUV444P10LE \ || (x)==PIX_FMT_YUV420P10LE \ || (x)==PIX_FMT_YUV420P10BE \ + || (x)==PIX_FMT_YUV422P10LE \ + || (x)==PIX_FMT_YUV422P10BE \ ) +#define is9_OR_10BPS isNBPS //for ronald #define isBE(x) ((x)&1) #define isPlanar8YUV(x) ( \ (x)==PIX_FMT_YUV410P \ @@ -384,6 +403,7 @@ const char *sws_format_name(enum PixelFormat format); || (x)==PIX_FMT_YUV422P10LE \ || (x)==PIX_FMT_YUV444P10LE \ || (x)==PIX_FMT_YUV420P16LE \ + || (x)==PIX_FMT_YUV422P10LE \ || (x)==PIX_FMT_YUV422P16LE \ || (x)==PIX_FMT_YUV444P16LE \ || (x)==PIX_FMT_YUV420P9BE \ @@ -392,6 +412,7 @@ const char *sws_format_name(enum PixelFormat format); || (x)==PIX_FMT_YUV422P10BE \ || (x)==PIX_FMT_YUV444P10BE \ || (x)==PIX_FMT_YUV420P16BE \ + || (x)==PIX_FMT_YUV422P10BE \ || (x)==PIX_FMT_YUV422P16BE \ || (x)==PIX_FMT_YUV444P16BE \ ) @@ -402,7 +423,7 @@ const char *sws_format_name(enum PixelFormat format); ) #define isGray(x) ( \ (x)==PIX_FMT_GRAY8 \ - || (x)==PIX_FMT_Y400A \ + || (x)==PIX_FMT_GRAY8A \ || (x)==PIX_FMT_GRAY16BE \ || (x)==PIX_FMT_GRAY16LE \ ) @@ -469,7 +490,8 @@ const char *sws_format_name(enum PixelFormat format); || (x)==PIX_FMT_BGR32_1 \ || (x)==PIX_FMT_RGB32 \ || (x)==PIX_FMT_RGB32_1 \ - || (x)==PIX_FMT_Y400A \ + || (x)==PIX_FMT_PAL8 \ + || (x)==PIX_FMT_GRAY8A \ || (x)==PIX_FMT_YUVA420P \ ) #define isPacked(x) ( \ @@ -479,10 +501,13 @@ const char *sws_format_name(enum PixelFormat format); || (x)==PIX_FMT_Y400A \ || isAnyRGB(x) \ ) -#define usePal(x) ((av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) || (x) == PIX_FMT_Y400A) +#define usePal(x) ((av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) || (x) == PIX_FMT_GRAY8A) extern const uint64_t ff_dither4[2]; extern const uint64_t ff_dither8[2]; +extern const uint8_t dithers[8][8][8]; +extern const uint16_t dither_scale[15][16]; + extern const AVClass sws_context_class; diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c new file mode 100644 index 0000000000..9ae9fc771c --- /dev/null +++ b/libswscale/swscale_template.c @@ -0,0 +1,748 @@ +/* + * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither) +{ + yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, + chrFilter, chrUSrc, chrVSrc, chrFilterSize, + alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); +} + +static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, uint8_t *dest, uint8_t *uDest, + int dstW, int chrDstW, enum PixelFormat dstFormat, const uint8_t *dither, const uint8_t *chrDither) +{ + yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize, + chrFilter, chrUSrc, chrVSrc, chrFilterSize, + dest, uDest, dstW, chrDstW, dstFormat, dither, chrDither); +} + +static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, + const int16_t *chrUSrc, const int16_t *chrVSrc, + const int16_t *alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither) +{ + int i; + + for (i=0; i<dstW; i++) { + int val= (lumSrc[i]+lumDither[i&7])>>7; + dest[i]= av_clip_uint8(val); + } + + if (uDest) + for (i=0; i<chrDstW; i++) { + int u=(chrUSrc[i]+chrDither[i&7])>>7; + int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7; + uDest[i]= av_clip_uint8(u); + vDest[i]= av_clip_uint8(v); + } + + if (CONFIG_SWSCALE_ALPHA && aDest) + for (i=0; i<dstW; i++) { + int val= (alpSrc[i]+lumDither[i&7])>>7; + aDest[i]= av_clip_uint8(val); + } +} + + +/** + * vertical scale YV12 to RGB + */ +static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) +{ + yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, + chrFilter, chrUSrc, chrVSrc, chrFilterSize, + alpSrc, dest, dstW, dstY); +} + +/** + * vertical bilinear scale YV12 to RGB + */ +static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, int dstW, + int yalpha, int uvalpha, int y) +{ + int yalpha1=4095- yalpha; + int uvalpha1=4095-uvalpha; + int i; + + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C) +} + +/** + * YV12 to RGB without scaling or interpolating + */ +static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, enum PixelFormat dstFormat, + int flags, int y) +{ + const int yalpha1=0; + int i; + + const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + const int yalpha= 4096; //FIXME ... + + if (uvalpha < 2048) { + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) + } else { + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) + } +} + +//FIXME yuy2* can read up to 7 samples too much + +static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width, + uint32_t *unused) +{ + int i; + for (i=0; i<width; i++) + dst[i]= src[2*i]; +} + +static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) +{ + int i; + for (i=0; i<width; i++) { + dstU[i]= src1[4*i + 1]; + dstV[i]= src1[4*i + 3]; + } + assert(src1 == src2); +} + +static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) +{ + int i; + // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so + // we need to skip each second pixel. Same for BEToUV. + for (i=0; i<width; i++) { + dstU[i]= src1[2*i + 1]; + dstV[i]= src2[2*i + 1]; + } +} + +/* This is almost identical to the previous, end exists only because + * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ +static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width, + uint32_t *unused) +{ + int i; + for (i=0; i<width; i++) + dst[i]= src[2*i+1]; +} + +static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) +{ + int i; + for (i=0; i<width; i++) { + dstU[i]= src1[4*i + 0]; + dstV[i]= src1[4*i + 2]; + } + assert(src1 == src2); +} + +static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) +{ + int i; + for (i=0; i<width; i++) { + dstU[i]= src1[2*i]; + dstV[i]= src2[2*i]; + } +} + +static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, + const uint8_t *src, int width) +{ + int i; + for (i = 0; i < width; i++) { + dst1[i] = src[2*i+0]; + dst2[i] = src[2*i+1]; + } +} + +// FIXME Maybe dither instead. +#ifndef YUV_NBPS +#define YUV_NBPS(depth, endianness, rfunc) \ +static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ + const uint16_t *srcU, const uint16_t *srcV, \ + int width, uint32_t *unused) \ +{ \ + int i; \ + for (i = 0; i < width; i++) { \ + dstU[i] = rfunc(&srcU[i])>>(depth-8); \ + dstV[i] = rfunc(&srcV[i])>>(depth-8); \ + } \ +} \ +\ +static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, int width, uint32_t *unused) \ +{ \ + int i; \ + for (i = 0; i < width; i++) \ + dstY[i] = rfunc(&srcY[i])>>(depth-8); \ +} \ + +YUV_NBPS( 9, LE, AV_RL16) +YUV_NBPS( 9, BE, AV_RB16) +YUV_NBPS(10, LE, AV_RL16) +YUV_NBPS(10, BE, AV_RB16) +#endif // YUV_NBPS + +static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ + nvXXtoUV_c(dstU, dstV, src1, width); +} + +static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) +{ + nvXXtoUV_c(dstV, dstU, src1, width); +} + +// bilinear / bicubic scaling +static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src, + int srcW, int xInc, + const int16_t *filter, const int16_t *filterPos, + int filterSize) +{ + int i; + for (i=0; i<dstW; i++) { + int j; + int srcPos= filterPos[i]; + int val=0; + for (j=0; j<filterSize; j++) { + val += ((int)src[srcPos + j])*filter[filterSize*i + j]; + } + //filter += hFilterSize; + dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ... + //dst[i] = val>>7; + } +} + + +#define DEBUG_SWSCALE_BUFFERS 0 +#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__) + +#if HAVE_MMX +static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, + int lastInLumBuf, int lastInChrBuf); +#endif + +static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], + int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + /* load a few things into local vars to make the code more readable? and faster */ + const int srcW= c->srcW; + const int dstW= c->dstW; + const int dstH= c->dstH; + const int chrDstW= c->chrDstW; + const int chrSrcW= c->chrSrcW; + const int lumXInc= c->lumXInc; + const int chrXInc= c->chrXInc; + const enum PixelFormat dstFormat= c->dstFormat; + const int flags= c->flags; + int16_t *vLumFilterPos= c->vLumFilterPos; + int16_t *vChrFilterPos= c->vChrFilterPos; + int16_t *hLumFilterPos= c->hLumFilterPos; + int16_t *hChrFilterPos= c->hChrFilterPos; + int16_t *vLumFilter= c->vLumFilter; + int16_t *vChrFilter= c->vChrFilter; + int16_t *hLumFilter= c->hLumFilter; + int16_t *hChrFilter= c->hChrFilter; + int32_t *lumMmxFilter= c->lumMmxFilter; + int32_t *chrMmxFilter= c->chrMmxFilter; + int32_t av_unused *alpMmxFilter= c->alpMmxFilter; + const int vLumFilterSize= c->vLumFilterSize; + const int vChrFilterSize= c->vChrFilterSize; + const int hLumFilterSize= c->hLumFilterSize; + const int hChrFilterSize= c->hChrFilterSize; + int16_t **lumPixBuf= c->lumPixBuf; + int16_t **chrUPixBuf= c->chrUPixBuf; + int16_t **chrVPixBuf= c->chrVPixBuf; + int16_t **alpPixBuf= c->alpPixBuf; + const int vLumBufSize= c->vLumBufSize; + const int vChrBufSize= c->vChrBufSize; + uint8_t *formatConvBuffer= c->formatConvBuffer; + const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; + const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); + int lastDstY; + uint32_t *pal=c->pal_yuv; + + /* vars which will change and which we need to store back in the context */ + int dstY= c->dstY; + int lumBufIndex= c->lumBufIndex; + int chrBufIndex= c->chrBufIndex; + int lastInLumBuf= c->lastInLumBuf; + int lastInChrBuf= c->lastInChrBuf; + + if (isPacked(c->srcFormat)) { + src[0]= + src[1]= + src[2]= + src[3]= src[0]; + srcStride[0]= + srcStride[1]= + srcStride[2]= + srcStride[3]= srcStride[0]; + } + srcStride[1]<<= c->vChrDrop; + srcStride[2]<<= c->vChrDrop; + + DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n", + src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3], + dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]); + DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n", + srcSliceY, srcSliceH, dstY, dstH); + DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n", + vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); + + if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) { + static int warnedAlready=0; //FIXME move this into the context perhaps + if (flags & SWS_PRINT_INFO && !warnedAlready) { + av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" + " ->cannot do aligned memory accesses anymore\n"); + warnedAlready=1; + } + } + + /* Note the user might start scaling the picture in the middle so this + will not get executed. This is not really intended but works + currently, so people might do it. */ + if (srcSliceY ==0) { + lumBufIndex=-1; + chrBufIndex=-1; + dstY=0; + lastInLumBuf= -1; + lastInChrBuf= -1; + } + + lastDstY= dstY; + + for (;dstY < dstH; dstY++) { + unsigned char *dest =dst[0]+dstStride[0]*dstY; + const int chrDstY= dstY>>c->chrDstVSubSample; + unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; + unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; + unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL; + const uint8_t *lumDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][dstY &7] : flat64; + const uint8_t *chrDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][chrDstY&7] : flat64; + + const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input + const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]; + const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input + int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input + int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input + int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input + int enough_lines; + + //handle holes (FAST_BILINEAR & weird filters) + if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; + if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; + assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1); + assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1); + + DEBUG_BUFFERS("dstY: %d\n", dstY); + DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n", + firstLumSrcY, lastLumSrcY, lastInLumBuf); + DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n", + firstChrSrcY, lastChrSrcY, lastInChrBuf); + + // Do we have enough lines in this slice to output the dstY line + enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample); + + if (!enough_lines) { + lastLumSrcY = srcSliceY + srcSliceH - 1; + lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1; + DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n", + lastLumSrcY, lastChrSrcY); + } + + //Do horizontal scaling + while(lastInLumBuf < lastLumSrcY) { + const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; + const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3]; + lumBufIndex++; + assert(lumBufIndex < 2*vLumBufSize); + assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); + assert(lastInLumBuf + 1 - srcSliceY >= 0); + hyscale_c(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc, + hLumFilter, hLumFilterPos, hLumFilterSize, + formatConvBuffer, + pal, 0); + if (CONFIG_SWSCALE_ALPHA && alpPixBuf) + hyscale_c(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, + lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize, + formatConvBuffer, + pal, 1); + lastInLumBuf++; + DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n", + lumBufIndex, lastInLumBuf); + } + while(lastInChrBuf < lastChrSrcY) { + const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; + const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; + chrBufIndex++; + assert(chrBufIndex < 2*vChrBufSize); + assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)); + assert(lastInChrBuf + 1 - chrSrcSliceY >= 0); + //FIXME replace parameters through context struct (some at least) + + if (c->needs_hcscale) + hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex], + chrDstW, src1, src2, chrSrcW, chrXInc, + hChrFilter, hChrFilterPos, hChrFilterSize, + formatConvBuffer, pal); + lastInChrBuf++; + DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n", + chrBufIndex, lastInChrBuf); + } + //wrap buf index around to stay inside the ring buffer + if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize; + if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize; + if (!enough_lines) + break; //we can't output a dstY line so let's try with the next slice + +#if HAVE_MMX + updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf); +#endif + if (dstY < dstH-2) { + const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) { + const int chrSkipMask= (1<<c->chrDstVSubSample)-1; + if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi + c->yuv2nv12X(c, + vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither); + } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like + const int chrSkipMask= (1<<c->chrDstVSubSample)-1; + if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi + if (is16BPS(dstFormat) || isNBPS(dstFormat)) { + yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, + chrVSrcPtr, vChrFilterSize, + alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, + (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, + dstFormat); + } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12 + const int16_t *lumBuf = lumSrcPtr[0]; + const int16_t *chrUBuf= chrUSrcPtr[0]; + const int16_t *chrVBuf= chrVSrcPtr[0]; + const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL; + c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest, + uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); + } else { //General YV12 + c->yuv2yuvX(c, + vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, + chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); + } + } else { + assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); + assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2); + if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB + int chrAlpha= vChrFilter[2*dstY+1]; + if(flags & SWS_FULL_CHR_H_INT) { + yuv2rgbXinC_full(c, //FIXME write a packed1_full function + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, + chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, dstW, dstY); + } else { + c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1), + *chrVSrcPtr, *(chrVSrcPtr+1), + alpPixBuf ? *alpSrcPtr : NULL, + dest, dstW, chrAlpha, dstFormat, flags, dstY); + } + } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB + int lumAlpha= vLumFilter[2*dstY+1]; + int chrAlpha= vChrFilter[2*dstY+1]; + lumMmxFilter[2]= + lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001; + chrMmxFilter[2]= + chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001; + if(flags & SWS_FULL_CHR_H_INT) { + yuv2rgbXinC_full(c, //FIXME write a packed2_full function + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, dstW, dstY); + } else { + c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1), + *chrVSrcPtr, *(chrVSrcPtr+1), + alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL, + dest, dstW, lumAlpha, chrAlpha, dstY); + } + } else { //general RGB + if(flags & SWS_FULL_CHR_H_INT) { + yuv2rgbXinC_full(c, + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, dstW, dstY); + } else { + c->yuv2packedX(c, + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, dstW, dstY); + } + } + } + } else { // hmm looks like we can't use MMX here without overwriting this array's tail + const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) { + const int chrSkipMask= (1<<c->chrDstVSubSample)-1; + if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi + yuv2nv12XinC( + vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither); + } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 + const int chrSkipMask= (1<<c->chrDstVSubSample)-1; + if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi + if (is16BPS(dstFormat) || isNBPS(dstFormat)) { + yuv2yuvX16inC( + vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, + dstFormat); + } else { + yuv2yuvXinC( + vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); + } + } else { + assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); + assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2); + if(flags & SWS_FULL_CHR_H_INT) { + yuv2rgbXinC_full(c, + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, dstW, dstY); + } else { + yuv2packedXinC(c, + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, dstW, dstY); + } + } + } + } + + if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf) + fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255); + +#if HAVE_MMX2 + if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2) + __asm__ volatile("sfence":::"memory"); +#endif + emms_c(); + + /* store changed local vars back in the context */ + c->dstY= dstY; + c->lumBufIndex= lumBufIndex; + c->chrBufIndex= chrBufIndex; + c->lastInLumBuf= lastInLumBuf; + c->lastInChrBuf= lastInChrBuf; + + return dstY - lastDstY; +} + +static void sws_init_swScale_c(SwsContext *c) +{ + enum PixelFormat srcFormat = c->srcFormat; + + c->yuv2nv12X = yuv2nv12X_c; + c->yuv2yuv1 = yuv2yuv1_c; + c->yuv2yuvX = yuv2yuvX_c; + c->yuv2packed1 = yuv2packed1_c; + c->yuv2packed2 = yuv2packed2_c; + c->yuv2packedX = yuv2packedX_c; + + c->hScale = hScale_c; + + if (c->flags & SWS_FAST_BILINEAR) + { + c->hyscale_fast = hyscale_fast_c; + c->hcscale_fast = hcscale_fast_c; + } + + c->chrToYV12 = NULL; + switch(srcFormat) { + case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break; + case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break; + case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break; + case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break; + case PIX_FMT_RGB8 : + case PIX_FMT_BGR8 : + case PIX_FMT_PAL8 : + case PIX_FMT_BGR4_BYTE: + case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break; + case PIX_FMT_GRAY16BE : + case PIX_FMT_YUV420P9BE: + case PIX_FMT_YUV422P10BE: + case PIX_FMT_YUV420P10BE: + case PIX_FMT_YUV420P16BE: + case PIX_FMT_YUV422P16BE: + case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break; + case PIX_FMT_GRAY16LE : + case PIX_FMT_YUV420P9LE: + case PIX_FMT_YUV422P10LE: + case PIX_FMT_YUV420P10LE: + case PIX_FMT_YUV420P16LE: + case PIX_FMT_YUV422P16LE: + case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break; + } + if (c->chrSrcHSubSample) { + switch(srcFormat) { + case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half; break; + case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half; break; + case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half; break; + case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half; break; + case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half; break; + case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break; + case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break; + case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break; + case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break; + case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half; break; + case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break; + case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break; + case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break; + case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break; + } + } else { + switch(srcFormat) { + case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV; break; + case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV; break; + case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV; break; + case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV; break; + case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV; break; + case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break; + case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break; + case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break; + case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break; + case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV; break; + case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break; + case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break; + case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break; + case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break; + } + } + + c->lumToYV12 = NULL; + c->alpToYV12 = NULL; + switch (srcFormat) { + case PIX_FMT_YUYV422 : + case PIX_FMT_GRAY8A : + c->lumToYV12 = yuy2ToY_c; break; + case PIX_FMT_UYVY422 : + c->lumToYV12 = uyvyToY_c; break; + case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break; + case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY; break; + case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY; break; + case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break; + case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY; break; + case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY; break; + case PIX_FMT_RGB8 : + case PIX_FMT_BGR8 : + case PIX_FMT_PAL8 : + case PIX_FMT_BGR4_BYTE: + case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break; + case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break; + case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break; + case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY; break; + case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break; + case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY; break; + case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break; + case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY; break; + case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY; break; + case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY; break; + case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY; break; + } + if (c->alpPixBuf) { + switch (srcFormat) { + case PIX_FMT_RGB32 : + case PIX_FMT_RGB32_1: + case PIX_FMT_BGR32 : + case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break; + case PIX_FMT_GRAY8A : c->alpToYV12 = yuy2ToY_c; break; + case PIX_FMT_PAL8 : c->alpToYV12 = palToA; break; + } + } + + if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8) + c->hScale16= hScale16_c; + + switch (srcFormat) { + case PIX_FMT_GRAY8A : + c->alpSrcOffset = 1; + break; + case PIX_FMT_RGB32 : + case PIX_FMT_BGR32 : + c->alpSrcOffset = 3; + break; + } + + if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { + if (c->srcRange) { + c->lumConvertRange = lumRangeFromJpeg_c; + c->chrConvertRange = chrRangeFromJpeg_c; + } else { + c->lumConvertRange = lumRangeToJpeg_c; + c->chrConvertRange = chrRangeToJpeg_c; + } + } + + if (!(isGray(srcFormat) || isGray(c->dstFormat) || + srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE)) + c->needs_hcscale = 1; +} diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 87cd655a46..e0c4b25846 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -221,7 +221,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; const uint8_t *srcPtr= src[0]; - if (srcFormat == PIX_FMT_Y400A) { + if (srcFormat == PIX_FMT_GRAY8A) { switch (dstFormat) { case PIX_FMT_RGB32 : conv = gray8aToPacked32; break; case PIX_FMT_BGR32 : conv = gray8aToPacked32; break; @@ -243,7 +243,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], if (!conv) av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); else { for (i=0; i<srcSliceH; i++) { conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb); @@ -330,7 +330,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], if (!conv) { av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); } else { const uint8_t *srcPtr= src[0]; uint8_t *dstPtr= dst[0]; @@ -340,7 +340,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], if ((dstFormat == PIX_FMT_RGB32_1 || dstFormat == PIX_FMT_BGR32_1) && !isRGBA32(srcFormat)) dstPtr += ALT32_CORR; - if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0) + if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0 && !(srcStride[0]%srcBpp)) conv(srcPtr, dstPtr + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); else { int i; @@ -412,6 +412,28 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[ return srcSliceH; } +#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\ + uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\ + int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\ + for (i = 0; i < height; i++) {\ + const uint8_t *dither= dithers[src_depth-9][i&7];\ + for (j = 0; j < length-7; j+=8){\ + dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\ + dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\ + dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\ + dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\ + dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\ + dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\ + dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\ + dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\ + }\ + for (; j < length; j++)\ + dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\ + dst += dstStride;\ + src += srcStride;\ + } + + static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) { @@ -431,141 +453,72 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[ length*=2; fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128); } else { - if(is9_OR_10BPS(c->srcFormat)) { + if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat) + || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat)) + ) { const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1; const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1; const uint16_t *srcPtr2 = (const uint16_t*)srcPtr; + uint16_t *dstPtr2 = (uint16_t*)dstPtr; - if (is16BPS(c->dstFormat)) { - uint16_t *dstPtr2 = (uint16_t*)dstPtr; -#define COPY9_OR_10TO16(rfunc, wfunc) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - int srcpx = rfunc(&srcPtr2[j]); \ - wfunc(&dstPtr2[j], (srcpx<<(16-src_depth)) | (srcpx>>(2*src_depth-16))); \ - } \ - dstPtr2 += dstStride[plane]/2; \ - srcPtr2 += srcStride[plane]/2; \ - } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO16(AV_RB16, AV_WB16); - } else { - COPY9_OR_10TO16(AV_RL16, AV_WB16); - } + if (dst_depth == 8) { + if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , ) } else { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO16(AV_RB16, AV_WL16); - } else { - COPY9_OR_10TO16(AV_RL16, AV_WL16); - } - } - } else if (is9_OR_10BPS(c->dstFormat)) { - uint16_t *dstPtr2 = (uint16_t*)dstPtr; -#define COPY9_OR_10TO9_OR_10(loop) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - loop; \ - } \ - dstPtr2 += dstStride[plane]/2; \ - srcPtr2 += srcStride[plane]/2; \ + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, ) } -#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \ - if (dst_depth > src_depth) { \ - COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \ - wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \ - } else if (dst_depth < src_depth) { \ - COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]) >> 1)); \ - } else { \ - COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \ - } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16); + } else if (src_depth == 8) { + for (i = 0; i < height; i++) { + if(isBE(c->dstFormat)){ + for (j = 0; j < length; j++) + AV_WB16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) | + (srcPtr[j]>>(2*8-dst_depth))); } else { - COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16); + for (j = 0; j < length; j++) + AV_WL16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) | + (srcPtr[j]>>(2*8-dst_depth))); } - } else { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16); + dstPtr2 += dstStride[plane]/2; + srcPtr += srcStride[plane]; + } + } else if (src_depth <= dst_depth) { + for (i = 0; i < height; i++) { +#define COPY_UP(r,w) \ + for (j = 0; j < length; j++){ \ + unsigned int v= r(&srcPtr2[j]);\ + w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \ + (v>>(2*src_depth-dst_depth)));\ + } + if(isBE(c->srcFormat)){ + if(isBE(c->dstFormat)){ + COPY_UP(AV_RB16, AV_WB16) + } else { + COPY_UP(AV_RB16, AV_WL16) + } } else { - COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16); + if(isBE(c->dstFormat)){ + COPY_UP(AV_RL16, AV_WB16) + } else { + COPY_UP(AV_RL16, AV_WL16) + } } + dstPtr2 += dstStride[plane]/2; + srcPtr2 += srcStride[plane]/2; } } else { - // FIXME Maybe dither instead. -#define COPY9_OR_10TO8(rfunc) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - dstPtr[j] = rfunc(&srcPtr2[j])>>(src_depth-8); \ - } \ - dstPtr += dstStride[plane]; \ - srcPtr2 += srcStride[plane]/2; \ - } - if (isBE(c->srcFormat)) { - COPY9_OR_10TO8(AV_RB16); - } else { - COPY9_OR_10TO8(AV_RL16); - } - } - } else if(is9_OR_10BPS(c->dstFormat)) { - const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1; - uint16_t *dstPtr2 = (uint16_t*)dstPtr; - - if (is16BPS(c->srcFormat)) { - const uint16_t *srcPtr2 = (const uint16_t*)srcPtr; -#define COPY16TO9_OR_10(rfunc, wfunc) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - wfunc(&dstPtr2[j], rfunc(&srcPtr2[j])>>(16-dst_depth)); \ - } \ - dstPtr2 += dstStride[plane]/2; \ - srcPtr2 += srcStride[plane]/2; \ - } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY16TO9_OR_10(AV_RB16, AV_WB16); + if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , ) } else { - COPY16TO9_OR_10(AV_RL16, AV_WB16); + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16) } - } else { - if (isBE(c->srcFormat)) { - COPY16TO9_OR_10(AV_RB16, AV_WL16); + }else{ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, ) } else { - COPY16TO9_OR_10(AV_RL16, AV_WL16); + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16) } } - } else /* 8bit */ { -#define COPY8TO9_OR_10(wfunc) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - const int srcpx = srcPtr[j]; \ - wfunc(&dstPtr2[j], (srcpx<<(dst_depth-8)) | (srcpx >> (16-dst_depth))); \ - } \ - dstPtr2 += dstStride[plane]/2; \ - srcPtr += srcStride[plane]; \ - } - if (isBE(c->dstFormat)) { - COPY8TO9_OR_10(AV_WB16); - } else { - COPY8TO9_OR_10(AV_WL16); - } - } - } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) { - if (!isBE(c->srcFormat)) srcPtr++; - for (i=0; i<height; i++) { - for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1]; - srcPtr+= srcStride[plane]; - dstPtr+= dstStride[plane]; - } - } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) { - for (i=0; i<height; i++) { - for (j=0; j<length; j++) { - dstPtr[ j<<1 ] = srcPtr[j]; - dstPtr[(j<<1)+1] = srcPtr[j]; - } - srcPtr+= srcStride[plane]; - dstPtr+= dstStride[plane]; } } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat) && isBE(c->srcFormat) != isBE(c->dstFormat)) { @@ -763,9 +716,10 @@ int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], if (usePal(c->srcFormat)) { for (i=0; i<256; i++) { - int p, r, g, b,y,u,v; + int p, r, g, b, y, u, v, a = 0xff; if(c->srcFormat == PIX_FMT_PAL8) { p=((const uint32_t*)(src[1]))[i]; + a= (p>>24)&0xFF; r= (p>>16)&0xFF; g= (p>> 8)&0xFF; b= p &0xFF; @@ -781,7 +735,7 @@ int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], r= (i>>3 )*255; g= ((i>>1)&3)*85; b= (i&1 )*255; - } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_Y400A) { + } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_GRAY8A) { r = g = b = i; } else { assert(c->srcFormat == PIX_FMT_BGR4_BYTE); @@ -792,33 +746,33 @@ int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); - c->pal_yuv[i]= y + (u<<8) + (v<<16); + c->pal_yuv[i]= y + (u<<8) + (v<<16) + (a<<24); switch(c->dstFormat) { case PIX_FMT_BGR32: #if !HAVE_BIGENDIAN case PIX_FMT_RGB24: #endif - c->pal_rgb[i]= r + (g<<8) + (b<<16); + c->pal_rgb[i]= r + (g<<8) + (b<<16) + (a<<24); break; case PIX_FMT_BGR32_1: #if HAVE_BIGENDIAN case PIX_FMT_BGR24: #endif - c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8; + c->pal_rgb[i]= a + (r<<8) + (g<<16) + (b<<24); break; case PIX_FMT_RGB32_1: #if HAVE_BIGENDIAN case PIX_FMT_RGB24: #endif - c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8; + c->pal_rgb[i]= a + (b<<8) + (g<<16) + (r<<24); break; case PIX_FMT_RGB32: #if !HAVE_BIGENDIAN case PIX_FMT_BGR24: #endif default: - c->pal_rgb[i]= b + (g<<8) + (r<<16); + c->pal_rgb[i]= b + (g<<8) + (r<<16) + (a<<24); } } } @@ -863,6 +817,14 @@ int sws_scale(SwsContext *c, const uint8_t* const src[], const int srcStride[], } } +#if LIBSWSCALE_VERSION_MAJOR < 1 +int sws_scale_ordered(SwsContext *c, const uint8_t* const src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride); +} +#endif + /* Convert the palette to the same packed 32-bit format as the palette */ void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette) { diff --git a/libswscale/utils.c b/libswscale/utils.c index 213bf3a043..c41590fd62 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -1,23 +1,25 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#define _SVID_SOURCE //needed for MAP_ANONYMOUS +#define _DARWIN_C_SOURCE // needed for MAP_ANON #include <inttypes.h> #include <string.h> #include <math.h> @@ -52,13 +54,13 @@ unsigned swscale_version(void) const char *swscale_configuration(void) { - return LIBAV_CONFIGURATION; + return FFMPEG_CONFIGURATION; } const char *swscale_license(void) { #define LICENSE_PREFIX "libswscale license: " - return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1; + return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1; } #define RET 0xC3 //near return opcode for x86 @@ -87,7 +89,7 @@ const char *swscale_license(void) || (x)==PIX_FMT_RGB555LE \ || (x)==PIX_FMT_RGB555BE \ || (x)==PIX_FMT_GRAY8 \ - || (x)==PIX_FMT_Y400A \ + || (x)==PIX_FMT_GRAY8A \ || (x)==PIX_FMT_YUV410P \ || (x)==PIX_FMT_YUV440P \ || (x)==PIX_FMT_NV12 \ @@ -125,6 +127,7 @@ const char *swscale_license(void) || (x)==PIX_FMT_YUV420P16BE \ || (x)==PIX_FMT_YUV422P16BE \ || (x)==PIX_FMT_YUV444P16BE \ + || (x)==PIX_FMT_YUV422P10 \ ) int sws_isSupportedInput(enum PixelFormat pix_fmt) @@ -167,6 +170,7 @@ int sws_isSupportedInput(enum PixelFormat pix_fmt) || (x)==PIX_FMT_GRAY8 \ || (x)==PIX_FMT_YUV410P \ || (x)==PIX_FMT_YUV440P \ + || (x)==PIX_FMT_YUV422P10 \ || (x)==PIX_FMT_YUV420P9LE \ || (x)==PIX_FMT_YUV420P10LE \ || (x)==PIX_FMT_YUV420P16LE \ @@ -186,13 +190,12 @@ int sws_isSupportedOutput(enum PixelFormat pix_fmt) extern const int32_t ff_yuv2rgb_coeffs[8][4]; +#if FF_API_SWS_FORMAT_NAME const char *sws_format_name(enum PixelFormat format) { - if ((unsigned)format < PIX_FMT_NB && av_pix_fmt_descriptors[format].name) - return av_pix_fmt_descriptors[format].name; - else - return "Unknown format"; + return av_get_pix_fmt_name(format); } +#endif static double getSplineCoeff(double a, double b, double c, double d, double dist) { @@ -730,7 +733,7 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation) { - if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; + if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; *inv_table = c->srcColorspaceTable; *table = c->dstColorspaceTable; @@ -774,7 +777,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) int srcH= c->srcH; int dstW= c->dstW; int dstH= c->dstH; - int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 16, 16), dst_stride_px = dst_stride >> 1; + int dst_stride = FFALIGN(dstW * sizeof(int16_t)+66, 16), dst_stride_px = dst_stride >> 1; int flags, cpu_flags; enum PixelFormat srcFormat= c->srcFormat; enum PixelFormat dstFormat= c->dstFormat; @@ -787,11 +790,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) unscaled = (srcW == dstW && srcH == dstH); if (!isSupportedIn(srcFormat)) { - av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat)); + av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", av_get_pix_fmt_name(srcFormat)); return AVERROR(EINVAL); } if (!isSupportedOut(dstFormat)) { - av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat)); + av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", av_get_pix_fmt_name(dstFormat)); return AVERROR(EINVAL); } @@ -866,19 +869,19 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) if (c->swScale) { if (flags&SWS_PRINT_INFO) av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); return 0; } } - FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW, 16) * 2, fail); + FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) { c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { if (flags&SWS_PRINT_INFO) av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n"); } - if (usesHFilter) c->canMMX2BeUsed=0; + if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) c->canMMX2BeUsed=0; } else c->canMMX2BeUsed=0; @@ -923,7 +926,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->chrMmx2FilterCode = av_malloc(c->chrMmx2FilterCodeSize); #endif +#ifdef MAP_ANONYMOUS + if (c->lumMmx2FilterCode == MAP_FAILED || c->chrMmx2FilterCode == MAP_FAILED) +#else if (!c->lumMmx2FilterCode || !c->chrMmx2FilterCode) +#endif return AVERROR(ENOMEM); FF_ALLOCZ_OR_GOTO(c, c->hLumFilter , (dstW /8+8)*sizeof(int16_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hChrFilter , (c->chrDstW /4+8)*sizeof(int16_t), fail); @@ -1000,7 +1007,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->vLumBufSize= c->vLumFilterSize; c->vChrBufSize= c->vChrFilterSize; for (i=0; i<dstH; i++) { - int chrI= i*c->chrDstH / dstH; + int chrI= (int64_t)i*c->chrDstH / dstH; int nextSlice= FFMAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1, ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample)); @@ -1059,7 +1066,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) else av_log(c, AV_LOG_INFO, "ehh flags invalid?! "); av_log(c, AV_LOG_INFO, "from %s to %s%s ", - sws_format_name(srcFormat), + av_get_pix_fmt_name(srcFormat), #ifdef DITHER1XBPP dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 || dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE || @@ -1067,7 +1074,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) #else "", #endif - sws_format_name(dstFormat)); + av_get_pix_fmt_name(dstFormat)); if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) av_log(c, AV_LOG_INFO, "using MMX2\n"); else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) av_log(c, AV_LOG_INFO, "using 3DNOW\n"); @@ -1461,6 +1468,13 @@ void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level) } } +#if LIBSWSCALE_VERSION_MAJOR < 1 +void sws_printVec(SwsVector *a) +{ + sws_printVec2(a, NULL, AV_LOG_DEBUG); +} +#endif + void sws_freeVec(SwsVector *a) { if (!a) return; @@ -1534,7 +1548,7 @@ void sws_freeContext(SwsContext *c) #endif /* HAVE_MMX */ av_freep(&c->yuvTable); - av_free(c->formatConvBuffer); + av_freep(&c->formatConvBuffer); av_free(c); } diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index 97c50dd636..78b804e367 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index c255610193..baef3f8ae5 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -7,20 +7,20 @@ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * lot of big-endian byte order fixes by Alex Beregszaszi * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index c86f75df51..2d5b88070a 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -72,14 +72,14 @@ DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL; DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL; DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL; DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL; -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008010000080100ULL; DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = { {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL}, {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL}, }; -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040010000400100ULL; //MMX versions #if HAVE_MMX diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 8fad257ddf..25399fadef 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -37,8 +37,8 @@ #define YSCALEYUV2YV12X(offset, dest, end, pos) \ __asm__ volatile(\ - "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ - "movq %%mm3, %%mm4 \n\t"\ + "movq "DITHER16"+0(%0), %%mm3 \n\t"\ + "movq "DITHER16"+8(%0), %%mm4 \n\t"\ "lea " offset "(%0), %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ ".p2align 4 \n\t" /* FIXME Unroll? */\ @@ -60,8 +60,8 @@ MOVNTQ(%%mm3, (%1, %3))\ "add $8, %3 \n\t"\ "cmp %2, %3 \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ - "movq %%mm3, %%mm4 \n\t"\ + "movq "DITHER16"+0(%0), %%mm3 \n\t"\ + "movq "DITHER16"+8(%0), %%mm4 \n\t"\ "lea " offset "(%0), %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "jb 1b \n\t"\ @@ -76,13 +76,18 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { + int i; if (uDest) { x86_reg uv_off = c->uv_off; + for(i=0; i<8; i++) c->dither16[i] = chrDither[i]>>4; YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) + for(i=0; i<8; i++) c->dither16[i] = chrDither[(i+3)&7]>>4; YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off) } + for(i=0; i<8; i++) c->dither16[i] = lumDither[i]>>4; if (CONFIG_SWSCALE_ALPHA && aDest) { YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) } @@ -93,6 +98,10 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, #define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \ __asm__ volatile(\ "lea " offset "(%0), %%"REG_d" \n\t"\ + "movq "DITHER32"+0(%0), %%mm4 \n\t"\ + "movq "DITHER32"+8(%0), %%mm5 \n\t"\ + "movq "DITHER32"+16(%0), %%mm6 \n\t"\ + "movq "DITHER32"+24(%0), %%mm7 \n\t"\ "pxor %%mm4, %%mm4 \n\t"\ "pxor %%mm5, %%mm5 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\ @@ -124,26 +133,21 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, "paddd %%mm2, %%mm6 \n\t"\ "paddd %%mm0, %%mm7 \n\t"\ " jnz 1b \n\t"\ - "psrad $16, %%mm4 \n\t"\ - "psrad $16, %%mm5 \n\t"\ - "psrad $16, %%mm6 \n\t"\ - "psrad $16, %%mm7 \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ + "psrad $19, %%mm4 \n\t"\ + "psrad $19, %%mm5 \n\t"\ + "psrad $19, %%mm6 \n\t"\ + "psrad $19, %%mm7 \n\t"\ "packssdw %%mm5, %%mm4 \n\t"\ "packssdw %%mm7, %%mm6 \n\t"\ - "paddw %%mm0, %%mm4 \n\t"\ - "paddw %%mm0, %%mm6 \n\t"\ - "psraw $3, %%mm4 \n\t"\ - "psraw $3, %%mm6 \n\t"\ "packuswb %%mm6, %%mm4 \n\t"\ MOVNTQ(%%mm4, (%1, %3))\ "add $8, %3 \n\t"\ "cmp %2, %3 \n\t"\ "lea " offset "(%0), %%"REG_d" \n\t"\ - "pxor %%mm4, %%mm4 \n\t"\ - "pxor %%mm5, %%mm5 \n\t"\ - "pxor %%mm6, %%mm6 \n\t"\ - "pxor %%mm7, %%mm7 \n\t"\ + "movq "DITHER32"+0(%0), %%mm4 \n\t"\ + "movq "DITHER32"+8(%0), %%mm5 \n\t"\ + "movq "DITHER32"+16(%0), %%mm6 \n\t"\ + "movq "DITHER32"+24(%0), %%mm7 \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "jb 1b \n\t"\ :: "r" (&c->redDither),\ @@ -157,13 +161,18 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { + int i; if (uDest) { x86_reg uv_off = c->uv_off; + for(i=0; i<8; i++) c->dither32[i] = chrDither[i]<<12; YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) + for(i=0; i<8; i++) c->dither32[i] = chrDither[(i+3)&7]<<12; YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off) } + for(i=0; i<8; i++) c->dither32[i] = lumDither[i]<<12; if (CONFIG_SWSCALE_ALPHA && aDest) { YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) } @@ -175,7 +184,8 @@ static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc, const int16_t *chrVSrc, const int16_t *alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { int p= 4; const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW }; @@ -208,7 +218,8 @@ static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc, const int16_t *chrVSrc, const int16_t *alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { int p= 4; const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW }; @@ -217,16 +228,17 @@ static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, while (p--) { if (dst[p]) { + int i; + for(i=0; i<8; i++) c->dither16[i] = i<2 ? lumDither[i] : chrDither[i]; __asm__ volatile( "mov %2, %%"REG_a" \n\t" - "pcmpeqw %%mm7, %%mm7 \n\t" - "psrlw $15, %%mm7 \n\t" - "psllw $6, %%mm7 \n\t" + "movq 0(%3), %%mm6 \n\t" + "movq 8(%3), %%mm7 \n\t" ".p2align 4 \n\t" /* FIXME Unroll? */ "1: \n\t" "movq (%0, %%"REG_a", 2), %%mm0 \n\t" "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t" - "paddsw %%mm7, %%mm0 \n\t" + "paddsw %%mm6, %%mm0 \n\t" "paddsw %%mm7, %%mm1 \n\t" "psraw $7, %%mm0 \n\t" "psraw $7, %%mm1 \n\t" @@ -235,7 +247,7 @@ static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, "add $8, %%"REG_a" \n\t" "jnc 1b \n\t" :: "r" (src[p]), "r" (dst[p] + counter[p]), - "g" (-counter[p]) + "g" (-counter[p]), "r"(c->dither16) : "%"REG_a ); } @@ -991,8 +1003,8 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, : "%r8" ); #else - *(const uint16_t **)(&c->u_temp)=abuf0; - *(const uint16_t **)(&c->v_temp)=abuf1; + c->u_temp=(intptr_t)abuf0; + c->v_temp=(intptr_t)abuf1; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1761,7 +1773,7 @@ static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV, } #endif /* !COMPILE_TEMPLATE_MMX2 */ -static av_always_inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, +static av_always_inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, int width, enum PixelFormat srcFormat) { @@ -1802,32 +1814,31 @@ static av_always_inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *s "paddd %%mm3, %%mm2 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm4, %%mm2 \n\t" - "psrad $15, %%mm0 \n\t" - "psrad $15, %%mm2 \n\t" + "psrad $9, %%mm0 \n\t" + "psrad $9, %%mm2 \n\t" "packssdw %%mm2, %%mm0 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "movd %%mm0, (%1, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" + "movq %%mm0, (%1, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" " js 1b \n\t" : "+r" (src) - : "r" (dst+width), "g" ((x86_reg)-width) + : "r" (dst+width), "g" ((x86_reg)-2*width) : "%"REG_a ); } -static void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, +static void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24); } -static void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, +static void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24); } -static av_always_inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, +static av_always_inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uint8_t *src, int width, enum PixelFormat srcFormat) { @@ -1869,25 +1880,23 @@ static av_always_inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, "paddd %%mm3, %%mm2 \n\t" "paddd %%mm3, %%mm1 \n\t" "paddd %%mm3, %%mm4 \n\t" - "psrad $15, %%mm0 \n\t" - "psrad $15, %%mm2 \n\t" - "psrad $15, %%mm1 \n\t" - "psrad $15, %%mm4 \n\t" + "psrad $9, %%mm0 \n\t" + "psrad $9, %%mm2 \n\t" + "psrad $9, %%mm1 \n\t" + "psrad $9, %%mm4 \n\t" "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm4, %%mm2 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "packuswb %%mm2, %%mm2 \n\t" - "movd %%mm0, (%1, %%"REG_a") \n\t" - "movd %%mm2, (%2, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" + "movq %%mm0, (%1, %%"REG_a") \n\t" + "movq %%mm2, (%2, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" " js 1b \n\t" : "+r" (src) - : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24]) + : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-2*width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24]) : "%"REG_a ); } -static void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, +static void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { @@ -1895,7 +1904,7 @@ static void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, assert(src1 == src2); } -static void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, +static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { @@ -2062,6 +2071,163 @@ static void RENAME(hScale)(int16_t *dst, int dstW, } #endif /* !COMPILE_TEMPLATE_MMX2 */ +static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, + const int16_t *filter, const int16_t *filterPos, long filterSize, int shift) +{ + int i, j; + + assert(filterSize % 4 == 0 && filterSize>0); + if (filterSize==4 && shift<15) { // Always true for upscaling, sometimes for down, too. + x86_reg counter= -2*dstW; + filter-= counter*2; + filterPos-= counter/2; + dst-= counter/2; + __asm__ volatile( + "movd %5, %%mm7 \n\t" +#if defined(PIC) + "push %%"REG_b" \n\t" +#endif + "push %%"REG_BP" \n\t" // we use 7 regs here ... + "mov %%"REG_a", %%"REG_BP" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "movzwl (%2, %%"REG_BP"), %%eax \n\t" + "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" + "movq (%1, %%"REG_BP", 4), %%mm1 \n\t" + "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t" + "movq (%3, %%"REG_a", 2), %%mm0 \n\t" + "movq (%3, %%"REG_b", 2), %%mm2 \n\t" + "pmaddwd %%mm1, %%mm0 \n\t" + "pmaddwd %%mm2, %%mm3 \n\t" + "movq %%mm0, %%mm4 \n\t" + "punpckldq %%mm3, %%mm0 \n\t" + "punpckhdq %%mm3, %%mm4 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "psrad %%mm7, %%mm0 \n\t" + "packssdw %%mm0, %%mm0 \n\t" + "movd %%mm0, (%4, %%"REG_BP") \n\t" + "add $4, %%"REG_BP" \n\t" + " jnc 1b \n\t" + + "pop %%"REG_BP" \n\t" +#if defined(PIC) + "pop %%"REG_b" \n\t" +#endif + : "+a" (counter) + : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift) +#if !defined(PIC) + : "%"REG_b +#endif + ); + } else if (filterSize==8 && shift<15) { + x86_reg counter= -2*dstW; + filter-= counter*4; + filterPos-= counter/2; + dst-= counter/2; + __asm__ volatile( + "movd %5, %%mm7 \n\t" +#if defined(PIC) + "push %%"REG_b" \n\t" +#endif + "push %%"REG_BP" \n\t" // we use 7 regs here ... + "mov %%"REG_a", %%"REG_BP" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "movzwl (%2, %%"REG_BP"), %%eax \n\t" + "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" + "movq (%1, %%"REG_BP", 8), %%mm1 \n\t" + "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t" + "movq (%3, %%"REG_a", 2), %%mm0 \n\t" + "movq (%3, %%"REG_b", 2), %%mm2 \n\t" + "pmaddwd %%mm1, %%mm0 \n\t" + "pmaddwd %%mm2, %%mm3 \n\t" + + "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t" + "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t" + "movq 8(%3, %%"REG_a", 2), %%mm4 \n\t" + "movq 8(%3, %%"REG_b", 2), %%mm2 \n\t" + "pmaddwd %%mm1, %%mm4 \n\t" + "pmaddwd %%mm2, %%mm5 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "paddd %%mm5, %%mm3 \n\t" + "movq %%mm0, %%mm4 \n\t" + "punpckldq %%mm3, %%mm0 \n\t" + "punpckhdq %%mm3, %%mm4 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "psrad %%mm7, %%mm0 \n\t" + "packssdw %%mm0, %%mm0 \n\t" + "movd %%mm0, (%4, %%"REG_BP") \n\t" + "add $4, %%"REG_BP" \n\t" + " jnc 1b \n\t" + + "pop %%"REG_BP" \n\t" +#if defined(PIC) + "pop %%"REG_b" \n\t" +#endif + : "+a" (counter) + : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift) +#if !defined(PIC) + : "%"REG_b +#endif + ); + } else if (shift<15){ + const uint16_t *offset = src+filterSize; + x86_reg counter= -2*dstW; + //filter-= counter*filterSize/2; + filterPos-= counter/2; + dst-= counter/2; + __asm__ volatile( + "movd %7, %%mm7 \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "mov %2, %%"REG_c" \n\t" + "movzwl (%%"REG_c", %0), %%eax \n\t" + "movzwl 2(%%"REG_c", %0), %%edx \n\t" + "mov %5, %%"REG_c" \n\t" + "pxor %%mm4, %%mm4 \n\t" + "pxor %%mm5, %%mm5 \n\t" + "2: \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1, %6), %%mm3 \n\t" + "movq (%%"REG_c", %%"REG_a", 2), %%mm0 \n\t" + "movq (%%"REG_c", %%"REG_d", 2), %%mm2 \n\t" + "pmaddwd %%mm1, %%mm0 \n\t" + "pmaddwd %%mm2, %%mm3 \n\t" + "paddd %%mm3, %%mm5 \n\t" + "paddd %%mm0, %%mm4 \n\t" + "add $8, %1 \n\t" + "add $8, %%"REG_c" \n\t" + "cmp %4, %%"REG_c" \n\t" + " jb 2b \n\t" + "add %6, %1 \n\t" + "movq %%mm4, %%mm0 \n\t" + "punpckldq %%mm5, %%mm4 \n\t" + "punpckhdq %%mm5, %%mm0 \n\t" + "paddd %%mm0, %%mm4 \n\t" + "psrad %%mm7, %%mm4 \n\t" + "packssdw %%mm4, %%mm4 \n\t" + "mov %3, %%"REG_a" \n\t" + "movd %%mm4, (%%"REG_a", %0) \n\t" + "add $4, %0 \n\t" + " jnc 1b \n\t" + + : "+r" (counter), "+r" (filter) + : "m" (filterPos), "m" (dst), "m"(offset), + "m" (src), "r" ((x86_reg)filterSize*2), "m"(shift) + : "%"REG_a, "%"REG_c, "%"REG_d + ); + } else + for (i=0; i<dstW; i++) { + int srcPos= filterPos[i]; + int val=0; + for (j=0; j<filterSize; j++) { + val += ((int)src[srcPos + j])*filter[filterSize*i + j]; + } + dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ... + } +} + + #if COMPILE_TEMPLATE_MMX2 static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, @@ -2203,8 +2369,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) enum PixelFormat srcFormat = c->srcFormat, dstFormat = c->dstFormat; - if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat)) { - if (!(c->flags & SWS_BITEXACT)) { + if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12 + && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) { if (c->flags & SWS_ACCURATE_RND) { c->yuv2yuv1 = RENAME(yuv2yuv1_ar ); c->yuv2yuvX = RENAME(yuv2yuvX_ar ); @@ -2219,7 +2385,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } } else { - c->yuv2yuv1 = RENAME(yuv2yuv1 ); + int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat); + c->yuv2yuv1 = should_dither ? RENAME(yuv2yuv1_ar ) : RENAME(yuv2yuv1 ); c->yuv2yuvX = RENAME(yuv2yuvX ); if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { @@ -2232,7 +2399,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } } - } if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { case PIX_FMT_RGB32: @@ -2285,13 +2451,13 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break; case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break; case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break; - case PIX_FMT_YUV420P16BE: - case PIX_FMT_YUV422P16BE: - case PIX_FMT_YUV444P16BE: c->chrToYV12 = RENAME(BEToUV); break; + case PIX_FMT_GRAY16LE : + case PIX_FMT_YUV420P9LE: + case PIX_FMT_YUV422P10LE: + case PIX_FMT_YUV420P10LE: case PIX_FMT_YUV420P16LE: case PIX_FMT_YUV422P16LE: - case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break; - default: break; + case PIX_FMT_YUV444P16LE: c->hScale16= RENAME(hScale16); break; } #endif /* !COMPILE_TEMPLATE_MMX2 */ if (!c->chrSrcHSubSample) { @@ -2305,16 +2471,10 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) switch (srcFormat) { #if !COMPILE_TEMPLATE_MMX2 case PIX_FMT_YUYV422 : - case PIX_FMT_YUV420P16BE: - case PIX_FMT_YUV422P16BE: - case PIX_FMT_YUV444P16BE: case PIX_FMT_Y400A : - case PIX_FMT_GRAY16BE : c->lumToYV12 = RENAME(yuy2ToY); break; + c->lumToYV12 = RENAME(yuy2ToY); break; case PIX_FMT_UYVY422 : - case PIX_FMT_YUV420P16LE: - case PIX_FMT_YUV422P16LE: - case PIX_FMT_YUV444P16LE: - case PIX_FMT_GRAY16LE : c->lumToYV12 = RENAME(uyvyToY); break; + c->lumToYV12 = RENAME(uyvyToY); break; #endif /* !COMPILE_TEMPLATE_MMX2 */ case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break; case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break; @@ -2328,4 +2488,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } #endif /* !COMPILE_TEMPLATE_MMX2 */ + if(isAnyRGB(c->srcFormat)) + c->hScale16= RENAME(hScale16); } diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c index 439482bb71..d46d5126da 100644 --- a/libswscale/x86/yuv2rgb_mmx.c +++ b/libswscale/x86/yuv2rgb_mmx.c @@ -7,20 +7,20 @@ * 1,4,8bpp support and context / deglobalize stuff * by Michael Niedermayer (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index 5d1fa5b309..926e3fb9c4 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -4,20 +4,20 @@ * Copyright (C) 2001-2007 Michael Niedermayer * (c) 2010 Konstantin Shishkov * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -43,17 +43,14 @@ if (h_size * depth > FFABS(dstStride[0])) \ h_size -= 8; \ \ - if (c->srcFormat == PIX_FMT_YUV422P) { \ - srcStride[1] *= 2; \ - srcStride[2] *= 2; \ - } \ + vshift = c->srcFormat != PIX_FMT_YUV422P; \ \ __asm__ volatile ("pxor %mm4, %mm4\n\t"); \ for (y = 0; y < srcSliceH; y++) { \ uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ const uint8_t *py = src[0] + y * srcStride[0]; \ - const uint8_t *pu = src[1] + (y >> 1) * srcStride[1]; \ - const uint8_t *pv = src[2] + (y >> 1) * srcStride[2]; \ + const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ + const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ #define YUV2RGB_INITIAL_LOAD \ @@ -188,7 +185,7 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(2) @@ -216,7 +213,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(2) @@ -306,7 +303,7 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(3) @@ -324,7 +321,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(3) @@ -368,7 +365,7 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -389,7 +386,7 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -411,7 +408,7 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -432,7 +429,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c index cad09338d3..36182a5ea9 100644 --- a/libswscale/yuv2rgb.c +++ b/libswscale/yuv2rgb.c @@ -6,20 +6,20 @@ * 1,4,8bpp support and context / deglobalize stuff * by Michael Niedermayer (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -34,6 +34,7 @@ #include "swscale_internal.h" #include "libavutil/cpu.h" #include "libavutil/bswap.h" +#include "libavutil/pixdesc.h" extern const uint8_t dither_4x4_16[4][8]; extern const uint8_t dither_8x8_32[8][8]; @@ -521,7 +522,8 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) if (t) return t; - av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", sws_format_name(c->srcFormat), sws_format_name(c->dstFormat)); + av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", + av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); switch (c->dstFormat) { case PIX_FMT_BGR48BE: |