diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2019-09-10 18:00:54 +0300 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2019-09-10 21:20:38 +0300 |
commit | 556890be42d8affef280188c1a5d22cf299b2197 (patch) | |
tree | a9579d6b901c5b1f76b3181ee81694c8c13c1cf5 /src/film_grain_tmpl.c | |
parent | 6d363223dc979c393217c86eda54848531d69b9e (diff) |
AVX2 for chroma 4:2:0 film grain reconstruction
fguv_32x32xn_8bpc_420_csfl0_c: 8945.4
fguv_32x32xn_8bpc_420_csfl0_avx2: 1001.6
fguv_32x32xn_8bpc_420_csfl1_c: 6363.4
fguv_32x32xn_8bpc_420_csfl1_avx2: 1299.5
Diffstat (limited to 'src/film_grain_tmpl.c')
-rw-r--r-- | src/film_grain_tmpl.c | 19 |
1 files changed, 9 insertions, 10 deletions
diff --git a/src/film_grain_tmpl.c b/src/film_grain_tmpl.c index de2ded0..3d596e5 100644 --- a/src/film_grain_tmpl.c +++ b/src/film_grain_tmpl.c @@ -281,12 +281,11 @@ static void fgy_32x32xn_c(pixel *const dst_row, const pixel *const src_row, static NOINLINE void fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row, - const ptrdiff_t stride, const pixel *const luma_row, - const ptrdiff_t luma_stride, const int pw, const int bh, - const Dav1dFilmGrainData *const data, - const entry grain_lut[][GRAIN_WIDTH], - const uint8_t scaling[SCALING_SIZE], - const int uv, const int row_num, const int is_id, + const ptrdiff_t stride, const Dav1dFilmGrainData *const data, + const int pw, const uint8_t scaling[SCALING_SIZE], + const entry grain_lut[][GRAIN_WIDTH], const int bh, + const int row_num, const pixel *const luma_row, + const ptrdiff_t luma_stride, const int uv, const int is_id, const int sx, const int sy HIGHBD_DECL_SUFFIX) { const int rows = 1 + (data->overlap_flag && row_num > 0); @@ -320,8 +319,8 @@ fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row, int offsets[2 /* col offset */][2 /* row offset */]; // process this row in BLOCK_SIZE^2 blocks (subsampled) - for (int bx = 0; bx < (pw + sx) >> sx; bx += BLOCK_SIZE >> sx) { - const int bw = (imin(BLOCK_SIZE, pw - (bx << sx)) + sx) >> sx; + for (int bx = 0; bx < pw; bx += BLOCK_SIZE >> sx) { + const int bw = imin(BLOCK_SIZE >> sx, pw - bx); if (data->overlap_flag && bx) { // shift previous offsets left for (int i = 0; i < rows; i++) @@ -412,8 +411,8 @@ fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row, #define fguv_ss_fn(nm, ss_x, ss_y) \ static decl_fguv_32x32xn_fn(fguv_32x32xn_##nm##_c) { \ - fguv_32x32xn_c(dst_row, src_row, stride, luma_row, luma_stride, pw, bh, \ - data, grain_lut, scaling, uv_pl, row_num, is_id, ss_x, ss_y \ + fguv_32x32xn_c(dst_row, src_row, stride, data, pw, scaling, grain_lut, bh, \ + row_num, luma_row, luma_stride, uv_pl, is_id, ss_x, ss_y \ HIGHBD_TAIL_SUFFIX); \ } |