Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2019-09-10 18:00:54 +0300
committerRonald S. Bultje <rsbultje@gmail.com>2019-09-10 21:20:38 +0300
commit556890be42d8affef280188c1a5d22cf299b2197 (patch)
treea9579d6b901c5b1f76b3181ee81694c8c13c1cf5 /src/film_grain_tmpl.c
parent6d363223dc979c393217c86eda54848531d69b9e (diff)
AVX2 for chroma 4:2:0 film grain reconstruction
fguv_32x32xn_8bpc_420_csfl0_c: 8945.4 fguv_32x32xn_8bpc_420_csfl0_avx2: 1001.6 fguv_32x32xn_8bpc_420_csfl1_c: 6363.4 fguv_32x32xn_8bpc_420_csfl1_avx2: 1299.5
Diffstat (limited to 'src/film_grain_tmpl.c')
-rw-r--r--src/film_grain_tmpl.c19
1 files changed, 9 insertions, 10 deletions
diff --git a/src/film_grain_tmpl.c b/src/film_grain_tmpl.c
index de2ded0..3d596e5 100644
--- a/src/film_grain_tmpl.c
+++ b/src/film_grain_tmpl.c
@@ -281,12 +281,11 @@ static void fgy_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
static NOINLINE void
fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
- const ptrdiff_t stride, const pixel *const luma_row,
- const ptrdiff_t luma_stride, const int pw, const int bh,
- const Dav1dFilmGrainData *const data,
- const entry grain_lut[][GRAIN_WIDTH],
- const uint8_t scaling[SCALING_SIZE],
- const int uv, const int row_num, const int is_id,
+ const ptrdiff_t stride, const Dav1dFilmGrainData *const data,
+ const int pw, const uint8_t scaling[SCALING_SIZE],
+ const entry grain_lut[][GRAIN_WIDTH], const int bh,
+ const int row_num, const pixel *const luma_row,
+ const ptrdiff_t luma_stride, const int uv, const int is_id,
const int sx, const int sy HIGHBD_DECL_SUFFIX)
{
const int rows = 1 + (data->overlap_flag && row_num > 0);
@@ -320,8 +319,8 @@ fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
int offsets[2 /* col offset */][2 /* row offset */];
// process this row in BLOCK_SIZE^2 blocks (subsampled)
- for (int bx = 0; bx < (pw + sx) >> sx; bx += BLOCK_SIZE >> sx) {
- const int bw = (imin(BLOCK_SIZE, pw - (bx << sx)) + sx) >> sx;
+ for (int bx = 0; bx < pw; bx += BLOCK_SIZE >> sx) {
+ const int bw = imin(BLOCK_SIZE >> sx, pw - bx);
if (data->overlap_flag && bx) {
// shift previous offsets left
for (int i = 0; i < rows; i++)
@@ -412,8 +411,8 @@ fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
#define fguv_ss_fn(nm, ss_x, ss_y) \
static decl_fguv_32x32xn_fn(fguv_32x32xn_##nm##_c) { \
- fguv_32x32xn_c(dst_row, src_row, stride, luma_row, luma_stride, pw, bh, \
- data, grain_lut, scaling, uv_pl, row_num, is_id, ss_x, ss_y \
+ fguv_32x32xn_c(dst_row, src_row, stride, data, pw, scaling, grain_lut, bh, \
+ row_num, luma_row, luma_stride, uv_pl, is_id, ss_x, ss_y \
HIGHBD_TAIL_SUFFIX); \
}