diff options
author | Henrik Gramner <gramner@twoorioles.com> | 2019-11-22 22:50:57 +0300 |
---|---|---|
committer | Henrik Gramner <henrik@gramner.com> | 2019-11-27 15:38:42 +0300 |
commit | 82eda83acd4b903d9a1ea03687951f3d6d8cd6b8 (patch) | |
tree | 94c85d85a52ed8a1725d24ae445eade8c5e462c6 /src/picture.c | |
parent | 2e5e05b767f0c61d16a6df8ae787791555ed2c42 (diff) |
Avoid excessive L2 collisions with certain frame widths
Memory addresses with certain power-of-two offsets will map to the
same set of cache lines. Using such offsets as strides will cause
excessive cache evictions resulting in more cache misses.
Avoid this by adding a small padding when the stride is a multiple
of 1024 (somewhat arbitrarily chosen as the specific number depends
on the hardware implementation) when allocating picture buffers.
Diffstat (limited to 'src/picture.c')
-rw-r--r-- | src/picture.c | 29 |
1 files changed, 18 insertions, 11 deletions
diff --git a/src/picture.c b/src/picture.c index b39e068..82197c3 100644 --- a/src/picture.c +++ b/src/picture.c @@ -52,17 +52,24 @@ int dav1d_default_picture_alloc(Dav1dPicture *const p, void *const cookie) { const int has_chroma = p->p.layout != DAV1D_PIXEL_LAYOUT_I400; const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420; const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444; - p->stride[0] = aligned_w << hbd; - p->stride[1] = has_chroma ? (aligned_w >> ss_hor) << hbd : 0; - const size_t y_sz = p->stride[0] * aligned_h; - const size_t uv_sz = p->stride[1] * (aligned_h >> ss_ver); - const size_t pic_size = y_sz + 2 * uv_sz; - - uint8_t *data = dav1d_alloc_aligned(pic_size + DAV1D_PICTURE_ALIGNMENT, - DAV1D_PICTURE_ALIGNMENT); - if (data == NULL) { - return DAV1D_ERR(ENOMEM); - } + ptrdiff_t y_stride = aligned_w << hbd; + ptrdiff_t uv_stride = has_chroma ? y_stride >> ss_hor : 0; + /* Due to how mapping of addresses to sets works in most L1 and L2 cache + * implementations, strides of multiples of certain power-of-two numbers + * may cause multiple rows of the same superblock to map to the same set, + * causing evictions of previous rows resulting in a reduction in cache + * hit rate. Avoid that by slightly padding the stride when necessary. */ + if (!(y_stride & 1023)) + y_stride += DAV1D_PICTURE_ALIGNMENT; + if (!(uv_stride & 1023) && has_chroma) + uv_stride += DAV1D_PICTURE_ALIGNMENT; + p->stride[0] = y_stride; + p->stride[1] = uv_stride; + const size_t y_sz = y_stride * aligned_h; + const size_t uv_sz = uv_stride * (aligned_h >> ss_ver); + const size_t pic_size = y_sz + 2 * uv_sz + DAV1D_PICTURE_ALIGNMENT; + uint8_t *data = dav1d_alloc_aligned(pic_size, DAV1D_PICTURE_ALIGNMENT); + if (!data) return DAV1D_ERR(ENOMEM); p->data[0] = data; p->data[1] = has_chroma ? data + y_sz : NULL; |