Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictorien Le Couviour--Tuffet <victorien@videolan.org>2020-04-15 19:18:30 +0300
committerVictorien Le Couviour--Tuffet <victorien@videolan.org>2020-04-16 12:43:08 +0300
commit98ed9be69b08f5438cce7e696b2c8eadfb3ce905 (patch)
treef5411f952a2bce3ce501c9f9056c94f40a4ceecd /src/wedge.c
parent6ea3fda58c17ec8d55d7fc90eb5305ade3f4ebbc (diff)
Fix MC masks alignment for sizes >= 64 for AVX-512
Those need to be aligned when w*h >= 64, as we will try to load by 64 bytes. (also realigns the 4x4 masks to 16 as a 32-byte alignment is unnecessary)
Diffstat (limited to 'src/wedge.c')
-rw-r--r--src/wedge.c70
1 files changed, 35 insertions, 35 deletions
diff --git a/src/wedge.c b/src/wedge.c
index 5304dc3..6b14e9a 100644
--- a/src/wedge.c
+++ b/src/wedge.c
@@ -83,35 +83,35 @@ static const wedge_code_type wedge_codebook_16_heqw[16] = {
{ WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};
-static uint8_t ALIGN(wedge_masks_444_32x32[2 * 16 * 32 * 32], 32);
-static uint8_t ALIGN(wedge_masks_444_32x16[2 * 16 * 32 * 16], 32);
-static uint8_t ALIGN(wedge_masks_444_32x8 [2 * 16 * 32 * 8], 32);
-static uint8_t ALIGN(wedge_masks_444_16x32[2 * 16 * 16 * 32], 32);
-static uint8_t ALIGN(wedge_masks_444_16x16[2 * 16 * 16 * 16], 32);
-static uint8_t ALIGN(wedge_masks_444_16x8 [2 * 16 * 16 * 8], 32);
-static uint8_t ALIGN(wedge_masks_444_8x32 [2 * 16 * 8 * 32], 32);
-static uint8_t ALIGN(wedge_masks_444_8x16 [2 * 16 * 8 * 16], 32);
-static uint8_t ALIGN(wedge_masks_444_8x8 [2 * 16 * 8 * 8], 32);
-
-static uint8_t ALIGN(wedge_masks_422_16x32[2 * 16 * 16 * 32], 32);
-static uint8_t ALIGN(wedge_masks_422_16x16[2 * 16 * 16 * 16], 32);
-static uint8_t ALIGN(wedge_masks_422_16x8 [2 * 16 * 16 * 8], 32);
-static uint8_t ALIGN(wedge_masks_422_8x32 [2 * 16 * 8 * 32], 32);
-static uint8_t ALIGN(wedge_masks_422_8x16 [2 * 16 * 8 * 16], 32);
-static uint8_t ALIGN(wedge_masks_422_8x8 [2 * 16 * 8 * 8], 32);
-static uint8_t ALIGN(wedge_masks_422_4x32 [2 * 16 * 4 * 32], 32);
-static uint8_t ALIGN(wedge_masks_422_4x16 [2 * 16 * 4 * 16], 32);
+static uint8_t ALIGN(wedge_masks_444_32x32[2 * 16 * 32 * 32], 64);
+static uint8_t ALIGN(wedge_masks_444_32x16[2 * 16 * 32 * 16], 64);
+static uint8_t ALIGN(wedge_masks_444_32x8 [2 * 16 * 32 * 8], 64);
+static uint8_t ALIGN(wedge_masks_444_16x32[2 * 16 * 16 * 32], 64);
+static uint8_t ALIGN(wedge_masks_444_16x16[2 * 16 * 16 * 16], 64);
+static uint8_t ALIGN(wedge_masks_444_16x8 [2 * 16 * 16 * 8], 64);
+static uint8_t ALIGN(wedge_masks_444_8x32 [2 * 16 * 8 * 32], 64);
+static uint8_t ALIGN(wedge_masks_444_8x16 [2 * 16 * 8 * 16], 64);
+static uint8_t ALIGN(wedge_masks_444_8x8 [2 * 16 * 8 * 8], 64);
+
+static uint8_t ALIGN(wedge_masks_422_16x32[2 * 16 * 16 * 32], 64);
+static uint8_t ALIGN(wedge_masks_422_16x16[2 * 16 * 16 * 16], 64);
+static uint8_t ALIGN(wedge_masks_422_16x8 [2 * 16 * 16 * 8], 64);
+static uint8_t ALIGN(wedge_masks_422_8x32 [2 * 16 * 8 * 32], 64);
+static uint8_t ALIGN(wedge_masks_422_8x16 [2 * 16 * 8 * 16], 64);
+static uint8_t ALIGN(wedge_masks_422_8x8 [2 * 16 * 8 * 8], 64);
+static uint8_t ALIGN(wedge_masks_422_4x32 [2 * 16 * 4 * 32], 64);
+static uint8_t ALIGN(wedge_masks_422_4x16 [2 * 16 * 4 * 16], 64);
static uint8_t ALIGN(wedge_masks_422_4x8 [2 * 16 * 4 * 8], 32);
-static uint8_t ALIGN(wedge_masks_420_16x16[2 * 16 * 16 * 16], 32);
-static uint8_t ALIGN(wedge_masks_420_16x8 [2 * 16 * 16 * 8], 32);
-static uint8_t ALIGN(wedge_masks_420_16x4 [2 * 16 * 16 * 4], 32);
-static uint8_t ALIGN(wedge_masks_420_8x16 [2 * 16 * 8 * 16], 32);
-static uint8_t ALIGN(wedge_masks_420_8x8 [2 * 16 * 8 * 8], 32);
-static uint8_t ALIGN(wedge_masks_420_8x4 [2 * 16 * 8 * 4], 32);
-static uint8_t ALIGN(wedge_masks_420_4x16 [2 * 16 * 4 * 16], 32);
+static uint8_t ALIGN(wedge_masks_420_16x16[2 * 16 * 16 * 16], 64);
+static uint8_t ALIGN(wedge_masks_420_16x8 [2 * 16 * 16 * 8], 64);
+static uint8_t ALIGN(wedge_masks_420_16x4 [2 * 16 * 16 * 4], 64);
+static uint8_t ALIGN(wedge_masks_420_8x16 [2 * 16 * 8 * 16], 64);
+static uint8_t ALIGN(wedge_masks_420_8x8 [2 * 16 * 8 * 8], 64);
+static uint8_t ALIGN(wedge_masks_420_8x4 [2 * 16 * 8 * 4], 64);
+static uint8_t ALIGN(wedge_masks_420_4x16 [2 * 16 * 4 * 16], 64);
static uint8_t ALIGN(wedge_masks_420_4x8 [2 * 16 * 4 * 8], 32);
-static uint8_t ALIGN(wedge_masks_420_4x4 [2 * 16 * 4 * 4], 32);
+static uint8_t ALIGN(wedge_masks_420_4x4 [2 * 16 * 4 * 4], 16);
const uint8_t *dav1d_wedge_masks[N_BS_SIZES][3][2][16];
@@ -274,16 +274,16 @@ COLD void dav1d_init_wedge_masks(void) {
}
#define N_II_PRED_MODES (N_INTER_INTRA_PRED_MODES - 1)
-static uint8_t ALIGN(ii_dc_mask[32 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_32x32[N_II_PRED_MODES][32 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_16x32[N_II_PRED_MODES][16 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_16x16[N_II_PRED_MODES][16 * 16], 32);
-static uint8_t ALIGN(ii_nondc_mask_8x32 [N_II_PRED_MODES][ 8 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_8x16 [N_II_PRED_MODES][ 8 * 16], 32);
-static uint8_t ALIGN(ii_nondc_mask_8x8 [N_II_PRED_MODES][ 8 * 8], 32);
-static uint8_t ALIGN(ii_nondc_mask_4x16 [N_II_PRED_MODES][ 4 * 16], 32);
+static uint8_t ALIGN(ii_dc_mask[32 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_32x32[N_II_PRED_MODES][32 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_16x32[N_II_PRED_MODES][16 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_16x16[N_II_PRED_MODES][16 * 16], 64);
+static uint8_t ALIGN(ii_nondc_mask_8x32 [N_II_PRED_MODES][ 8 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_8x16 [N_II_PRED_MODES][ 8 * 16], 64);
+static uint8_t ALIGN(ii_nondc_mask_8x8 [N_II_PRED_MODES][ 8 * 8], 64);
+static uint8_t ALIGN(ii_nondc_mask_4x16 [N_II_PRED_MODES][ 4 * 16], 64);
static uint8_t ALIGN(ii_nondc_mask_4x8 [N_II_PRED_MODES][ 4 * 8], 32);
-static uint8_t ALIGN(ii_nondc_mask_4x4 [N_II_PRED_MODES][ 4 * 4], 32);
+static uint8_t ALIGN(ii_nondc_mask_4x4 [N_II_PRED_MODES][ 4 * 4], 16);
#undef N_II_PRED_MODES
#define set1(sz) \