Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src/cdf.h
diff options
context:
space:
mode:
authorHenrik Gramner <gramner@twoorioles.com>2019-08-06 16:17:31 +0300
committerHenrik Gramner <henrik@gramner.com>2019-08-13 19:51:49 +0300
commite29fd5c0016fec27c88a36ac6f6eaaf416d91330 (patch)
treec0bb371a91af5abbe470751189e35dcee5f8314b /src/cdf.h
parenta819653e1b71ea69c13faaa64c5bb89534ce2772 (diff)
Add msac optimizations
* Eliminate the trailing zero after the CDF probabilities. We can reuse the count value as a terminator instead. This reduces the size of the CDF context by around 8%. * Align the CDF arrays. * Various other minor optimizations.
Diffstat (limited to 'src/cdf.h')
-rw-r--r--src/cdf.h148
1 files changed, 74 insertions, 74 deletions
diff --git a/src/cdf.h b/src/cdf.h
index 7b8baa3..6d1c29a 100644
--- a/src/cdf.h
+++ b/src/cdf.h
@@ -37,94 +37,94 @@
/* Buffers padded to [8] or [16] for SIMD where needed. */
typedef struct CdfModeContext {
- uint16_t y_mode[4][N_INTRA_PRED_MODES + 1 + 2];
- uint16_t use_filter_intra[N_BS_SIZES][2];
- uint16_t filter_intra[5 + 1];
- uint16_t uv_mode[2][N_INTRA_PRED_MODES][N_UV_INTRA_PRED_MODES + 1 + 1];
- uint16_t angle_delta[8][8];
- uint16_t filter[2][8][DAV1D_N_SWITCHABLE_FILTERS + 1];
- uint16_t newmv_mode[6][2];
- uint16_t globalmv_mode[2][2];
- uint16_t refmv_mode[6][2];
- uint16_t drl_bit[3][2];
- uint16_t comp_inter_mode[8][N_COMP_INTER_PRED_MODES + 1];
- uint16_t intra[4][2];
- uint16_t comp[5][2];
- uint16_t comp_dir[5][2];
- uint16_t jnt_comp[6][2];
- uint16_t mask_comp[6][2];
- uint16_t wedge_comp[9][2];
- uint16_t wedge_idx[9][16 + 1];
- uint16_t interintra[7][2];
- uint16_t interintra_mode[4][5];
- uint16_t interintra_wedge[7][2];
- uint16_t ref[6][3][2];
- uint16_t comp_fwd_ref[3][3][2];
- uint16_t comp_bwd_ref[2][3][2];
- uint16_t comp_uni_ref[3][3][2];
- uint16_t txsz[N_TX_SIZES - 1][3][4];
- uint16_t txpart[7][3][2];
- uint16_t txtp_inter1[2][16 + 1];
- uint16_t txtp_inter2[12 + 1 + 3];
- uint16_t txtp_inter3[4][2];
- uint16_t txtp_intra1[2][N_INTRA_PRED_MODES][7 + 1];
- uint16_t txtp_intra2[3][N_INTRA_PRED_MODES][5 + 1 + 2];
- uint16_t skip[3][2];
- uint16_t skip_mode[3][2];
- uint16_t partition[N_BL_LEVELS][4][N_PARTITIONS + 1 + 5];
- uint16_t seg_pred[3][2];
- uint16_t seg_id[3][DAV1D_MAX_SEGMENTS + 1];
- uint16_t cfl_sign[8 + 1];
- uint16_t cfl_alpha[6][16 + 1];
- uint16_t restore_wiener[2];
- uint16_t restore_sgrproj[2];
- uint16_t restore_switchable[3 + 1];
- uint16_t delta_q[4 + 1];
- uint16_t delta_lf[5][4 + 1];
- uint16_t obmc[N_BS_SIZES][2];
- uint16_t motion_mode[N_BS_SIZES][3 + 1];
- uint16_t pal_y[7][3][2];
- uint16_t pal_uv[2][2];
- uint16_t pal_sz[2][7][7 + 1];
- uint16_t color_map[2][7][5][8 + 1];
- uint16_t intrabc[2];
+ ALIGN(uint16_t y_mode[4][N_INTRA_PRED_MODES + 3], 32);
+ ALIGN(uint16_t uv_mode[2][N_INTRA_PRED_MODES][N_UV_INTRA_PRED_MODES + 2], 32);
+ ALIGN(uint16_t wedge_idx[9][16], 32);
+ ALIGN(uint16_t partition[N_BL_LEVELS][4][N_PARTITIONS + 6], 32);
+ ALIGN(uint16_t cfl_alpha[6][16], 32);
+ ALIGN(uint16_t txtp_inter1[2][16], 32);
+ ALIGN(uint16_t txtp_inter2[12 + 4], 32);
+ ALIGN(uint16_t txtp_intra1[2][N_INTRA_PRED_MODES][7 + 1], 16);
+ ALIGN(uint16_t txtp_intra2[3][N_INTRA_PRED_MODES][5 + 3], 16);
+ ALIGN(uint16_t cfl_sign[8], 16);
+ ALIGN(uint16_t angle_delta[8][8], 16);
+ ALIGN(uint16_t filter_intra[5 + 3], 16);
+ ALIGN(uint16_t comp_inter_mode[8][N_COMP_INTER_PRED_MODES], 16);
+ ALIGN(uint16_t seg_id[3][DAV1D_MAX_SEGMENTS], 16);
+ ALIGN(uint16_t pal_sz[2][7][7 + 1], 16);
+ ALIGN(uint16_t color_map[2][7][5][8], 16);
+ ALIGN(uint16_t filter[2][8][DAV1D_N_SWITCHABLE_FILTERS + 1], 8);
+ ALIGN(uint16_t txsz[N_TX_SIZES - 1][3][4], 8);
+ ALIGN(uint16_t motion_mode[N_BS_SIZES][3 + 1], 8);
+ ALIGN(uint16_t delta_q[4], 8);
+ ALIGN(uint16_t delta_lf[5][4], 8);
+ ALIGN(uint16_t interintra_mode[4][4], 8);
+ ALIGN(uint16_t restore_switchable[3 + 1], 8);
+ ALIGN(uint16_t restore_wiener[2], 4);
+ ALIGN(uint16_t restore_sgrproj[2], 4);
+ ALIGN(uint16_t interintra[7][2], 4);
+ ALIGN(uint16_t interintra_wedge[7][2], 4);
+ ALIGN(uint16_t txtp_inter3[4][2], 4);
+ ALIGN(uint16_t use_filter_intra[N_BS_SIZES][2], 4);
+ ALIGN(uint16_t newmv_mode[6][2], 4);
+ ALIGN(uint16_t globalmv_mode[2][2], 4);
+ ALIGN(uint16_t refmv_mode[6][2], 4);
+ ALIGN(uint16_t drl_bit[3][2], 4);
+ ALIGN(uint16_t intra[4][2], 4);
+ ALIGN(uint16_t comp[5][2], 4);
+ ALIGN(uint16_t comp_dir[5][2], 4);
+ ALIGN(uint16_t jnt_comp[6][2], 4);
+ ALIGN(uint16_t mask_comp[6][2], 4);
+ ALIGN(uint16_t wedge_comp[9][2], 4);
+ ALIGN(uint16_t ref[6][3][2], 4);
+ ALIGN(uint16_t comp_fwd_ref[3][3][2], 4);
+ ALIGN(uint16_t comp_bwd_ref[2][3][2], 4);
+ ALIGN(uint16_t comp_uni_ref[3][3][2], 4);
+ ALIGN(uint16_t txpart[7][3][2], 4);
+ ALIGN(uint16_t skip[3][2], 4);
+ ALIGN(uint16_t skip_mode[3][2], 4);
+ ALIGN(uint16_t seg_pred[3][2], 4);
+ ALIGN(uint16_t obmc[N_BS_SIZES][2], 4);
+ ALIGN(uint16_t pal_y[7][3][2], 4);
+ ALIGN(uint16_t pal_uv[2][2], 4);
+ ALIGN(uint16_t intrabc[2], 4);
} CdfModeContext;
typedef struct CdfCoefContext {
- uint16_t skip[N_TX_SIZES][13][2];
- uint16_t eob_bin_16[2][2][6];
- uint16_t eob_bin_32[2][2][7 + 1];
- uint16_t eob_bin_64[2][2][8];
- uint16_t eob_bin_128[2][2][9];
- uint16_t eob_bin_256[2][2][10 + 6];
- uint16_t eob_bin_512[2][11 + 5];
- uint16_t eob_bin_1024[2][12 + 4];
- uint16_t eob_hi_bit[N_TX_SIZES][2][11 /*22*/][2];
- uint16_t eob_base_tok[N_TX_SIZES][2][4][4];
- uint16_t base_tok[N_TX_SIZES][2][41][5];
- uint16_t dc_sign[2][3][2];
- uint16_t br_tok[4 /*5*/][2][21][5];
+ ALIGN(uint16_t eob_bin_16[2][2][5 + 3], 16);
+ ALIGN(uint16_t eob_bin_32[2][2][6 + 2], 16);
+ ALIGN(uint16_t eob_bin_64[2][2][7 + 1], 16);
+ ALIGN(uint16_t eob_bin_128[2][2][8 + 0], 16);
+ ALIGN(uint16_t eob_bin_256[2][2][9 + 7], 32);
+ ALIGN(uint16_t eob_bin_512[2][10 + 6], 32);
+ ALIGN(uint16_t eob_bin_1024[2][11 + 5], 32);
+ ALIGN(uint16_t eob_base_tok[N_TX_SIZES][2][4][4], 8);
+ ALIGN(uint16_t base_tok[N_TX_SIZES][2][41][4], 8);
+ ALIGN(uint16_t br_tok[4 /*5*/][2][21][4], 8);
+ ALIGN(uint16_t eob_hi_bit[N_TX_SIZES][2][11 /*22*/][2], 4);
+ ALIGN(uint16_t skip[N_TX_SIZES][13][2], 4);
+ ALIGN(uint16_t dc_sign[2][3][2], 4);
} CdfCoefContext;
typedef struct CdfMvComponent {
- uint16_t classes[11 + 1 + 4];
- uint16_t class0[2];
- uint16_t classN[10][2];
- uint16_t class0_fp[2][4 + 1];
- uint16_t classN_fp[4 + 1];
- uint16_t class0_hp[2];
- uint16_t classN_hp[2];
- uint16_t sign[2];
+ ALIGN(uint16_t classes[11 + 5], 32);
+ ALIGN(uint16_t class0_fp[2][4], 8);
+ ALIGN(uint16_t classN_fp[4], 8);
+ ALIGN(uint16_t class0_hp[2], 4);
+ ALIGN(uint16_t classN_hp[2], 4);
+ ALIGN(uint16_t class0[2], 4);
+ ALIGN(uint16_t classN[10][2], 4);
+ ALIGN(uint16_t sign[2], 4);
} CdfMvComponent;
typedef struct CdfMvContext {
CdfMvComponent comp[2];
- uint16_t joint[N_MV_JOINTS + 1];
+ ALIGN(uint16_t joint[N_MV_JOINTS], 8);
} CdfMvContext;
typedef struct CdfContext {
CdfModeContext m;
- uint16_t kfym[5][5][N_INTRA_PRED_MODES + 1 + 2];
+ ALIGN(uint16_t kfym[5][5][N_INTRA_PRED_MODES + 3], 32);
CdfCoefContext coef;
CdfMvContext mv, dmv;
} CdfContext;