diff options
author | Timothy B. Terriberry <tterribe@xiph.org> | 2013-06-15 10:57:19 +0400 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2013-06-15 11:06:57 +0400 |
commit | ce15e6531932624387b5c49bb1fbfccd19710bca (patch) | |
tree | 197418fdd64ea5c46b16f7661d520125c1055b8e | |
parent | 63f744d583938830e4b2860eacc8047ce022c5ec (diff) |
Split cwrsi() by pulses vs. dimensions.
This lets us cut out a bunch of work in the large _n, small _k case
where most of the dimensions won't have any pulses.
It also gets rid of all remaining usage of CELT_PVQ_U() in cwrsi(),
leaving just a single test instead of lots of mins and maxes, and
makes a bunch of the jump threading more obvious.
This is a 1.6% decoder speedup on a 96 kbps comp48-stereo encode on
a Cortex A8.
-rw-r--r-- | celt/cwrs.c | 55 |
1 files changed, 35 insertions, 20 deletions
diff --git a/celt/cwrs.c b/celt/cwrs.c index d384dfe2..029232c8 100644 --- a/celt/cwrs.c +++ b/celt/cwrs.c @@ -467,34 +467,49 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ celt_assert(_k>0); celt_assert(_n>1); while(_n>2){ - /*Are the pulses in this dimension negative?*/ - p=CELT_PVQ_U(_n,_k+1); - s=-(_i>=p); - _i-=p&s; - /*Count how many pulses were placed in this dimension.*/ - k0=_k; - p=CELT_PVQ_U(_n,_k); - if(_k>_n){ + opus_uint32 q; + /*Lots of pulses case:*/ + if(_k>=_n){ const opus_uint32 *row; - opus_uint32 q; row=CELT_PVQ_U_ROW[_n]; + /*Are the pulses in this dimension negative?*/ + p=row[_k+1]; + s=-(_i>=p); + _i-=p&s; + /*Count how many pulses were placed in this dimension.*/ + k0=_k; q=row[_n]; if(q>_i){ celt_assert(p>q); - /*Setting p=q is unnecessary, but it helps the optimizer prove p>_i, - allowing it to jump straight past the initial test in the second - loop below. - Once it's removed that first comparison, a smart compiler should be - able to figure out that the result of this assignment isn't used and - optimize it away anyway.*/ - p=q; _k=_n; + do p=CELT_PVQ_U_ROW[--_k][_n]; + while(p>_i); + } + else for(p=row[_k];p>_i;p=row[_k])_k--; + _i-=p; + *_y++=(k0-_k+s)^s; + } + /*Lots of dimensions case:*/ + else{ + /*Are there any pulses in this dimension at all?*/ + p=CELT_PVQ_U_ROW[_k][_n]; + q=CELT_PVQ_U_ROW[_k+1][_n]; + if(p<=_i&&_i<q){ + _i-=p; + *_y++=0; + } + else{ + /*Are the pulses in this dimension negative?*/ + s=-(_i>=q); + _i-=q&s; + /*Count how many pulses were placed in this dimension.*/ + k0=_k; + do p=CELT_PVQ_U_ROW[--_k][_n]; + while(p>_i); + _i-=p; + *_y++=(k0-_k+s)^s; } - else for(;p>_i;p=row[_k])_k--; } - for(;p>_i;p=CELT_PVQ_U_ROW[_k][_n])_k--; - _i-=p; - *_y++=(k0-_k+s)^s; _n--; } /*_n==2*/ |