Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/quite/celt.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimothy B.B Terriberry <tterribe@xiph.org>2008-09-21 23:10:58 +0400
committerJean-Marc Valin <jean-marc.valin@usherbrooke.ca>2008-09-22 05:37:41 +0400
commit5ee9715c5cb9694f0af6aa580ec9855b052d6d9b (patch)
treea557beeb9ac2eaacb93f4077b0de870e34ef18af /libcelt/cwrs.c
parentd910274f790ba586c40a625bb9120d13554c3443 (diff)
Change cwrsi() to operate on rows of U instead of columns.
It is no slower with a large number of pulses, and as much as 30% faster with a large number of dimensions.
Diffstat (limited to 'libcelt/cwrs.c')
-rw-r--r--libcelt/cwrs.c201
1 files changed, 66 insertions, 135 deletions
diff --git a/libcelt/cwrs.c b/libcelt/cwrs.c
index c4b5fe7..247d5fd 100644
--- a/libcelt/cwrs.c
+++ b/libcelt/cwrs.c
@@ -264,128 +264,83 @@ static inline void uprev64(celt_uint64_t *_ui,int _n,celt_uint64_t _ui0){
/*Returns the number of ways of choosing _m elements from a set of size _n with
replacement when a sign bit is needed for each unique element.
- On input, _u should be initialized to column (_m-1) of U(n,m).
- On exit, _u will be initialized to column _m of U(n,m).*/
-celt_uint32_t ncwrs_unext32(int _n,celt_uint32_t *_ui){
- celt_uint32_t ret;
- celt_uint32_t ui0;
- celt_uint32_t ui1;
- int j;
- ret=ui0=2;
- celt_assert(_n>=2);
- j=1; do {
- ui1=_ui[j]+_ui[j-1]+ui0;
- _ui[j-1]=ui0;
- ui0=ui1;
- ret+=ui0;
- } while (++j<_n);
- _ui[j-1]=ui0;
- return ret;
-}
-
-celt_uint64_t ncwrs_unext64(int _n,celt_uint64_t *_ui){
- celt_uint64_t ret;
- celt_uint64_t ui0;
- celt_uint64_t ui1;
- int j;
- ret=ui0=1;
- celt_assert(_n>=2);
- j=1; do {
- ui1=_ui[j]+_ui[j-1]+ui0;
- _ui[j-1]=ui0;
- ui0=ui1;
- ret+=ui0;
- } while (++j<_n);
- _ui[j-1]=ui0;
- return ret<<=1;
-}
-
-/*Returns the number of ways of choosing _m elements from a set of size _n with
- replacement when a sign bit is needed for each unique element.
- _u: On exit, _u[i] contains U(i+1,_m).*/
+ _u: On exit, _u[i] contains U(_n,i) for i in [0..._m+1].*/
celt_uint32_t ncwrs_u32(int _n,int _m,celt_uint32_t *_u){
- celt_uint32_t ret;
celt_uint32_t um2;
int k;
- /*If _m==0, _u[] should be set to zero and the return should be 1.*/
- celt_assert(_m>0);
- /*We'll overflow our buffer unless _n>=2.*/
- celt_assert(_n>=2);
- um2=_u[0]=1;
- if(_m<=6){
- if(_m<2){
- k=1;
- do _u[k]=1;
- while(++k<_n);
- }
- else{
- k=1;
- do _u[k]=(k<<1)+1;
- while(++k<_n);
- for(k=2;k<_m;k++)unext32(_u,_n,1);
- }
+ int len;
+ len=_m+2;
+ _u[0]=0;
+ _u[1]=um2=1;
+ if(_n<=6){
+ /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
+ /*If _n==1, _u[i] should be 1 for i>1.*/
+ celt_assert(_n>=2);
+ /*If _m==0, the following do-while loop will overflow the buffer.*/
+ celt_assert(_m>0);
+ k=2;
+ do _u[k]=(k<<1)-1;
+ while(++k<len);
+ for(k=2;k<_n;k++)unext32(_u+2,_m,(k<<1)+1);
}
else{
celt_uint32_t um1;
celt_uint32_t n2m1;
- _u[1]=n2m1=um1=(_m<<1)-1;
- for(k=2;k<_n;k++){
+ _u[2]=n2m1=um1=(_n<<1)-1;
+ for(k=3;k<len;k++){
/*U(n,m) = ((2*n-1)*U(n,m-1)-U(n,m-2))/(m-1) + U(n,m-2)*/
- _u[k]=um2=imusdiv32even(n2m1,um1,um2,k)+um2;
- if(++k>=_n)break;
- _u[k]=um1=imusdiv32odd(n2m1,um2,um1,k>>1)+um1;
+ _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2;
+ if(++k>=len)break;
+ _u[k]=um1=imusdiv32odd(n2m1,um2,um1,k-1>>1)+um1;
}
}
- ret=1;
- k=1;
- do ret+=_u[k];
- while(++k<_n);
- return ret<<1;
+ return _u[_m]+_u[_m+1];
}
celt_uint64_t ncwrs_u64(int _n,int _m,celt_uint64_t *_u){
int k;
- CELT_MEMSET(_u,0,_n);
- if(_m<=0)return 1;
- if(_n<=0)return 0;
- for(k=1;k<_m;k++)unext64(_u,_n,1);
- return ncwrs_unext64(_n,_u);
+ int len;
+ len=_m+2;
+ _u[0]=0;
+ /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
+ /*If _n==1, _u[i] should be 1 for i>1.*/
+ celt_assert(_n>=2);
+ k=1;
+ do _u[k]=(k<<1)-1;
+ while(++k<len);
+ for(k=2;k<_n;k++)unext64(_u+2,_m,(k<<1)+1);
+ /*TODO: For large _n, an imusdiv64 could make this O(_m) instead of
+ O(_n*_m), but would require an INV_TABLE twice as large, as well as lots
+ of 64x64->64 bit multiplies.*/
+ return _u[_m]+_u[_m+1];
}
/*Returns the _i'th combination of _m elements chosen from a set of size _n
with associated sign bits.
_y: Returns the vector of pulses.
- _u: Must contain entries [1..._n] of column _m of U() on input.
+ _u: Must contain entries [0..._m+1] of row _n of U() on input.
Its contents will be destructively modified.*/
-void cwrsi32(int _n,int _m,celt_uint32_t _i,celt_uint32_t _nc,int *_y,
- celt_uint32_t *_u){
- celt_uint32_t p;
- celt_uint32_t q;
- int j;
- int k;
+void cwrsi32(int _n,int _m,celt_uint32_t _i,int *_y,celt_uint32_t *_u){
+ int j;
+ int k;
celt_assert(_n>0);
- p=_nc;
- q=0;
j=0;
k=_m;
do{
- int s;
- int yj;
- p-=q;
- q=_u[_n-j-1];
- p-=q;
+ celt_uint32_t p;
+ int s;
+ int yj;
+ p=_u[k+1];
s=_i>=p;
if(s)_i-=p;
yj=k;
- while(q>_i){
- uprev32(_u,_n-j,--k>0);
- p=q;
- q=_u[_n-j-1];
- }
- _i-=q;
+ p=_u[k];
+ while(p>_i)p=_u[--k];
+ _i-=p;
yj-=k;
_y[j]=yj-(yj<<1&-s);
+ uprev32(_u,k+2,0);
}
while(++j<_n);
}
@@ -393,36 +348,28 @@ void cwrsi32(int _n,int _m,celt_uint32_t _i,celt_uint32_t _nc,int *_y,
/*Returns the _i'th combination of _m elements chosen from a set of size _n
with associated sign bits.
_y: Returns the vector of pulses.
- _u: Must contain entries [1..._n] of column _m of U() on input.
+ _u: Must contain entries [0..._m+1] of row _n of U() on input.
Its contents will be destructively modified.*/
-void cwrsi64(int _n,int _m,celt_uint64_t _i,celt_uint64_t _nc,int *_y,
- celt_uint64_t *_u){
- celt_uint64_t p;
- celt_uint64_t q;
- int j;
- int k;
+void cwrsi64(int _n,int _m,celt_uint64_t _i,int *_y,celt_uint64_t *_u){
+ int j;
+ int k;
celt_assert(_n>0);
- p=_nc;
- q=0;
j=0;
k=_m;
do{
- int s;
- int yj;
- p-=q;
- q=_u[_n-j-1];
- p-=q;
+ celt_uint64_t p;
+ int s;
+ int yj;
+ p=_u[k+1];
s=_i>=p;
if(s)_i-=p;
yj=k;
- while(q>_i){
- uprev64(_u,_n-j,--k>0);
- p=q;
- q=_u[_n-j-1];
- }
- _i-=q;
+ p=_u[k];
+ while(p>_i)p=_u[--k];
+ _i-=p;
yj-=k;
_y[j]=yj-(yj<<1&-s);
+ uprev64(_u,k+2,0);
}
while(++j<_n);
}
@@ -433,32 +380,26 @@ void cwrsi64(int _n,int _m,celt_uint64_t _i,celt_uint64_t _nc,int *_y,
_nc: Returns V(_n,_m).*/
celt_uint32_t icwrs32(int _n,int _m,celt_uint32_t *_nc,const int *_y,
celt_uint32_t *_u){
- celt_uint32_t nc;
celt_uint32_t i;
int j;
int k;
/*We can't unroll the first two iterations of the loop unless _n>=2.*/
celt_assert(_n>=2);
- nc=1;
i=_y[_n-1]<0;
_u[0]=0;
for(k=1;k<=_m+1;k++)_u[k]=(k<<1)-1;
k=abs(_y[_n-1]);
j=_n-2;
- nc+=_u[_m];
i+=_u[k];
k+=abs(_y[j]);
if(_y[j]<0)i+=_u[k+1];
while(j-->0){
unext32(_u,_m+2,0);
- nc+=_u[_m];
i+=_u[k];
k+=abs(_y[j]);
if(_y[j]<0)i+=_u[k+1];
}
- /*If _m==0, nc should not be doubled.*/
- celt_assert(_m>0);
- *_nc=nc<<1;
+ *_nc=_u[_m]+_u[_m+1];
return i;
}
@@ -468,32 +409,26 @@ celt_uint32_t icwrs32(int _n,int _m,celt_uint32_t *_nc,const int *_y,
_nc: Returns V(_n,_m).*/
celt_uint64_t icwrs64(int _n,int _m,celt_uint64_t *_nc,const int *_y,
celt_uint64_t *_u){
- celt_uint64_t nc;
celt_uint64_t i;
int j;
int k;
/*We can't unroll the first two iterations of the loop unless _n>=2.*/
celt_assert(_n>=2);
- nc=1;
i=_y[_n-1]<0;
_u[0]=0;
for(k=1;k<=_m+1;k++)_u[k]=(k<<1)-1;
k=abs(_y[_n-1]);
j=_n-2;
- nc+=_u[_m];
i+=_u[k];
k+=abs(_y[j]);
if(_y[j]<0)i+=_u[k+1];
while(j-->0){
unext64(_u,_m+2,0);
- nc+=_u[_m];
i+=_u[k];
k+=abs(_y[j]);
if(_y[j]<0)i+=_u[k+1];
}
- /*If _m==0, nc should not be doubled.*/
- celt_assert(_m>0);
- *_nc=nc<<1;
+ *_nc=_u[_m]+_u[_m+1];
return i;
}
@@ -526,7 +461,7 @@ int get_required_bits(int N, int K, int frac)
{
VARDECL(celt_uint64_t,u);
SAVE_STACK;
- ALLOC(u,N,celt_uint64_t);
+ ALLOC(u,K+2,celt_uint64_t);
nbits = log2_frac64(ncwrs_u64(N,K,u), frac);
RESTORE_STACK;
} else {
@@ -564,21 +499,17 @@ void encode_pulses(int *_y, int N, int K, ec_enc *enc)
static inline void decode_pulse32(int _n,int _m,int *_y,ec_dec *_dec){
VARDECL(celt_uint32_t,u);
- celt_uint32_t nc;
SAVE_STACK;
- ALLOC(u,_n,celt_uint32_t);
- nc=ncwrs_u32(_n,_m,u);
- cwrsi32(_n,_m,ec_dec_uint(_dec,nc),nc,_y,u);
+ ALLOC(u,_m+2,celt_uint32_t);
+ cwrsi32(_n,_m,ec_dec_uint(_dec,ncwrs_u32(_n,_m,u)),_y,u);
RESTORE_STACK;
}
static inline void decode_pulse64(int _n,int _m,int *_y,ec_dec *_dec){
VARDECL(celt_uint64_t,u);
- celt_uint64_t nc;
SAVE_STACK;
- ALLOC(u,_n,celt_uint64_t);
- nc=ncwrs_u64(_n,_m,u);
- cwrsi64(_n,_m,ec_dec_uint64(_dec,nc),nc,_y,u);
+ ALLOC(u,_m+2,celt_uint64_t);
+ cwrsi64(_n,_m,ec_dec_uint64(_dec,ncwrs_u64(_n,_m,u)),_y,u);
RESTORE_STACK;
}