diff options
Diffstat (limited to 'newlib/libm/machine/spu/headers/tgammaf4.h')
-rw-r--r-- | newlib/libm/machine/spu/headers/tgammaf4.h | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/newlib/libm/machine/spu/headers/tgammaf4.h b/newlib/libm/machine/spu/headers/tgammaf4.h new file mode 100644 index 000000000..4f10d8c13 --- /dev/null +++ b/newlib/libm/machine/spu/headers/tgammaf4.h @@ -0,0 +1,241 @@ +/* -------------------------------------------------------------- */ +/* (C)Copyright 2006,2007, */ +/* International Business Machines Corporation */ +/* All Rights Reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the */ +/* following conditions are met: */ +/* */ +/* - Redistributions of source code must retain the above copyright*/ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/* - Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* - Neither the name of IBM Corporation nor the names of its */ +/* contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/* Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* Neither the name of IBM Corporation nor the names of its */ +/* contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ +/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ +/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ +/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ +/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ +/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ +/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ +/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ +/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* -------------------------------------------------------------- */ +/* PROLOG END TAG zYx */ +#ifdef __SPU__ +#ifndef _TGAMMAF4_H_ +#define _TGAMMAF4_H_ 1 + +#include <spu_intrinsics.h> +#include "simdmath.h" + +#include "recipf4.h" +#include "truncf4.h" +#include "expf4.h" +#include "logf4.h" +#include "divf4.h" +#include "sinf4.h" +#include "powf4.h" +#include "tgammad2.h" + +/* + * FUNCTION + * vector float _tgammaf4(vector float x) + * + * DESCRIPTION + * The tgammaf4 function returns a vector containing tgamma for each + * element of x + * + * We take a fairly standard approach - break the domain into 5 separate regions: + * + * 1. [-infinity, 0) - use gamma(x) = pi/(x*gamma(-x)*sin(x*pi)) + * 2. [0, 1) - push x into [1,2), then adjust the + * result. + * 3. [1, 2) - use a rational approximation. + * 4. [2, 10) - pull back into [1, 2), then adjust + * the result. + * 5. [10, +infinity] - use Stirling's Approximation. + * + * + * Special Cases: + * - tgamma(+/- 0) returns +/- infinity + * - tgamma(negative integer) returns NaN + * - tgamma(-infinity) returns NaN + * - tgamma(infinity) returns infinity + * + */ + +/* + * Coefficients for Stirling's Series for Gamma() are defined in + * tgammad2.h + */ + +/* + * Rational Approximation Coefficients for the + * domain [1, 2) are defined in tgammad2.h + */ + + +static __inline vector float _tgammaf4(vector float x) +{ + vector float signbit = spu_splats(-0.0f); + vector float zerof = spu_splats(0.0f); + vector float halff = spu_splats(0.5f); + vector float onef = spu_splats(1.0f); + vector float ninep9f = (vector float)spu_splats(0x411FFFFF); /* Next closest to 10.0 */ + vector float t38f = spu_splats(38.0f); + vector float pi = spu_splats((float)SM_PI); + vector float sqrt2pi = spu_splats(2.506628274631000502415765284811f); + vector float inf = (vec_float4)spu_splats(0x7F800000); + vector float nan = (vec_float4)spu_splats(0x7FFFFFFF); + + vector float xabs; + vector float xscaled; + vector float xtrunc; + vector float xinv; + vector float nresult; /* Negative x result */ + vector float rresult; /* Rational Approx result */ + vector float sresult; /* Stirling's result */ + vector float result; + vector float pr,qr; + + vector unsigned int gt0 = spu_cmpgt(x, zerof); + vector unsigned int gt1 = spu_cmpgt(x, onef); + vector unsigned int gt9p9 = spu_cmpgt(x, ninep9f); + vector unsigned int gt38 = spu_cmpgt(x, t38f); + + xabs = spu_andc(x, signbit); + + /* + * For x in [0, 1], add 1 to x, use rational + * approximation, then use: + * + * gamma(x) = gamma(x+1)/x + * + */ + xabs = spu_sel(spu_add(xabs, onef), xabs, gt1); + xtrunc = _truncf4(xabs); + + + /* + * For x in [2, 10): + */ + xscaled = spu_add(onef, spu_sub(xabs, xtrunc)); + + /* + * For x in [1,2), use a rational approximation. + */ + pr = spu_madd(xscaled, spu_splats((float)TGD2_P07), spu_splats((float)TGD2_P06)); + pr = spu_madd(pr, xscaled, spu_splats((float)TGD2_P05)); + pr = spu_madd(pr, xscaled, spu_splats((float)TGD2_P04)); + pr = spu_madd(pr, xscaled, spu_splats((float)TGD2_P03)); + pr = spu_madd(pr, xscaled, spu_splats((float)TGD2_P02)); + pr = spu_madd(pr, xscaled, spu_splats((float)TGD2_P01)); + pr = spu_madd(pr, xscaled, spu_splats((float)TGD2_P00)); + + qr = spu_madd(xscaled, spu_splats((float)TGD2_Q07), spu_splats((float)TGD2_Q06)); + qr = spu_madd(qr, xscaled, spu_splats((float)TGD2_Q05)); + qr = spu_madd(qr, xscaled, spu_splats((float)TGD2_Q04)); + qr = spu_madd(qr, xscaled, spu_splats((float)TGD2_Q03)); + qr = spu_madd(qr, xscaled, spu_splats((float)TGD2_Q02)); + qr = spu_madd(qr, xscaled, spu_splats((float)TGD2_Q01)); + qr = spu_madd(qr, xscaled, spu_splats((float)TGD2_Q00)); + + rresult = _divf4(pr, qr); + rresult = spu_sel(_divf4(rresult, x), rresult, gt1); + + /* + * If x was in [2,10) and we pulled it into [1,2), we need to push + * it back out again. + */ + rresult = spu_sel(rresult, spu_mul(rresult, xscaled), spu_cmpgt(x, xscaled)); /* [2,3) */ + xscaled = spu_add(xscaled, onef); + rresult = spu_sel(rresult, spu_mul(rresult, xscaled), spu_cmpgt(x, xscaled)); /* [3,4) */ + xscaled = spu_add(xscaled, onef); + rresult = spu_sel(rresult, spu_mul(rresult, xscaled), spu_cmpgt(x, xscaled)); /* [4,5) */ + xscaled = spu_add(xscaled, onef); + rresult = spu_sel(rresult, spu_mul(rresult, xscaled), spu_cmpgt(x, xscaled)); /* [5,6) */ + xscaled = spu_add(xscaled, onef); + rresult = spu_sel(rresult, spu_mul(rresult, xscaled), spu_cmpgt(x, xscaled)); /* [6,7) */ + xscaled = spu_add(xscaled, onef); + rresult = spu_sel(rresult, spu_mul(rresult, xscaled), spu_cmpgt(x, xscaled)); /* [7,8) */ + xscaled = spu_add(xscaled, onef); + rresult = spu_sel(rresult, spu_mul(rresult, xscaled), spu_cmpgt(x, xscaled)); /* [8,9) */ + xscaled = spu_add(xscaled, onef); + rresult = spu_sel(rresult, spu_mul(rresult, xscaled), spu_cmpgt(x, xscaled)); /* [9,10) */ + + + /* + * For x >= 10, we use Stirling's Approximation + */ + vector float sum; + xinv = _recipf4(xabs); + sum = spu_madd(xinv, spu_splats((float)STIRLING_16), spu_splats((float)STIRLING_15)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_14)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_13)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_12)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_11)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_10)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_09)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_08)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_07)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_06)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_05)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_04)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_03)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_02)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_01)); + sum = spu_madd(sum, xinv, spu_splats((float)STIRLING_00)); + + sum = spu_mul(sum, sqrt2pi); + sum = spu_mul(sum, _powf4(x, spu_sub(x, halff))); + sresult = spu_mul(sum, _expf4(spu_or(x, signbit))); + + /* + * Choose rational approximation or Stirling's result. + */ + result = spu_sel(rresult, sresult, gt9p9); + + result = spu_sel(result, inf, gt38); + + /* For x < 0, use: + * gamma(x) = pi/(x*gamma(-x)*sin(x*pi)) + */ + nresult = _divf4(pi, spu_mul(x, spu_mul(result, _sinf4(spu_mul(x, pi))))); + result = spu_sel(nresult, result, gt0); + + /* + * x = non-positive integer, return NaN. + */ + result = spu_sel(result, nan, spu_andc(spu_cmpeq(x, xtrunc), gt0)); + + return result; +} + +#endif /* _TGAMMAF4_H_ */ +#endif /* __SPU__ */ |