diff options
Diffstat (limited to 'extern/Eigen3/Eigen/src/Jacobi/Jacobi.h')
-rw-r--r-- | extern/Eigen3/Eigen/src/Jacobi/Jacobi.h | 263 |
1 files changed, 146 insertions, 117 deletions
diff --git a/extern/Eigen3/Eigen/src/Jacobi/Jacobi.h b/extern/Eigen3/Eigen/src/Jacobi/Jacobi.h index 956f72d5701..1998c632274 100644 --- a/extern/Eigen3/Eigen/src/Jacobi/Jacobi.h +++ b/extern/Eigen3/Eigen/src/Jacobi/Jacobi.h @@ -62,14 +62,14 @@ template<typename Scalar> class JacobiRotation JacobiRotation adjoint() const { using numext::conj; return JacobiRotation(conj(m_c), -m_s); } template<typename Derived> - bool makeJacobi(const MatrixBase<Derived>&, typename Derived::Index p, typename Derived::Index q); + bool makeJacobi(const MatrixBase<Derived>&, Index p, Index q); bool makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z); - void makeGivens(const Scalar& p, const Scalar& q, Scalar* z=0); + void makeGivens(const Scalar& p, const Scalar& q, Scalar* r=0); protected: - void makeGivens(const Scalar& p, const Scalar& q, Scalar* z, internal::true_type); - void makeGivens(const Scalar& p, const Scalar& q, Scalar* z, internal::false_type); + void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::true_type); + void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::false_type); Scalar m_c, m_s; }; @@ -84,8 +84,8 @@ bool JacobiRotation<Scalar>::makeJacobi(const RealScalar& x, const Scalar& y, co { using std::sqrt; using std::abs; - typedef typename NumTraits<Scalar>::Real RealScalar; - if(y == Scalar(0)) + RealScalar deno = RealScalar(2)*abs(y); + if(deno < (std::numeric_limits<RealScalar>::min)()) { m_c = Scalar(1); m_s = Scalar(0); @@ -93,7 +93,7 @@ bool JacobiRotation<Scalar>::makeJacobi(const RealScalar& x, const Scalar& y, co } else { - RealScalar tau = (x-z)/(RealScalar(2)*abs(y)); + RealScalar tau = (x-z)/deno; RealScalar w = sqrt(numext::abs2(tau) + RealScalar(1)); RealScalar t; if(tau>RealScalar(0)) @@ -123,7 +123,7 @@ bool JacobiRotation<Scalar>::makeJacobi(const RealScalar& x, const Scalar& y, co */ template<typename Scalar> template<typename Derived> -inline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, typename Derived::Index p, typename Derived::Index q) +inline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, Index p, Index q) { return makeJacobi(numext::real(m.coeff(p,p)), m.coeff(p,q), numext::real(m.coeff(q,q))); } @@ -132,7 +132,7 @@ inline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, typ * \f$ V = \left ( \begin{array}{c} p \\ q \end{array} \right )\f$ yields: * \f$ G^* V = \left ( \begin{array}{c} r \\ 0 \end{array} \right )\f$. * - * The value of \a z is returned if \a z is not null (the default is null). + * The value of \a r is returned if \a r is not null (the default is null). * Also note that G is built such that the cosine is always real. * * Example: \include Jacobi_makeGivens.cpp @@ -145,9 +145,9 @@ inline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, typ * \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight() */ template<typename Scalar> -void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* z) +void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r) { - makeGivens(p, q, z, typename internal::conditional<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>::type()); + makeGivens(p, q, r, typename internal::conditional<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>::type()); } @@ -255,15 +255,15 @@ void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar * Implementation of MatrixBase methods ****************************************************************************************/ +namespace internal { /** \jacobi_module * Applies the clock wise 2D rotation \a j to the set of 2D vectors of cordinates \a x and \a y: * \f$ \left ( \begin{array}{cc} x \\ y \end{array} \right ) = J \left ( \begin{array}{cc} x \\ y \end{array} \right ) \f$ * * \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight() */ -namespace internal { template<typename VectorX, typename VectorY, typename OtherScalar> -void apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, const JacobiRotation<OtherScalar>& j); +void apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j); } /** \jacobi_module @@ -297,133 +297,162 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR } namespace internal { -template<typename VectorX, typename VectorY, typename OtherScalar> -void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, const JacobiRotation<OtherScalar>& j) -{ - typedef typename VectorX::Index Index; - typedef typename VectorX::Scalar Scalar; - enum { PacketSize = packet_traits<Scalar>::size }; - typedef typename packet_traits<Scalar>::type Packet; - eigen_assert(_x.size() == _y.size()); - Index size = _x.size(); - Index incrx = _x.innerStride(); - Index incry = _y.innerStride(); - - Scalar* EIGEN_RESTRICT x = &_x.coeffRef(0); - Scalar* EIGEN_RESTRICT y = &_y.coeffRef(0); - - OtherScalar c = j.c(); - OtherScalar s = j.s(); - if (c==OtherScalar(1) && s==OtherScalar(0)) - return; - /*** dynamic-size vectorized paths ***/ +template<typename Scalar, typename OtherScalar, + int SizeAtCompileTime, int MinAlignment, bool Vectorizable> +struct apply_rotation_in_the_plane_selector +{ + static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s) + { + for(Index i=0; i<size; ++i) + { + Scalar xi = *x; + Scalar yi = *y; + *x = c * xi + numext::conj(s) * yi; + *y = -s * xi + numext::conj(c) * yi; + x += incrx; + y += incry; + } + } +}; - if(VectorX::SizeAtCompileTime == Dynamic && - (VectorX::Flags & VectorY::Flags & PacketAccessBit) && - ((incrx==1 && incry==1) || PacketSize == 1)) +template<typename Scalar, typename OtherScalar, + int SizeAtCompileTime, int MinAlignment> +struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */> +{ + static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s) { - // both vectors are sequentially stored in memory => vectorization - enum { Peeling = 2 }; + enum { + PacketSize = packet_traits<Scalar>::size, + OtherPacketSize = packet_traits<OtherScalar>::size + }; + typedef typename packet_traits<Scalar>::type Packet; + typedef typename packet_traits<OtherScalar>::type OtherPacket; + + /*** dynamic-size vectorized paths ***/ + if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1)) + { + // both vectors are sequentially stored in memory => vectorization + enum { Peeling = 2 }; - Index alignedStart = internal::first_aligned(y, size); - Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; + Index alignedStart = internal::first_default_aligned(y, size); + Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; - const Packet pc = pset1<Packet>(c); - const Packet ps = pset1<Packet>(s); - conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; - for(Index i=0; i<alignedStart; ++i) - { - Scalar xi = x[i]; - Scalar yi = y[i]; - x[i] = c * xi + numext::conj(s) * yi; - y[i] = -s * xi + numext::conj(c) * yi; - } + for(Index i=0; i<alignedStart; ++i) + { + Scalar xi = x[i]; + Scalar yi = y[i]; + x[i] = c * xi + numext::conj(s) * yi; + y[i] = -s * xi + numext::conj(c) * yi; + } - Scalar* EIGEN_RESTRICT px = x + alignedStart; - Scalar* EIGEN_RESTRICT py = y + alignedStart; + Scalar* EIGEN_RESTRICT px = x + alignedStart; + Scalar* EIGEN_RESTRICT py = y + alignedStart; - if(internal::first_aligned(x, size)==alignedStart) - { - for(Index i=alignedStart; i<alignedEnd; i+=PacketSize) + if(internal::first_default_aligned(x, size)==alignedStart) { - Packet xi = pload<Packet>(px); - Packet yi = pload<Packet>(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); - px += PacketSize; - py += PacketSize; + for(Index i=alignedStart; i<alignedEnd; i+=PacketSize) + { + Packet xi = pload<Packet>(px); + Packet yi = pload<Packet>(py); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + px += PacketSize; + py += PacketSize; + } } - } - else - { - Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize); - for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize) + else { - Packet xi = ploadu<Packet>(px); - Packet xi1 = ploadu<Packet>(px+PacketSize); - Packet yi = pload <Packet>(py); - Packet yi1 = pload <Packet>(py+PacketSize); - pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1))); - pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); - pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1))); - px += Peeling*PacketSize; - py += Peeling*PacketSize; + Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize); + for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize) + { + Packet xi = ploadu<Packet>(px); + Packet xi1 = ploadu<Packet>(px+PacketSize); + Packet yi = pload <Packet>(py); + Packet yi1 = pload <Packet>(py+PacketSize); + pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); + pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); + px += Peeling*PacketSize; + py += Peeling*PacketSize; + } + if(alignedEnd!=peelingEnd) + { + Packet xi = ploadu<Packet>(x+peelingEnd); + Packet yi = pload <Packet>(y+peelingEnd); + pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + } } - if(alignedEnd!=peelingEnd) + + for(Index i=alignedEnd; i<size; ++i) { - Packet xi = ploadu<Packet>(x+peelingEnd); - Packet yi = pload <Packet>(y+peelingEnd); - pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + Scalar xi = x[i]; + Scalar yi = y[i]; + x[i] = c * xi + numext::conj(s) * yi; + y[i] = -s * xi + numext::conj(c) * yi; } } - for(Index i=alignedEnd; i<size; ++i) + /*** fixed-size vectorized path ***/ + else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment { - Scalar xi = x[i]; - Scalar yi = y[i]; - x[i] = c * xi + numext::conj(s) * yi; - y[i] = -s * xi + numext::conj(c) * yi; + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; + Scalar* EIGEN_RESTRICT px = x; + Scalar* EIGEN_RESTRICT py = y; + for(Index i=0; i<size; i+=PacketSize) + { + Packet xi = pload<Packet>(px); + Packet yi = pload<Packet>(py); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + px += PacketSize; + py += PacketSize; + } } - } - /*** fixed-size vectorized path ***/ - else if(VectorX::SizeAtCompileTime != Dynamic && - (VectorX::Flags & VectorY::Flags & PacketAccessBit) && - (VectorX::Flags & VectorY::Flags & AlignedBit)) - { - const Packet pc = pset1<Packet>(c); - const Packet ps = pset1<Packet>(s); - conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; - Scalar* EIGEN_RESTRICT px = x; - Scalar* EIGEN_RESTRICT py = y; - for(Index i=0; i<size; i+=PacketSize) + /*** non-vectorized path ***/ + else { - Packet xi = pload<Packet>(px); - Packet yi = pload<Packet>(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); - px += PacketSize; - py += PacketSize; + apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s); } } +}; - /*** non-vectorized path ***/ - else - { - for(Index i=0; i<size; ++i) - { - Scalar xi = *x; - Scalar yi = *y; - *x = c * xi + numext::conj(s) * yi; - *y = -s * xi + numext::conj(c) * yi; - x += incrx; - y += incry; - } - } +template<typename VectorX, typename VectorY, typename OtherScalar> +void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j) +{ + typedef typename VectorX::Scalar Scalar; + const bool Vectorizable = (VectorX::Flags & VectorY::Flags & PacketAccessBit) + && (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size)); + + eigen_assert(xpr_x.size() == xpr_y.size()); + Index size = xpr_x.size(); + Index incrx = xpr_x.derived().innerStride(); + Index incry = xpr_y.derived().innerStride(); + + Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0); + Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0); + + OtherScalar c = j.c(); + OtherScalar s = j.s(); + if (c==OtherScalar(1) && s==OtherScalar(0)) + return; + + apply_rotation_in_the_plane_selector< + Scalar,OtherScalar, + VectorX::SizeAtCompileTime, + EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment), + Vectorizable>::run(x,incrx,y,incry,size,c,s); } } // end namespace internal |