diff options
Diffstat (limited to 'extern/Eigen3/Eigen/src/Core/products')
22 files changed, 1996 insertions, 520 deletions
diff --git a/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h b/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h index dc20f7e1e29..403d25fa9eb 100644 --- a/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h +++ b/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h @@ -4,28 +4,15 @@ // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_COEFFBASED_PRODUCT_H #define EIGEN_COEFFBASED_PRODUCT_H +namespace Eigen { + namespace internal { /********************************************************************************* @@ -224,8 +211,8 @@ class CoeffBasedProduct { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); } protected: - const LhsNested m_lhs; - const RhsNested m_rhs; + typename internal::add_const_on_value_type<LhsNested>::type m_lhs; + typename internal::add_const_on_value_type<RhsNested>::type m_rhs; mutable PlainObject m_result; }; @@ -252,7 +239,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); @@ -263,7 +250,7 @@ template<typename Lhs, typename Rhs, typename RetScalar> struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { res = lhs.coeff(row, 0) * rhs.coeff(0, col); } @@ -273,7 +260,7 @@ template<typename Lhs, typename Rhs, typename RetScalar> struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) { eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); res = lhs.coeff(row, 0) * rhs.coeff(0, col); @@ -291,7 +278,7 @@ struct product_coeff_vectorized_unroller { typedef typename Lhs::Index Index; enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) { product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); @@ -302,7 +289,7 @@ template<typename Lhs, typename Rhs, typename Packet> struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) { pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); } @@ -314,7 +301,7 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re typedef typename Lhs::PacketScalar Packet; typedef typename Lhs::Index Index; enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { Packet pres; product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); @@ -327,7 +314,7 @@ template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int R struct product_coeff_vectorized_dyn_selector { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); } @@ -339,7 +326,7 @@ template<typename Lhs, typename Rhs, int RhsCols> struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); } @@ -349,7 +336,7 @@ template<typename Lhs, typename Rhs, int LhsRows> struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); } @@ -359,7 +346,7 @@ template<typename Lhs, typename Rhs> struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { res = lhs.transpose().cwiseProduct(rhs).sum(); } @@ -369,7 +356,7 @@ template<typename Lhs, typename Rhs, typename RetScalar> struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); } @@ -383,7 +370,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); @@ -394,7 +381,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); @@ -405,7 +392,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode> struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); } @@ -415,7 +402,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode> struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); } @@ -425,7 +412,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode> struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) { eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); @@ -438,7 +425,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode> struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) { eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); @@ -449,4 +436,6 @@ struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> } // end namespace internal +} // end namespace Eigen + #endif // EIGEN_COEFFBASED_PRODUCT_H diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h index cd1c37c780e..5eb03c98ccf 100644 --- a/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -3,34 +3,23 @@ // // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_GENERAL_BLOCK_PANEL_H #define EIGEN_GENERAL_BLOCK_PANEL_H +namespace Eigen { + namespace internal { template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false> class gebp_traits; -inline std::ptrdiff_t manage_caching_sizes_second_if_negative(std::ptrdiff_t a, std::ptrdiff_t b) + +/** \internal \returns b if a<=0, and returns a otherwise. */ +inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b) { return a<=0 ? b : a; } @@ -38,9 +27,14 @@ inline std::ptrdiff_t manage_caching_sizes_second_if_negative(std::ptrdiff_t a, /** \internal */ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0) { - static std::ptrdiff_t m_l1CacheSize = manage_caching_sizes_second_if_negative(queryL1CacheSize(),8 * 1024); - static std::ptrdiff_t m_l2CacheSize = manage_caching_sizes_second_if_negative(queryTopLevelCacheSize(),1*1024*1024); - + static std::ptrdiff_t m_l1CacheSize = 0; + static std::ptrdiff_t m_l2CacheSize = 0; + if(m_l2CacheSize==0) + { + m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024); + m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024); + } + if(action==SetAction) { // set the cpu cache size and cache all block sizes from a global cache size in byte @@ -533,7 +527,7 @@ struct gebp_kernel ResPacketSize = Traits::ResPacketSize }; - EIGEN_FLATTEN_ATTRIB + EIGEN_DONT_INLINE EIGEN_FLATTEN_ATTRIB void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha, Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0) { @@ -595,64 +589,64 @@ struct gebp_kernel if(nr==2) { LhsPacket A0, A1; - RhsPacket B0; + RhsPacket B_0; RhsPacket T0; EIGEN_ASM_COMMENT("mybegin2"); traits.loadLhs(&blA[0*LhsProgress], A0); traits.loadLhs(&blA[1*LhsProgress], A1); - traits.loadRhs(&blB[0*RhsProgress], B0); - traits.madd(A0,B0,C0,T0); - traits.madd(A1,B0,C4,B0); - traits.loadRhs(&blB[1*RhsProgress], B0); - traits.madd(A0,B0,C1,T0); - traits.madd(A1,B0,C5,B0); + traits.loadRhs(&blB[0*RhsProgress], B_0); + traits.madd(A0,B_0,C0,T0); + traits.madd(A1,B_0,C4,B_0); + traits.loadRhs(&blB[1*RhsProgress], B_0); + traits.madd(A0,B_0,C1,T0); + traits.madd(A1,B_0,C5,B_0); traits.loadLhs(&blA[2*LhsProgress], A0); traits.loadLhs(&blA[3*LhsProgress], A1); - traits.loadRhs(&blB[2*RhsProgress], B0); - traits.madd(A0,B0,C0,T0); - traits.madd(A1,B0,C4,B0); - traits.loadRhs(&blB[3*RhsProgress], B0); - traits.madd(A0,B0,C1,T0); - traits.madd(A1,B0,C5,B0); + traits.loadRhs(&blB[2*RhsProgress], B_0); + traits.madd(A0,B_0,C0,T0); + traits.madd(A1,B_0,C4,B_0); + traits.loadRhs(&blB[3*RhsProgress], B_0); + traits.madd(A0,B_0,C1,T0); + traits.madd(A1,B_0,C5,B_0); traits.loadLhs(&blA[4*LhsProgress], A0); traits.loadLhs(&blA[5*LhsProgress], A1); - traits.loadRhs(&blB[4*RhsProgress], B0); - traits.madd(A0,B0,C0,T0); - traits.madd(A1,B0,C4,B0); - traits.loadRhs(&blB[5*RhsProgress], B0); - traits.madd(A0,B0,C1,T0); - traits.madd(A1,B0,C5,B0); + traits.loadRhs(&blB[4*RhsProgress], B_0); + traits.madd(A0,B_0,C0,T0); + traits.madd(A1,B_0,C4,B_0); + traits.loadRhs(&blB[5*RhsProgress], B_0); + traits.madd(A0,B_0,C1,T0); + traits.madd(A1,B_0,C5,B_0); traits.loadLhs(&blA[6*LhsProgress], A0); traits.loadLhs(&blA[7*LhsProgress], A1); - traits.loadRhs(&blB[6*RhsProgress], B0); - traits.madd(A0,B0,C0,T0); - traits.madd(A1,B0,C4,B0); - traits.loadRhs(&blB[7*RhsProgress], B0); - traits.madd(A0,B0,C1,T0); - traits.madd(A1,B0,C5,B0); + traits.loadRhs(&blB[6*RhsProgress], B_0); + traits.madd(A0,B_0,C0,T0); + traits.madd(A1,B_0,C4,B_0); + traits.loadRhs(&blB[7*RhsProgress], B_0); + traits.madd(A0,B_0,C1,T0); + traits.madd(A1,B_0,C5,B_0); EIGEN_ASM_COMMENT("myend"); } else { EIGEN_ASM_COMMENT("mybegin4"); LhsPacket A0, A1; - RhsPacket B0, B1, B2, B3; + RhsPacket B_0, B1, B2, B3; RhsPacket T0; traits.loadLhs(&blA[0*LhsProgress], A0); traits.loadLhs(&blA[1*LhsProgress], A1); - traits.loadRhs(&blB[0*RhsProgress], B0); + traits.loadRhs(&blB[0*RhsProgress], B_0); traits.loadRhs(&blB[1*RhsProgress], B1); - traits.madd(A0,B0,C0,T0); + traits.madd(A0,B_0,C0,T0); traits.loadRhs(&blB[2*RhsProgress], B2); - traits.madd(A1,B0,C4,B0); + traits.madd(A1,B_0,C4,B_0); traits.loadRhs(&blB[3*RhsProgress], B3); - traits.loadRhs(&blB[4*RhsProgress], B0); + traits.loadRhs(&blB[4*RhsProgress], B_0); traits.madd(A0,B1,C1,T0); traits.madd(A1,B1,C5,B1); traits.loadRhs(&blB[5*RhsProgress], B1); @@ -664,9 +658,9 @@ EIGEN_ASM_COMMENT("mybegin4"); traits.madd(A1,B3,C7,B3); traits.loadLhs(&blA[3*LhsProgress], A1); traits.loadRhs(&blB[7*RhsProgress], B3); - traits.madd(A0,B0,C0,T0); - traits.madd(A1,B0,C4,B0); - traits.loadRhs(&blB[8*RhsProgress], B0); + traits.madd(A0,B_0,C0,T0); + traits.madd(A1,B_0,C4,B_0); + traits.loadRhs(&blB[8*RhsProgress], B_0); traits.madd(A0,B1,C1,T0); traits.madd(A1,B1,C5,B1); traits.loadRhs(&blB[9*RhsProgress], B1); @@ -679,9 +673,9 @@ EIGEN_ASM_COMMENT("mybegin4"); traits.loadLhs(&blA[5*LhsProgress], A1); traits.loadRhs(&blB[11*RhsProgress], B3); - traits.madd(A0,B0,C0,T0); - traits.madd(A1,B0,C4,B0); - traits.loadRhs(&blB[12*RhsProgress], B0); + traits.madd(A0,B_0,C0,T0); + traits.madd(A1,B_0,C4,B_0); + traits.loadRhs(&blB[12*RhsProgress], B_0); traits.madd(A0,B1,C1,T0); traits.madd(A1,B1,C5,B1); traits.loadRhs(&blB[13*RhsProgress], B1); @@ -693,8 +687,8 @@ EIGEN_ASM_COMMENT("mybegin4"); traits.madd(A1,B3,C7,B3); traits.loadLhs(&blA[7*LhsProgress], A1); traits.loadRhs(&blB[15*RhsProgress], B3); - traits.madd(A0,B0,C0,T0); - traits.madd(A1,B0,C4,B0); + traits.madd(A0,B_0,C0,T0); + traits.madd(A1,B_0,C4,B_0); traits.madd(A0,B1,C1,T0); traits.madd(A1,B1,C5,B1); traits.madd(A0,B2,C2,T0); @@ -712,32 +706,32 @@ EIGEN_ASM_COMMENT("mybegin4"); if(nr==2) { LhsPacket A0, A1; - RhsPacket B0; + RhsPacket B_0; RhsPacket T0; traits.loadLhs(&blA[0*LhsProgress], A0); traits.loadLhs(&blA[1*LhsProgress], A1); - traits.loadRhs(&blB[0*RhsProgress], B0); - traits.madd(A0,B0,C0,T0); - traits.madd(A1,B0,C4,B0); - traits.loadRhs(&blB[1*RhsProgress], B0); - traits.madd(A0,B0,C1,T0); - traits.madd(A1,B0,C5,B0); + traits.loadRhs(&blB[0*RhsProgress], B_0); + traits.madd(A0,B_0,C0,T0); + traits.madd(A1,B_0,C4,B_0); + traits.loadRhs(&blB[1*RhsProgress], B_0); + traits.madd(A0,B_0,C1,T0); + traits.madd(A1,B_0,C5,B_0); } else { LhsPacket A0, A1; - RhsPacket B0, B1, B2, B3; + RhsPacket B_0, B1, B2, B3; RhsPacket T0; traits.loadLhs(&blA[0*LhsProgress], A0); traits.loadLhs(&blA[1*LhsProgress], A1); - traits.loadRhs(&blB[0*RhsProgress], B0); + traits.loadRhs(&blB[0*RhsProgress], B_0); traits.loadRhs(&blB[1*RhsProgress], B1); - traits.madd(A0,B0,C0,T0); + traits.madd(A0,B_0,C0,T0); traits.loadRhs(&blB[2*RhsProgress], B2); - traits.madd(A1,B0,C4,B0); + traits.madd(A1,B_0,C4,B_0); traits.loadRhs(&blB[3*RhsProgress], B3); traits.madd(A0,B1,C1,T0); traits.madd(A1,B1,C5,B1); @@ -824,42 +818,42 @@ EIGEN_ASM_COMMENT("mybegin4"); if(nr==2) { LhsPacket A0; - RhsPacket B0, B1; + RhsPacket B_0, B1; traits.loadLhs(&blA[0*LhsProgress], A0); - traits.loadRhs(&blB[0*RhsProgress], B0); + traits.loadRhs(&blB[0*RhsProgress], B_0); traits.loadRhs(&blB[1*RhsProgress], B1); - traits.madd(A0,B0,C0,B0); - traits.loadRhs(&blB[2*RhsProgress], B0); + traits.madd(A0,B_0,C0,B_0); + traits.loadRhs(&blB[2*RhsProgress], B_0); traits.madd(A0,B1,C1,B1); traits.loadLhs(&blA[1*LhsProgress], A0); traits.loadRhs(&blB[3*RhsProgress], B1); - traits.madd(A0,B0,C0,B0); - traits.loadRhs(&blB[4*RhsProgress], B0); + traits.madd(A0,B_0,C0,B_0); + traits.loadRhs(&blB[4*RhsProgress], B_0); traits.madd(A0,B1,C1,B1); traits.loadLhs(&blA[2*LhsProgress], A0); traits.loadRhs(&blB[5*RhsProgress], B1); - traits.madd(A0,B0,C0,B0); - traits.loadRhs(&blB[6*RhsProgress], B0); + traits.madd(A0,B_0,C0,B_0); + traits.loadRhs(&blB[6*RhsProgress], B_0); traits.madd(A0,B1,C1,B1); traits.loadLhs(&blA[3*LhsProgress], A0); traits.loadRhs(&blB[7*RhsProgress], B1); - traits.madd(A0,B0,C0,B0); + traits.madd(A0,B_0,C0,B_0); traits.madd(A0,B1,C1,B1); } else { LhsPacket A0; - RhsPacket B0, B1, B2, B3; + RhsPacket B_0, B1, B2, B3; traits.loadLhs(&blA[0*LhsProgress], A0); - traits.loadRhs(&blB[0*RhsProgress], B0); + traits.loadRhs(&blB[0*RhsProgress], B_0); traits.loadRhs(&blB[1*RhsProgress], B1); - traits.madd(A0,B0,C0,B0); + traits.madd(A0,B_0,C0,B_0); traits.loadRhs(&blB[2*RhsProgress], B2); traits.loadRhs(&blB[3*RhsProgress], B3); - traits.loadRhs(&blB[4*RhsProgress], B0); + traits.loadRhs(&blB[4*RhsProgress], B_0); traits.madd(A0,B1,C1,B1); traits.loadRhs(&blB[5*RhsProgress], B1); traits.madd(A0,B2,C2,B2); @@ -867,8 +861,8 @@ EIGEN_ASM_COMMENT("mybegin4"); traits.madd(A0,B3,C3,B3); traits.loadLhs(&blA[1*LhsProgress], A0); traits.loadRhs(&blB[7*RhsProgress], B3); - traits.madd(A0,B0,C0,B0); - traits.loadRhs(&blB[8*RhsProgress], B0); + traits.madd(A0,B_0,C0,B_0); + traits.loadRhs(&blB[8*RhsProgress], B_0); traits.madd(A0,B1,C1,B1); traits.loadRhs(&blB[9*RhsProgress], B1); traits.madd(A0,B2,C2,B2); @@ -877,8 +871,8 @@ EIGEN_ASM_COMMENT("mybegin4"); traits.loadLhs(&blA[2*LhsProgress], A0); traits.loadRhs(&blB[11*RhsProgress], B3); - traits.madd(A0,B0,C0,B0); - traits.loadRhs(&blB[12*RhsProgress], B0); + traits.madd(A0,B_0,C0,B_0); + traits.loadRhs(&blB[12*RhsProgress], B_0); traits.madd(A0,B1,C1,B1); traits.loadRhs(&blB[13*RhsProgress], B1); traits.madd(A0,B2,C2,B2); @@ -887,7 +881,7 @@ EIGEN_ASM_COMMENT("mybegin4"); traits.loadLhs(&blA[3*LhsProgress], A0); traits.loadRhs(&blB[15*RhsProgress], B3); - traits.madd(A0,B0,C0,B0); + traits.madd(A0,B_0,C0,B_0); traits.madd(A0,B1,C1,B1); traits.madd(A0,B2,C2,B2); traits.madd(A0,B3,C3,B3); @@ -902,26 +896,26 @@ EIGEN_ASM_COMMENT("mybegin4"); if(nr==2) { LhsPacket A0; - RhsPacket B0, B1; + RhsPacket B_0, B1; traits.loadLhs(&blA[0*LhsProgress], A0); - traits.loadRhs(&blB[0*RhsProgress], B0); + traits.loadRhs(&blB[0*RhsProgress], B_0); traits.loadRhs(&blB[1*RhsProgress], B1); - traits.madd(A0,B0,C0,B0); + traits.madd(A0,B_0,C0,B_0); traits.madd(A0,B1,C1,B1); } else { LhsPacket A0; - RhsPacket B0, B1, B2, B3; + RhsPacket B_0, B1, B2, B3; traits.loadLhs(&blA[0*LhsProgress], A0); - traits.loadRhs(&blB[0*RhsProgress], B0); + traits.loadRhs(&blB[0*RhsProgress], B_0); traits.loadRhs(&blB[1*RhsProgress], B1); traits.loadRhs(&blB[2*RhsProgress], B2); traits.loadRhs(&blB[3*RhsProgress], B3); - traits.madd(A0,B0,C0,B0); + traits.madd(A0,B_0,C0,B_0); traits.madd(A0,B1,C1,B1); traits.madd(A0,B2,C2,B2); traits.madd(A0,B3,C3,B3); @@ -968,26 +962,26 @@ EIGEN_ASM_COMMENT("mybegin4"); if(nr==2) { LhsScalar A0; - RhsScalar B0, B1; + RhsScalar B_0, B1; A0 = blA[k]; - B0 = blB[0]; + B_0 = blB[0]; B1 = blB[1]; - MADD(cj,A0,B0,C0,B0); + MADD(cj,A0,B_0,C0,B_0); MADD(cj,A0,B1,C1,B1); } else { LhsScalar A0; - RhsScalar B0, B1, B2, B3; + RhsScalar B_0, B1, B2, B3; A0 = blA[k]; - B0 = blB[0]; + B_0 = blB[0]; B1 = blB[1]; B2 = blB[2]; B3 = blB[3]; - MADD(cj,A0,B0,C0,B0); + MADD(cj,A0,B_0,C0,B_0); MADD(cj,A0,B1,C1,B1); MADD(cj,A0,B2,C2,B2); MADD(cj,A0,B3,C3,B3); @@ -1024,14 +1018,14 @@ EIGEN_ASM_COMMENT("mybegin4"); for(Index k=0; k<depth; k++) { LhsPacket A0, A1; - RhsPacket B0; + RhsPacket B_0; RhsPacket T0; traits.loadLhs(&blA[0*LhsProgress], A0); traits.loadLhs(&blA[1*LhsProgress], A1); - traits.loadRhs(&blB[0*RhsProgress], B0); - traits.madd(A0,B0,C0,T0); - traits.madd(A1,B0,C4,B0); + traits.loadRhs(&blB[0*RhsProgress], B_0); + traits.madd(A0,B_0,C0,T0); + traits.madd(A1,B_0,C4,B_0); blB += RhsProgress; blA += 2*LhsProgress; @@ -1063,10 +1057,10 @@ EIGEN_ASM_COMMENT("mybegin4"); for(Index k=0; k<depth; k++) { LhsPacket A0; - RhsPacket B0; + RhsPacket B_0; traits.loadLhs(blA, A0); - traits.loadRhs(blB, B0); - traits.madd(A0, B0, C0, B0); + traits.loadRhs(blB, B_0); + traits.madd(A0, B_0, C0, B_0); blB += RhsProgress; blA += LhsProgress; } @@ -1088,8 +1082,8 @@ EIGEN_ASM_COMMENT("mybegin4"); for(Index k=0; k<depth; k++) { LhsScalar A0 = blA[k]; - RhsScalar B0 = blB[k]; - MADD(cj, A0, B0, C0, B0); + RhsScalar B_0 = blB[k]; + MADD(cj, A0, B_0, C0, B_0); } res[(j2+0)*resStride + i] += alpha*C0; } @@ -1100,7 +1094,7 @@ EIGEN_ASM_COMMENT("mybegin4"); #undef CJMADD // pack a block of the lhs -// The travesal is as follow (mr==4): +// The traversal is as follow (mr==4): // 0 4 8 12 ... // 1 5 9 13 ... // 2 6 10 14 ... @@ -1116,11 +1110,15 @@ EIGEN_ASM_COMMENT("mybegin4"); template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode> struct gemm_pack_lhs { - void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, + EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0) { -// enum { PacketSize = packet_traits<Scalar>::size }; + typedef typename packet_traits<Scalar>::type Packet; + enum { PacketSize = packet_traits<Scalar>::size }; + + EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS"); eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) ); conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride); Index count = 0; @@ -1128,9 +1126,44 @@ struct gemm_pack_lhs for(Index i=0; i<peeled_mc; i+=Pack1) { if(PanelMode) count += Pack1 * offset; - for(Index k=0; k<depth; k++) - for(Index w=0; w<Pack1; w++) - blockA[count++] = cj(lhs(i+w, k)); + + if(StorageOrder==ColMajor) + { + for(Index k=0; k<depth; k++) + { + Packet A, B, C, D; + if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k)); + if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k)); + if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k)); + if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k)); + if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; } + if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; } + if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; } + if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; } + } + } + else + { + for(Index k=0; k<depth; k++) + { + // TODO add a vectorized transpose here + Index w=0; + for(; w<Pack1-3; w+=4) + { + Scalar a(cj(lhs(i+w+0, k))), + b(cj(lhs(i+w+1, k))), + c(cj(lhs(i+w+2, k))), + d(cj(lhs(i+w+3, k))); + blockA[count++] = a; + blockA[count++] = b; + blockA[count++] = c; + blockA[count++] = d; + } + if(Pack1%4) + for(;w<Pack1;++w) + blockA[count++] = cj(lhs(i+w, k)); + } + } if(PanelMode) count += Pack1 * (stride-offset-depth); } if(rows-peeled_mc>=Pack2) @@ -1164,9 +1197,10 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode> { typedef typename packet_traits<Scalar>::type Packet; enum { PacketSize = packet_traits<Scalar>::size }; - void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, + EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0) { + EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR"); eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; Index packet_cols = (cols/nr) * nr; @@ -1211,9 +1245,10 @@ template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode> { enum { PacketSize = packet_traits<Scalar>::size }; - void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, + EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0) { + EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR"); eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj; Index packet_cols = (cols/nr) * nr; @@ -1279,4 +1314,6 @@ inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2) internal::manage_caching_sizes(SetAction, &l1, &l2); } +} // end namespace Eigen + #endif // EIGEN_GENERAL_BLOCK_PANEL_H diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h index ae94a27953b..73a465ec5ee 100644 --- a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -3,28 +3,15 @@ // // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H #define EIGEN_GENERAL_MATRIX_MATRIX_H +namespace Eigen { + namespace internal { template<typename _LhsScalar, typename _RhsScalar> class level3_blocking; @@ -77,7 +64,7 @@ static void run(Index rows, Index cols, Index depth, typedef gebp_traits<LhsScalar,RhsScalar> Traits; - Index kc = blocking.kc(); // cache block size along the K direction + Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction //Index nc = blocking.nc(); // cache block size along the N direction @@ -247,7 +234,7 @@ struct gemm_functor BlockingType& m_blocking; }; -template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, +template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1, bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space; template<typename _LhsScalar, typename _RhsScalar> @@ -280,8 +267,8 @@ class level3_blocking inline RhsScalar* blockW() { return m_blockW; } }; -template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth> -class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true> +template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor> +class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true> : public level3_blocking< typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type, typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type> @@ -322,8 +309,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M inline void allocateAll() {} }; -template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth> -class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false> +template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor> +class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false> : public level3_blocking< typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type, typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type> @@ -347,7 +334,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M this->m_nc = Transpose ? rows : cols; this->m_kc = depth; - computeProductBlockingSizes<LhsScalar,RhsScalar>(this->m_kc, this->m_mc, this->m_nc); + computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc); m_sizeA = this->m_mc * this->m_kc; m_sizeB = this->m_kc * this->m_nc; m_sizeW = this->m_kc*Traits::WorkSpaceFactor; @@ -412,8 +399,8 @@ class GeneralProduct<Lhs, Rhs, GemmProduct> { eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); - const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); - const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); @@ -436,4 +423,6 @@ class GeneralProduct<Lhs, Rhs, GemmProduct> } }; +} // end namespace Eigen + #endif // EIGEN_GENERAL_MATRIX_MATRIX_H diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 5043b64fe2e..432d3a9dc84 100644 --- a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -3,28 +3,15 @@ // // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H +namespace Eigen { + namespace internal { /********************************************************************** @@ -42,14 +29,14 @@ struct tribb_kernel; template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, - int ResStorageOrder, int UpLo> + int ResStorageOrder, int UpLo, int Version = Specialized> struct general_matrix_matrix_triangular_product; // as usual if the result is row major => we transpose the product template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, - typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo> -struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo> -{ + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> +struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version> +{ typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha) @@ -63,8 +50,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder, }; template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, - typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo> -struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo> + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> +struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version> { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, @@ -201,13 +188,13 @@ TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct( typedef internal::blas_traits<Lhs> LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; typedef typename internal::remove_all<ActualLhs>::type _ActualLhs; - const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs()); + typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs; typedef internal::blas_traits<Rhs> RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; typedef typename internal::remove_all<ActualRhs>::type _ActualRhs; - const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs()); + typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); @@ -222,4 +209,6 @@ TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct( return *this; } +} // end namespace Eigen + #endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h new file mode 100644 index 00000000000..3deed068e39 --- /dev/null +++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h @@ -0,0 +1,146 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Level 3 BLAS SYRK/HERK implementation. + ******************************************************************************** +*/ + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H +#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H + +namespace Eigen { + +namespace internal { + +template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int UpLo> +struct general_matrix_matrix_rankupdate : + general_matrix_matrix_triangular_product< + Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {}; + + +// try to go to BLAS specialization +#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \ +template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs, int UpLo> \ +struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \ + Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \ + { \ + if (lhs==rhs) { \ + general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \ + ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ + } else { \ + general_matrix_matrix_triangular_product<Index, \ + Scalar, LhsStorageOrder, ConjugateLhs, \ + Scalar, RhsStorageOrder, ConjugateRhs, \ + ColMajor, UpLo, BuiltIn> \ + ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ + } \ + } \ +}; + +EIGEN_MKL_RANKUPDATE_SPECIALIZE(double) +//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex) +EIGEN_MKL_RANKUPDATE_SPECIALIZE(float) +//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex) + +// SYRK for float/double +#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \ +template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \ +struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \ + enum { \ + IsLower = (UpLo&Lower) == Lower, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \ + }; \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ + { \ + /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \ +\ + MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ + char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \ + MKLTYPE alpha_, beta_; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \ + MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \ + } \ +}; + +// HERK for complex data +#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \ +template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \ +struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \ + enum { \ + IsLower = (UpLo&Lower) == Lower, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \ + }; \ + static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ + { \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \ +\ + MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ + char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \ + RTYPE alpha_, beta_; \ + const EIGTYPE* a_ptr; \ +\ +/* Set alpha_ & beta_ */ \ +/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\ +/* assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \ + alpha_ = alpha.real(); \ + beta_ = 1.0; \ +/* Copy with conjugation in some cases*/ \ + MatrixType a; \ + if (conjA) { \ + Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \ + a = mapA.conjugate(); \ + lda = a.outerStride(); \ + a_ptr = a.data(); \ + } else a_ptr=lhs; \ + MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \ + } \ +}; + + +EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk) +EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk) + +//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk) +//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk) + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h new file mode 100644 index 00000000000..060af328ebe --- /dev/null +++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h @@ -0,0 +1,118 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * General matrix-matrix product functionality based on ?GEMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H +#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H + +namespace Eigen { + +namespace internal { + +/********************************************************************** +* This file implements general matrix-matrix multiplication using BLAS +* gemm function via partial specialization of +* general_matrix_matrix_product::run(..) method for float, double, +* std::complex<float> and std::complex<double> types +**********************************************************************/ + +// gemm specialization + +#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \ +template< \ + typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \ +{ \ +static void run(Index rows, Index cols, Index depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha, \ + level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, \ + GemmParallelInfo<Index>* /*info = 0*/) \ +{ \ + using std::conj; \ +\ + char transa, transb; \ + MKL_INT m, n, k, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX a_tmp, b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set transpose options */ \ + transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ + transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ + k = (MKL_INT)depth; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \ + a_tmp = lhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _lhs; \ +\ + if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \ + b_tmp = rhs.conjugate(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _rhs; \ +\ + MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +}}; + +GEMM_SPECIALIZATION(double, d, double, d) +GEMM_SPECIALIZATION(float, f, float, s) +GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z) +GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c) + +} // end namespase internal + +} // end namespace Eigen + +#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector.h index e0e2cbf8f62..ba1f73957db 100644 --- a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector.h @@ -3,28 +3,15 @@ // // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_GENERAL_MATRIX_VECTOR_H #define EIGEN_GENERAL_MATRIX_VECTOR_H +namespace Eigen { + namespace internal { /* Optimized col-major matrix * vector product: @@ -40,8 +27,8 @@ namespace internal { * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp * |cplx |real |real | optimal case, vectorization possible via real-cplx mul */ -template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs> -struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs> +template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version> +struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version> { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; @@ -99,7 +86,7 @@ EIGEN_DONT_INLINE static void run( // How many coeffs of the result do we have to skip to be aligned. // Here we assume data are at least aligned on the base scalar type. - Index alignedStart = first_aligned(res,size); + Index alignedStart = internal::first_aligned(res,size); Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0; const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart; @@ -109,7 +96,7 @@ EIGEN_DONT_INLINE static void run( : FirstAligned; // we cannot assume the first element is aligned because of sub-matrices - const Index lhsAlignmentOffset = first_aligned(lhs,size); + const Index lhsAlignmentOffset = internal::first_aligned(lhs,size); // find how many columns do we have to skip to be aligned with the result (if possible) Index skipColumns = 0; @@ -296,8 +283,8 @@ EIGEN_DONT_INLINE static void run( * - alpha is always a complex (or converted to a complex) * - no vectorization */ -template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs> -struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs> +template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version> +struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version> { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; @@ -351,7 +338,7 @@ EIGEN_DONT_INLINE static void run( // How many coeffs of the result do we have to skip to be aligned. // Here we assume data are at least aligned on the base scalar type // if that's not the case then vectorization is discarded, see below. - Index alignedStart = first_aligned(rhs, depth); + Index alignedStart = internal::first_aligned(rhs, depth); Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0; const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart; @@ -361,7 +348,7 @@ EIGEN_DONT_INLINE static void run( : FirstAligned; // we cannot assume the first element is aligned because of sub-matrices - const Index lhsAlignmentOffset = first_aligned(lhs,depth); + const Index lhsAlignmentOffset = internal::first_aligned(lhs,depth); // find how many rows do we have to skip to be aligned with rhs (if possible) Index skipRows = 0; @@ -556,4 +543,6 @@ EIGEN_DONT_INLINE static void run( } // end namespace internal +} // end namespace Eigen + #endif // EIGEN_GENERAL_MATRIX_VECTOR_H diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h new file mode 100644 index 00000000000..e9de6af3ed1 --- /dev/null +++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h @@ -0,0 +1,131 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * General matrix-vector product functionality based on ?GEMV. + ******************************************************************************** +*/ + +#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H +#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H + +namespace Eigen { + +namespace internal { + +/********************************************************************** +* This file implements general matrix-vector multiplication using BLAS +* gemv function via partial specialization of +* general_matrix_vector_product::run(..) method for float, double, +* std::complex<float> and std::complex<double> types +**********************************************************************/ + +// gemv specialization + +template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs> +struct general_matrix_vector_product_gemv : + general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {}; + +#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \ +template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \ +struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \ +static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsIncr, \ + Scalar* res, Index resIncr, Scalar alpha) \ +{ \ + if (ConjugateLhs) { \ + general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ + } else { \ + general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ + } \ +} \ +}; \ +template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \ +struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \ +static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const Scalar* lhs, Index lhsStride, \ + const Scalar* rhs, Index rhsIncr, \ + Scalar* res, Index resIncr, Scalar alpha) \ +{ \ + general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \ + rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ +} \ +}; \ + +EIGEN_MKL_GEMV_SPECIALIZE(double) +EIGEN_MKL_GEMV_SPECIALIZE(float) +EIGEN_MKL_GEMV_SPECIALIZE(dcomplex) +EIGEN_MKL_GEMV_SPECIALIZE(scomplex) + +#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \ +template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \ +struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \ +{ \ +typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\ +\ +static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* lhs, Index lhsStride, \ + const EIGTYPE* rhs, Index rhsIncr, \ + EIGTYPE* res, Index resIncr, EIGTYPE alpha) \ +{ \ + MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \ + MKLTYPE alpha_, beta_; \ + const EIGTYPE *x_ptr, myone(1); \ + char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \ + if (LhsStorageOrder==RowMajor) { \ + m=cols; \ + n=rows; \ + }\ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ + GEMVVector x_tmp; \ + if (ConjugateRhs) { \ + Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \ + x_tmp=map_x.conjugate(); \ + x_ptr=x_tmp.data(); \ + incx=1; \ + } else x_ptr=rhs; \ + MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ +}\ +}; + +EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d) +EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s) +EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z) +EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c) + +} // end namespase internal + +} // end namespace Eigen + +#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H diff --git a/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h b/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h index ecdedc363ce..5c3e9b7ac15 100644 --- a/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h +++ b/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h @@ -3,28 +3,15 @@ // // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_PARALLELIZER_H #define EIGEN_PARALLELIZER_H +namespace Eigen { + namespace internal { /** \internal */ @@ -55,12 +42,23 @@ inline void manage_multi_threading(Action action, int* v) } } +} + +/** Must be call first when calling Eigen from multiple threads */ +inline void initParallel() +{ + int nbt; + internal::manage_multi_threading(GetAction, &nbt); + std::ptrdiff_t l1, l2; + internal::manage_caching_sizes(GetAction, &l1, &l2); +} + /** \returns the max number of threads reserved for Eigen * \sa setNbThreads */ inline int nbThreads() { int ret; - manage_multi_threading(GetAction, &ret); + internal::manage_multi_threading(GetAction, &ret); return ret; } @@ -68,9 +66,11 @@ inline int nbThreads() * \sa nbThreads */ inline void setNbThreads(int v) { - manage_multi_threading(SetAction, &v); + internal::manage_multi_threading(SetAction, &v); } +namespace internal { + template<typename Index> struct GemmParallelInfo { GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {} @@ -85,7 +85,9 @@ template<typename Index> struct GemmParallelInfo template<bool Condition, typename Functor, typename Index> void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose) { -#ifndef EIGEN_HAS_OPENMP + // TODO when EIGEN_USE_BLAS is defined, + // we should still enable OMP for other scalar types +#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS) // FIXME the transpose variable is only needed to properly split // the matrix product when multithreading is enabled. This is a temporary // fix to support row-major destination matrices. This whole @@ -117,6 +119,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos if(threads==1) return func(0,rows, 0,cols); + Eigen::initParallel(); func.initParallelSession(); if(transpose) @@ -151,4 +154,6 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos } // end namespace internal +} // end namespace Eigen + #endif // EIGEN_PARALLELIZER_H diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index ccd757cfaf8..48209636eed 100644 --- a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -3,28 +3,15 @@ // // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H +namespace Eigen { + namespace internal { // pack a selfadjoint block diagonal for use with the gebp_kernel @@ -400,8 +387,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> { eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); - const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); - const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); @@ -424,4 +411,6 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> } }; +} // end namespace Eigen + #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h new file mode 100644 index 00000000000..4e5c4125c01 --- /dev/null +++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h @@ -0,0 +1,295 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H +#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H + +namespace Eigen { + +namespace internal { + + +/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ + +#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \ +{\ +\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='L', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set transpose options */ \ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (LhsStorageOrder==RowMajor) uplo='U'; \ + a = _lhs; \ +\ + if (RhsStorageOrder==RowMajor) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.adjoint(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _rhs; \ +\ + MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + + +#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \ +{\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='L', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \ + EIGTYPE myone(1); \ +\ +/* Set transpose options */ \ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)lhsStride; \ + ldb = (MKL_INT)rhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \ + Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \ + a_tmp = lhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _lhs; \ + if (LhsStorageOrder==RowMajor) uplo='U'; \ +\ + if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \ + b = _rhs; } \ + else { \ + if (RhsStorageOrder==ColMajor && ConjugateRhs) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \ + b_tmp = rhs.conjugate(); \ + } else \ + if (ConjugateRhs) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.adjoint(); \ + } else { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = rhs.transpose(); \ + } \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } \ +\ + MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + +EIGEN_MKL_SYMM_L(double, double, d, d) +EIGEN_MKL_SYMM_L(float, float, f, s) +EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c) + + +/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ + +#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \ +{\ +\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='R', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + EIGTYPE myone(1);\ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)rhsStride; \ + ldb = (MKL_INT)lhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (RhsStorageOrder==RowMajor) uplo='U'; \ + a = _rhs; \ +\ + if (LhsStorageOrder==RowMajor) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \ + b_tmp = lhs.adjoint(); \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } else b = _lhs; \ +\ + MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ +\ + } \ +}; + + +#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \ +{\ + static EIGEN_DONT_INLINE void run( \ + Index rows, Index cols, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + char side='R', uplo='L'; \ + MKL_INT m, n, lda, ldb, ldc; \ + const EIGTYPE *a, *b; \ + MKLTYPE alpha_, beta_; \ + MatrixX##EIGPREFIX b_tmp; \ + Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \ + EIGTYPE myone(1); \ +\ +/* Set m, n, k */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)cols; \ +\ +/* Set alpha_ & beta_ */ \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ +\ +/* Set lda, ldb, ldc */ \ + lda = (MKL_INT)rhsStride; \ + ldb = (MKL_INT)lhsStride; \ + ldc = (MKL_INT)resStride; \ +\ +/* Set a, b, c */ \ + if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \ + Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \ + a_tmp = rhs.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else a = _rhs; \ + if (RhsStorageOrder==RowMajor) uplo='U'; \ +\ + if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \ + b = _lhs; } \ + else { \ + if (LhsStorageOrder==ColMajor && ConjugateLhs) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \ + b_tmp = lhs.conjugate(); \ + } else \ + if (ConjugateLhs) { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ + b_tmp = lhs.adjoint(); \ + } else { \ + Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ + b_tmp = lhs.transpose(); \ + } \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ + } \ +\ + MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ + } \ +}; + +EIGEN_MKL_SYMM_R(double, double, d, d) +EIGEN_MKL_SYMM_R(float, float, f, s) +EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h index d6121fc07bd..c3145c69a5f 100644 --- a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -3,28 +3,15 @@ // // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H +namespace Eigen { + namespace internal { /* Optimized selfadjoint matrix * vector product: @@ -32,8 +19,15 @@ namespace internal { * the number of load/stores of the result by a factor 2 and to reduce * the instruction dependency. */ -template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> -static EIGEN_DONT_INLINE void product_selfadjoint_vector( + +template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized> +struct selfadjoint_matrix_vector_product; + +template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version> +struct selfadjoint_matrix_vector_product + +{ +static EIGEN_DONT_INLINE void run( Index size, const Scalar* lhs, Index lhsStride, const Scalar* _rhs, Index rhsIncr, @@ -85,14 +79,14 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector( Scalar t1 = cjAlpha * rhs[j+1]; Packet ptmp1 = pset1<Packet>(t1); - Scalar t2 = 0; + Scalar t2(0); Packet ptmp2 = pset1<Packet>(t2); - Scalar t3 = 0; + Scalar t3(0); Packet ptmp3 = pset1<Packet>(t3); size_t starti = FirstTriangular ? 0 : j+2; size_t endi = FirstTriangular ? j : size; - size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti); + size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti); size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize); // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed @@ -148,7 +142,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector( register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride; Scalar t1 = cjAlpha * rhs[j]; - Scalar t2 = 0; + Scalar t2(0); // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed res[j] += cjd.pmul(internal::real(A0[j]), t1); for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++) @@ -159,6 +153,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector( res[j] += alpha * t2; } } +}; } // end namespace internal @@ -193,8 +188,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols()); - const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); - const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); @@ -232,7 +227,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> } - internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)> + internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run ( lhs.rows(), // size &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info @@ -274,5 +269,6 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> } }; +} // end namespace Eigen #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h new file mode 100644 index 00000000000..f88d483b653 --- /dev/null +++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h @@ -0,0 +1,114 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV. + ******************************************************************************** +*/ + +#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H +#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H + +namespace Eigen { + +namespace internal { + +/********************************************************************** +* This file implements selfadjoint matrix-vector multiplication using BLAS +**********************************************************************/ + +// symv/hemv specialization + +template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> +struct selfadjoint_matrix_vector_product_symv : + selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {}; + +#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \ +template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \ +struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \ +static EIGEN_DONT_INLINE void run( \ + Index size, const Scalar* lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \ + enum {\ + IsColMajor = StorageOrder==ColMajor \ + }; \ + if (IsColMajor == ConjugateLhs) {\ + selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \ + size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + } else {\ + selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \ + size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + }\ + } \ +}; \ + +EIGEN_MKL_SYMV_SPECIALIZE(double) +EIGEN_MKL_SYMV_SPECIALIZE(float) +EIGEN_MKL_SYMV_SPECIALIZE(dcomplex) +EIGEN_MKL_SYMV_SPECIALIZE(scomplex) + +#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \ +template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \ +struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \ +{ \ +typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\ +\ +static EIGEN_DONT_INLINE void run( \ +Index size, const EIGTYPE* lhs, Index lhsStride, \ +const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ +{ \ + enum {\ + IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \ + IsLower = UpLo == Lower ? 1 : 0 \ + }; \ + MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \ + MKLTYPE alpha_, beta_; \ + const EIGTYPE *x_ptr, myone(1); \ + char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \ + assign_scalar_eig2mkl(alpha_, alpha); \ + assign_scalar_eig2mkl(beta_, myone); \ + SYMVVector x_tmp; \ + if (ConjugateRhs) { \ + Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \ + x_tmp=map_x.conjugate(); \ + x_ptr=x_tmp.data(); \ + incx=1; \ + } else x_ptr=_rhs; \ + MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ +}\ +}; + +EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv) +EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv) +EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv) +EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointProduct.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointProduct.h index 3a4523fa4a9..6a55f3d7715 100644 --- a/extern/Eigen3/Eigen/src/Core/products/SelfadjointProduct.h +++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointProduct.h @@ -3,24 +3,9 @@ // // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_SELFADJOINT_PRODUCT_H #define EIGEN_SELFADJOINT_PRODUCT_H @@ -31,6 +16,8 @@ * It corresponds to the level 3 SYRK and level 2 SYR Blas routines. **********************************************************************/ +namespace Eigen { + template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs> struct selfadjoint_rank1_update; @@ -72,7 +59,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true> typedef internal::blas_traits<OtherType> OtherBlasTraits; typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType; typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType; - const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived()); + typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived()); Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived()); @@ -105,12 +92,12 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false> typedef internal::blas_traits<OtherType> OtherBlasTraits; typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType; typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType; - const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived()); + typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived()); Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived()); enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 }; - + internal::general_matrix_matrix_triangular_product<Index, Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex, @@ -133,4 +120,6 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> return *this; } +} // end namespace Eigen + #endif // EIGEN_SELFADJOINT_PRODUCT_H diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h index 9f8b8438a5d..57a98cc2de9 100644 --- a/extern/Eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h +++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h @@ -3,28 +3,15 @@ // // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_SELFADJOINTRANK2UPTADE_H #define EIGEN_SELFADJOINTRANK2UPTADE_H +namespace Eigen { + namespace internal { /* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu' @@ -76,12 +63,12 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> typedef internal::blas_traits<DerivedU> UBlasTraits; typedef typename UBlasTraits::DirectLinearAccessType ActualUType; typedef typename internal::remove_all<ActualUType>::type _ActualUType; - const ActualUType actualU = UBlasTraits::extract(u.derived()); + typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived()); typedef internal::blas_traits<DerivedV> VBlasTraits; typedef typename VBlasTraits::DirectLinearAccessType ActualVType; typedef typename internal::remove_all<ActualVType>::type _ActualVType; - const ActualVType actualV = VBlasTraits::extract(v.derived()); + typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived()); // If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and // vice versa, and take the complex conjugate of all coefficients and vector entries. @@ -101,4 +88,6 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> return *this; } +} // end namespace Eigen + #endif // EIGEN_SELFADJOINTRANK2UPTADE_H diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h index 0c48d2efb75..92cba66f615 100644 --- a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -3,28 +3,15 @@ // // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H +namespace Eigen { + namespace internal { // template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode> @@ -58,23 +45,23 @@ template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs, - int ResStorageOrder> + int ResStorageOrder, int Version = Specialized> struct product_triangular_matrix_matrix; template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs> + int RhsStorageOrder, bool ConjugateRhs, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,RowMajor> + RhsStorageOrder,ConjugateRhs,RowMajor,Version> { static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, - Scalar alpha) + Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { product_triangular_matrix_matrix<Scalar, Index, (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper), @@ -84,22 +71,22 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, ColMajor> - ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); + ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking); } }; // implements col-major += alpha * op(triangular) * op(general) template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs> + int RhsStorageOrder, bool ConjugateRhs, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor> + RhsStorageOrder,ConjugateRhs,ColMajor,Version> { typedef gebp_traits<Scalar,Scalar> Traits; enum { - SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), + SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), IsLower = (Mode&Lower) == Lower, SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1 }; @@ -109,7 +96,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, - Scalar alpha) + Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { // strip zeros Index diagSize = (std::min)(_rows,_depth); @@ -120,15 +107,16 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); - Index kc = depth; // cache block size along the K direction - Index mc = rows; // cache block size along the M direction - Index nc = cols; // cache block size along the N direction - computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc); + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*cols; std::size_t sizeW = kc*Traits::WorkSpaceFactor; - std::size_t sizeB = sizeW + kc*cols; - ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); - Scalar* blockB = allocatedBlockB + sizeW; + + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer; triangularBuffer.setZero(); @@ -186,7 +174,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth); gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha, - actualPanelWidth, actual_kc, 0, blockBOffset); + actualPanelWidth, actual_kc, 0, blockBOffset, blockW); // GEBP with remaining micro panel if (lengthTarget>0) @@ -196,7 +184,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget); gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha, - actualPanelWidth, actual_kc, 0, blockBOffset); + actualPanelWidth, actual_kc, 0, blockBOffset, blockW); } } } @@ -210,7 +198,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>() (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc); - gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); + gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW); } } } @@ -220,10 +208,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, // implements col-major += alpha * op(general) * op(triangular) template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs> + int RhsStorageOrder, bool ConjugateRhs, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,false, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor> + RhsStorageOrder,ConjugateRhs,ColMajor,Version> { typedef gebp_traits<Scalar,Scalar> Traits; enum { @@ -237,7 +225,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, - Scalar alpha) + Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { // strip zeros Index diagSize = (std::min)(_cols,_depth); @@ -248,16 +236,16 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false, const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); - Index kc = depth; // cache block size along the K direction - Index mc = rows; // cache block size along the M direction - Index nc = cols; // cache block size along the N direction - computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc); + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*cols; std::size_t sizeW = kc*Traits::WorkSpaceFactor; - std::size_t sizeB = sizeW + kc*cols; - ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); - Scalar* blockB = allocatedBlockB + sizeW; + + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer; triangularBuffer.setZero(); @@ -345,13 +333,13 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false, alpha, actual_kc, actual_kc, // strides blockOffset, blockOffset,// offsets - allocatedBlockB); // workspace + blockW); // workspace } } gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride, blockA, geb, actual_mc, actual_kc, rs, alpha, - -1, -1, 0, 0, allocatedBlockB); + -1, -1, 0, 0, blockW); } } } @@ -378,26 +366,38 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const { - const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); - const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); + typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); + typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); + typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, + Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; + + enum { IsLower = (Mode&Lower) == Lower }; + Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols()); + Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows()); + Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows())) + : ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols())); + + BlockingType blocking(stripedRows, stripedCols, stripedDepth); + internal::product_triangular_matrix_matrix<Scalar, Index, Mode, LhsIsTriangular, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor> ::run( - lhs.rows(), rhs.cols(), lhs.cols(),// LhsIsTriangular ? rhs.cols() : lhs.rows(), // sizes + stripedRows, stripedCols, stripedDepth, // sizes &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info - &dst.coeffRef(0,0), dst.outerStride(), // result info - actualAlpha // alpha + &dst.coeffRef(0,0), dst.outerStride(), // result info + actualAlpha, blocking ); } }; +} // end namespace Eigen #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h new file mode 100644 index 00000000000..8173da5bb6d --- /dev/null +++ b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h @@ -0,0 +1,309 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Triangular matrix * matrix product functionality based on ?TRMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H +#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H + +namespace Eigen { + +namespace internal { + + +template <typename Scalar, typename Index, + int Mode, bool LhsIsTriangular, + int LhsStorageOrder, bool ConjugateLhs, + int RhsStorageOrder, bool ConjugateRhs, + int ResStorageOrder> +struct product_triangular_matrix_matrix_trmm : + product_triangular_matrix_matrix<Scalar,Index,Mode, + LhsIsTriangular,LhsStorageOrder,ConjugateLhs, + RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {}; + + +// try to go to BLAS specialization +#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \ +template <typename Index, int Mode, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \ + static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\ + const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { \ + product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \ + LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \ + RhsStorageOrder, ConjugateRhs, ColMajor>::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \ + } \ +}; + +EIGEN_MKL_TRMM_SPECIALIZE(double, true) +EIGEN_MKL_TRMM_SPECIALIZE(double, false) +EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true) +EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false) +EIGEN_MKL_TRMM_SPECIALIZE(float, true) +EIGEN_MKL_TRMM_SPECIALIZE(float, false) +EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true) +EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false) + +// implements col-major += alpha * op(triangular) * op(general) +#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, int Mode, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ + LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \ + }; \ +\ + static EIGEN_DONT_INLINE void run( \ + Index _rows, Index _cols, Index _depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + Index diagSize = (std::min)(_rows,_depth); \ + Index rows = IsLower ? _rows : diagSize; \ + Index depth = IsLower ? diagSize : _depth; \ + Index cols = _cols; \ +\ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \ +\ +/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ + if (rows != depth) { \ +\ + int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ +\ + if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ + /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ + product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \ + /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \ + } else { \ + /* Make sense to call GEMM */ \ + Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \ + MKL_INT aStride = aa_tmp.outerStride(); \ + gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \ + general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ + rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, blocking, 0); \ +\ + /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ + } \ + return; \ + } \ + char side = 'L', transa, uplo, diag = 'N'; \ + EIGTYPE *b; \ + const EIGTYPE *a; \ + MKL_INT m, n, lda, ldb; \ + MKLTYPE alpha_; \ +\ +/* Set alpha_*/ \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ +\ +/* Set m, n */ \ + m = (MKL_INT)diagSize; \ + n = (MKL_INT)cols; \ +\ +/* Set trans */ \ + transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set b, ldb */ \ + Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \ + MatrixX##EIGPREFIX b_tmp; \ +\ + if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ +\ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixLhs a_tmp; \ +\ + if ((conjA!=0) || (SetDiag==0)) { \ + if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \ + if (IsZeroDiag) \ + a_tmp.diagonal().setZero(); \ + else if (IsUnitDiag) \ + a_tmp.diagonal().setOnes();\ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _lhs; \ + lda = lhsStride; \ + } \ + /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \ +/* call ?trmm*/ \ + MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ +\ +/* Add op(a_triangular)*b into res*/ \ + Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ + res_tmp=res_tmp+b_tmp; \ + } \ +}; + +EIGEN_MKL_TRMM_L(double, double, d, d) +EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMM_L(float, float, f, s) +EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c) + +// implements col-major += alpha * op(general) * op(triangular) +#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template <typename Index, int Mode, \ + int LhsStorageOrder, bool ConjugateLhs, \ + int RhsStorageOrder, bool ConjugateRhs> \ +struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ + LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper, \ + conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \ + }; \ +\ + static EIGEN_DONT_INLINE void run( \ + Index _rows, Index _cols, Index _depth, \ + const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsStride, \ + EIGTYPE* res, Index resStride, \ + EIGTYPE alpha) \ + { \ + Index diagSize = (std::min)(_cols,_depth); \ + Index rows = _rows; \ + Index depth = IsLower ? _depth : diagSize; \ + Index cols = IsLower ? diagSize : _cols; \ +\ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \ +\ +/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ + if (cols != depth) { \ +\ + int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ +\ + if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ + /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ + product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \ + LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \ + _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \ + /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \ + } else { \ + /* Make sense to call GEMM */ \ + Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \ + MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \ + MKL_INT aStride = aa_tmp.outerStride(); \ + gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \ + general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \ + rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, blocking, 0); \ +\ + /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ + } \ + return; \ + } \ + char side = 'R', transa, uplo, diag = 'N'; \ + EIGTYPE *b; \ + const EIGTYPE *a; \ + MKL_INT m, n, lda, ldb; \ + MKLTYPE alpha_; \ +\ +/* Set alpha_*/ \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ +\ +/* Set m, n */ \ + m = (MKL_INT)rows; \ + n = (MKL_INT)diagSize; \ +\ +/* Set trans */ \ + transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ +\ +/* Set b, ldb */ \ + Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ + MatrixX##EIGPREFIX b_tmp; \ +\ + if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \ + b = b_tmp.data(); \ + ldb = b_tmp.outerStride(); \ +\ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \ + MatrixRhs a_tmp; \ +\ + if ((conjA!=0) || (SetDiag==0)) { \ + if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \ + if (IsZeroDiag) \ + a_tmp.diagonal().setZero(); \ + else if (IsUnitDiag) \ + a_tmp.diagonal().setOnes();\ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _rhs; \ + lda = rhsStride; \ + } \ + /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \ +/* call ?trmm*/ \ + MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ +\ +/* Add op(a_triangular)*b into res*/ \ + Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ + res_tmp=res_tmp+b_tmp; \ + } \ +}; + +EIGEN_MKL_TRMM_R(double, double, d, d) +EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMM_R(float, float, f, s) +EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector.h b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector.h index 71b4a52ab80..b1c10c201c5 100644 --- a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector.h @@ -3,45 +3,36 @@ // // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_TRIANGULARMATRIXVECTOR_H #define EIGEN_TRIANGULARMATRIXVECTOR_H +namespace Eigen { + namespace internal { -template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder> -struct product_triangular_matrix_vector; +template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized> +struct triangular_matrix_vector_product; -template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs> -struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor> +template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version> +struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version> { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; enum { IsLower = ((Mode&Lower)==Lower), - HasUnitDiag = (Mode & UnitDiag)==UnitDiag + HasUnitDiag = (Mode & UnitDiag)==UnitDiag, + HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag }; - static EIGEN_DONT_INLINE void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride, + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha) { static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; + Index size = (std::min)(_rows,_cols); + Index rows = IsLower ? _rows : (std::min)(_rows,_cols); + Index cols = IsLower ? (std::min)(_rows,_cols) : _cols; typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap; const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride)); @@ -54,45 +45,57 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap; ResMap res(_res,rows); - for (Index pi=0; pi<cols; pi+=PanelWidth) + for (Index pi=0; pi<size; pi+=PanelWidth) { - Index actualPanelWidth = (std::min)(PanelWidth, cols-pi); + Index actualPanelWidth = (std::min)(PanelWidth, size-pi); for (Index k=0; k<actualPanelWidth; ++k) { Index i = pi + k; - Index s = IsLower ? (HasUnitDiag ? i+1 : i ) : pi; + Index s = IsLower ? ((HasUnitDiag||HasZeroDiag) ? i+1 : i ) : pi; Index r = IsLower ? actualPanelWidth-k : k+1; - if ((!HasUnitDiag) || (--r)>0) + if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0) res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r); if (HasUnitDiag) res.coeffRef(i) += alpha * cjRhs.coeff(i); } - Index r = IsLower ? cols - pi - actualPanelWidth : pi; + Index r = IsLower ? rows - pi - actualPanelWidth : pi; if (r>0) { Index s = IsLower ? pi+actualPanelWidth : 0; - general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run( + general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run( r, actualPanelWidth, &lhs.coeffRef(s,pi), lhsStride, &rhs.coeffRef(pi), rhsIncr, &res.coeffRef(s), resIncr, alpha); } } + if((!IsLower) && cols>size) + { + general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run( + rows, cols-size, + &lhs.coeffRef(0,size), lhsStride, + &rhs.coeffRef(size), rhsIncr, + _res, resIncr, alpha); + } } }; -template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs> -struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor> +template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version> +struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version> { typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; enum { IsLower = ((Mode&Lower)==Lower), - HasUnitDiag = (Mode & UnitDiag)==UnitDiag + HasUnitDiag = (Mode & UnitDiag)==UnitDiag, + HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag }; - static void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride, + static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha) { static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; + Index diagSize = (std::min)(_rows,_cols); + Index rows = IsLower ? _rows : diagSize; + Index cols = IsLower ? diagSize : _cols; typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap; const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride)); @@ -105,15 +108,15 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap; ResMap res(_res,rows,InnerStride<>(resIncr)); - for (Index pi=0; pi<cols; pi+=PanelWidth) + for (Index pi=0; pi<diagSize; pi+=PanelWidth) { - Index actualPanelWidth = (std::min)(PanelWidth, cols-pi); + Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi); for (Index k=0; k<actualPanelWidth; ++k) { Index i = pi + k; - Index s = IsLower ? pi : (HasUnitDiag ? i+1 : i); + Index s = IsLower ? pi : ((HasUnitDiag||HasZeroDiag) ? i+1 : i); Index r = IsLower ? k+1 : actualPanelWidth-k; - if ((!HasUnitDiag) || (--r)>0) + if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0) res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum(); if (HasUnitDiag) res.coeffRef(i) += alpha * cjRhs.coeff(i); @@ -122,13 +125,21 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C if (r>0) { Index s = IsLower ? 0 : pi + actualPanelWidth; - general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run( + general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run( actualPanelWidth, r, &lhs.coeffRef(pi,s), lhsStride, &rhs.coeffRef(s), rhsIncr, &res.coeffRef(pi), resIncr, alpha); } } + if(IsLower && rows>diagSize) + { + general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run( + rows-diagSize, cols, + &lhs.coeffRef(diagSize,0), lhsStride, + &rhs.coeffRef(0), rhsIncr, + &res.coeffRef(diagSize), resIncr, alpha); + } } }; @@ -180,7 +191,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false> { eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); - typedef TriangularProduct<(Mode & UnitDiag) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose; + typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose; Transpose<Dest> dstT(dst); internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run( TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha); @@ -208,8 +219,8 @@ template<> struct trmv_selector<ColMajor> typedef typename ProductType::RhsBlasTraits RhsBlasTraits; typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; - const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); - const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); + typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); + typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) * RhsBlasTraits::extractScalarFactor(prod.rhs()); @@ -247,7 +258,7 @@ template<> struct trmv_selector<ColMajor> MappedDest(actualDestPtr, dest.size()) = dest; } - internal::product_triangular_matrix_vector + internal::triangular_matrix_vector_product <Index,Mode, LhsScalar, LhsBlasTraits::NeedToConjugate, RhsScalar, RhsBlasTraits::NeedToConjugate, @@ -307,7 +318,7 @@ template<> struct trmv_selector<RowMajor> Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; } - internal::product_triangular_matrix_vector + internal::triangular_matrix_vector_product <Index,Mode, LhsScalar, LhsBlasTraits::NeedToConjugate, RhsScalar, RhsBlasTraits::NeedToConjugate, @@ -322,4 +333,6 @@ template<> struct trmv_selector<RowMajor> } // end namespace internal +} // end namespace Eigen + #endif // EIGEN_TRIANGULARMATRIXVECTOR_H diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h new file mode 100644 index 00000000000..3589b8c5ef6 --- /dev/null +++ b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h @@ -0,0 +1,247 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Triangular matrix-vector product functionality based on ?TRMV. + ******************************************************************************** +*/ + +#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H +#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H + +namespace Eigen { + +namespace internal { + +/********************************************************************** +* This file implements triangular matrix-vector multiplication using BLAS +**********************************************************************/ + +// trmv/hemv specialization + +template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder> +struct triangular_matrix_vector_product_trmv : + triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {}; + +#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \ +template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ +struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ + triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + } \ +}; \ +template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ +struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ + const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ + triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ + } \ +}; + +EIGEN_MKL_TRMV_SPECIALIZE(double) +EIGEN_MKL_TRMV_SPECIALIZE(float) +EIGEN_MKL_TRMV_SPECIALIZE(dcomplex) +EIGEN_MKL_TRMV_SPECIALIZE(scomplex) + +// implements col-major: res += alpha * op(triangular) * vector +#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ +struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper \ + }; \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \ + { \ + if (ConjLhs || IsZeroDiag) { \ + triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha, blocking); \ + return; \ + }\ + Index size = (std::min)(_rows,_cols); \ + Index rows = IsLower ? _rows : size; \ + Index cols = IsLower ? size : _cols; \ +\ + typedef VectorX##EIGPREFIX VectorRhs; \ + EIGTYPE *x, *y;\ +\ +/* Set x*/ \ + Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ + VectorRhs x_tmp; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ +\ +/* Square part handling */\ +\ + char trans, uplo, diag; \ + MKL_INT m, n, lda, incx, incy; \ + EIGTYPE const *a; \ + MKLTYPE alpha_, beta_; \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \ +\ +/* Set m, n */ \ + n = (MKL_INT)size; \ + lda = lhsStride; \ + incx = 1; \ + incy = resIncr; \ +\ +/* Set uplo, trans and diag*/ \ + trans = 'N'; \ + uplo = IsLower ? 'L' : 'U'; \ + diag = IsUnitDiag ? 'U' : 'N'; \ +\ +/* call ?TRMV*/ \ + MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ +\ +/* Add op(a_tr)rhs into res*/ \ + MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ +/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ + if (size<(std::max)(rows,cols)) { \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ + if (size<rows) { \ + y = _res + size*resIncr; \ + a = _lhs + size; \ + m = rows-size; \ + n = size; \ + } \ + else { \ + x += size; \ + y = _res; \ + a = _lhs + size*lda; \ + m = size; \ + n = cols-size; \ + } \ + MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \ + } \ + } \ +}; + +EIGEN_MKL_TRMV_CM(double, double, d, d) +EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMV_CM(float, float, f, s) +EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c) + +// implements row-major: res += alpha * op(triangular) * vector +#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ +template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ +struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + LowUp = IsLower ? Lower : Upper \ + }; \ + static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ + const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \ + { \ + if (IsZeroDiag) { \ + triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \ + _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha, blocking); \ + return; \ + }\ + Index size = (std::min)(_rows,_cols); \ + Index rows = IsLower ? _rows : size; \ + Index cols = IsLower ? size : _cols; \ +\ + typedef VectorX##EIGPREFIX VectorRhs; \ + EIGTYPE *x, *y;\ +\ +/* Set x*/ \ + Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ + VectorRhs x_tmp; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ +\ +/* Square part handling */\ +\ + char trans, uplo, diag; \ + MKL_INT m, n, lda, incx, incy; \ + EIGTYPE const *a; \ + MKLTYPE alpha_, beta_; \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \ + assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \ +\ +/* Set m, n */ \ + n = (MKL_INT)size; \ + lda = lhsStride; \ + incx = 1; \ + incy = resIncr; \ +\ +/* Set uplo, trans and diag*/ \ + trans = ConjLhs ? 'C' : 'T'; \ + uplo = IsLower ? 'U' : 'L'; \ + diag = IsUnitDiag ? 'U' : 'N'; \ +\ +/* call ?TRMV*/ \ + MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ +\ +/* Add op(a_tr)rhs into res*/ \ + MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ +/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ + if (size<(std::max)(rows,cols)) { \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \ + if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ + x = x_tmp.data(); \ + if (size<rows) { \ + y = _res + size*resIncr; \ + a = _lhs + size*lda; \ + m = rows-size; \ + n = size; \ + } \ + else { \ + x += size; \ + y = _res; \ + a = _lhs + size; \ + m = size; \ + n = cols-size; \ + } \ + MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \ + } \ + } \ +}; + +EIGEN_MKL_TRMV_RM(double, double, d, d) +EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z) +EIGEN_MKL_TRMV_RM(float, float, f, s) +EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c) + +} // end namespase internal + +} // end namespace Eigen + +#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h index 4dced6b0eb9..a49ea318345 100644 --- a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -3,28 +3,15 @@ // // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H #define EIGEN_TRIANGULAR_SOLVER_MATRIX_H +namespace Eigen { + namespace internal { // if the rhs is row major, let's transpose the product @@ -34,14 +21,15 @@ struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder, static EIGEN_DONT_INLINE void run( Index size, Index cols, const Scalar* tri, Index triStride, - Scalar* _other, Index otherStride) + Scalar* _other, Index otherStride, + level3_blocking<Scalar,Scalar>& blocking) { triangular_solve_matrix< Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft, (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper), NumTraits<Scalar>::IsComplex && Conjugate, TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor> - ::run(size, cols, tri, triStride, _other, otherStride); + ::run(size, cols, tri, triStride, _other, otherStride, blocking); } }; @@ -53,7 +41,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO static EIGEN_DONT_INLINE void run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride) + Scalar* _other, Index otherStride, + level3_blocking<Scalar,Scalar>& blocking) { Index cols = otherSize; const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride); @@ -65,22 +54,29 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO IsLower = (Mode&Lower) == Lower }; - Index kc = size; // cache block size along the K direction - Index mc = size; // cache block size along the M direction - Index nc = cols; // cache block size along the N direction - computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc); + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*cols; std::size_t sizeW = kc*Traits::WorkSpaceFactor; - std::size_t sizeB = sizeW + kc*cols; - ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); - Scalar* blockB = allocatedBlockB + sizeW; + + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); conj_if<Conjugate> conj; gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel; gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs; gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs; + // the goal here is to subdivise the Rhs panels such that we keep some cache + // coherence when accessing the rhs elements + std::ptrdiff_t l1, l2; + manage_caching_sizes(GetAction, &l1, &l2); + Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0; + subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr); + for(Index k2=IsLower ? 0 : size; IsLower ? k2<size : k2>0; IsLower ? k2+=kc : k2-=kc) @@ -92,16 +88,18 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO // A11 (the triangular part) and A21 the remaining rectangular part. // Then the high level algorithm is: // - B = R1 => general block copy (done during the next step) - // - R1 = L1^-1 B => tricky part + // - R1 = A11^-1 B => tricky part // - update B from the new R1 => actually this has to be performed continuously during the above step - // - R2 = L2 * B => GEPP + // - R2 -= A21 * B => GEPP - // The tricky part: compute R1 = L1^-1 B while updating B from R1 - // The idea is to split L1 into multiple small vertical panels. - // Each panel can be split into a small triangular part A1 which is processed without optimization, - // and the remaining small part A2 which is processed using gebp with appropriate block strides + // The tricky part: compute R1 = A11^-1 B while updating B from R1 + // The idea is to split A11 into multiple small vertical panels. + // Each panel can be split into a small triangular part T1k which is processed without optimization, + // and the remaining small part T2k which is processed using gebp with appropriate block strides + for(Index j2=0; j2<cols; j2+=subcols) { - // for each small vertical panels of lhs + Index actual_cols = (std::min)(cols-j2,subcols); + // for each small vertical panels [T1k^T, T2k^T]^T of lhs for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth) { Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth); @@ -114,11 +112,11 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO Index rs = actualPanelWidth - k - 1; // remaining size Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i)); - for (Index j=0; j<cols; ++j) + for (Index j=j2; j<j2+actual_cols; ++j) { if (TriStorageOrder==RowMajor) { - Scalar b = 0; + Scalar b(0); const Scalar* l = &tri(i,s); Scalar* r = &other(s,j); for (Index i3=0; i3<k; ++i3) @@ -143,7 +141,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO Index blockBOffset = IsLower ? k1 : lengthTarget; // update the respective rows of B from other - pack_rhs(blockB, _other+startBlock, otherStride, actualPanelWidth, cols, actual_kc, blockBOffset); + pack_rhs(blockB+actual_kc*j2, &other(startBlock,j2), otherStride, actualPanelWidth, actual_cols, actual_kc, blockBOffset); // GEBP if (lengthTarget>0) @@ -152,13 +150,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget); - gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, Scalar(-1), - actualPanelWidth, actual_kc, 0, blockBOffset); + gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1), + actualPanelWidth, actual_kc, 0, blockBOffset, blockW); } } } - - // R2 = A2 * B => GEPP + + // R2 -= A21 * B => GEPP { Index start = IsLower ? k2+kc : 0; Index end = IsLower ? size : k2-kc; @@ -169,7 +167,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO { pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc); - gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1)); + gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0, blockW); } } } @@ -185,7 +183,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage static EIGEN_DONT_INLINE void run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride) + Scalar* _other, Index otherStride, + level3_blocking<Scalar,Scalar>& blocking) { Index rows = otherSize; const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride); @@ -198,19 +197,16 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage IsLower = (Mode&Lower) == Lower }; -// Index kc = std::min<Index>(Traits::Max_kc/4,size); // cache block size along the K direction -// Index mc = std::min<Index>(Traits::Max_mc,size); // cache block size along the M direction - // check that !!!! - Index kc = size; // cache block size along the K direction - Index mc = size; // cache block size along the M direction - Index nc = rows; // cache block size along the N direction - computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc); + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*size; std::size_t sizeW = kc*Traits::WorkSpaceFactor; - std::size_t sizeB = sizeW + kc*size; - ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); - Scalar* blockB = allocatedBlockB + sizeW; + + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); conj_if<Conjugate> conj; gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel; @@ -277,7 +273,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage Scalar(-1), actual_kc, actual_kc, // strides panelOffset, panelOffset, // offsets - allocatedBlockB); // workspace + blockW); // workspace } // unblocked triangular solve @@ -308,7 +304,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage if (rs>0) gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb, actual_mc, actual_kc, rs, Scalar(-1), - -1, -1, 0, 0, allocatedBlockB); + -1, -1, 0, 0, blockW); } } } @@ -316,4 +312,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage } // end namespace internal +} // end namespace Eigen + #endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h new file mode 100644 index 00000000000..a4f508b2e83 --- /dev/null +++ b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h @@ -0,0 +1,155 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Triangular matrix * matrix product functionality based on ?TRMM. + ******************************************************************************** +*/ + +#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H +#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H + +namespace Eigen { + +namespace internal { + +// implements LeftSide op(triangular)^-1 * general +#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \ +template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ +struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ + }; \ + static EIGEN_DONT_INLINE void run( \ + Index size, Index otherSize, \ + const EIGTYPE* _tri, Index triStride, \ + EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \ + { \ + MKL_INT m = size, n = otherSize, lda, ldb; \ + char side = 'L', uplo, diag='N', transa; \ + /* Set alpha_ */ \ + MKLTYPE alpha; \ + EIGTYPE myone(1); \ + assign_scalar_eig2mkl(alpha, myone); \ + ldb = otherStride;\ +\ + const EIGTYPE *a; \ +/* Set trans */ \ + transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \ + Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \ + MatrixTri a_tmp; \ +\ + if (conjA) { \ + a_tmp = tri.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _tri; \ + lda = triStride; \ + } \ + if (IsUnitDiag) diag='U'; \ +/* call ?trsm*/ \ + MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ + } \ +}; + +EIGEN_MKL_TRSM_L(double, double, d) +EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z) +EIGEN_MKL_TRSM_L(float, float, s) +EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c) + + +// implements RightSide general * op(triangular)^-1 +#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \ +template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ +struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \ +{ \ + enum { \ + IsLower = (Mode&Lower) == Lower, \ + IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ + IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ + conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ + }; \ + static EIGEN_DONT_INLINE void run( \ + Index size, Index otherSize, \ + const EIGTYPE* _tri, Index triStride, \ + EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \ + { \ + MKL_INT m = otherSize, n = size, lda, ldb; \ + char side = 'R', uplo, diag='N', transa; \ + /* Set alpha_ */ \ + MKLTYPE alpha; \ + EIGTYPE myone(1); \ + assign_scalar_eig2mkl(alpha, myone); \ + ldb = otherStride;\ +\ + const EIGTYPE *a; \ +/* Set trans */ \ + transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ +/* Set uplo */ \ + uplo = IsLower ? 'L' : 'U'; \ + if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ +/* Set a, lda */ \ + typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \ + Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \ + MatrixTri a_tmp; \ +\ + if (conjA) { \ + a_tmp = tri.conjugate(); \ + a = a_tmp.data(); \ + lda = a_tmp.outerStride(); \ + } else { \ + a = _tri; \ + lda = triStride; \ + } \ + if (IsUnitDiag) diag='U'; \ +/* call ?trsm*/ \ + MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ + /*std::cout << "TRMS_L specialization!\n";*/ \ + } \ +}; + +EIGEN_MKL_TRSM_R(double, double, d) +EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z) +EIGEN_MKL_TRSM_R(float, float, s) +EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c) + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverVector.h b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverVector.h index 639d4a5b476..ce4d1008801 100644 --- a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverVector.h +++ b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverVector.h @@ -3,28 +3,15 @@ // // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_TRIANGULAR_SOLVER_VECTOR_H #define EIGEN_TRIANGULAR_SOLVER_VECTOR_H +namespace Eigen { + namespace internal { template<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate, int StorageOrder> @@ -147,4 +134,6 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con } // end namespace internal +} // end namespace Eigen + #endif // EIGEN_TRIANGULAR_SOLVER_VECTOR_H |