diff options
Diffstat (limited to 'extern/Eigen2/Eigen/src/Core/Product.h')
-rw-r--r-- | extern/Eigen2/Eigen/src/Core/Product.h | 769 |
1 files changed, 769 insertions, 0 deletions
diff --git a/extern/Eigen2/Eigen/src/Core/Product.h b/extern/Eigen2/Eigen/src/Core/Product.h new file mode 100644 index 00000000000..1151b21641c --- /dev/null +++ b/extern/Eigen2/Eigen/src/Core/Product.h @@ -0,0 +1,769 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_PRODUCT_H +#define EIGEN_PRODUCT_H + +/*************************** +*** Forward declarations *** +***************************/ + +template<int VectorizationMode, int Index, typename Lhs, typename Rhs, typename RetScalar> +struct ei_product_coeff_impl; + +template<int StorageOrder, int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> +struct ei_product_packet_impl; + +/** \class ProductReturnType + * + * \brief Helper class to get the correct and optimized returned type of operator* + * + * \param Lhs the type of the left-hand side + * \param Rhs the type of the right-hand side + * \param ProductMode the type of the product (determined automatically by ei_product_mode) + * + * This class defines the typename Type representing the optimized product expression + * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type + * is the recommended way to define the result type of a function returning an expression + * which involve a matrix product. The class Product or DiagonalProduct should never be + * used directly. + * + * \sa class Product, class DiagonalProduct, MatrixBase::operator*(const MatrixBase<OtherDerived>&) + */ +template<typename Lhs, typename Rhs, int ProductMode> +struct ProductReturnType +{ + typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; + typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; + + typedef Product<LhsNested, RhsNested, ProductMode> Type; +}; + +// cache friendly specialization +// note that there is a DiagonalProduct specialization in DiagonalProduct.h +template<typename Lhs, typename Rhs> +struct ProductReturnType<Lhs,Rhs,CacheFriendlyProduct> +{ + typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; + + typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime, + typename ei_plain_matrix_type_column_major<Rhs>::type + >::type RhsNested; + + typedef Product<LhsNested, RhsNested, CacheFriendlyProduct> Type; +}; + +/* Helper class to determine the type of the product, can be either: + * - NormalProduct + * - CacheFriendlyProduct + * - DiagonalProduct + */ +template<typename Lhs, typename Rhs> struct ei_product_mode +{ + enum{ + + value = ((Rhs::Flags&Diagonal)==Diagonal) || ((Lhs::Flags&Diagonal)==Diagonal) + ? DiagonalProduct + : Lhs::MaxColsAtCompileTime == Dynamic + && ( Lhs::MaxRowsAtCompileTime == Dynamic + || Rhs::MaxColsAtCompileTime == Dynamic ) + && (!(Rhs::IsVectorAtCompileTime && (Lhs::Flags&RowMajorBit) && (!(Lhs::Flags&DirectAccessBit)))) + && (!(Lhs::IsVectorAtCompileTime && (!(Rhs::Flags&RowMajorBit)) && (!(Rhs::Flags&DirectAccessBit)))) + && (ei_is_same_type<typename Lhs::Scalar, typename Rhs::Scalar>::ret) + ? CacheFriendlyProduct + : NormalProduct }; +}; + +/** \class Product + * + * \brief Expression of the product of two matrices + * + * \param LhsNested the type used to store the left-hand side + * \param RhsNested the type used to store the right-hand side + * \param ProductMode the type of the product + * + * This class represents an expression of the product of two matrices. + * It is the return type of the operator* between matrices. Its template + * arguments are determined automatically by ProductReturnType. Therefore, + * Product should never be used direclty. To determine the result type of a + * function which involves a matrix product, use ProductReturnType::Type. + * + * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&) + */ +template<typename LhsNested, typename RhsNested, int ProductMode> +struct ei_traits<Product<LhsNested, RhsNested, ProductMode> > +{ + // clean the nested types: + typedef typename ei_cleantype<LhsNested>::type _LhsNested; + typedef typename ei_cleantype<RhsNested>::type _RhsNested; + typedef typename ei_scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; + + enum { + LhsCoeffReadCost = _LhsNested::CoeffReadCost, + RhsCoeffReadCost = _RhsNested::CoeffReadCost, + LhsFlags = _LhsNested::Flags, + RhsFlags = _RhsNested::Flags, + + RowsAtCompileTime = _LhsNested::RowsAtCompileTime, + ColsAtCompileTime = _RhsNested::ColsAtCompileTime, + InnerSize = EIGEN_ENUM_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), + + MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, + MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) + && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0), + + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) + && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0), + + EvalToRowMajor = RhsRowMajor && (ProductMode==(int)CacheFriendlyProduct ? LhsRowMajor : (!CanVectorizeLhs)), + + RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) + | EvalBeforeAssigningBit + | EvalBeforeNestingBit + | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0) + | (LhsFlags & RhsFlags & AlignedBit), + + CoeffReadCost = InnerSize == Dynamic ? Dynamic + : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits<Scalar>::AddCost, + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (InnerSize % ei_packet_traits<Scalar>::size == 0) + }; +}; + +template<typename LhsNested, typename RhsNested, int ProductMode> class Product : ei_no_assignment_operator, + public MatrixBase<Product<LhsNested, RhsNested, ProductMode> > +{ + public: + + EIGEN_GENERIC_PUBLIC_INTERFACE(Product) + + private: + + typedef typename ei_traits<Product>::_LhsNested _LhsNested; + typedef typename ei_traits<Product>::_RhsNested _RhsNested; + + enum { + PacketSize = ei_packet_traits<Scalar>::size, + InnerSize = ei_traits<Product>::InnerSize, + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + CanVectorizeInner = ei_traits<Product>::CanVectorizeInner + }; + + typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorization : NoVectorization, + Unroll ? InnerSize-1 : Dynamic, + _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; + + public: + + template<typename Lhs, typename Rhs> + inline Product(const Lhs& lhs, const Rhs& rhs) + : m_lhs(lhs), m_rhs(rhs) + { + // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. + // We still allow to mix T and complex<T>. + EIGEN_STATIC_ASSERT((ei_is_same_type<typename Lhs::RealScalar, typename Rhs::RealScalar>::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + ei_assert(lhs.cols() == rhs.rows() + && "invalid matrix product" + && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); + } + + /** \internal + * compute \a res += \c *this using the cache friendly product. + */ + template<typename DestDerived> + void _cacheFriendlyEvalAndAdd(DestDerived& res) const; + + /** \internal + * \returns whether it is worth it to use the cache friendly product. + */ + EIGEN_STRONG_INLINE bool _useCacheFriendlyProduct() const + { + return m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && ( rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + || cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD); + } + + EIGEN_STRONG_INLINE int rows() const { return m_lhs.rows(); } + EIGEN_STRONG_INLINE int cols() const { return m_rhs.cols(); } + + EIGEN_STRONG_INLINE const Scalar coeff(int row, int col) const + { + Scalar res; + ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); + return res; + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + */ + EIGEN_STRONG_INLINE const Scalar coeff(int index) const + { + Scalar res; + const int row = RowsAtCompileTime == 1 ? 0 : index; + const int col = RowsAtCompileTime == 1 ? index : 0; + ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); + return res; + } + + template<int LoadMode> + EIGEN_STRONG_INLINE const PacketScalar packet(int row, int col) const + { + PacketScalar res; + ei_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, + Unroll ? InnerSize-1 : Dynamic, + _LhsNested, _RhsNested, PacketScalar, LoadMode> + ::run(row, col, m_lhs, m_rhs, res); + return res; + } + + EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; } + EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; } + + protected: + const LhsNested m_lhs; + const RhsNested m_rhs; +}; + +/** \returns the matrix product of \c *this and \a other. + * + * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*(). + * + * \sa lazy(), operator*=(const MatrixBase&), Cwise::operator*() + */ +template<typename Derived> +template<typename OtherDerived> +inline const typename ProductReturnType<Derived,OtherDerived>::Type +MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const +{ + enum { + ProductIsValid = Derived::ColsAtCompileTime==Dynamic + || OtherDerived::RowsAtCompileTime==Dynamic + || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), + AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, + SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) + }; + // note to the lost user: + // * for a dot product use: v1.dot(v2) + // * for a coeff-wise product use: v1.cwise()*v2 + EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) + EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) + EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) + return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); +} + +/** replaces \c *this by \c *this * \a other. + * + * \returns a reference to \c *this + */ +template<typename Derived> +template<typename OtherDerived> +inline Derived & +MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other) +{ + return derived() = derived() * other.derived(); +} + +/*************************************************************************** +* Normal product .coeff() implementation (with meta-unrolling) +***************************************************************************/ + +/************************************** +*** Scalar path - no vectorization *** +**************************************/ + +template<int Index, typename Lhs, typename Rhs, typename RetScalar> +struct ei_product_coeff_impl<NoVectorization, Index, Lhs, Rhs, RetScalar> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + { + ei_product_coeff_impl<NoVectorization, Index-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); + res += lhs.coeff(row, Index) * rhs.coeff(Index, col); + } +}; + +template<typename Lhs, typename Rhs, typename RetScalar> +struct ei_product_coeff_impl<NoVectorization, 0, Lhs, Rhs, RetScalar> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + } +}; + +template<typename Lhs, typename Rhs, typename RetScalar> +struct ei_product_coeff_impl<NoVectorization, Dynamic, Lhs, Rhs, RetScalar> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) + { + ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + for(int i = 1; i < lhs.cols(); ++i) + res += lhs.coeff(row, i) * rhs.coeff(i, col); + } +}; + +// prevent buggy user code from causing an infinite recursion +template<typename Lhs, typename Rhs, typename RetScalar> +struct ei_product_coeff_impl<NoVectorization, -1, Lhs, Rhs, RetScalar> +{ + EIGEN_STRONG_INLINE static void run(int, int, const Lhs&, const Rhs&, RetScalar&) {} +}; + +/******************************************* +*** Scalar path with inner vectorization *** +*******************************************/ + +template<int Index, typename Lhs, typename Rhs, typename PacketScalar> +struct ei_product_coeff_vectorized_unroller +{ + enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size }; + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + { + ei_product_coeff_vectorized_unroller<Index-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres); + pres = ei_padd(pres, ei_pmul( lhs.template packet<Aligned>(row, Index) , rhs.template packet<Aligned>(Index, col) )); + } +}; + +template<typename Lhs, typename Rhs, typename PacketScalar> +struct ei_product_coeff_vectorized_unroller<0, Lhs, Rhs, PacketScalar> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) + { + pres = ei_pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); + } +}; + +template<int Index, typename Lhs, typename Rhs, typename RetScalar> +struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::PacketScalar PacketScalar; + enum { PacketSize = ei_packet_traits<typename Lhs::Scalar>::size }; + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + { + PacketScalar pres; + ei_product_coeff_vectorized_unroller<Index+1-PacketSize, Lhs, Rhs, PacketScalar>::run(row, col, lhs, rhs, pres); + ei_product_coeff_impl<NoVectorization,Index,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res); + res = ei_predux(pres); + } +}; + +template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> +struct ei_product_coeff_vectorized_dyn_selector +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = ei_dot_impl< + Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>, + Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>, + LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs.col(col)); + } +}; + +// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower +// NOTE maybe they are now useless since we have a specialization for Block<Matrix> +template<typename Lhs, typename Rhs, int RhsCols> +struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> +{ + EIGEN_STRONG_INLINE static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = ei_dot_impl< + Lhs, + Block<Rhs, ei_traits<Rhs>::RowsAtCompileTime, 1>, + LinearVectorization, NoUnrolling>::run(lhs, rhs.col(col)); + } +}; + +template<typename Lhs, typename Rhs, int LhsRows> +struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> +{ + EIGEN_STRONG_INLINE static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = ei_dot_impl< + Block<Lhs, 1, ei_traits<Lhs>::ColsAtCompileTime>, + Rhs, + LinearVectorization, NoUnrolling>::run(lhs.row(row), rhs); + } +}; + +template<typename Lhs, typename Rhs> +struct ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> +{ + EIGEN_STRONG_INLINE static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + res = ei_dot_impl< + Lhs, + Rhs, + LinearVectorization, NoUnrolling>::run(lhs, rhs); + } +}; + +template<typename Lhs, typename Rhs, typename RetScalar> +struct ei_product_coeff_impl<InnerVectorization, Dynamic, Lhs, Rhs, RetScalar> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) + { + ei_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); + } +}; + +/******************* +*** Packet path *** +*******************/ + +template<int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> +struct ei_product_packet_impl<RowMajor, Index, Lhs, Rhs, PacketScalar, LoadMode> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + ei_product_packet_impl<RowMajor, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res); + res = ei_pmadd(ei_pset1(lhs.coeff(row, Index)), rhs.template packet<LoadMode>(Index, col), res); + } +}; + +template<int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> +struct ei_product_packet_impl<ColMajor, Index, Lhs, Rhs, PacketScalar, LoadMode> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + ei_product_packet_impl<ColMajor, Index-1, Lhs, Rhs, PacketScalar, LoadMode>::run(row, col, lhs, rhs, res); + res = ei_pmadd(lhs.template packet<LoadMode>(row, Index), ei_pset1(rhs.coeff(Index, col)), res); + } +}; + +template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> +struct ei_product_packet_impl<RowMajor, 0, Lhs, Rhs, PacketScalar, LoadMode> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); + } +}; + +template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> +struct ei_product_packet_impl<ColMajor, 0, Lhs, Rhs, PacketScalar, LoadMode> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar &res) + { + res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1(rhs.coeff(0, col))); + } +}; + +template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> +struct ei_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) + { + ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); + res = ei_pmul(ei_pset1(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); + for(int i = 1; i < lhs.cols(); ++i) + res = ei_pmadd(ei_pset1(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); + } +}; + +template<typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> +struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMode> +{ + EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, PacketScalar& res) + { + ei_assert(lhs.cols()>0 && "you are using a non initialized matrix"); + res = ei_pmul(lhs.template packet<LoadMode>(row, 0), ei_pset1(rhs.coeff(0, col))); + for(int i = 1; i < lhs.cols(); ++i) + res = ei_pmadd(lhs.template packet<LoadMode>(row, i), ei_pset1(rhs.coeff(i, col)), res); + } +}; + +/*************************************************************************** +* Cache friendly product callers and specific nested evaluation strategies +***************************************************************************/ + +template<typename Scalar, typename RhsType> +static void ei_cache_friendly_product_colmajor_times_vector( + int size, const Scalar* lhs, int lhsStride, const RhsType& rhs, Scalar* res); + +template<typename Scalar, typename ResType> +static void ei_cache_friendly_product_rowmajor_times_vector( + const Scalar* lhs, int lhsStride, const Scalar* rhs, int rhsSize, ResType& res); + +template<typename ProductType, + int LhsRows = ei_traits<ProductType>::RowsAtCompileTime, + int LhsOrder = int(ei_traits<ProductType>::LhsFlags)&RowMajorBit ? RowMajor : ColMajor, + int LhsHasDirectAccess = int(ei_traits<ProductType>::LhsFlags)&DirectAccessBit? HasDirectAccess : NoDirectAccess, + int RhsCols = ei_traits<ProductType>::ColsAtCompileTime, + int RhsOrder = int(ei_traits<ProductType>::RhsFlags)&RowMajorBit ? RowMajor : ColMajor, + int RhsHasDirectAccess = int(ei_traits<ProductType>::RhsFlags)&DirectAccessBit? HasDirectAccess : NoDirectAccess> +struct ei_cache_friendly_product_selector +{ + template<typename DestDerived> + inline static void run(DestDerived& res, const ProductType& product) + { + product._cacheFriendlyEvalAndAdd(res); + } +}; + +// optimized colmajor * vector path +template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess> +struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,NoDirectAccess,1,RhsOrder,RhsAccess> +{ + template<typename DestDerived> + inline static void run(DestDerived& res, const ProductType& product) + { + const int size = product.rhs().rows(); + for (int k=0; k<size; ++k) + res += product.rhs().coeff(k) * product.lhs().col(k); + } +}; + +// optimized cache friendly colmajor * vector path for matrix with direct access flag +// NOTE this path could also be enabled for expressions if we add runtime align queries +template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess> +struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirectAccess,1,RhsOrder,RhsAccess> +{ + typedef typename ProductType::Scalar Scalar; + + template<typename DestDerived> + inline static void run(DestDerived& res, const ProductType& product) + { + enum { + EvalToRes = (ei_packet_traits<Scalar>::size==1) + ||((DestDerived::Flags&ActualPacketAccessBit) && (!(DestDerived::Flags & RowMajorBit))) }; + Scalar* EIGEN_RESTRICT _res; + if (EvalToRes) + _res = &res.coeffRef(0); + else + { + _res = ei_aligned_stack_new(Scalar,res.size()); + Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res; + } + ei_cache_friendly_product_colmajor_times_vector(res.size(), + &product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(), + product.rhs(), _res); + + if (!EvalToRes) + { + res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()); + ei_aligned_stack_delete(Scalar, _res, res.size()); + } + } +}; + +// optimized vector * rowmajor path +template<typename ProductType, int LhsOrder, int LhsAccess, int RhsCols> +struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCols,RowMajor,NoDirectAccess> +{ + template<typename DestDerived> + inline static void run(DestDerived& res, const ProductType& product) + { + const int cols = product.lhs().cols(); + for (int j=0; j<cols; ++j) + res += product.lhs().coeff(j) * product.rhs().row(j); + } +}; + +// optimized cache friendly vector * rowmajor path for matrix with direct access flag +// NOTE this path coul also be enabled for expressions if we add runtime align queries +template<typename ProductType, int LhsOrder, int LhsAccess, int RhsCols> +struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCols,RowMajor,HasDirectAccess> +{ + typedef typename ProductType::Scalar Scalar; + + template<typename DestDerived> + inline static void run(DestDerived& res, const ProductType& product) + { + enum { + EvalToRes = (ei_packet_traits<Scalar>::size==1) + ||((DestDerived::Flags & ActualPacketAccessBit) && (DestDerived::Flags & RowMajorBit)) }; + Scalar* EIGEN_RESTRICT _res; + if (EvalToRes) + _res = &res.coeffRef(0); + else + { + _res = ei_aligned_stack_new(Scalar, res.size()); + Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()) = res; + } + ei_cache_friendly_product_colmajor_times_vector(res.size(), + &product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(), + product.lhs().transpose(), _res); + + if (!EvalToRes) + { + res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()); + ei_aligned_stack_delete(Scalar, _res, res.size()); + } + } +}; + +// optimized rowmajor - vector product +template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess> +struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,HasDirectAccess,1,RhsOrder,RhsAccess> +{ + typedef typename ProductType::Scalar Scalar; + typedef typename ei_traits<ProductType>::_RhsNested Rhs; + enum { + UseRhsDirectly = ((ei_packet_traits<Scalar>::size==1) || (Rhs::Flags&ActualPacketAccessBit)) + && (!(Rhs::Flags & RowMajorBit)) }; + + template<typename DestDerived> + inline static void run(DestDerived& res, const ProductType& product) + { + Scalar* EIGEN_RESTRICT _rhs; + if (UseRhsDirectly) + _rhs = &product.rhs().const_cast_derived().coeffRef(0); + else + { + _rhs = ei_aligned_stack_new(Scalar, product.rhs().size()); + Map<Matrix<Scalar,Rhs::SizeAtCompileTime,1> >(_rhs, product.rhs().size()) = product.rhs(); + } + ei_cache_friendly_product_rowmajor_times_vector(&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(), + _rhs, product.rhs().size(), res); + + if (!UseRhsDirectly) ei_aligned_stack_delete(Scalar, _rhs, product.rhs().size()); + } +}; + +// optimized vector - colmajor product +template<typename ProductType, int LhsOrder, int LhsAccess, int RhsCols> +struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCols,ColMajor,HasDirectAccess> +{ + typedef typename ProductType::Scalar Scalar; + typedef typename ei_traits<ProductType>::_LhsNested Lhs; + enum { + UseLhsDirectly = ((ei_packet_traits<Scalar>::size==1) || (Lhs::Flags&ActualPacketAccessBit)) + && (Lhs::Flags & RowMajorBit) }; + + template<typename DestDerived> + inline static void run(DestDerived& res, const ProductType& product) + { + Scalar* EIGEN_RESTRICT _lhs; + if (UseLhsDirectly) + _lhs = &product.lhs().const_cast_derived().coeffRef(0); + else + { + _lhs = ei_aligned_stack_new(Scalar, product.lhs().size()); + Map<Matrix<Scalar,Lhs::SizeAtCompileTime,1> >(_lhs, product.lhs().size()) = product.lhs(); + } + ei_cache_friendly_product_rowmajor_times_vector(&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(), + _lhs, product.lhs().size(), res); + + if(!UseLhsDirectly) ei_aligned_stack_delete(Scalar, _lhs, product.lhs().size()); + } +}; + +// discard this case which has to be handled by the default path +// (we keep it to be sure to hit a compilation error if this is not the case) +template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess> +struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,NoDirectAccess,1,RhsOrder,RhsAccess> +{}; + +// discard this case which has to be handled by the default path +// (we keep it to be sure to hit a compilation error if this is not the case) +template<typename ProductType, int LhsOrder, int LhsAccess, int RhsCols> +struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCols,ColMajor,NoDirectAccess> +{}; + + +/** \internal */ +template<typename Derived> +template<typename Lhs,typename Rhs> +inline Derived& +MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other) +{ + if (other._expression()._useCacheFriendlyProduct()) + ei_cache_friendly_product_selector<Product<Lhs,Rhs,CacheFriendlyProduct> >::run(const_cast_derived(), other._expression()); + else + lazyAssign(derived() + other._expression()); + return derived(); +} + +template<typename Derived> +template<typename Lhs, typename Rhs> +inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product) +{ + if (product._useCacheFriendlyProduct()) + { + setZero(); + ei_cache_friendly_product_selector<Product<Lhs,Rhs,CacheFriendlyProduct> >::run(const_cast_derived(), product); + } + else + { + lazyAssign<Product<Lhs,Rhs,CacheFriendlyProduct> >(product); + } + return derived(); +} + +template<typename T> struct ei_product_copy_rhs +{ + typedef typename ei_meta_if< + (ei_traits<T>::Flags & RowMajorBit) + || (!(ei_traits<T>::Flags & DirectAccessBit)), + typename ei_plain_matrix_type_column_major<T>::type, + const T& + >::ret type; +}; + +template<typename T> struct ei_product_copy_lhs +{ + typedef typename ei_meta_if< + (!(int(ei_traits<T>::Flags) & DirectAccessBit)), + typename ei_plain_matrix_type<T>::type, + const T& + >::ret type; +}; + +template<typename Lhs, typename Rhs, int ProductMode> +template<typename DestDerived> +inline void Product<Lhs,Rhs,ProductMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const +{ + typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy; + typedef typename ei_unref<LhsCopy>::type _LhsCopy; + typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy; + typedef typename ei_unref<RhsCopy>::type _RhsCopy; + LhsCopy lhs(m_lhs); + RhsCopy rhs(m_rhs); + ei_cache_friendly_product<Scalar>( + rows(), cols(), lhs.cols(), + _LhsCopy::Flags&RowMajorBit, (const Scalar*)&(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(), + _RhsCopy::Flags&RowMajorBit, (const Scalar*)&(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(), + DestDerived::Flags&RowMajorBit, (Scalar*)&(res.coeffRef(0,0)), res.stride() + ); +} + +#endif // EIGEN_PRODUCT_H |