diff options
author | Martijn Berger <mberger@denc.com> | 2015-12-10 14:37:46 +0300 |
---|---|---|
committer | Martijn Berger <mberger@denc.com> | 2015-12-10 14:37:46 +0300 |
commit | 9ad13d70faff0c510c313a7d58a58b974e8ce11f (patch) | |
tree | 3b7847573047815fc474105195d06c24cfefaaad /extern/Eigen3/Eigen/src/Core/products | |
parent | ffc750a4fafc6fe4e6fb556506becaa0e1ce8c9a (diff) |
Update Eigen to version 3.2.7
The main purpose of this is to get MSVC 2015 fixes
Diffstat (limited to 'extern/Eigen3/Eigen/src/Core/products')
4 files changed, 75 insertions, 34 deletions
diff --git a/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h b/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h index c06a0df1c21..2a9d65b9473 100644 --- a/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h +++ b/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h @@ -90,6 +90,7 @@ struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), CoeffReadCost = InnerSize == Dynamic ? Dynamic + : InnerSize == 0 ? 0 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits<Scalar>::AddCost, @@ -133,7 +134,7 @@ class CoeffBasedProduct }; typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, - Unroll ? InnerSize-1 : Dynamic, + Unroll ? InnerSize : Dynamic, _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType; @@ -184,7 +185,7 @@ class CoeffBasedProduct { PacketScalar res; internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, - Unroll ? InnerSize-1 : Dynamic, + Unroll ? InnerSize : Dynamic, _LhsNested, _RhsNested, PacketScalar, LoadMode> ::run(row, col, m_lhs, m_rhs, res); return res; @@ -242,12 +243,12 @@ struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); - res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); + res += lhs.coeff(row, UnrollingIndex-1) * rhs.coeff(UnrollingIndex-1, col); } }; template<typename Lhs, typename Rhs, typename RetScalar> -struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> +struct product_coeff_impl<DefaultTraversal, 1, Lhs, Rhs, RetScalar> { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) @@ -257,15 +258,22 @@ struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> }; template<typename Lhs, typename Rhs, typename RetScalar> +struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res) + { + res = RetScalar(0); + } +}; + +template<typename Lhs, typename Rhs, typename RetScalar> struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) { - eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - for(Index i = 1; i < lhs.cols(); ++i) - res += lhs.coeff(row, i) * rhs.coeff(i, col); + res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum(); } }; @@ -295,6 +303,16 @@ struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> } }; +template<typename Lhs, typename Rhs, typename RetScalar> +struct product_coeff_impl<InnerVectorizedTraversal, 0, Lhs, Rhs, RetScalar> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res) + { + res = 0; + } +}; + template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> { @@ -304,8 +322,7 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { Packet pres; - product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); - product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res); + product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); res = predux(pres); } }; @@ -373,7 +390,7 @@ struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); - res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); + res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res); } }; @@ -384,12 +401,12 @@ struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); - res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); + res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res); } }; template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> +struct product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) @@ -399,7 +416,7 @@ struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> }; template<typename Lhs, typename Rhs, typename Packet, int LoadMode> -struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> +struct product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) @@ -409,15 +426,34 @@ struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> }; template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res) + { + res = pset1<Packet>(0); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> +struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res) + { + res = pset1<Packet>(0); + } +}; + +template<typename Lhs, typename Rhs, typename Packet, int LoadMode> struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) { - eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); - for(Index i = 1; i < lhs.cols(); ++i) - res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); + res = pset1<Packet>(0); + for(Index i = 0; i < lhs.cols(); ++i) + res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); } }; @@ -427,10 +463,9 @@ struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) { - eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); - res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); - for(Index i = 1; i < lhs.cols(); ++i) - res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); + res = pset1<Packet>(0); + for(Index i = 0; i < lhs.cols(); ++i) + res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); } }; diff --git a/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h b/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h index 5c3e9b7ac15..6937ee33284 100644 --- a/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h +++ b/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h @@ -125,19 +125,22 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos if(transpose) std::swap(rows,cols); - Index blockCols = (cols / threads) & ~Index(0x3); - Index blockRows = (rows / threads) & ~Index(0x7); - GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads]; - #pragma omp parallel for schedule(static,1) num_threads(threads) - for(Index i=0; i<threads; ++i) + #pragma omp parallel num_threads(threads) { + Index i = omp_get_thread_num(); + // Note that the actual number of threads might be lower than the number of request ones. + Index actual_threads = omp_get_num_threads(); + + Index blockCols = (cols / actual_threads) & ~Index(0x3); + Index blockRows = (rows / actual_threads) & ~Index(0x7); + Index r0 = i*blockRows; - Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows; + Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows; Index c0 = i*blockCols; - Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols; + Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols; info[i].rhs_start = c0; info[i].rhs_length = actualBlockCols; diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h index ba41a1c99f6..4cc56a42fd5 100644 --- a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +++ b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h @@ -109,7 +109,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ /* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ if (rows != depth) { \ \ - int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ + int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ \ if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ @@ -223,7 +223,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ /* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ if (cols != depth) { \ \ - int nthr = mkl_domain_get_max_threads(MKL_BLAS); \ + int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ \ if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h index f103eae72c0..04240ab5032 100644 --- a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -302,9 +302,12 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj for (Index i=0; i<actual_mc; ++i) r[i] -= a[i] * b; } - Scalar b = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(rhs(j,j)); - for (Index i=0; i<actual_mc; ++i) - r[i] *= b; + if((Mode & UnitDiag)==0) + { + Scalar b = conj(rhs(j,j)); + for (Index i=0; i<actual_mc; ++i) + r[i] /= b; + } } // pack the just computed part of lhs to A |