diff options
Diffstat (limited to 'extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h')
-rw-r--r-- | extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h | 114 |
1 files changed, 56 insertions, 58 deletions
diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h index 4dced6b0eb9..a49ea318345 100644 --- a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -3,28 +3,15 @@ // // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H #define EIGEN_TRIANGULAR_SOLVER_MATRIX_H +namespace Eigen { + namespace internal { // if the rhs is row major, let's transpose the product @@ -34,14 +21,15 @@ struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder, static EIGEN_DONT_INLINE void run( Index size, Index cols, const Scalar* tri, Index triStride, - Scalar* _other, Index otherStride) + Scalar* _other, Index otherStride, + level3_blocking<Scalar,Scalar>& blocking) { triangular_solve_matrix< Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft, (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper), NumTraits<Scalar>::IsComplex && Conjugate, TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor> - ::run(size, cols, tri, triStride, _other, otherStride); + ::run(size, cols, tri, triStride, _other, otherStride, blocking); } }; @@ -53,7 +41,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO static EIGEN_DONT_INLINE void run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride) + Scalar* _other, Index otherStride, + level3_blocking<Scalar,Scalar>& blocking) { Index cols = otherSize; const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride); @@ -65,22 +54,29 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO IsLower = (Mode&Lower) == Lower }; - Index kc = size; // cache block size along the K direction - Index mc = size; // cache block size along the M direction - Index nc = cols; // cache block size along the N direction - computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc); + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*cols; std::size_t sizeW = kc*Traits::WorkSpaceFactor; - std::size_t sizeB = sizeW + kc*cols; - ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); - Scalar* blockB = allocatedBlockB + sizeW; + + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); conj_if<Conjugate> conj; gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel; gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs; gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs; + // the goal here is to subdivise the Rhs panels such that we keep some cache + // coherence when accessing the rhs elements + std::ptrdiff_t l1, l2; + manage_caching_sizes(GetAction, &l1, &l2); + Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0; + subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr); + for(Index k2=IsLower ? 0 : size; IsLower ? k2<size : k2>0; IsLower ? k2+=kc : k2-=kc) @@ -92,16 +88,18 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO // A11 (the triangular part) and A21 the remaining rectangular part. // Then the high level algorithm is: // - B = R1 => general block copy (done during the next step) - // - R1 = L1^-1 B => tricky part + // - R1 = A11^-1 B => tricky part // - update B from the new R1 => actually this has to be performed continuously during the above step - // - R2 = L2 * B => GEPP + // - R2 -= A21 * B => GEPP - // The tricky part: compute R1 = L1^-1 B while updating B from R1 - // The idea is to split L1 into multiple small vertical panels. - // Each panel can be split into a small triangular part A1 which is processed without optimization, - // and the remaining small part A2 which is processed using gebp with appropriate block strides + // The tricky part: compute R1 = A11^-1 B while updating B from R1 + // The idea is to split A11 into multiple small vertical panels. + // Each panel can be split into a small triangular part T1k which is processed without optimization, + // and the remaining small part T2k which is processed using gebp with appropriate block strides + for(Index j2=0; j2<cols; j2+=subcols) { - // for each small vertical panels of lhs + Index actual_cols = (std::min)(cols-j2,subcols); + // for each small vertical panels [T1k^T, T2k^T]^T of lhs for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth) { Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth); @@ -114,11 +112,11 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO Index rs = actualPanelWidth - k - 1; // remaining size Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i)); - for (Index j=0; j<cols; ++j) + for (Index j=j2; j<j2+actual_cols; ++j) { if (TriStorageOrder==RowMajor) { - Scalar b = 0; + Scalar b(0); const Scalar* l = &tri(i,s); Scalar* r = &other(s,j); for (Index i3=0; i3<k; ++i3) @@ -143,7 +141,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO Index blockBOffset = IsLower ? k1 : lengthTarget; // update the respective rows of B from other - pack_rhs(blockB, _other+startBlock, otherStride, actualPanelWidth, cols, actual_kc, blockBOffset); + pack_rhs(blockB+actual_kc*j2, &other(startBlock,j2), otherStride, actualPanelWidth, actual_cols, actual_kc, blockBOffset); // GEBP if (lengthTarget>0) @@ -152,13 +150,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget); - gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, Scalar(-1), - actualPanelWidth, actual_kc, 0, blockBOffset); + gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1), + actualPanelWidth, actual_kc, 0, blockBOffset, blockW); } } } - - // R2 = A2 * B => GEPP + + // R2 -= A21 * B => GEPP { Index start = IsLower ? k2+kc : 0; Index end = IsLower ? size : k2-kc; @@ -169,7 +167,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO { pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc); - gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1)); + gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0, blockW); } } } @@ -185,7 +183,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage static EIGEN_DONT_INLINE void run( Index size, Index otherSize, const Scalar* _tri, Index triStride, - Scalar* _other, Index otherStride) + Scalar* _other, Index otherStride, + level3_blocking<Scalar,Scalar>& blocking) { Index rows = otherSize; const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride); @@ -198,19 +197,16 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage IsLower = (Mode&Lower) == Lower }; -// Index kc = std::min<Index>(Traits::Max_kc/4,size); // cache block size along the K direction -// Index mc = std::min<Index>(Traits::Max_mc,size); // cache block size along the M direction - // check that !!!! - Index kc = size; // cache block size along the K direction - Index mc = size; // cache block size along the M direction - Index nc = rows; // cache block size along the N direction - computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc); + Index kc = blocking.kc(); // cache block size along the K direction + Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction + std::size_t sizeA = kc*mc; + std::size_t sizeB = kc*size; std::size_t sizeW = kc*Traits::WorkSpaceFactor; - std::size_t sizeB = sizeW + kc*size; - ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); - Scalar* blockB = allocatedBlockB + sizeW; + + ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); + ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); conj_if<Conjugate> conj; gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel; @@ -277,7 +273,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage Scalar(-1), actual_kc, actual_kc, // strides panelOffset, panelOffset, // offsets - allocatedBlockB); // workspace + blockW); // workspace } // unblocked triangular solve @@ -308,7 +304,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage if (rs>0) gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb, actual_mc, actual_kc, rs, Scalar(-1), - -1, -1, 0, 0, allocatedBlockB); + -1, -1, 0, 0, blockW); } } } @@ -316,4 +312,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage } // end namespace internal +} // end namespace Eigen + #endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H |