22 files changed, 1996 insertions, 520 deletions
diff --git a/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h b/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h
index dc20f7e1e29..403d25fa9eb 100644
--- a/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/extern/Eigen3/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -4,28 +4,15 @@
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_COEFFBASED_PRODUCT_H
 #define EIGEN_COEFFBASED_PRODUCT_H
 
+namespace Eigen { 
+
 namespace internal {
 
 /*********************************************************************************
@@ -224,8 +211,8 @@ class CoeffBasedProduct
     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
 
   protected:
-    const LhsNested m_lhs;
-    const RhsNested m_rhs;
+    typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
+    typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
 
     mutable PlainObject m_result;
 };
@@ -252,7 +239,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
   {
     product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
     res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
@@ -263,7 +250,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
   {
     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
   }
@@ -273,7 +260,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
   {
     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
@@ -291,7 +278,7 @@ struct product_coeff_vectorized_unroller
 {
   typedef typename Lhs::Index Index;
   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
   {
     product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
     pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
@@ -302,7 +289,7 @@ template<typename Lhs, typename Rhs, typename Packet>
 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
   {
     pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
   }
@@ -314,7 +301,7 @@ struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, Re
   typedef typename Lhs::PacketScalar Packet;
   typedef typename Lhs::Index Index;
   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
   {
     Packet pres;
     product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
@@ -327,7 +314,7 @@ template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int R
 struct product_coeff_vectorized_dyn_selector
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
   {
     res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
   }
@@ -339,7 +326,7 @@ template<typename Lhs, typename Rhs, int RhsCols>
 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
   {
     res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
   }
@@ -349,7 +336,7 @@ template<typename Lhs, typename Rhs, int LhsRows>
 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
   {
     res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
   }
@@ -359,7 +346,7 @@ template<typename Lhs, typename Rhs>
 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
   {
     res = lhs.transpose().cwiseProduct(rhs).sum();
   }
@@ -369,7 +356,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
   {
     product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
   }
@@ -383,7 +370,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
   {
     product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
     res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
@@ -394,7 +381,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
   {
     product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
     res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
@@ -405,7 +392,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
   {
     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
   }
@@ -415,7 +402,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
   {
     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
   }
@@ -425,7 +412,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
   {
     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
@@ -438,7 +425,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
   typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
   {
     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
@@ -449,4 +436,6 @@ struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 
 } // end namespace internal
 
+} // end namespace Eigen
+
 #endif // EIGEN_COEFFBASED_PRODUCT_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index cd1c37c780e..5eb03c98ccf 100644
--- a/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -3,34 +3,23 @@
 //
 // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_GENERAL_BLOCK_PANEL_H
 #define EIGEN_GENERAL_BLOCK_PANEL_H
 
+namespace Eigen { 
+  
 namespace internal {
 
 template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
 class gebp_traits;
 
-inline std::ptrdiff_t manage_caching_sizes_second_if_negative(std::ptrdiff_t a, std::ptrdiff_t b)
+
+/** \internal \returns b if a<=0, and returns a otherwise. */
+inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
 {
   return a<=0 ? b : a;
 }
@@ -38,9 +27,14 @@ inline std::ptrdiff_t manage_caching_sizes_second_if_negative(std::ptrdiff_t a,
 /** \internal */
 inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdiff_t* l2=0)
 {
-  static std::ptrdiff_t m_l1CacheSize = manage_caching_sizes_second_if_negative(queryL1CacheSize(),8 * 1024);
-  static std::ptrdiff_t m_l2CacheSize = manage_caching_sizes_second_if_negative(queryTopLevelCacheSize(),1*1024*1024);
-
+  static std::ptrdiff_t m_l1CacheSize = 0;
+  static std::ptrdiff_t m_l2CacheSize = 0;
+  if(m_l2CacheSize==0)
+  {
+    m_l1CacheSize = manage_caching_sizes_helper(queryL1CacheSize(),8 * 1024);
+    m_l2CacheSize = manage_caching_sizes_helper(queryTopLevelCacheSize(),1*1024*1024);
+  }
+  
   if(action==SetAction)
   {
     // set the cpu cache size and cache all block sizes from a global cache size in byte
@@ -533,7 +527,7 @@ struct gebp_kernel
     ResPacketSize = Traits::ResPacketSize
   };
 
-  EIGEN_FLATTEN_ATTRIB
+  EIGEN_DONT_INLINE EIGEN_FLATTEN_ATTRIB
   void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
                   Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
   {
@@ -595,64 +589,64 @@ struct gebp_kernel
           if(nr==2)
           {
             LhsPacket A0, A1;
-            RhsPacket B0;
+            RhsPacket B_0;
             RhsPacket T0;
             
 EIGEN_ASM_COMMENT("mybegin2");
             traits.loadLhs(&blA[0*LhsProgress], A0);
             traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[1*RhsProgress], B0);
-            traits.madd(A0,B0,C1,T0);
-            traits.madd(A1,B0,C5,B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[1*RhsProgress], B_0);
+            traits.madd(A0,B_0,C1,T0);
+            traits.madd(A1,B_0,C5,B_0);
 
             traits.loadLhs(&blA[2*LhsProgress], A0);
             traits.loadLhs(&blA[3*LhsProgress], A1);
-            traits.loadRhs(&blB[2*RhsProgress], B0);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[3*RhsProgress], B0);
-            traits.madd(A0,B0,C1,T0);
-            traits.madd(A1,B0,C5,B0);
+            traits.loadRhs(&blB[2*RhsProgress], B_0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[3*RhsProgress], B_0);
+            traits.madd(A0,B_0,C1,T0);
+            traits.madd(A1,B_0,C5,B_0);
 
             traits.loadLhs(&blA[4*LhsProgress], A0);
             traits.loadLhs(&blA[5*LhsProgress], A1);
-            traits.loadRhs(&blB[4*RhsProgress], B0);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[5*RhsProgress], B0);
-            traits.madd(A0,B0,C1,T0);
-            traits.madd(A1,B0,C5,B0);
+            traits.loadRhs(&blB[4*RhsProgress], B_0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[5*RhsProgress], B_0);
+            traits.madd(A0,B_0,C1,T0);
+            traits.madd(A1,B_0,C5,B_0);
 
             traits.loadLhs(&blA[6*LhsProgress], A0);
             traits.loadLhs(&blA[7*LhsProgress], A1);
-            traits.loadRhs(&blB[6*RhsProgress], B0);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[7*RhsProgress], B0);
-            traits.madd(A0,B0,C1,T0);
-            traits.madd(A1,B0,C5,B0);
+            traits.loadRhs(&blB[6*RhsProgress], B_0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[7*RhsProgress], B_0);
+            traits.madd(A0,B_0,C1,T0);
+            traits.madd(A1,B_0,C5,B_0);
 EIGEN_ASM_COMMENT("myend");
           }
           else
           {
 EIGEN_ASM_COMMENT("mybegin4");
             LhsPacket A0, A1;
-            RhsPacket B0, B1, B2, B3;
+            RhsPacket B_0, B1, B2, B3;
             RhsPacket T0;
             
             traits.loadLhs(&blA[0*LhsProgress], A0);
             traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
             traits.loadRhs(&blB[1*RhsProgress], B1);
 
-            traits.madd(A0,B0,C0,T0);
+            traits.madd(A0,B_0,C0,T0);
             traits.loadRhs(&blB[2*RhsProgress], B2);
-            traits.madd(A1,B0,C4,B0);
+            traits.madd(A1,B_0,C4,B_0);
             traits.loadRhs(&blB[3*RhsProgress], B3);
-            traits.loadRhs(&blB[4*RhsProgress], B0);
+            traits.loadRhs(&blB[4*RhsProgress], B_0);
             traits.madd(A0,B1,C1,T0);
             traits.madd(A1,B1,C5,B1);
             traits.loadRhs(&blB[5*RhsProgress], B1);
@@ -664,9 +658,9 @@ EIGEN_ASM_COMMENT("mybegin4");
             traits.madd(A1,B3,C7,B3);
             traits.loadLhs(&blA[3*LhsProgress], A1);
             traits.loadRhs(&blB[7*RhsProgress], B3);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[8*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[8*RhsProgress], B_0);
             traits.madd(A0,B1,C1,T0);
             traits.madd(A1,B1,C5,B1);
             traits.loadRhs(&blB[9*RhsProgress], B1);
@@ -679,9 +673,9 @@ EIGEN_ASM_COMMENT("mybegin4");
             traits.loadLhs(&blA[5*LhsProgress], A1);
             traits.loadRhs(&blB[11*RhsProgress], B3);
 
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[12*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[12*RhsProgress], B_0);
             traits.madd(A0,B1,C1,T0);
             traits.madd(A1,B1,C5,B1);
             traits.loadRhs(&blB[13*RhsProgress], B1);
@@ -693,8 +687,8 @@ EIGEN_ASM_COMMENT("mybegin4");
             traits.madd(A1,B3,C7,B3);
             traits.loadLhs(&blA[7*LhsProgress], A1);
             traits.loadRhs(&blB[15*RhsProgress], B3);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
             traits.madd(A0,B1,C1,T0);
             traits.madd(A1,B1,C5,B1);
             traits.madd(A0,B2,C2,T0);
@@ -712,32 +706,32 @@ EIGEN_ASM_COMMENT("mybegin4");
           if(nr==2)
           {
             LhsPacket A0, A1;
-            RhsPacket B0;
+            RhsPacket B_0;
             RhsPacket T0;
 
             traits.loadLhs(&blA[0*LhsProgress], A0);
             traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
-            traits.madd(A0,B0,C0,T0);
-            traits.madd(A1,B0,C4,B0);
-            traits.loadRhs(&blB[1*RhsProgress], B0);
-            traits.madd(A0,B0,C1,T0);
-            traits.madd(A1,B0,C5,B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
+            traits.madd(A0,B_0,C0,T0);
+            traits.madd(A1,B_0,C4,B_0);
+            traits.loadRhs(&blB[1*RhsProgress], B_0);
+            traits.madd(A0,B_0,C1,T0);
+            traits.madd(A1,B_0,C5,B_0);
           }
           else
           {
             LhsPacket A0, A1;
-            RhsPacket B0, B1, B2, B3;
+            RhsPacket B_0, B1, B2, B3;
             RhsPacket T0;
 
             traits.loadLhs(&blA[0*LhsProgress], A0);
             traits.loadLhs(&blA[1*LhsProgress], A1);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
             traits.loadRhs(&blB[1*RhsProgress], B1);
 
-            traits.madd(A0,B0,C0,T0);
+            traits.madd(A0,B_0,C0,T0);
             traits.loadRhs(&blB[2*RhsProgress], B2);
-            traits.madd(A1,B0,C4,B0);
+            traits.madd(A1,B_0,C4,B_0);
             traits.loadRhs(&blB[3*RhsProgress], B3);
             traits.madd(A0,B1,C1,T0);
             traits.madd(A1,B1,C5,B1);
@@ -824,42 +818,42 @@ EIGEN_ASM_COMMENT("mybegin4");
           if(nr==2)
           {
             LhsPacket A0;
-            RhsPacket B0, B1;
+            RhsPacket B_0, B1;
 
             traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
             traits.loadRhs(&blB[1*RhsProgress], B1);
-            traits.madd(A0,B0,C0,B0);
-            traits.loadRhs(&blB[2*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,B_0);
+            traits.loadRhs(&blB[2*RhsProgress], B_0);
             traits.madd(A0,B1,C1,B1);
             traits.loadLhs(&blA[1*LhsProgress], A0);
             traits.loadRhs(&blB[3*RhsProgress], B1);
-            traits.madd(A0,B0,C0,B0);
-            traits.loadRhs(&blB[4*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,B_0);
+            traits.loadRhs(&blB[4*RhsProgress], B_0);
             traits.madd(A0,B1,C1,B1);
             traits.loadLhs(&blA[2*LhsProgress], A0);
             traits.loadRhs(&blB[5*RhsProgress], B1);
-            traits.madd(A0,B0,C0,B0);
-            traits.loadRhs(&blB[6*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,B_0);
+            traits.loadRhs(&blB[6*RhsProgress], B_0);
             traits.madd(A0,B1,C1,B1);
             traits.loadLhs(&blA[3*LhsProgress], A0);
             traits.loadRhs(&blB[7*RhsProgress], B1);
-            traits.madd(A0,B0,C0,B0);
+            traits.madd(A0,B_0,C0,B_0);
             traits.madd(A0,B1,C1,B1);
           }
           else
           {
             LhsPacket A0;
-            RhsPacket B0, B1, B2, B3;
+            RhsPacket B_0, B1, B2, B3;
 
             traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
             traits.loadRhs(&blB[1*RhsProgress], B1);
 
-            traits.madd(A0,B0,C0,B0);
+            traits.madd(A0,B_0,C0,B_0);
             traits.loadRhs(&blB[2*RhsProgress], B2);
             traits.loadRhs(&blB[3*RhsProgress], B3);
-            traits.loadRhs(&blB[4*RhsProgress], B0);
+            traits.loadRhs(&blB[4*RhsProgress], B_0);
             traits.madd(A0,B1,C1,B1);
             traits.loadRhs(&blB[5*RhsProgress], B1);
             traits.madd(A0,B2,C2,B2);
@@ -867,8 +861,8 @@ EIGEN_ASM_COMMENT("mybegin4");
             traits.madd(A0,B3,C3,B3);
             traits.loadLhs(&blA[1*LhsProgress], A0);
             traits.loadRhs(&blB[7*RhsProgress], B3);
-            traits.madd(A0,B0,C0,B0);
-            traits.loadRhs(&blB[8*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,B_0);
+            traits.loadRhs(&blB[8*RhsProgress], B_0);
             traits.madd(A0,B1,C1,B1);
             traits.loadRhs(&blB[9*RhsProgress], B1);
             traits.madd(A0,B2,C2,B2);
@@ -877,8 +871,8 @@ EIGEN_ASM_COMMENT("mybegin4");
             traits.loadLhs(&blA[2*LhsProgress], A0);
             traits.loadRhs(&blB[11*RhsProgress], B3);
 
-            traits.madd(A0,B0,C0,B0);
-            traits.loadRhs(&blB[12*RhsProgress], B0);
+            traits.madd(A0,B_0,C0,B_0);
+            traits.loadRhs(&blB[12*RhsProgress], B_0);
             traits.madd(A0,B1,C1,B1);
             traits.loadRhs(&blB[13*RhsProgress], B1);
             traits.madd(A0,B2,C2,B2);
@@ -887,7 +881,7 @@ EIGEN_ASM_COMMENT("mybegin4");
 
             traits.loadLhs(&blA[3*LhsProgress], A0);
             traits.loadRhs(&blB[15*RhsProgress], B3);
-            traits.madd(A0,B0,C0,B0);
+            traits.madd(A0,B_0,C0,B_0);
             traits.madd(A0,B1,C1,B1);
             traits.madd(A0,B2,C2,B2);
             traits.madd(A0,B3,C3,B3);
@@ -902,26 +896,26 @@ EIGEN_ASM_COMMENT("mybegin4");
           if(nr==2)
           {
             LhsPacket A0;
-            RhsPacket B0, B1;
+            RhsPacket B_0, B1;
 
             traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
             traits.loadRhs(&blB[1*RhsProgress], B1);
-            traits.madd(A0,B0,C0,B0);
+            traits.madd(A0,B_0,C0,B_0);
             traits.madd(A0,B1,C1,B1);
           }
           else
           {
             LhsPacket A0;
-            RhsPacket B0, B1, B2, B3;
+            RhsPacket B_0, B1, B2, B3;
 
             traits.loadLhs(&blA[0*LhsProgress], A0);
-            traits.loadRhs(&blB[0*RhsProgress], B0);
+            traits.loadRhs(&blB[0*RhsProgress], B_0);
             traits.loadRhs(&blB[1*RhsProgress], B1);
             traits.loadRhs(&blB[2*RhsProgress], B2);
             traits.loadRhs(&blB[3*RhsProgress], B3);
 
-            traits.madd(A0,B0,C0,B0);
+            traits.madd(A0,B_0,C0,B_0);
             traits.madd(A0,B1,C1,B1);
             traits.madd(A0,B2,C2,B2);
             traits.madd(A0,B3,C3,B3);
@@ -968,26 +962,26 @@ EIGEN_ASM_COMMENT("mybegin4");
           if(nr==2)
           {
             LhsScalar A0;
-            RhsScalar B0, B1;
+            RhsScalar B_0, B1;
 
             A0 = blA[k];
-            B0 = blB[0];
+            B_0 = blB[0];
             B1 = blB[1];
-            MADD(cj,A0,B0,C0,B0);
+            MADD(cj,A0,B_0,C0,B_0);
             MADD(cj,A0,B1,C1,B1);
           }
           else
           {
             LhsScalar A0;
-            RhsScalar B0, B1, B2, B3;
+            RhsScalar B_0, B1, B2, B3;
 
             A0 = blA[k];
-            B0 = blB[0];
+            B_0 = blB[0];
             B1 = blB[1];
             B2 = blB[2];
             B3 = blB[3];
 
-            MADD(cj,A0,B0,C0,B0);
+            MADD(cj,A0,B_0,C0,B_0);
             MADD(cj,A0,B1,C1,B1);
             MADD(cj,A0,B2,C2,B2);
             MADD(cj,A0,B3,C3,B3);
@@ -1024,14 +1018,14 @@ EIGEN_ASM_COMMENT("mybegin4");
         for(Index k=0; k<depth; k++)
         {
           LhsPacket A0, A1;
-          RhsPacket B0;
+          RhsPacket B_0;
           RhsPacket T0;
 
           traits.loadLhs(&blA[0*LhsProgress], A0);
           traits.loadLhs(&blA[1*LhsProgress], A1);
-          traits.loadRhs(&blB[0*RhsProgress], B0);
-          traits.madd(A0,B0,C0,T0);
-          traits.madd(A1,B0,C4,B0);
+          traits.loadRhs(&blB[0*RhsProgress], B_0);
+          traits.madd(A0,B_0,C0,T0);
+          traits.madd(A1,B_0,C4,B_0);
 
           blB += RhsProgress;
           blA += 2*LhsProgress;
@@ -1063,10 +1057,10 @@ EIGEN_ASM_COMMENT("mybegin4");
         for(Index k=0; k<depth; k++)
         {
           LhsPacket A0;
-          RhsPacket B0;
+          RhsPacket B_0;
           traits.loadLhs(blA, A0);
-          traits.loadRhs(blB, B0);
-          traits.madd(A0, B0, C0, B0);
+          traits.loadRhs(blB, B_0);
+          traits.madd(A0, B_0, C0, B_0);
           blB += RhsProgress;
           blA += LhsProgress;
         }
@@ -1088,8 +1082,8 @@ EIGEN_ASM_COMMENT("mybegin4");
         for(Index k=0; k<depth; k++)
         {
           LhsScalar A0 = blA[k];
-          RhsScalar B0 = blB[k];
-          MADD(cj, A0, B0, C0, B0);
+          RhsScalar B_0 = blB[k];
+          MADD(cj, A0, B_0, C0, B_0);
         }
         res[(j2+0)*resStride + i] += alpha*C0;
       }
@@ -1100,7 +1094,7 @@ EIGEN_ASM_COMMENT("mybegin4");
 #undef CJMADD
 
 // pack a block of the lhs
-// The travesal is as follow (mr==4):
+// The traversal is as follow (mr==4):
 //   0  4  8 12 ...
 //   1  5  9 13 ...
 //   2  6 10 14 ...
@@ -1116,11 +1110,15 @@ EIGEN_ASM_COMMENT("mybegin4");
 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
 struct gemm_pack_lhs
 {
-  void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
+  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
                   Index stride=0, Index offset=0)
   {
-//     enum { PacketSize = packet_traits<Scalar>::size };
+    typedef typename packet_traits<Scalar>::type Packet;
+    enum { PacketSize = packet_traits<Scalar>::size };
+
+    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
     eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+    eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
     conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
     const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
     Index count = 0;
@@ -1128,9 +1126,44 @@ struct gemm_pack_lhs
     for(Index i=0; i<peeled_mc; i+=Pack1)
     {
       if(PanelMode) count += Pack1 * offset;
-      for(Index k=0; k<depth; k++)
-        for(Index w=0; w<Pack1; w++)
-          blockA[count++] = cj(lhs(i+w, k));
+
+      if(StorageOrder==ColMajor)
+      {
+        for(Index k=0; k<depth; k++)
+        {
+          Packet A, B, C, D;
+          if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
+          if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
+          if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
+          if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
+          if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
+          if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
+          if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
+          if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
+        }
+      }
+      else
+      {
+        for(Index k=0; k<depth; k++)
+        {
+          // TODO add a vectorized transpose here
+          Index w=0;
+          for(; w<Pack1-3; w+=4)
+          {
+            Scalar a(cj(lhs(i+w+0, k))),
+                   b(cj(lhs(i+w+1, k))),
+                   c(cj(lhs(i+w+2, k))),
+                   d(cj(lhs(i+w+3, k)));
+            blockA[count++] = a;
+            blockA[count++] = b;
+            blockA[count++] = c;
+            blockA[count++] = d;
+          }
+          if(Pack1%4)
+            for(;w<Pack1;++w)
+              blockA[count++] = cj(lhs(i+w, k));
+        }
+      }
       if(PanelMode) count += Pack1 * (stride-offset-depth);
     }
     if(rows-peeled_mc>=Pack2)
@@ -1164,9 +1197,10 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
 {
   typedef typename packet_traits<Scalar>::type Packet;
   enum { PacketSize = packet_traits<Scalar>::size };
-  void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
                   Index stride=0, Index offset=0)
   {
+    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
     eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
     conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
     Index packet_cols = (cols/nr) * nr;
@@ -1211,9 +1245,10 @@ template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode
 struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
 {
   enum { PacketSize = packet_traits<Scalar>::size };
-  void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
                   Index stride=0, Index offset=0)
   {
+    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
     eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
     conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
     Index packet_cols = (cols/nr) * nr;
@@ -1279,4 +1314,6 @@ inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
   internal::manage_caching_sizes(SetAction, &l1, &l2);
 }
 
+} // end namespace Eigen
+
 #endif // EIGEN_GENERAL_BLOCK_PANEL_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h
index ae94a27953b..73a465ec5ee 100644
--- a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -3,28 +3,15 @@
 //
 // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
 #define EIGEN_GENERAL_MATRIX_MATRIX_H
 
+namespace Eigen { 
+
 namespace internal {
 
 template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
@@ -77,7 +64,7 @@ static void run(Index rows, Index cols, Index depth,
 
   typedef gebp_traits<LhsScalar,RhsScalar> Traits;
 
-  Index kc = blocking.kc();                 // cache block size along the K direction
+  Index kc = blocking.kc();                   // cache block size along the K direction
   Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
   //Index nc = blocking.nc(); // cache block size along the N direction
 
@@ -247,7 +234,7 @@ struct gemm_functor
     BlockingType& m_blocking;
 };
 
-template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth,
+template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
 bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
 
 template<typename _LhsScalar, typename _RhsScalar>
@@ -280,8 +267,8 @@ class level3_blocking
     inline RhsScalar* blockW() { return m_blockW; }
 };
 
-template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
-class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, true>
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true>
   : public level3_blocking<
       typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
       typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
@@ -322,8 +309,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
     inline void allocateAll() {}
 };
 
-template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth>
-class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, false>
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
   : public level3_blocking<
       typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
       typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
@@ -347,7 +334,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
       this->m_nc = Transpose ? rows : cols;
       this->m_kc = depth;
 
-      computeProductBlockingSizes<LhsScalar,RhsScalar>(this->m_kc, this->m_mc, this->m_nc);
+      computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc);
       m_sizeA = this->m_mc * this->m_kc;
       m_sizeB = this->m_kc * this->m_nc;
       m_sizeW = this->m_kc*Traits::WorkSpaceFactor;
@@ -412,8 +399,8 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
     {
       eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
-      const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
-      const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
+      typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
+      typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
 
       Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
                                  * RhsBlasTraits::extractScalarFactor(m_rhs);
@@ -436,4 +423,6 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
     }
 };
 
+} // end namespace Eigen
+
 #endif // EIGEN_GENERAL_MATRIX_MATRIX_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 5043b64fe2e..432d3a9dc84 100644
--- a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -3,28 +3,15 @@
 //
 // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
 #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
 
+namespace Eigen { 
+
 namespace internal {
 
 /**********************************************************************
@@ -42,14 +29,14 @@ struct tribb_kernel;
 template <typename Index,
           typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
           typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
-                              int ResStorageOrder, int  UpLo>
+                              int ResStorageOrder, int  UpLo, int Version = Specialized>
 struct general_matrix_matrix_triangular_product;
 
 // as usual if the result is row major => we transpose the product
 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
-                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo>
-struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo>
-{  
+                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo, int Version>
+struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
+{
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
                                       const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
@@ -63,8 +50,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
 };
 
 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
-                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo>
-struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo>
+                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo, int Version>
+struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
@@ -201,13 +188,13 @@ TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(
   typedef internal::blas_traits<Lhs> LhsBlasTraits;
   typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
   typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
-  const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs());
+  typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
   
   typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
   typedef internal::blas_traits<Rhs> RhsBlasTraits;
   typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
   typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
-  const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs());
+  typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
 
   typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
 
@@ -222,4 +209,6 @@ TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(
   return *this;
 }
 
+} // end namespace Eigen
+
 #endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
new file mode 100644
index 00000000000..3deed068e39
--- /dev/null
+++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
@@ -0,0 +1,146 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Level 3 BLAS SYRK/HERK implementation.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int  UpLo>
+struct general_matrix_matrix_rankupdate :
+       general_matrix_matrix_triangular_product<
+         Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
+
+
+// try to go to BLAS specialization
+#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
+template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
+                          int RhsStorageOrder, bool ConjugateRhs, int  UpLo> \
+struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
+               Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
+  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
+                          const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
+  { \
+    if (lhs==rhs) { \
+      general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
+      ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
+    } else { \
+      general_matrix_matrix_triangular_product<Index, \
+        Scalar, LhsStorageOrder, ConjugateLhs, \
+        Scalar, RhsStorageOrder, ConjugateRhs, \
+        ColMajor, UpLo, BuiltIn> \
+      ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
+    } \
+  } \
+};
+
+EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
+//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
+EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
+//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
+
+// SYRK for float/double
+#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
+template <typename Index, int AStorageOrder, bool ConjugateA, int  UpLo> \
+struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
+  enum { \
+    IsLower = (UpLo&Lower) == Lower, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
+  }; \
+  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
+                          const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
+  { \
+  /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
+\
+   MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
+   char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
+   MKLTYPE alpha_, beta_; \
+\
+/* Set alpha_ & beta_ */ \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+   MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
+  } \
+};
+
+// HERK for complex data
+#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
+template <typename Index, int AStorageOrder, bool ConjugateA, int  UpLo> \
+struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
+  enum { \
+    IsLower = (UpLo&Lower) == Lower, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
+  }; \
+  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
+                          const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
+  { \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
+\
+   MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
+   char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
+   RTYPE alpha_, beta_; \
+   const EIGTYPE* a_ptr; \
+\
+/* Set alpha_ & beta_ */ \
+/*   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
+/*   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
+   alpha_ = alpha.real(); \
+   beta_ = 1.0; \
+/* Copy with conjugation in some cases*/ \
+   MatrixType a; \
+   if (conjA) { \
+     Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \
+     a = mapA.conjugate(); \
+     lda = a.outerStride(); \
+     a_ptr = a.data(); \
+   } else a_ptr=lhs; \
+   MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
+  } \
+};
+
+
+EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
+EIGEN_MKL_RANKUPDATE_R(float,  float,  ssyrk)
+
+//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
+//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8,  double, cherk)
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
new file mode 100644
index 00000000000..060af328ebe
--- /dev/null
+++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
@@ -0,0 +1,118 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   General matrix-matrix product functionality based on ?GEMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/**********************************************************************
+* This file implements general matrix-matrix multiplication using BLAS
+* gemm function via partial specialization of
+* general_matrix_matrix_product::run(..) method for float, double,
+* std::complex<float> and std::complex<double> types
+**********************************************************************/
+
+// gemm specialization
+
+#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
+template< \
+  typename Index, \
+  int LhsStorageOrder, bool ConjugateLhs, \
+  int RhsStorageOrder, bool ConjugateRhs> \
+struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
+{ \
+static void run(Index rows, Index cols, Index depth, \
+  const EIGTYPE* _lhs, Index lhsStride, \
+  const EIGTYPE* _rhs, Index rhsStride, \
+  EIGTYPE* res, Index resStride, \
+  EIGTYPE alpha, \
+  level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, \
+  GemmParallelInfo<Index>* /*info = 0*/) \
+{ \
+  using std::conj; \
+\
+  char transa, transb; \
+  MKL_INT m, n, k, lda, ldb, ldc; \
+  const EIGTYPE *a, *b; \
+  MKLTYPE alpha_, beta_; \
+  MatrixX##EIGPREFIX a_tmp, b_tmp; \
+  EIGTYPE myone(1);\
+\
+/* Set transpose options */ \
+  transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
+  transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set m, n, k */ \
+  m = (MKL_INT)rows;  \
+  n = (MKL_INT)cols;  \
+  k = (MKL_INT)depth; \
+\
+/* Set alpha_ & beta_ */ \
+  assign_scalar_eig2mkl(alpha_, alpha); \
+  assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+  lda = (MKL_INT)lhsStride; \
+  ldb = (MKL_INT)rhsStride; \
+  ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+  if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
+    Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
+    a_tmp = lhs.conjugate(); \
+    a = a_tmp.data(); \
+    lda = a_tmp.outerStride(); \
+  } else a = _lhs; \
+\
+  if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
+    Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
+    b_tmp = rhs.conjugate(); \
+    b = b_tmp.data(); \
+    ldb = b_tmp.outerStride(); \
+  } else b = _rhs; \
+\
+  MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+}};
+
+GEMM_SPECIALIZATION(double,   d,  double,        d)
+GEMM_SPECIALIZATION(float,    f,  float,         s)
+GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
+GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8,  c)
+
+} // end namespase internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector.h
index e0e2cbf8f62..ba1f73957db 100644
--- a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -3,28 +3,15 @@
 //
 // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
 #define EIGEN_GENERAL_MATRIX_VECTOR_H
 
+namespace Eigen { 
+
 namespace internal {
 
 /* Optimized col-major matrix * vector product:
@@ -40,8 +27,8 @@ namespace internal {
  *  |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
  *  |cplx |real |real | optimal case, vectorization possible via real-cplx mul
  */
-template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
-struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
 {
 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 
@@ -99,7 +86,7 @@ EIGEN_DONT_INLINE static void run(
   
   // How many coeffs of the result do we have to skip to be aligned.
   // Here we assume data are at least aligned on the base scalar type.
-  Index alignedStart = first_aligned(res,size);
+  Index alignedStart = internal::first_aligned(res,size);
   Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
   const Index peeledSize  = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
 
@@ -109,7 +96,7 @@ EIGEN_DONT_INLINE static void run(
                        : FirstAligned;
 
   // we cannot assume the first element is aligned because of sub-matrices
-  const Index lhsAlignmentOffset = first_aligned(lhs,size);
+  const Index lhsAlignmentOffset = internal::first_aligned(lhs,size);
 
   // find how many columns do we have to skip to be aligned with the result (if possible)
   Index skipColumns = 0;
@@ -296,8 +283,8 @@ EIGEN_DONT_INLINE static void run(
  *  - alpha is always a complex (or converted to a complex)
  *  - no vectorization
  */
-template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
-struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
 {
 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 
@@ -351,7 +338,7 @@ EIGEN_DONT_INLINE static void run(
   // How many coeffs of the result do we have to skip to be aligned.
   // Here we assume data are at least aligned on the base scalar type
   // if that's not the case then vectorization is discarded, see below.
-  Index alignedStart = first_aligned(rhs, depth);
+  Index alignedStart = internal::first_aligned(rhs, depth);
   Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
   const Index peeledSize  = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
 
@@ -361,7 +348,7 @@ EIGEN_DONT_INLINE static void run(
                          : FirstAligned;
 
   // we cannot assume the first element is aligned because of sub-matrices
-  const Index lhsAlignmentOffset = first_aligned(lhs,depth);
+  const Index lhsAlignmentOffset = internal::first_aligned(lhs,depth);
 
   // find how many rows do we have to skip to be aligned with rhs (if possible)
   Index skipRows = 0;
@@ -556,4 +543,6 @@ EIGEN_DONT_INLINE static void run(
 
 } // end namespace internal
 
+} // end namespace Eigen
+
 #endif // EIGEN_GENERAL_MATRIX_VECTOR_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
new file mode 100644
index 00000000000..e9de6af3ed1
--- /dev/null
+++ b/extern/Eigen3/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
@@ -0,0 +1,131 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   General matrix-vector product functionality based on ?GEMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
+#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/**********************************************************************
+* This file implements general matrix-vector multiplication using BLAS
+* gemv function via partial specialization of
+* general_matrix_vector_product::run(..) method for float, double,
+* std::complex<float> and std::complex<double> types
+**********************************************************************/
+
+// gemv specialization
+
+template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
+struct general_matrix_vector_product_gemv :
+  general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
+
+#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
+template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
+static EIGEN_DONT_INLINE void run( \
+  Index rows, Index cols, \
+  const Scalar* lhs, Index lhsStride, \
+  const Scalar* rhs, Index rhsIncr, \
+  Scalar* res, Index resIncr, Scalar alpha) \
+{ \
+  if (ConjugateLhs) { \
+    general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
+      rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
+  } else { \
+    general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
+      rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
+  } \
+} \
+}; \
+template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
+static EIGEN_DONT_INLINE void run( \
+  Index rows, Index cols, \
+  const Scalar* lhs, Index lhsStride, \
+  const Scalar* rhs, Index rhsIncr, \
+  Scalar* res, Index resIncr, Scalar alpha) \
+{ \
+    general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
+      rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
+} \
+}; \
+
+EIGEN_MKL_GEMV_SPECIALIZE(double)
+EIGEN_MKL_GEMV_SPECIALIZE(float)
+EIGEN_MKL_GEMV_SPECIALIZE(dcomplex)
+EIGEN_MKL_GEMV_SPECIALIZE(scomplex)
+
+#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \
+template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
+{ \
+typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
+\
+static EIGEN_DONT_INLINE void run( \
+  Index rows, Index cols, \
+  const EIGTYPE* lhs, Index lhsStride, \
+  const EIGTYPE* rhs, Index rhsIncr, \
+  EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
+{ \
+  MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \
+  MKLTYPE alpha_, beta_; \
+  const EIGTYPE *x_ptr, myone(1); \
+  char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
+  if (LhsStorageOrder==RowMajor) { \
+    m=cols; \
+    n=rows; \
+  }\
+  assign_scalar_eig2mkl(alpha_, alpha); \
+  assign_scalar_eig2mkl(beta_, myone); \
+  GEMVVector x_tmp; \
+  if (ConjugateRhs) { \
+    Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
+    x_tmp=map_x.conjugate(); \
+    x_ptr=x_tmp.data(); \
+    incx=1; \
+  } else x_ptr=rhs; \
+  MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
+}\
+};
+
+EIGEN_MKL_GEMV_SPECIALIZATION(double,   double,        d)
+EIGEN_MKL_GEMV_SPECIALIZATION(float,    float,         s)
+EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z)
+EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8,  c)
+
+} // end namespase internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h b/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h
index ecdedc363ce..5c3e9b7ac15 100644
--- a/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h
+++ b/extern/Eigen3/Eigen/src/Core/products/Parallelizer.h
@@ -3,28 +3,15 @@
 //
 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_PARALLELIZER_H
 #define EIGEN_PARALLELIZER_H
 
+namespace Eigen { 
+
 namespace internal {
 
 /** \internal */
@@ -55,12 +42,23 @@ inline void manage_multi_threading(Action action, int* v)
   }
 }
 
+}
+
+/** Must be call first when calling Eigen from multiple threads */
+inline void initParallel()
+{
+  int nbt;
+  internal::manage_multi_threading(GetAction, &nbt);
+  std::ptrdiff_t l1, l2;
+  internal::manage_caching_sizes(GetAction, &l1, &l2);
+}
+
 /** \returns the max number of threads reserved for Eigen
   * \sa setNbThreads */
 inline int nbThreads()
 {
   int ret;
-  manage_multi_threading(GetAction, &ret);
+  internal::manage_multi_threading(GetAction, &ret);
   return ret;
 }
 
@@ -68,9 +66,11 @@ inline int nbThreads()
   * \sa nbThreads */
 inline void setNbThreads(int v)
 {
-  manage_multi_threading(SetAction, &v);
+  internal::manage_multi_threading(SetAction, &v);
 }
 
+namespace internal {
+
 template<typename Index> struct GemmParallelInfo
 {
   GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
@@ -85,7 +85,9 @@ template<typename Index> struct GemmParallelInfo
 template<bool Condition, typename Functor, typename Index>
 void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
 {
-#ifndef EIGEN_HAS_OPENMP
+  // TODO when EIGEN_USE_BLAS is defined,
+  // we should still enable OMP for other scalar types
+#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
   // FIXME the transpose variable is only needed to properly split
   // the matrix product when multithreading is enabled. This is a temporary
   // fix to support row-major destination matrices. This whole
@@ -117,6 +119,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
   if(threads==1)
     return func(0,rows, 0,cols);
 
+  Eigen::initParallel();
   func.initParallelSession();
 
   if(transpose)
@@ -151,4 +154,6 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
 
 } // end namespace internal
 
+} // end namespace Eigen
+
 #endif // EIGEN_PARALLELIZER_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index ccd757cfaf8..48209636eed 100644
--- a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -3,28 +3,15 @@
 //
 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
 
+namespace Eigen { 
+
 namespace internal {
 
 // pack a selfadjoint block diagonal for use with the gebp_kernel
@@ -400,8 +387,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
   {
     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
-    const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
-    const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
+    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
 
     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
                                * RhsBlasTraits::extractScalarFactor(m_rhs);
@@ -424,4 +411,6 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
   }
 };
 
+} // end namespace Eigen
+
 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
new file mode 100644
index 00000000000..4e5c4125c01
--- /dev/null
+++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
@@ -0,0 +1,295 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
+#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
+
+namespace Eigen { 
+
+namespace internal {
+
+
+/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
+
+#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
+{\
+\
+  static EIGEN_DONT_INLINE void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+    char side='L', uplo='L'; \
+    MKL_INT m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    MKLTYPE alpha_, beta_; \
+    MatrixX##EIGPREFIX b_tmp; \
+    EIGTYPE myone(1);\
+\
+/* Set transpose options */ \
+/* Set m, n, k */ \
+    m = (MKL_INT)rows;  \
+    n = (MKL_INT)cols;  \
+\
+/* Set alpha_ & beta_ */ \
+    assign_scalar_eig2mkl(alpha_, alpha); \
+    assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+    lda = (MKL_INT)lhsStride; \
+    ldb = (MKL_INT)rhsStride; \
+    ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+    if (LhsStorageOrder==RowMajor) uplo='U'; \
+    a = _lhs; \
+\
+    if (RhsStorageOrder==RowMajor) { \
+      Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+      b_tmp = rhs.adjoint(); \
+      b = b_tmp.data(); \
+      ldb = b_tmp.outerStride(); \
+    } else b = _rhs; \
+\
+    MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+\
+  } \
+};
+
+
+#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
+{\
+  static EIGEN_DONT_INLINE void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+    char side='L', uplo='L'; \
+    MKL_INT m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    MKLTYPE alpha_, beta_; \
+    MatrixX##EIGPREFIX b_tmp; \
+    Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
+    EIGTYPE myone(1); \
+\
+/* Set transpose options */ \
+/* Set m, n, k */ \
+    m = (MKL_INT)rows; \
+    n = (MKL_INT)cols; \
+\
+/* Set alpha_ & beta_ */ \
+    assign_scalar_eig2mkl(alpha_, alpha); \
+    assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+    lda = (MKL_INT)lhsStride; \
+    ldb = (MKL_INT)rhsStride; \
+    ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+    if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
+      Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
+      a_tmp = lhs.conjugate(); \
+      a = a_tmp.data(); \
+      lda = a_tmp.outerStride(); \
+    } else a = _lhs; \
+    if (LhsStorageOrder==RowMajor) uplo='U'; \
+\
+    if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
+       b = _rhs; } \
+    else { \
+      if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
+        b_tmp = rhs.conjugate(); \
+      } else \
+      if (ConjugateRhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+        b_tmp = rhs.adjoint(); \
+      } else { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+        b_tmp = rhs.transpose(); \
+      } \
+      b = b_tmp.data(); \
+      ldb = b_tmp.outerStride(); \
+    } \
+\
+    MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+\
+  } \
+};
+
+EIGEN_MKL_SYMM_L(double, double, d, d)
+EIGEN_MKL_SYMM_L(float, float, f, s)
+EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c)
+
+
+/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
+
+#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
+{\
+\
+  static EIGEN_DONT_INLINE void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+    char side='R', uplo='L'; \
+    MKL_INT m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    MKLTYPE alpha_, beta_; \
+    MatrixX##EIGPREFIX b_tmp; \
+    EIGTYPE myone(1);\
+\
+/* Set m, n, k */ \
+    m = (MKL_INT)rows;  \
+    n = (MKL_INT)cols;  \
+\
+/* Set alpha_ & beta_ */ \
+    assign_scalar_eig2mkl(alpha_, alpha); \
+    assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+    lda = (MKL_INT)rhsStride; \
+    ldb = (MKL_INT)lhsStride; \
+    ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+    if (RhsStorageOrder==RowMajor) uplo='U'; \
+    a = _rhs; \
+\
+    if (LhsStorageOrder==RowMajor) { \
+      Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
+      b_tmp = lhs.adjoint(); \
+      b = b_tmp.data(); \
+      ldb = b_tmp.outerStride(); \
+    } else b = _lhs; \
+\
+    MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+\
+  } \
+};
+
+
+#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
+{\
+  static EIGEN_DONT_INLINE void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+    char side='R', uplo='L'; \
+    MKL_INT m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    MKLTYPE alpha_, beta_; \
+    MatrixX##EIGPREFIX b_tmp; \
+    Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
+    EIGTYPE myone(1); \
+\
+/* Set m, n, k */ \
+    m = (MKL_INT)rows; \
+    n = (MKL_INT)cols; \
+\
+/* Set alpha_ & beta_ */ \
+    assign_scalar_eig2mkl(alpha_, alpha); \
+    assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+    lda = (MKL_INT)rhsStride; \
+    ldb = (MKL_INT)lhsStride; \
+    ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+    if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
+      Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
+      a_tmp = rhs.conjugate(); \
+      a = a_tmp.data(); \
+      lda = a_tmp.outerStride(); \
+    } else a = _rhs; \
+    if (RhsStorageOrder==RowMajor) uplo='U'; \
+\
+    if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
+       b = _lhs; } \
+    else { \
+      if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
+        b_tmp = lhs.conjugate(); \
+      } else \
+      if (ConjugateLhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
+        b_tmp = lhs.adjoint(); \
+      } else { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
+        b_tmp = lhs.transpose(); \
+      } \
+      b = b_tmp.data(); \
+      ldb = b_tmp.outerStride(); \
+    } \
+\
+    MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+  } \
+};
+
+EIGEN_MKL_SYMM_R(double, double, d, d)
+EIGEN_MKL_SYMM_R(float, float, f, s)
+EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h
index d6121fc07bd..c3145c69a5f 100644
--- a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -3,28 +3,15 @@
 //
 // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
 #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
 
+namespace Eigen { 
+
 namespace internal {
 
 /* Optimized selfadjoint matrix * vector product:
@@ -32,8 +19,15 @@ namespace internal {
  * the number of load/stores of the result by a factor 2 and to reduce
  * the instruction dependency.
  */
-template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
-static EIGEN_DONT_INLINE void product_selfadjoint_vector(
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
+struct selfadjoint_matrix_vector_product;
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+struct selfadjoint_matrix_vector_product
+
+{
+static EIGEN_DONT_INLINE void run(
   Index size,
   const Scalar*  lhs, Index lhsStride,
   const Scalar* _rhs, Index rhsIncr,
@@ -85,14 +79,14 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
     Scalar t1 = cjAlpha * rhs[j+1];
     Packet ptmp1 = pset1<Packet>(t1);
 
-    Scalar t2 = 0;
+    Scalar t2(0);
     Packet ptmp2 = pset1<Packet>(t2);
-    Scalar t3 = 0;
+    Scalar t3(0);
     Packet ptmp3 = pset1<Packet>(t3);
 
     size_t starti = FirstTriangular ? 0 : j+2;
     size_t endi   = FirstTriangular ? j : size;
-    size_t alignedStart = (starti) + first_aligned(&res[starti], endi-starti);
+    size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
     size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
 
     // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
@@ -148,7 +142,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
     register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
 
     Scalar t1 = cjAlpha * rhs[j];
-    Scalar t2 = 0;
+    Scalar t2(0);
     // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
     res[j] += cjd.pmul(internal::real(A0[j]), t1);
     for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
@@ -159,6 +153,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
     res[j] += alpha * t2;
   }
 }
+};
 
 } // end namespace internal 
 
@@ -193,8 +188,8 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
     
     eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
 
-    const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
-    const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
+    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
 
     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
                                * RhsBlasTraits::extractScalarFactor(m_rhs);
@@ -232,7 +227,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
     }
       
       
-    internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
+    internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
       (
         lhs.rows(),                             // size
         &lhs.coeffRef(0,0),  lhs.outerStride(), // lhs info
@@ -274,5 +269,6 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
   }
 };
 
+} // end namespace Eigen
 
 #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
new file mode 100644
index 00000000000..f88d483b653
--- /dev/null
+++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
@@ -0,0 +1,114 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
+#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/**********************************************************************
+* This file implements selfadjoint matrix-vector multiplication using BLAS
+**********************************************************************/
+
+// symv/hemv specialization
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
+struct selfadjoint_matrix_vector_product_symv :
+  selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
+
+#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
+template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
+struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
+static EIGEN_DONT_INLINE void run( \
+  Index size, const Scalar*  lhs, Index lhsStride, \
+  const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
+    enum {\
+      IsColMajor = StorageOrder==ColMajor \
+    }; \
+    if (IsColMajor == ConjugateLhs) {\
+      selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
+        size, lhs, lhsStride, _rhs, rhsIncr, res, alpha);  \
+    } else {\
+      selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
+        size, lhs, lhsStride, _rhs, rhsIncr, res, alpha);  \
+    }\
+  } \
+}; \
+
+EIGEN_MKL_SYMV_SPECIALIZE(double)
+EIGEN_MKL_SYMV_SPECIALIZE(float)
+EIGEN_MKL_SYMV_SPECIALIZE(dcomplex)
+EIGEN_MKL_SYMV_SPECIALIZE(scomplex)
+
+#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \
+template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
+struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
+{ \
+typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
+\
+static EIGEN_DONT_INLINE void run( \
+Index size, const EIGTYPE*  lhs, Index lhsStride, \
+const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
+{ \
+  enum {\
+    IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
+    IsLower = UpLo == Lower ? 1 : 0 \
+  }; \
+  MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \
+  MKLTYPE alpha_, beta_; \
+  const EIGTYPE *x_ptr, myone(1); \
+  char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
+  assign_scalar_eig2mkl(alpha_, alpha); \
+  assign_scalar_eig2mkl(beta_, myone); \
+  SYMVVector x_tmp; \
+  if (ConjugateRhs) { \
+    Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \
+    x_tmp=map_x.conjugate(); \
+    x_ptr=x_tmp.data(); \
+    incx=1; \
+  } else x_ptr=_rhs; \
+  MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
+}\
+};
+
+EIGEN_MKL_SYMV_SPECIALIZATION(double,   double,        dsymv)
+EIGEN_MKL_SYMV_SPECIALIZATION(float,    float,         ssymv)
+EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
+EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8,  chemv)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointProduct.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointProduct.h
index 3a4523fa4a9..6a55f3d7715 100644
--- a/extern/Eigen3/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointProduct.h
@@ -3,24 +3,9 @@
 //
 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_SELFADJOINT_PRODUCT_H
 #define EIGEN_SELFADJOINT_PRODUCT_H
@@ -31,6 +16,8 @@
 * It corresponds to the level 3 SYRK and level 2 SYR Blas routines.
 **********************************************************************/
 
+namespace Eigen { 
+
 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
 struct selfadjoint_rank1_update;
 
@@ -72,7 +59,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
     typedef internal::blas_traits<OtherType> OtherBlasTraits;
     typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
     typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
-    const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived());
+    typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
 
     Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
 
@@ -105,12 +92,12 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
     typedef internal::blas_traits<OtherType> OtherBlasTraits;
     typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
     typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
-    const ActualOtherType actualOther = OtherBlasTraits::extract(other.derived());
+    typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
 
     Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
 
     enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
-    
+
     internal::general_matrix_matrix_triangular_product<Index,
       Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor,   OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
       Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
@@ -133,4 +120,6 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
   return *this;
 }
 
+} // end namespace Eigen
+
 #endif // EIGEN_SELFADJOINT_PRODUCT_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h b/extern/Eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h
index 9f8b8438a5d..57a98cc2de9 100644
--- a/extern/Eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/extern/Eigen3/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -3,28 +3,15 @@
 //
 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_SELFADJOINTRANK2UPTADE_H
 #define EIGEN_SELFADJOINTRANK2UPTADE_H
 
+namespace Eigen { 
+
 namespace internal {
 
 /* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu'
@@ -76,12 +63,12 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
   typedef internal::blas_traits<DerivedU> UBlasTraits;
   typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
   typedef typename internal::remove_all<ActualUType>::type _ActualUType;
-  const ActualUType actualU = UBlasTraits::extract(u.derived());
+  typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());
 
   typedef internal::blas_traits<DerivedV> VBlasTraits;
   typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
   typedef typename internal::remove_all<ActualVType>::type _ActualVType;
-  const ActualVType actualV = VBlasTraits::extract(v.derived());
+  typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());
 
   // If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and
   // vice versa, and take the complex conjugate of all coefficients and vector entries.
@@ -101,4 +88,6 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
   return *this;
 }
 
+} // end namespace Eigen
+
 #endif // EIGEN_SELFADJOINTRANK2UPTADE_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 0c48d2efb75..92cba66f615 100644
--- a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -3,28 +3,15 @@
 //
 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
 #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
 
+namespace Eigen { 
+
 namespace internal {
 
 // template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode>
@@ -58,23 +45,23 @@ template <typename Scalar, typename Index,
           int Mode, bool LhsIsTriangular,
           int LhsStorageOrder, bool ConjugateLhs,
           int RhsStorageOrder, bool ConjugateRhs,
-          int ResStorageOrder>
+          int ResStorageOrder, int Version = Specialized>
 struct product_triangular_matrix_matrix;
 
 template <typename Scalar, typename Index,
           int Mode, bool LhsIsTriangular,
           int LhsStorageOrder, bool ConjugateLhs,
-          int RhsStorageOrder, bool ConjugateRhs>
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
                                            LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,RowMajor>
+                                           RhsStorageOrder,ConjugateRhs,RowMajor,Version>
 {
   static EIGEN_STRONG_INLINE void run(
     Index rows, Index cols, Index depth,
     const Scalar* lhs, Index lhsStride,
     const Scalar* rhs, Index rhsStride,
     Scalar* res,       Index resStride,
-    Scalar alpha)
+    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     product_triangular_matrix_matrix<Scalar, Index,
       (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
@@ -84,22 +71,22 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
       LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
       ConjugateLhs,
       ColMajor>
-      ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
+      ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
   }
 };
 
 // implements col-major += alpha * op(triangular) * op(general)
 template <typename Scalar, typename Index, int Mode,
           int LhsStorageOrder, bool ConjugateLhs,
-          int RhsStorageOrder, bool ConjugateRhs>
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
                                            LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,ColMajor>
+                                           RhsStorageOrder,ConjugateRhs,ColMajor,Version>
 {
   
   typedef gebp_traits<Scalar,Scalar> Traits;
   enum {
-    SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+    SmallPanelWidth   = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
     IsLower = (Mode&Lower) == Lower,
     SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
   };
@@ -109,7 +96,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha)
+    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     // strip zeros
     Index diagSize  = (std::min)(_rows,_depth);
@@ -120,15 +107,16 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
     const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
     const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
 
-    Index kc = depth; // cache block size along the K direction
-    Index mc = rows;  // cache block size along the M direction
-    Index nc = cols;  // cache block size along the N direction
-    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
+
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*cols;
     std::size_t sizeW = kc*Traits::WorkSpaceFactor;
-    std::size_t sizeB = sizeW + kc*cols;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);    
-    Scalar* blockB = allocatedBlockB + sizeW;
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
 
     Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
     triangularBuffer.setZero();
@@ -186,7 +174,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
           pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
 
           gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
-                      actualPanelWidth, actual_kc, 0, blockBOffset);
+                      actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
 
           // GEBP with remaining micro panel
           if (lengthTarget>0)
@@ -196,7 +184,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
             pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
 
             gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
-                        actualPanelWidth, actual_kc, 0, blockBOffset);
+                        actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
           }
         }
       }
@@ -210,7 +198,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
           gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
             (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
 
-          gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
+          gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
         }
       }
     }
@@ -220,10 +208,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
 // implements col-major += alpha * op(general) * op(triangular)
 template <typename Scalar, typename Index, int Mode,
           int LhsStorageOrder, bool ConjugateLhs,
-          int RhsStorageOrder, bool ConjugateRhs>
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
                                            LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,ColMajor>
+                                           RhsStorageOrder,ConjugateRhs,ColMajor,Version>
 {
   typedef gebp_traits<Scalar,Scalar> Traits;
   enum {
@@ -237,7 +225,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha)
+    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     // strip zeros
     Index diagSize  = (std::min)(_cols,_depth);
@@ -248,16 +236,16 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
     const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
     const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
 
-    Index kc = depth; // cache block size along the K direction
-    Index mc = rows;  // cache block size along the M direction
-    Index nc = cols;  // cache block size along the N direction
-    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
 
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*cols;
     std::size_t sizeW = kc*Traits::WorkSpaceFactor;
-    std::size_t sizeB = sizeW + kc*cols;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
-    Scalar* blockB = allocatedBlockB + sizeW;
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
 
     Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
     triangularBuffer.setZero();
@@ -345,13 +333,13 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
                         alpha,
                         actual_kc, actual_kc,  // strides
                         blockOffset, blockOffset,// offsets
-                        allocatedBlockB); // workspace
+                        blockW); // workspace
           }
         }
         gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
                     blockA, geb, actual_mc, actual_kc, rs,
                     alpha,
-                    -1, -1, 0, 0, allocatedBlockB);
+                    -1, -1, 0, 0, blockW);
       }
     }
   }
@@ -378,26 +366,38 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
 
   template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
   {
-    const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs);
-    const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs);
+    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
 
     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
                                * RhsBlasTraits::extractScalarFactor(m_rhs);
 
+    typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
+              Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
+
+    enum { IsLower = (Mode&Lower) == Lower };
+    Index stripedRows  = ((!LhsIsTriangular) || (IsLower))  ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols());
+    Index stripedCols  = ((LhsIsTriangular)  || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows());
+    Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
+                                         : ((IsLower)  ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));
+
+    BlockingType blocking(stripedRows, stripedCols, stripedDepth);
+
     internal::product_triangular_matrix_matrix<Scalar, Index,
       Mode, LhsIsTriangular,
       (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
       (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
       (internal::traits<Dest          >::Flags&RowMajorBit) ? RowMajor : ColMajor>
       ::run(
-        lhs.rows(), rhs.cols(), lhs.cols(),// LhsIsTriangular ? rhs.cols() : lhs.rows(),           // sizes
+        stripedRows, stripedCols, stripedDepth,   // sizes
         &lhs.coeffRef(0,0),    lhs.outerStride(), // lhs info
         &rhs.coeffRef(0,0),    rhs.outerStride(), // rhs info
-        &dst.coeffRef(0,0), dst.outerStride(), // result info
-        actualAlpha                            // alpha
+        &dst.coeffRef(0,0), dst.outerStride(),    // result info
+        actualAlpha, blocking
       );
   }
 };
 
+} // end namespace Eigen
 
 #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
new file mode 100644
index 00000000000..8173da5bb6d
--- /dev/null
+++ b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@@ -0,0 +1,309 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Triangular matrix * matrix product functionality based on ?TRMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
+#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
+
+namespace Eigen { 
+
+namespace internal {
+
+
+template <typename Scalar, typename Index,
+          int Mode, bool LhsIsTriangular,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResStorageOrder>
+struct product_triangular_matrix_matrix_trmm :
+       product_triangular_matrix_matrix<Scalar,Index,Mode,
+          LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
+          RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
+
+
+// try to go to BLAS specialization
+#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
+template <typename Index, int Mode, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
+           LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
+  static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
+    const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { \
+      product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
+        LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
+        RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
+        _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
+  } \
+};
+
+EIGEN_MKL_TRMM_SPECIALIZE(double, true)
+EIGEN_MKL_TRMM_SPECIALIZE(double, false)
+EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true)
+EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false)
+EIGEN_MKL_TRMM_SPECIALIZE(float, true)
+EIGEN_MKL_TRMM_SPECIALIZE(float, false)
+EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true)
+EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false)
+
+// implements col-major += alpha * op(triangular) * op(general)
+#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, int Mode, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
+         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
+  }; \
+\
+  static EIGEN_DONT_INLINE void run( \
+    Index _rows, Index _cols, Index _depth, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+   Index diagSize  = (std::min)(_rows,_depth); \
+   Index rows      = IsLower ? _rows : diagSize; \
+   Index depth     = IsLower ? diagSize : _depth; \
+   Index cols      = _cols; \
+\
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
+\
+/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
+   if (rows != depth) { \
+\
+     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
+\
+     if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
+     /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
+       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
+       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
+           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
+     /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
+     } else { \
+     /* Make sense to call GEMM */ \
+       Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+       MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
+       MKL_INT aStride = aa_tmp.outerStride(); \
+       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
+       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
+       rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, blocking, 0); \
+\
+     /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
+     } \
+     return; \
+   } \
+   char side = 'L', transa, uplo, diag = 'N'; \
+   EIGTYPE *b; \
+   const EIGTYPE *a; \
+   MKL_INT m, n, lda, ldb; \
+   MKLTYPE alpha_; \
+\
+/* Set alpha_*/ \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+\
+/* Set m, n */ \
+   m = (MKL_INT)diagSize; \
+   n = (MKL_INT)cols; \
+\
+/* Set trans */ \
+   transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set b, ldb */ \
+   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
+   MatrixX##EIGPREFIX b_tmp; \
+\
+   if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
+   b = b_tmp.data(); \
+   ldb = b_tmp.outerStride(); \
+\
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+   MatrixLhs a_tmp; \
+\
+   if ((conjA!=0) || (SetDiag==0)) { \
+     if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
+     if (IsZeroDiag) \
+       a_tmp.diagonal().setZero(); \
+     else if (IsUnitDiag) \
+       a_tmp.diagonal().setOnes();\
+     a = a_tmp.data(); \
+     lda = a_tmp.outerStride(); \
+   } else { \
+     a = _lhs; \
+     lda = lhsStride; \
+   } \
+   /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \
+/* call ?trmm*/ \
+   MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
+\
+/* Add op(a_triangular)*b into res*/ \
+   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
+   res_tmp=res_tmp+b_tmp; \
+  } \
+};
+
+EIGEN_MKL_TRMM_L(double, double, d, d)
+EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMM_L(float, float, f, s)
+EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c)
+
+// implements col-major += alpha * op(general) * op(triangular)
+#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, int Mode, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
+         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
+  }; \
+\
+  static EIGEN_DONT_INLINE void run( \
+    Index _rows, Index _cols, Index _depth, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+   Index diagSize  = (std::min)(_cols,_depth); \
+   Index rows      = _rows; \
+   Index depth     = IsLower ? _depth : diagSize; \
+   Index cols      = IsLower ? diagSize : _cols; \
+\
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
+\
+/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
+   if (cols != depth) { \
+\
+     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
+\
+     if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
+     /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
+       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
+       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
+           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
+       /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
+     } else { \
+     /* Make sense to call GEMM */ \
+       Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
+       MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
+       MKL_INT aStride = aa_tmp.outerStride(); \
+       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
+       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
+       rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, blocking, 0); \
+\
+     /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
+     } \
+     return; \
+   } \
+   char side = 'R', transa, uplo, diag = 'N'; \
+   EIGTYPE *b; \
+   const EIGTYPE *a; \
+   MKL_INT m, n, lda, ldb; \
+   MKLTYPE alpha_; \
+\
+/* Set alpha_*/ \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+\
+/* Set m, n */ \
+   m = (MKL_INT)rows; \
+   n = (MKL_INT)diagSize; \
+\
+/* Set trans */ \
+   transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set b, ldb */ \
+   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+   MatrixX##EIGPREFIX b_tmp; \
+\
+   if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
+   b = b_tmp.data(); \
+   ldb = b_tmp.outerStride(); \
+\
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
+   MatrixRhs a_tmp; \
+\
+   if ((conjA!=0) || (SetDiag==0)) { \
+     if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
+     if (IsZeroDiag) \
+       a_tmp.diagonal().setZero(); \
+     else if (IsUnitDiag) \
+       a_tmp.diagonal().setOnes();\
+     a = a_tmp.data(); \
+     lda = a_tmp.outerStride(); \
+   } else { \
+     a = _rhs; \
+     lda = rhsStride; \
+   } \
+   /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \
+/* call ?trmm*/ \
+   MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
+\
+/* Add op(a_triangular)*b into res*/ \
+   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
+   res_tmp=res_tmp+b_tmp; \
+  } \
+};
+
+EIGEN_MKL_TRMM_R(double, double, d, d)
+EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMM_R(float, float, f, s)
+EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector.h b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector.h
index 71b4a52ab80..b1c10c201c5 100644
--- a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -3,45 +3,36 @@
 //
 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_TRIANGULARMATRIXVECTOR_H
 #define EIGEN_TRIANGULARMATRIXVECTOR_H
 
+namespace Eigen { 
+
 namespace internal {
 
-template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
-struct product_triangular_matrix_vector;
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized>
+struct triangular_matrix_vector_product;
 
-template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
-struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor>
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   enum {
     IsLower = ((Mode&Lower)==Lower),
-    HasUnitDiag = (Mode & UnitDiag)==UnitDiag
+    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
+    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
   };
-  static EIGEN_DONT_INLINE  void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
+  static EIGEN_DONT_INLINE  void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
                                      const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
   {
     static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+    Index size = (std::min)(_rows,_cols);
+    Index rows = IsLower ? _rows : (std::min)(_rows,_cols);
+    Index cols = IsLower ? (std::min)(_rows,_cols) : _cols;
 
     typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
     const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
@@ -54,45 +45,57 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
     typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;
     ResMap res(_res,rows);
 
-    for (Index pi=0; pi<cols; pi+=PanelWidth)
+    for (Index pi=0; pi<size; pi+=PanelWidth)
     {
-      Index actualPanelWidth = (std::min)(PanelWidth, cols-pi);
+      Index actualPanelWidth = (std::min)(PanelWidth, size-pi);
       for (Index k=0; k<actualPanelWidth; ++k)
       {
         Index i = pi + k;
-        Index s = IsLower ? (HasUnitDiag ? i+1 : i ) : pi;
+        Index s = IsLower ? ((HasUnitDiag||HasZeroDiag) ? i+1 : i ) : pi;
         Index r = IsLower ? actualPanelWidth-k : k+1;
-        if ((!HasUnitDiag) || (--r)>0)
+        if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
           res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r);
         if (HasUnitDiag)
           res.coeffRef(i) += alpha * cjRhs.coeff(i);
       }
-      Index r = IsLower ? cols - pi - actualPanelWidth : pi;
+      Index r = IsLower ? rows - pi - actualPanelWidth : pi;
       if (r>0)
       {
         Index s = IsLower ? pi+actualPanelWidth : 0;
-        general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
+        general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
             r, actualPanelWidth,
             &lhs.coeffRef(s,pi), lhsStride,
             &rhs.coeffRef(pi), rhsIncr,
             &res.coeffRef(s), resIncr, alpha);
       }
     }
+    if((!IsLower) && cols>size)
+    {
+      general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
+          rows, cols-size,
+          &lhs.coeffRef(0,size), lhsStride,
+          &rhs.coeffRef(size), rhsIncr,
+          _res, resIncr, alpha);
+    }
   }
 };
 
-template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
-struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor>
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   enum {
     IsLower = ((Mode&Lower)==Lower),
-    HasUnitDiag = (Mode & UnitDiag)==UnitDiag
+    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
+    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
   };
-  static void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
+  static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
                   const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
   {
     static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+    Index diagSize = (std::min)(_rows,_cols);
+    Index rows = IsLower ? _rows : diagSize;
+    Index cols = IsLower ? diagSize : _cols;
 
     typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
     const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
@@ -105,15 +108,15 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
     typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap;
     ResMap res(_res,rows,InnerStride<>(resIncr));
     
-    for (Index pi=0; pi<cols; pi+=PanelWidth)
+    for (Index pi=0; pi<diagSize; pi+=PanelWidth)
     {
-      Index actualPanelWidth = (std::min)(PanelWidth, cols-pi);
+      Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi);
       for (Index k=0; k<actualPanelWidth; ++k)
       {
         Index i = pi + k;
-        Index s = IsLower ? pi  : (HasUnitDiag ? i+1 : i);
+        Index s = IsLower ? pi  : ((HasUnitDiag||HasZeroDiag) ? i+1 : i);
         Index r = IsLower ? k+1 : actualPanelWidth-k;
-        if ((!HasUnitDiag) || (--r)>0)
+        if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
           res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum();
         if (HasUnitDiag)
           res.coeffRef(i) += alpha * cjRhs.coeff(i);
@@ -122,13 +125,21 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
       if (r>0)
       {
         Index s = IsLower ? 0 : pi + actualPanelWidth;
-        general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
+        general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
             actualPanelWidth, r,
             &lhs.coeffRef(pi,s), lhsStride,
             &rhs.coeffRef(s), rhsIncr,
             &res.coeffRef(pi), resIncr, alpha);
       }
     }
+    if(IsLower && rows>diagSize)
+    {
+      general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
+            rows-diagSize, cols,
+            &lhs.coeffRef(diagSize,0), lhsStride,
+            &rhs.coeffRef(0), rhsIncr,
+            &res.coeffRef(diagSize), resIncr, alpha);
+    }
   }
 };
 
@@ -180,7 +191,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
   {
     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
-    typedef TriangularProduct<(Mode & UnitDiag) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
+    typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
     Transpose<Dest> dstT(dst);
     internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
       TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha);
@@ -208,8 +219,8 @@ template<> struct trmv_selector<ColMajor>
     typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
     typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
 
-    const ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
-    const ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
+    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
 
     ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
                                   * RhsBlasTraits::extractScalarFactor(prod.rhs());
@@ -247,7 +258,7 @@ template<> struct trmv_selector<ColMajor>
         MappedDest(actualDestPtr, dest.size()) = dest;
     }
     
-    internal::product_triangular_matrix_vector
+    internal::triangular_matrix_vector_product
       <Index,Mode,
        LhsScalar, LhsBlasTraits::NeedToConjugate,
        RhsScalar, RhsBlasTraits::NeedToConjugate,
@@ -307,7 +318,7 @@ template<> struct trmv_selector<RowMajor>
       Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
     }
     
-    internal::product_triangular_matrix_vector
+    internal::triangular_matrix_vector_product
       <Index,Mode,
        LhsScalar, LhsBlasTraits::NeedToConjugate,
        RhsScalar, RhsBlasTraits::NeedToConjugate,
@@ -322,4 +333,6 @@ template<> struct trmv_selector<RowMajor>
 
 } // end namespace internal
 
+} // end namespace Eigen
+
 #endif // EIGEN_TRIANGULARMATRIXVECTOR_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
new file mode 100644
index 00000000000..3589b8c5ef6
--- /dev/null
+++ b/extern/Eigen3/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
@@ -0,0 +1,247 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Triangular matrix-vector product functionality based on ?TRMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
+#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/**********************************************************************
+* This file implements triangular matrix-vector multiplication using BLAS
+**********************************************************************/
+
+// trmv/hemv specialization
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
+struct triangular_matrix_vector_product_trmv :
+  triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
+
+#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
+      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
+        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+  } \
+}; \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
+      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
+        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+  } \
+};
+
+EIGEN_MKL_TRMV_SPECIALIZE(double)
+EIGEN_MKL_TRMV_SPECIALIZE(float)
+EIGEN_MKL_TRMV_SPECIALIZE(dcomplex)
+EIGEN_MKL_TRMV_SPECIALIZE(scomplex)
+
+// implements col-major: res += alpha * op(triangular) * vector
+#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper \
+  }; \
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
+ { \
+   if (ConjLhs || IsZeroDiag) { \
+     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
+       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha, blocking); \
+     return; \
+   }\
+   Index size = (std::min)(_rows,_cols); \
+   Index rows = IsLower ? _rows : size; \
+   Index cols = IsLower ? size : _cols; \
+\
+   typedef VectorX##EIGPREFIX VectorRhs; \
+   EIGTYPE *x, *y;\
+\
+/* Set x*/ \
+   Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
+   VectorRhs x_tmp; \
+   if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+   x = x_tmp.data(); \
+\
+/* Square part handling */\
+\
+   char trans, uplo, diag; \
+   MKL_INT m, n, lda, incx, incy; \
+   EIGTYPE const *a; \
+   MKLTYPE alpha_, beta_; \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+\
+/* Set m, n */ \
+   n = (MKL_INT)size; \
+   lda = lhsStride; \
+   incx = 1; \
+   incy = resIncr; \
+\
+/* Set uplo, trans and diag*/ \
+   trans = 'N'; \
+   uplo = IsLower ? 'L' : 'U'; \
+   diag = IsUnitDiag ? 'U' : 'N'; \
+\
+/* call ?TRMV*/ \
+   MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
+\
+/* Add op(a_tr)rhs into res*/ \
+   MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
+   if (size<(std::max)(rows,cols)) { \
+     typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
+     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+     x = x_tmp.data(); \
+     if (size<rows) { \
+       y = _res + size*resIncr; \
+       a = _lhs + size; \
+       m = rows-size; \
+       n = size; \
+     } \
+     else { \
+       x += size; \
+       y = _res; \
+       a = _lhs + size*lda; \
+       m = size; \
+       n = cols-size; \
+     } \
+     MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
+   } \
+  } \
+};
+
+EIGEN_MKL_TRMV_CM(double, double, d, d)
+EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMV_CM(float, float, f, s)
+EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c)
+
+// implements row-major: res += alpha * op(triangular) * vector
+#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper \
+  }; \
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
+ { \
+   if (IsZeroDiag) { \
+     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
+       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha, blocking); \
+     return; \
+   }\
+   Index size = (std::min)(_rows,_cols); \
+   Index rows = IsLower ? _rows : size; \
+   Index cols = IsLower ? size : _cols; \
+\
+   typedef VectorX##EIGPREFIX VectorRhs; \
+   EIGTYPE *x, *y;\
+\
+/* Set x*/ \
+   Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
+   VectorRhs x_tmp; \
+   if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+   x = x_tmp.data(); \
+\
+/* Square part handling */\
+\
+   char trans, uplo, diag; \
+   MKL_INT m, n, lda, incx, incy; \
+   EIGTYPE const *a; \
+   MKLTYPE alpha_, beta_; \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+\
+/* Set m, n */ \
+   n = (MKL_INT)size; \
+   lda = lhsStride; \
+   incx = 1; \
+   incy = resIncr; \
+\
+/* Set uplo, trans and diag*/ \
+   trans = ConjLhs ? 'C' : 'T'; \
+   uplo = IsLower ? 'U' : 'L'; \
+   diag = IsUnitDiag ? 'U' : 'N'; \
+\
+/* call ?TRMV*/ \
+   MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
+\
+/* Add op(a_tr)rhs into res*/ \
+   MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
+   if (size<(std::max)(rows,cols)) { \
+     typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
+     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+     x = x_tmp.data(); \
+     if (size<rows) { \
+       y = _res + size*resIncr; \
+       a = _lhs + size*lda; \
+       m = rows-size; \
+       n = size; \
+     } \
+     else { \
+       x += size; \
+       y = _res; \
+       a = _lhs + size; \
+       m = size; \
+       n = cols-size; \
+     } \
+     MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
+   } \
+  } \
+};
+
+EIGEN_MKL_TRMV_RM(double, double, d, d)
+EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMV_RM(float, float, f, s)
+EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c)
+
+} // end namespase internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h
index 4dced6b0eb9..a49ea318345 100644
--- a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -3,28 +3,15 @@
 //
 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H
 #define EIGEN_TRIANGULAR_SOLVER_MATRIX_H
 
+namespace Eigen { 
+
 namespace internal {
 
 // if the rhs is row major, let's transpose the product
@@ -34,14 +21,15 @@ struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,
   static EIGEN_DONT_INLINE void run(
     Index size, Index cols,
     const Scalar*  tri, Index triStride,
-    Scalar* _other, Index otherStride)
+    Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking)
   {
     triangular_solve_matrix<
       Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
       (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
       NumTraits<Scalar>::IsComplex && Conjugate,
       TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor>
-      ::run(size, cols, tri, triStride, _other, otherStride);
+      ::run(size, cols, tri, triStride, _other, otherStride, blocking);
   }
 };
 
@@ -53,7 +41,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
   static EIGEN_DONT_INLINE void run(
     Index size, Index otherSize,
     const Scalar* _tri, Index triStride,
-    Scalar* _other, Index otherStride)
+    Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking)
   {
     Index cols = otherSize;
     const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride);
@@ -65,22 +54,29 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
       IsLower = (Mode&Lower) == Lower
     };
 
-    Index kc = size; // cache block size along the K direction
-    Index mc = size;  // cache block size along the M direction
-    Index nc = cols;  // cache block size along the N direction
-    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(size,blocking.mc());  // cache block size along the M direction
 
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*cols;
     std::size_t sizeW = kc*Traits::WorkSpaceFactor;
-    std::size_t sizeB = sizeW + kc*cols;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
-    Scalar* blockB = allocatedBlockB + sizeW;
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
 
     conj_if<Conjugate> conj;
     gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
     gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
     gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
 
+    // the goal here is to subdivise the Rhs panels such that we keep some cache
+    // coherence when accessing the rhs elements
+    std::ptrdiff_t l1, l2;
+    manage_caching_sizes(GetAction, &l1, &l2);
+    Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
+    subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
+
     for(Index k2=IsLower ? 0 : size;
         IsLower ? k2<size : k2>0;
         IsLower ? k2+=kc : k2-=kc)
@@ -92,16 +88,18 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
       // A11 (the triangular part) and A21 the remaining rectangular part.
       // Then the high level algorithm is:
       //  - B = R1                    => general block copy (done during the next step)
-      //  - R1 = L1^-1 B              => tricky part
+      //  - R1 = A11^-1 B             => tricky part
       //  - update B from the new R1  => actually this has to be performed continuously during the above step
-      //  - R2 = L2 * B               => GEPP
+      //  - R2 -= A21 * B             => GEPP
 
-      // The tricky part: compute R1 = L1^-1 B while updating B from R1
-      // The idea is to split L1 into multiple small vertical panels.
-      // Each panel can be split into a small triangular part A1 which is processed without optimization,
-      // and the remaining small part A2 which is processed using gebp with appropriate block strides
+      // The tricky part: compute R1 = A11^-1 B while updating B from R1
+      // The idea is to split A11 into multiple small vertical panels.
+      // Each panel can be split into a small triangular part T1k which is processed without optimization,
+      // and the remaining small part T2k which is processed using gebp with appropriate block strides
+      for(Index j2=0; j2<cols; j2+=subcols)
       {
-        // for each small vertical panels of lhs
+        Index actual_cols = (std::min)(cols-j2,subcols);
+        // for each small vertical panels [T1k^T, T2k^T]^T of lhs
         for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
         {
           Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
@@ -114,11 +112,11 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
             Index rs = actualPanelWidth - k - 1; // remaining size
 
             Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
-            for (Index j=0; j<cols; ++j)
+            for (Index j=j2; j<j2+actual_cols; ++j)
             {
               if (TriStorageOrder==RowMajor)
               {
-                Scalar b = 0;
+                Scalar b(0);
                 const Scalar* l = &tri(i,s);
                 Scalar* r = &other(s,j);
                 for (Index i3=0; i3<k; ++i3)
@@ -143,7 +141,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
           Index blockBOffset = IsLower ? k1 : lengthTarget;
 
           // update the respective rows of B from other
-          pack_rhs(blockB, _other+startBlock, otherStride, actualPanelWidth, cols, actual_kc, blockBOffset);
+          pack_rhs(blockB+actual_kc*j2, &other(startBlock,j2), otherStride, actualPanelWidth, actual_cols, actual_kc, blockBOffset);
 
           // GEBP
           if (lengthTarget>0)
@@ -152,13 +150,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
 
             pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);
 
-            gebp_kernel(_other+startTarget, otherStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, Scalar(-1),
-                        actualPanelWidth, actual_kc, 0, blockBOffset);
+            gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
+                        actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
           }
         }
       }
-
-      // R2 = A2 * B => GEPP
+      
+      // R2 -= A21 * B => GEPP
       {
         Index start = IsLower ? k2+kc : 0;
         Index end   = IsLower ? size : k2-kc;
@@ -169,7 +167,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
           {
             pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);
 
-            gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1));
+            gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0, blockW);
           }
         }
       }
@@ -185,7 +183,8 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
   static EIGEN_DONT_INLINE void run(
     Index size, Index otherSize,
     const Scalar* _tri, Index triStride,
-    Scalar* _other, Index otherStride)
+    Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking)
   {
     Index rows = otherSize;
     const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride);
@@ -198,19 +197,16 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
       IsLower = (Mode&Lower) == Lower
     };
 
-//     Index kc = std::min<Index>(Traits::Max_kc/4,size); // cache block size along the K direction
-//     Index mc = std::min<Index>(Traits::Max_mc,size);   // cache block size along the M direction
-    // check that !!!!
-    Index kc = size; // cache block size along the K direction
-    Index mc = size;  // cache block size along the M direction
-    Index nc = rows;  // cache block size along the N direction
-    computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
 
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*size;
     std::size_t sizeW = kc*Traits::WorkSpaceFactor;
-    std::size_t sizeB = sizeW + kc*size;
-    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
-    ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
-    Scalar* blockB = allocatedBlockB + sizeW;
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
 
     conj_if<Conjugate> conj;
     gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
@@ -277,7 +273,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
                           Scalar(-1),
                           actual_kc, actual_kc, // strides
                           panelOffset, panelOffset, // offsets
-                          allocatedBlockB);  // workspace
+                          blockW);  // workspace
             }
 
             // unblocked triangular solve
@@ -308,7 +304,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
         if (rs>0)
           gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb,
                       actual_mc, actual_kc, rs, Scalar(-1),
-                      -1, -1, 0, 0, allocatedBlockB);
+                      -1, -1, 0, 0, blockW);
       }
     }
   }
@@ -316,4 +312,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
 
 } // end namespace internal
 
+} // end namespace Eigen
+
 #endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
new file mode 100644
index 00000000000..a4f508b2e83
--- /dev/null
+++ b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
@@ -0,0 +1,155 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Triangular matrix * matrix product functionality based on ?TRMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
+#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
+
+namespace Eigen {
+
+namespace internal {
+
+// implements LeftSide op(triangular)^-1 * general
+#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \
+template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
+struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
+  }; \
+  static EIGEN_DONT_INLINE void run( \
+      Index size, Index otherSize, \
+      const EIGTYPE* _tri, Index triStride, \
+      EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
+  { \
+   MKL_INT m = size, n = otherSize, lda, ldb; \
+   char side = 'L', uplo, diag='N', transa; \
+   /* Set alpha_ */ \
+   MKLTYPE alpha; \
+   EIGTYPE myone(1); \
+   assign_scalar_eig2mkl(alpha, myone); \
+   ldb = otherStride;\
+\
+   const EIGTYPE *a; \
+/* Set trans */ \
+   transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
+   Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
+   MatrixTri a_tmp; \
+\
+   if (conjA) { \
+     a_tmp = tri.conjugate(); \
+     a = a_tmp.data(); \
+     lda = a_tmp.outerStride(); \
+   } else { \
+     a = _tri; \
+     lda = triStride; \
+   } \
+   if (IsUnitDiag) diag='U'; \
+/* call ?trsm*/ \
+   MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
+ } \
+};
+
+EIGEN_MKL_TRSM_L(double, double, d)
+EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z)
+EIGEN_MKL_TRSM_L(float, float, s)
+EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c)
+
+
+// implements RightSide general * op(triangular)^-1
+#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \
+template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
+struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
+  }; \
+  static EIGEN_DONT_INLINE void run( \
+      Index size, Index otherSize, \
+      const EIGTYPE* _tri, Index triStride, \
+      EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
+  { \
+   MKL_INT m = otherSize, n = size, lda, ldb; \
+   char side = 'R', uplo, diag='N', transa; \
+   /* Set alpha_ */ \
+   MKLTYPE alpha; \
+   EIGTYPE myone(1); \
+   assign_scalar_eig2mkl(alpha, myone); \
+   ldb = otherStride;\
+\
+   const EIGTYPE *a; \
+/* Set trans */ \
+   transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
+   Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
+   MatrixTri a_tmp; \
+\
+   if (conjA) { \
+     a_tmp = tri.conjugate(); \
+     a = a_tmp.data(); \
+     lda = a_tmp.outerStride(); \
+   } else { \
+     a = _tri; \
+     lda = triStride; \
+   } \
+   if (IsUnitDiag) diag='U'; \
+/* call ?trsm*/ \
+   MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
+   /*std::cout << "TRMS_L specialization!\n";*/ \
+ } \
+};
+
+EIGEN_MKL_TRSM_R(double, double, d)
+EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z)
+EIGEN_MKL_TRSM_R(float, float, s)
+EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c)
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
diff --git a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverVector.h b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverVector.h
index 639d4a5b476..ce4d1008801 100644
--- a/extern/Eigen3/Eigen/src/Core/products/TriangularSolverVector.h
+++ b/extern/Eigen3/Eigen/src/Core/products/TriangularSolverVector.h
@@ -3,28 +3,15 @@
 //
 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 #ifndef EIGEN_TRIANGULAR_SOLVER_VECTOR_H
 #define EIGEN_TRIANGULAR_SOLVER_VECTOR_H
 
+namespace Eigen { 
+
 namespace internal {
 
 template<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate, int StorageOrder>
@@ -147,4 +134,6 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
 
 } // end namespace internal
 
+} // end namespace Eigen
+
 #endif // EIGEN_TRIANGULAR_SOLVER_VECTOR_H