Home | History | Annotate | Download | only in SparseLU
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2012 Dsir Nuentsa-Wakam <desire.nuentsa_wakam (at) inria.fr>
      5 // Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud (at) inria.fr>
      6 //
      7 // This Source Code Form is subject to the terms of the Mozilla
      8 // Public License v. 2.0. If a copy of the MPL was not distributed
      9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     10 
     11 
     12 #ifndef EIGEN_SPARSE_LU_H
     13 #define EIGEN_SPARSE_LU_H
     14 
     15 namespace Eigen {
     16 
     17 template <typename _MatrixType, typename _OrderingType = COLAMDOrdering<typename _MatrixType::StorageIndex> > class SparseLU;
     18 template <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType;
     19 template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixUReturnType;
     20 
     21 /** \ingroup SparseLU_Module
     22   * \class SparseLU
     23   *
     24   * \brief Sparse supernodal LU factorization for general matrices
     25   *
     26   * This class implements the supernodal LU factorization for general matrices.
     27   * It uses the main techniques from the sequential SuperLU package
     28   * (http://crd-legacy.lbl.gov/~xiaoye/SuperLU/). It handles transparently real
     29   * and complex arithmetics with single and double precision, depending on the
     30   * scalar type of your input matrix.
     31   * The code has been optimized to provide BLAS-3 operations during supernode-panel updates.
     32   * It benefits directly from the built-in high-performant Eigen BLAS routines.
     33   * Moreover, when the size of a supernode is very small, the BLAS calls are avoided to
     34   * enable a better optimization from the compiler. For best performance,
     35   * you should compile it with NDEBUG flag to avoid the numerous bounds checking on vectors.
     36   *
     37   * An important parameter of this class is the ordering method. It is used to reorder the columns
     38   * (and eventually the rows) of the matrix to reduce the number of new elements that are created during
     39   * numerical factorization. The cheapest method available is COLAMD.
     40   * See  \link OrderingMethods_Module the OrderingMethods module \endlink for the list of
     41   * built-in and external ordering methods.
     42   *
     43   * Simple example with key steps
     44   * \code
     45   * VectorXd x(n), b(n);
     46   * SparseMatrix<double, ColMajor> A;
     47   * SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<Index> >   solver;
     48   * // fill A and b;
     49   * // Compute the ordering permutation vector from the structural pattern of A
     50   * solver.analyzePattern(A);
     51   * // Compute the numerical factorization
     52   * solver.factorize(A);
     53   * //Use the factors to solve the linear system
     54   * x = solver.solve(b);
     55   * \endcode
     56   *
     57   * \warning The input matrix A should be in a \b compressed and \b column-major form.
     58   * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix.
     59   *
     60   * \note Unlike the initial SuperLU implementation, there is no step to equilibrate the matrix.
     61   * For badly scaled matrices, this step can be useful to reduce the pivoting during factorization.
     62   * If this is the case for your matrices, you can try the basic scaling method at
     63   *  "unsupported/Eigen/src/IterativeSolvers/Scaling.h"
     64   *
     65   * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<>
     66   * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD
     67   *
     68   * \implsparsesolverconcept
     69   *
     70   * \sa \ref TutorialSparseSolverConcept
     71   * \sa \ref OrderingMethods_Module
     72   */
     73 template <typename _MatrixType, typename _OrderingType>
     74 class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::StorageIndex>
     75 {
     76   protected:
     77     typedef SparseSolverBase<SparseLU<_MatrixType,_OrderingType> > APIBase;
     78     using APIBase::m_isInitialized;
     79   public:
     80     using APIBase::_solve_impl;
     81 
     82     typedef _MatrixType MatrixType;
     83     typedef _OrderingType OrderingType;
     84     typedef typename MatrixType::Scalar Scalar;
     85     typedef typename MatrixType::RealScalar RealScalar;
     86     typedef typename MatrixType::StorageIndex StorageIndex;
     87     typedef SparseMatrix<Scalar,ColMajor,StorageIndex> NCMatrix;
     88     typedef internal::MappedSuperNodalMatrix<Scalar, StorageIndex> SCMatrix;
     89     typedef Matrix<Scalar,Dynamic,1> ScalarVector;
     90     typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
     91     typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType;
     92     typedef internal::SparseLUImpl<Scalar, StorageIndex> Base;
     93 
     94     enum {
     95       ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     96       MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
     97     };
     98 
     99   public:
    100     SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
    101     {
    102       initperfvalues();
    103     }
    104     explicit SparseLU(const MatrixType& matrix)
    105       : m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
    106     {
    107       initperfvalues();
    108       compute(matrix);
    109     }
    110 
    111     ~SparseLU()
    112     {
    113       // Free all explicit dynamic pointers
    114     }
    115 
    116     void analyzePattern (const MatrixType& matrix);
    117     void factorize (const MatrixType& matrix);
    118     void simplicialfactorize(const MatrixType& matrix);
    119 
    120     /**
    121       * Compute the symbolic and numeric factorization of the input sparse matrix.
    122       * The input matrix should be in column-major storage.
    123       */
    124     void compute (const MatrixType& matrix)
    125     {
    126       // Analyze
    127       analyzePattern(matrix);
    128       //Factorize
    129       factorize(matrix);
    130     }
    131 
    132     inline Index rows() const { return m_mat.rows(); }
    133     inline Index cols() const { return m_mat.cols(); }
    134     /** Indicate that the pattern of the input matrix is symmetric */
    135     void isSymmetric(bool sym)
    136     {
    137       m_symmetricmode = sym;
    138     }
    139 
    140     /** \returns an expression of the matrix L, internally stored as supernodes
    141       * The only operation available with this expression is the triangular solve
    142       * \code
    143       * y = b; matrixL().solveInPlace(y);
    144       * \endcode
    145       */
    146     SparseLUMatrixLReturnType<SCMatrix> matrixL() const
    147     {
    148       return SparseLUMatrixLReturnType<SCMatrix>(m_Lstore);
    149     }
    150     /** \returns an expression of the matrix U,
    151       * The only operation available with this expression is the triangular solve
    152       * \code
    153       * y = b; matrixU().solveInPlace(y);
    154       * \endcode
    155       */
    156     SparseLUMatrixUReturnType<SCMatrix,MappedSparseMatrix<Scalar,ColMajor,StorageIndex> > matrixU() const
    157     {
    158       return SparseLUMatrixUReturnType<SCMatrix, MappedSparseMatrix<Scalar,ColMajor,StorageIndex> >(m_Lstore, m_Ustore);
    159     }
    160 
    161     /**
    162       * \returns a reference to the row matrix permutation \f$ P_r \f$ such that \f$P_r A P_c^T = L U\f$
    163       * \sa colsPermutation()
    164       */
    165     inline const PermutationType& rowsPermutation() const
    166     {
    167       return m_perm_r;
    168     }
    169     /**
    170       * \returns a reference to the column matrix permutation\f$ P_c^T \f$ such that \f$P_r A P_c^T = L U\f$
    171       * \sa rowsPermutation()
    172       */
    173     inline const PermutationType& colsPermutation() const
    174     {
    175       return m_perm_c;
    176     }
    177     /** Set the threshold used for a diagonal entry to be an acceptable pivot. */
    178     void setPivotThreshold(const RealScalar& thresh)
    179     {
    180       m_diagpivotthresh = thresh;
    181     }
    182 
    183 #ifdef EIGEN_PARSED_BY_DOXYGEN
    184     /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
    185       *
    186       * \warning the destination matrix X in X = this->solve(B) must be colmun-major.
    187       *
    188       * \sa compute()
    189       */
    190     template<typename Rhs>
    191     inline const Solve<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const;
    192 #endif // EIGEN_PARSED_BY_DOXYGEN
    193 
    194     /** \brief Reports whether previous computation was successful.
    195       *
    196       * \returns \c Success if computation was succesful,
    197       *          \c NumericalIssue if the LU factorization reports a problem, zero diagonal for instance
    198       *          \c InvalidInput if the input matrix is invalid
    199       *
    200       * \sa iparm()
    201       */
    202     ComputationInfo info() const
    203     {
    204       eigen_assert(m_isInitialized && "Decomposition is not initialized.");
    205       return m_info;
    206     }
    207 
    208     /**
    209       * \returns A string describing the type of error
    210       */
    211     std::string lastErrorMessage() const
    212     {
    213       return m_lastError;
    214     }
    215 
    216     template<typename Rhs, typename Dest>
    217     bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
    218     {
    219       Dest& X(X_base.derived());
    220       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first");
    221       EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,
    222                         THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
    223 
    224       // Permute the right hand side to form X = Pr*B
    225       // on return, X is overwritten by the computed solution
    226       X.resize(B.rows(),B.cols());
    227 
    228       // this ugly const_cast_derived() helps to detect aliasing when applying the permutations
    229       for(Index j = 0; j < B.cols(); ++j)
    230         X.col(j) = rowsPermutation() * B.const_cast_derived().col(j);
    231 
    232       //Forward substitution with L
    233       this->matrixL().solveInPlace(X);
    234       this->matrixU().solveInPlace(X);
    235 
    236       // Permute back the solution
    237       for (Index j = 0; j < B.cols(); ++j)
    238         X.col(j) = colsPermutation().inverse() * X.col(j);
    239 
    240       return true;
    241     }
    242 
    243     /**
    244       * \returns the absolute value of the determinant of the matrix of which
    245       * *this is the QR decomposition.
    246       *
    247       * \warning a determinant can be very big or small, so for matrices
    248       * of large enough dimension, there is a risk of overflow/underflow.
    249       * One way to work around that is to use logAbsDeterminant() instead.
    250       *
    251       * \sa logAbsDeterminant(), signDeterminant()
    252       */
    253     Scalar absDeterminant()
    254     {
    255       using std::abs;
    256       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
    257       // Initialize with the determinant of the row matrix
    258       Scalar det = Scalar(1.);
    259       // Note that the diagonal blocks of U are stored in supernodes,
    260       // which are available in the  L part :)
    261       for (Index j = 0; j < this->cols(); ++j)
    262       {
    263         for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
    264         {
    265           if(it.index() == j)
    266           {
    267             det *= abs(it.value());
    268             break;
    269           }
    270         }
    271       }
    272       return det;
    273     }
    274 
    275     /** \returns the natural log of the absolute value of the determinant of the matrix
    276       * of which **this is the QR decomposition
    277       *
    278       * \note This method is useful to work around the risk of overflow/underflow that's
    279       * inherent to the determinant computation.
    280       *
    281       * \sa absDeterminant(), signDeterminant()
    282       */
    283     Scalar logAbsDeterminant() const
    284     {
    285       using std::log;
    286       using std::abs;
    287 
    288       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
    289       Scalar det = Scalar(0.);
    290       for (Index j = 0; j < this->cols(); ++j)
    291       {
    292         for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
    293         {
    294           if(it.row() < j) continue;
    295           if(it.row() == j)
    296           {
    297             det += log(abs(it.value()));
    298             break;
    299           }
    300         }
    301       }
    302       return det;
    303     }
    304 
    305     /** \returns A number representing the sign of the determinant
    306       *
    307       * \sa absDeterminant(), logAbsDeterminant()
    308       */
    309     Scalar signDeterminant()
    310     {
    311       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
    312       // Initialize with the determinant of the row matrix
    313       Index det = 1;
    314       // Note that the diagonal blocks of U are stored in supernodes,
    315       // which are available in the  L part :)
    316       for (Index j = 0; j < this->cols(); ++j)
    317       {
    318         for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
    319         {
    320           if(it.index() == j)
    321           {
    322             if(it.value()<0)
    323               det = -det;
    324             else if(it.value()==0)
    325               return 0;
    326             break;
    327           }
    328         }
    329       }
    330       return det * m_detPermR * m_detPermC;
    331     }
    332 
    333     /** \returns The determinant of the matrix.
    334       *
    335       * \sa absDeterminant(), logAbsDeterminant()
    336       */
    337     Scalar determinant()
    338     {
    339       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
    340       // Initialize with the determinant of the row matrix
    341       Scalar det = Scalar(1.);
    342       // Note that the diagonal blocks of U are stored in supernodes,
    343       // which are available in the  L part :)
    344       for (Index j = 0; j < this->cols(); ++j)
    345       {
    346         for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
    347         {
    348           if(it.index() == j)
    349           {
    350             det *= it.value();
    351             break;
    352           }
    353         }
    354       }
    355       return (m_detPermR * m_detPermC) > 0 ? det : -det;
    356     }
    357 
    358   protected:
    359     // Functions
    360     void initperfvalues()
    361     {
    362       m_perfv.panel_size = 16;
    363       m_perfv.relax = 1;
    364       m_perfv.maxsuper = 128;
    365       m_perfv.rowblk = 16;
    366       m_perfv.colblk = 8;
    367       m_perfv.fillfactor = 20;
    368     }
    369 
    370     // Variables
    371     mutable ComputationInfo m_info;
    372     bool m_factorizationIsOk;
    373     bool m_analysisIsOk;
    374     std::string m_lastError;
    375     NCMatrix m_mat; // The input (permuted ) matrix
    376     SCMatrix m_Lstore; // The lower triangular matrix (supernodal)
    377     MappedSparseMatrix<Scalar,ColMajor,StorageIndex> m_Ustore; // The upper triangular matrix
    378     PermutationType m_perm_c; // Column permutation
    379     PermutationType m_perm_r ; // Row permutation
    380     IndexVector m_etree; // Column elimination tree
    381 
    382     typename Base::GlobalLU_t m_glu;
    383 
    384     // SparseLU options
    385     bool m_symmetricmode;
    386     // values for performance
    387     internal::perfvalues m_perfv;
    388     RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot
    389     Index m_nnzL, m_nnzU; // Nonzeros in L and U factors
    390     Index m_detPermR, m_detPermC; // Determinants of the permutation matrices
    391   private:
    392     // Disable copy constructor
    393     SparseLU (const SparseLU& );
    394 
    395 }; // End class SparseLU
    396 
    397 
    398 
    399 // Functions needed by the anaysis phase
    400 /**
    401   * Compute the column permutation to minimize the fill-in
    402   *
    403   *  - Apply this permutation to the input matrix -
    404   *
    405   *  - Compute the column elimination tree on the permuted matrix
    406   *
    407   *  - Postorder the elimination tree and the column permutation
    408   *
    409   */
    410 template <typename MatrixType, typename OrderingType>
    411 void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
    412 {
    413 
    414   //TODO  It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat.
    415 
    416   // Firstly, copy the whole input matrix.
    417   m_mat = mat;
    418 
    419   // Compute fill-in ordering
    420   OrderingType ord;
    421   ord(m_mat,m_perm_c);
    422 
    423   // Apply the permutation to the column of the input  matrix
    424   if (m_perm_c.size())
    425   {
    426     m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.
    427     // Then, permute only the column pointers
    428     ei_declare_aligned_stack_constructed_variable(StorageIndex,outerIndexPtr,mat.cols()+1,mat.isCompressed()?const_cast<StorageIndex*>(mat.outerIndexPtr()):0);
    429 
    430     // If the input matrix 'mat' is uncompressed, then the outer-indices do not match the ones of m_mat, and a copy is thus needed.
    431     if(!mat.isCompressed())
    432       IndexVector::Map(outerIndexPtr, mat.cols()+1) = IndexVector::Map(m_mat.outerIndexPtr(),mat.cols()+1);
    433 
    434     // Apply the permutation and compute the nnz per column.
    435     for (Index i = 0; i < mat.cols(); i++)
    436     {
    437       m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
    438       m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
    439     }
    440   }
    441 
    442   // Compute the column elimination tree of the permuted matrix
    443   IndexVector firstRowElt;
    444   internal::coletree(m_mat, m_etree,firstRowElt);
    445 
    446   // In symmetric mode, do not do postorder here
    447   if (!m_symmetricmode) {
    448     IndexVector post, iwork;
    449     // Post order etree
    450     internal::treePostorder(StorageIndex(m_mat.cols()), m_etree, post);
    451 
    452 
    453     // Renumber etree in postorder
    454     Index m = m_mat.cols();
    455     iwork.resize(m+1);
    456     for (Index i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i));
    457     m_etree = iwork;
    458 
    459     // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree
    460     PermutationType post_perm(m);
    461     for (Index i = 0; i < m; i++)
    462       post_perm.indices()(i) = post(i);
    463 
    464     // Combine the two permutations : postorder the permutation for future use
    465     if(m_perm_c.size()) {
    466       m_perm_c = post_perm * m_perm_c;
    467     }
    468 
    469   } // end postordering
    470 
    471   m_analysisIsOk = true;
    472 }
    473 
    474 // Functions needed by the numerical factorization phase
    475 
    476 
    477 /**
    478   *  - Numerical factorization
    479   *  - Interleaved with the symbolic factorization
    480   * On exit,  info is
    481   *
    482   *    = 0: successful factorization
    483   *
    484   *    > 0: if info = i, and i is
    485   *
    486   *       <= A->ncol: U(i,i) is exactly zero. The factorization has
    487   *          been completed, but the factor U is exactly singular,
    488   *          and division by zero will occur if it is used to solve a
    489   *          system of equations.
    490   *
    491   *       > A->ncol: number of bytes allocated when memory allocation
    492   *         failure occurred, plus A->ncol. If lwork = -1, it is
    493   *         the estimated amount of space needed, plus A->ncol.
    494   */
    495 template <typename MatrixType, typename OrderingType>
    496 void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
    497 {
    498   using internal::emptyIdxLU;
    499   eigen_assert(m_analysisIsOk && "analyzePattern() should be called first");
    500   eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices");
    501 
    502   typedef typename IndexVector::Scalar StorageIndex;
    503 
    504   m_isInitialized = true;
    505 
    506 
    507   // Apply the column permutation computed in analyzepattern()
    508   //   m_mat = matrix * m_perm_c.inverse();
    509   m_mat = matrix;
    510   if (m_perm_c.size())
    511   {
    512     m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers.
    513     //Then, permute only the column pointers
    514     const StorageIndex * outerIndexPtr;
    515     if (matrix.isCompressed()) outerIndexPtr = matrix.outerIndexPtr();
    516     else
    517     {
    518       StorageIndex* outerIndexPtr_t = new StorageIndex[matrix.cols()+1];
    519       for(Index i = 0; i <= matrix.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i];
    520       outerIndexPtr = outerIndexPtr_t;
    521     }
    522     for (Index i = 0; i < matrix.cols(); i++)
    523     {
    524       m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
    525       m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
    526     }
    527     if(!matrix.isCompressed()) delete[] outerIndexPtr;
    528   }
    529   else
    530   { //FIXME This should not be needed if the empty permutation is handled transparently
    531     m_perm_c.resize(matrix.cols());
    532     for(StorageIndex i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i;
    533   }
    534 
    535   Index m = m_mat.rows();
    536   Index n = m_mat.cols();
    537   Index nnz = m_mat.nonZeros();
    538   Index maxpanel = m_perfv.panel_size * m;
    539   // Allocate working storage common to the factor routines
    540   Index lwork = 0;
    541   Index info = Base::memInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu);
    542   if (info)
    543   {
    544     m_lastError = "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ;
    545     m_factorizationIsOk = false;
    546     return ;
    547   }
    548 
    549   // Set up pointers for integer working arrays
    550   IndexVector segrep(m); segrep.setZero();
    551   IndexVector parent(m); parent.setZero();
    552   IndexVector xplore(m); xplore.setZero();
    553   IndexVector repfnz(maxpanel);
    554   IndexVector panel_lsub(maxpanel);
    555   IndexVector xprune(n); xprune.setZero();
    556   IndexVector marker(m*internal::LUNoMarker); marker.setZero();
    557 
    558   repfnz.setConstant(-1);
    559   panel_lsub.setConstant(-1);
    560 
    561   // Set up pointers for scalar working arrays
    562   ScalarVector dense;
    563   dense.setZero(maxpanel);
    564   ScalarVector tempv;
    565   tempv.setZero(internal::LUnumTempV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) );
    566 
    567   // Compute the inverse of perm_c
    568   PermutationType iperm_c(m_perm_c.inverse());
    569 
    570   // Identify initial relaxed snodes
    571   IndexVector relax_end(n);
    572   if ( m_symmetricmode == true )
    573     Base::heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
    574   else
    575     Base::relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
    576 
    577 
    578   m_perm_r.resize(m);
    579   m_perm_r.indices().setConstant(-1);
    580   marker.setConstant(-1);
    581   m_detPermR = 1; // Record the determinant of the row permutation
    582 
    583   m_glu.supno(0) = emptyIdxLU; m_glu.xsup.setConstant(0);
    584   m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0);
    585 
    586   // Work on one 'panel' at a time. A panel is one of the following :
    587   //  (a) a relaxed supernode at the bottom of the etree, or
    588   //  (b) panel_size contiguous columns, <panel_size> defined by the user
    589   Index jcol;
    590   IndexVector panel_histo(n);
    591   Index pivrow; // Pivotal row number in the original row matrix
    592   Index nseg1; // Number of segments in U-column above panel row jcol
    593   Index nseg; // Number of segments in each U-column
    594   Index irep;
    595   Index i, k, jj;
    596   for (jcol = 0; jcol < n; )
    597   {
    598     // Adjust panel size so that a panel won't overlap with the next relaxed snode.
    599     Index panel_size = m_perfv.panel_size; // upper bound on panel width
    600     for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
    601     {
    602       if (relax_end(k) != emptyIdxLU)
    603       {
    604         panel_size = k - jcol;
    605         break;
    606       }
    607     }
    608     if (k == n)
    609       panel_size = n - jcol;
    610 
    611     // Symbolic outer factorization on a panel of columns
    612     Base::panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu);
    613 
    614     // Numeric sup-panel updates in topological order
    615     Base::panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu);
    616 
    617     // Sparse LU within the panel, and below the panel diagonal
    618     for ( jj = jcol; jj< jcol + panel_size; jj++)
    619     {
    620       k = (jj - jcol) * m; // Column index for w-wide arrays
    621 
    622       nseg = nseg1; // begin after all the panel segments
    623       //Depth-first-search for the current column
    624       VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
    625       VectorBlock<IndexVector> repfnz_k(repfnz, k, m);
    626       info = Base::column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu);
    627       if ( info )
    628       {
    629         m_lastError =  "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() ";
    630         m_info = NumericalIssue;
    631         m_factorizationIsOk = false;
    632         return;
    633       }
    634       // Numeric updates to this column
    635       VectorBlock<ScalarVector> dense_k(dense, k, m);
    636       VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1);
    637       info = Base::column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu);
    638       if ( info )
    639       {
    640         m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() ";
    641         m_info = NumericalIssue;
    642         m_factorizationIsOk = false;
    643         return;
    644       }
    645 
    646       // Copy the U-segments to ucol(*)
    647       info = Base::copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu);
    648       if ( info )
    649       {
    650         m_lastError = "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() ";
    651         m_info = NumericalIssue;
    652         m_factorizationIsOk = false;
    653         return;
    654       }
    655 
    656       // Form the L-segment
    657       info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
    658       if ( info )
    659       {
    660         m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT ";
    661         std::ostringstream returnInfo;
    662         returnInfo << info;
    663         m_lastError += returnInfo.str();
    664         m_info = NumericalIssue;
    665         m_factorizationIsOk = false;
    666         return;
    667       }
    668 
    669       // Update the determinant of the row permutation matrix
    670       // FIXME: the following test is not correct, we should probably take iperm_c into account and pivrow is not directly the row pivot.
    671       if (pivrow != jj) m_detPermR = -m_detPermR;
    672 
    673       // Prune columns (0:jj-1) using column jj
    674       Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);
    675 
    676       // Reset repfnz for this column
    677       for (i = 0; i < nseg; i++)
    678       {
    679         irep = segrep(i);
    680         repfnz_k(irep) = emptyIdxLU;
    681       }
    682     } // end SparseLU within the panel
    683     jcol += panel_size;  // Move to the next panel
    684   } // end for -- end elimination
    685 
    686   m_detPermR = m_perm_r.determinant();
    687   m_detPermC = m_perm_c.determinant();
    688 
    689   // Count the number of nonzeros in factors
    690   Base::countnz(n, m_nnzL, m_nnzU, m_glu);
    691   // Apply permutation  to the L subscripts
    692   Base::fixupL(n, m_perm_r.indices(), m_glu);
    693 
    694   // Create supernode matrix L
    695   m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup);
    696   // Create the column major upper sparse matrix  U;
    697   new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, StorageIndex> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() );
    698 
    699   m_info = Success;
    700   m_factorizationIsOk = true;
    701 }
    702 
    703 template<typename MappedSupernodalType>
    704 struct SparseLUMatrixLReturnType : internal::no_assignment_operator
    705 {
    706   typedef typename MappedSupernodalType::Scalar Scalar;
    707   explicit SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)
    708   { }
    709   Index rows() { return m_mapL.rows(); }
    710   Index cols() { return m_mapL.cols(); }
    711   template<typename Dest>
    712   void solveInPlace( MatrixBase<Dest> &X) const
    713   {
    714     m_mapL.solveInPlace(X);
    715   }
    716   const MappedSupernodalType& m_mapL;
    717 };
    718 
    719 template<typename MatrixLType, typename MatrixUType>
    720 struct SparseLUMatrixUReturnType : internal::no_assignment_operator
    721 {
    722   typedef typename MatrixLType::Scalar Scalar;
    723   SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU)
    724   : m_mapL(mapL),m_mapU(mapU)
    725   { }
    726   Index rows() { return m_mapL.rows(); }
    727   Index cols() { return m_mapL.cols(); }
    728 
    729   template<typename Dest>   void solveInPlace(MatrixBase<Dest> &X) const
    730   {
    731     Index nrhs = X.cols();
    732     Index n    = X.rows();
    733     // Backward solve with U
    734     for (Index k = m_mapL.nsuper(); k >= 0; k--)
    735     {
    736       Index fsupc = m_mapL.supToCol()[k];
    737       Index lda = m_mapL.colIndexPtr()[fsupc+1] - m_mapL.colIndexPtr()[fsupc]; // leading dimension
    738       Index nsupc = m_mapL.supToCol()[k+1] - fsupc;
    739       Index luptr = m_mapL.colIndexPtr()[fsupc];
    740 
    741       if (nsupc == 1)
    742       {
    743         for (Index j = 0; j < nrhs; j++)
    744         {
    745           X(fsupc, j) /= m_mapL.valuePtr()[luptr];
    746         }
    747       }
    748       else
    749       {
    750         Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
    751         Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
    752         U = A.template triangularView<Upper>().solve(U);
    753       }
    754 
    755       for (Index j = 0; j < nrhs; ++j)
    756       {
    757         for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++)
    758         {
    759           typename MatrixUType::InnerIterator it(m_mapU, jcol);
    760           for ( ; it; ++it)
    761           {
    762             Index irow = it.index();
    763             X(irow, j) -= X(jcol, j) * it.value();
    764           }
    765         }
    766       }
    767     } // End For U-solve
    768   }
    769   const MatrixLType& m_mapL;
    770   const MatrixUType& m_mapU;
    771 };
    772 
    773 } // End namespace Eigen
    774 
    775 #endif
    776