Home | History | Annotate | Download | only in Core
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud (at) inria.fr>
      5 //
      6 // This Source Code Form is subject to the terms of the Mozilla
      7 // Public License v. 2.0. If a copy of the MPL was not distributed
      8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9 
     10 #ifndef EIGEN_STABLENORM_H
     11 #define EIGEN_STABLENORM_H
     12 
     13 namespace Eigen {
     14 
     15 namespace internal {
     16 
     17 template<typename ExpressionType, typename Scalar>
     18 inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
     19 {
     20   Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
     21 
     22   if(maxCoeff>scale)
     23   {
     24     ssq = ssq * numext::abs2(scale/maxCoeff);
     25     Scalar tmp = Scalar(1)/maxCoeff;
     26     if(tmp > NumTraits<Scalar>::highest())
     27     {
     28       invScale = NumTraits<Scalar>::highest();
     29       scale = Scalar(1)/invScale;
     30     }
     31     else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF
     32     {
     33       invScale = Scalar(1);
     34       scale = maxCoeff;
     35     }
     36     else
     37     {
     38       scale = maxCoeff;
     39       invScale = tmp;
     40     }
     41   }
     42   else if(maxCoeff!=maxCoeff) // we got a NaN
     43   {
     44     scale = maxCoeff;
     45   }
     46 
     47   // TODO if the maxCoeff is much much smaller than the current scale,
     48   // then we can neglect this sub vector
     49   if(scale>Scalar(0)) // if scale==0, then bl is 0
     50     ssq += (bl*invScale).squaredNorm();
     51 }
     52 
     53 template<typename Derived>
     54 inline typename NumTraits<typename traits<Derived>::Scalar>::Real
     55 blueNorm_impl(const EigenBase<Derived>& _vec)
     56 {
     57   typedef typename Derived::RealScalar RealScalar;
     58   using std::pow;
     59   using std::sqrt;
     60   using std::abs;
     61   const Derived& vec(_vec.derived());
     62   static bool initialized = false;
     63   static RealScalar b1, b2, s1m, s2m, rbig, relerr;
     64   if(!initialized)
     65   {
     66     int ibeta, it, iemin, iemax, iexp;
     67     RealScalar eps;
     68     // This program calculates the machine-dependent constants
     69     // bl, b2, slm, s2m, relerr overfl
     70     // from the "basic" machine-dependent numbers
     71     // nbig, ibeta, it, iemin, iemax, rbig.
     72     // The following define the basic machine-dependent constants.
     73     // For portability, the PORT subprograms "ilmaeh" and "rlmach"
     74     // are used. For any specific computer, each of the assignment
     75     // statements can be replaced
     76     ibeta = std::numeric_limits<RealScalar>::radix;                 // base for floating-point numbers
     77     it    = std::numeric_limits<RealScalar>::digits;                // number of base-beta digits in mantissa
     78     iemin = std::numeric_limits<RealScalar>::min_exponent;          // minimum exponent
     79     iemax = std::numeric_limits<RealScalar>::max_exponent;          // maximum exponent
     80     rbig  = (std::numeric_limits<RealScalar>::max)();               // largest floating-point number
     81 
     82     iexp  = -((1-iemin)/2);
     83     b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // lower boundary of midrange
     84     iexp  = (iemax + 1 - it)/2;
     85     b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // upper boundary of midrange
     86 
     87     iexp  = (2-iemin)/2;
     88     s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for lower range
     89     iexp  = - ((iemax+it)/2);
     90     s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for upper range
     91 
     92     eps     = RealScalar(pow(double(ibeta), 1-it));
     93     relerr  = sqrt(eps);                                            // tolerance for neglecting asml
     94     initialized = true;
     95   }
     96   Index n = vec.size();
     97   RealScalar ab2 = b2 / RealScalar(n);
     98   RealScalar asml = RealScalar(0);
     99   RealScalar amed = RealScalar(0);
    100   RealScalar abig = RealScalar(0);
    101   for(typename Derived::InnerIterator it(vec, 0); it; ++it)
    102   {
    103     RealScalar ax = abs(it.value());
    104     if(ax > ab2)     abig += numext::abs2(ax*s2m);
    105     else if(ax < b1) asml += numext::abs2(ax*s1m);
    106     else             amed += numext::abs2(ax);
    107   }
    108   if(amed!=amed)
    109     return amed;  // we got a NaN
    110   if(abig > RealScalar(0))
    111   {
    112     abig = sqrt(abig);
    113     if(abig > rbig) // overflow, or *this contains INF values
    114       return abig;  // return INF
    115     if(amed > RealScalar(0))
    116     {
    117       abig = abig/s2m;
    118       amed = sqrt(amed);
    119     }
    120     else
    121       return abig/s2m;
    122   }
    123   else if(asml > RealScalar(0))
    124   {
    125     if (amed > RealScalar(0))
    126     {
    127       abig = sqrt(amed);
    128       amed = sqrt(asml) / s1m;
    129     }
    130     else
    131       return sqrt(asml)/s1m;
    132   }
    133   else
    134     return sqrt(amed);
    135   asml = numext::mini(abig, amed);
    136   abig = numext::maxi(abig, amed);
    137   if(asml <= abig*relerr)
    138     return abig;
    139   else
    140     return abig * sqrt(RealScalar(1) + numext::abs2(asml/abig));
    141 }
    142 
    143 } // end namespace internal
    144 
    145 /** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
    146   * This version use a blockwise two passes algorithm:
    147   *  1 - find the absolute largest coefficient \c s
    148   *  2 - compute \f$ s \Vert \frac{*this}{s} \Vert \f$ in a standard way
    149   *
    150   * For architecture/scalar types supporting vectorization, this version
    151   * is faster than blueNorm(). Otherwise the blueNorm() is much faster.
    152   *
    153   * \sa norm(), blueNorm(), hypotNorm()
    154   */
    155 template<typename Derived>
    156 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
    157 MatrixBase<Derived>::stableNorm() const
    158 {
    159   using std::sqrt;
    160   using std::abs;
    161   const Index blockSize = 4096;
    162   RealScalar scale(0);
    163   RealScalar invScale(1);
    164   RealScalar ssq(0); // sum of square
    165 
    166   typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
    167   typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
    168   DerivedCopy copy(derived());
    169 
    170   enum {
    171     CanAlign = (   (int(DerivedCopyClean::Flags)&DirectAccessBit)
    172                 || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
    173                ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
    174                  && (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
    175   };
    176   typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
    177                                                    typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
    178   Index n = size();
    179 
    180   if(n==1)
    181     return abs(this->coeff(0));
    182 
    183   Index bi = internal::first_default_aligned(copy);
    184   if (bi>0)
    185     internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
    186   for (; bi<n; bi+=blockSize)
    187     internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
    188   return scale * sqrt(ssq);
    189 }
    190 
    191 /** \returns the \em l2 norm of \c *this using the Blue's algorithm.
    192   * A Portable Fortran Program to Find the Euclidean Norm of a Vector,
    193   * ACM TOMS, Vol 4, Issue 1, 1978.
    194   *
    195   * For architecture/scalar types without vectorization, this version
    196   * is much faster than stableNorm(). Otherwise the stableNorm() is faster.
    197   *
    198   * \sa norm(), stableNorm(), hypotNorm()
    199   */
    200 template<typename Derived>
    201 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
    202 MatrixBase<Derived>::blueNorm() const
    203 {
    204   return internal::blueNorm_impl(*this);
    205 }
    206 
    207 /** \returns the \em l2 norm of \c *this avoiding undeflow and overflow.
    208   * This version use a concatenation of hypot() calls, and it is very slow.
    209   *
    210   * \sa norm(), stableNorm()
    211   */
    212 template<typename Derived>
    213 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
    214 MatrixBase<Derived>::hypotNorm() const
    215 {
    216   return this->cwiseAbs().redux(internal::scalar_hypot_op<RealScalar>());
    217 }
    218 
    219 } // end namespace Eigen
    220 
    221 #endif // EIGEN_STABLENORM_H
    222