Home | History | Annotate | Download | only in src
      1 /*
      2   IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      3 
      4   By downloading, copying, installing or using the software you agree to this license.
      5   If you do not agree to this license, do not download, install,
      6   copy or use the software.
      7 
      8 
      9                           BSD 3-Clause License
     10 
     11  Copyright (C) 2014, Olexa Bilaniuk, Hamid Bazargani & Robert Laganiere, all rights reserved.
     12 
     13  Redistribution and use in source and binary forms, with or without modification,
     14  are permitted provided that the following conditions are met:
     15 
     16    * Redistribution's of source code must retain the above copyright notice,
     17      this list of conditions and the following disclaimer.
     18 
     19    * Redistribution's in binary form must reproduce the above copyright notice,
     20      this list of conditions and the following disclaimer in the documentation
     21      and/or other materials provided with the distribution.
     22 
     23    * The name of the copyright holders may not be used to endorse or promote products
     24      derived from this software without specific prior written permission.
     25 
     26  This software is provided by the copyright holders and contributors "as is" and
     27  any express or implied warranties, including, but not limited to, the implied
     28  warranties of merchantability and fitness for a particular purpose are disclaimed.
     29  In no event shall the Intel Corporation or contributors be liable for any direct,
     30  indirect, incidental, special, exemplary, or consequential damages
     31  (including, but not limited to, procurement of substitute goods or services;
     32  loss of use, data, or profits; or business interruption) however caused
     33  and on any theory of liability, whether in contract, strict liability,
     34  or tort (including negligence or otherwise) arising in any way out of
     35  the use of this software, even if advised of the possibility of such damage.
     36 */
     37 
     38 /**
     39  * Bilaniuk, Olexa, Hamid Bazargani, and Robert Laganiere. "Fast Target
     40  * Recognition on Mobile Devices: Revisiting Gaussian Elimination for the
     41  * Estimation of Planar Homographies." In Computer Vision and Pattern
     42  * Recognition Workshops (CVPRW), 2014 IEEE Conference on, pp. 119-125.
     43  * IEEE, 2014.
     44  */
     45 
     46 /* Includes */
     47 #include <precomp.hpp>
     48 #include <opencv2/core.hpp>
     49 #include <stdlib.h>
     50 #include <stdio.h>
     51 #include <stdint.h>
     52 #include <string.h>
     53 #include <stddef.h>
     54 #include <limits.h>
     55 #include <float.h>
     56 #include <math.h>
     57 #include <vector>
     58 #include "rho.h"
     59 
     60 
     61 
     62 
     63 
     64 /* For the sake of cv:: namespace ONLY: */
     65 namespace cv{/* For C support, replace with extern "C" { */
     66 
     67 
     68 /* Constants */
     69 const int    MEM_ALIGN              = 32;
     70 const size_t HSIZE                  = (3*3*sizeof(float));
     71 const double MIN_DELTA_CHNG         = 0.1;
     72 // const double CHI_STAT               = 2.706;
     73 const double CHI_SQ                 = 1.645;
     74 // const double RLO                    = 0.25;
     75 // const double RHI                    = 0.75;
     76 const int    MAXLEVMARQITERS        = 100;
     77 const int    SMPL_SIZE              = 4;      /* 4 points required per model */
     78 const int    SPRT_T_M               = 25;     /* Guessing 25 match evlauations / 1 model generation */
     79 const int    SPRT_M_S               = 1;      /* 1 model per sample */
     80 const double SPRT_EPSILON           = 0.1;    /* No explanation */
     81 const double SPRT_DELTA             = 0.01;   /* No explanation */
     82 const double LM_GAIN_LO             = 0.25;   /* See sacLMGain(). */
     83 const double LM_GAIN_HI             = 0.75;   /* See sacLMGain(). */
     84 
     85 
     86 /* Data Structures */
     87 
     88 /**
     89  * Base Struct for RHO algorithm.
     90  *
     91  * A RHO estimator has initialization, finalization, capacity, seeding and
     92  * homography-estimation APIs that must be implemented.
     93  */
     94 
     95 struct RHO_HEST{
     96     /* This is a virtual base class; It should have a virtual destructor. */
     97     virtual ~RHO_HEST(){}
     98 
     99     /* External Interface Methods */
    100 
    101     /**
    102      * Initialization work.
    103      *
    104      * @return 0 if initialization is unsuccessful; non-zero otherwise.
    105      */
    106 
    107     virtual inline int    initialize(void){return 1;}
    108 
    109 
    110     /**
    111      * Finalization work.
    112      */
    113 
    114     virtual inline void   finalize(void){}
    115 
    116     /**
    117      * Ensure that the estimator context's internal table for the non-randomness
    118      * criterion is at least of the given size, and uses the given beta. The table
    119      * should be larger than the maximum number of matches fed into the estimator.
    120      *
    121      * A value of N of 0 requests deallocation of the table.
    122      *
    123      * @param [in] N     If 0, deallocate internal table. If > 0, ensure that the
    124      *                   internal table is of at least this size, reallocating if
    125      *                   necessary.
    126      * @param [in] beta  The beta-factor to use within the table.
    127      * @return 0 if unsuccessful; non-zero otherwise.
    128      */
    129 
    130     virtual inline int    ensureCapacity(unsigned N, double beta){
    131         (void)N;
    132         (void)beta;
    133 
    134         return 1;
    135     }
    136 
    137 
    138     /**
    139      * Generates a random double uniformly distributed in the range [0, 1).
    140      *
    141      * The default implementation uses the xorshift128+ algorithm from
    142      * Sebastiano Vigna. Further scramblings of Marsaglia's xorshift generators.
    143      * CoRR, abs/1402.6246, 2014.
    144      * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
    145      *
    146      * Source roughly as given in
    147      * http://en.wikipedia.org/wiki/Xorshift#Xorshift.2B
    148      */
    149 
    150     virtual inline double fastRandom(void){
    151         uint64_t x = prng.s[0];
    152         uint64_t y = prng.s[1];
    153         x ^= x << 23; // a
    154         x ^= x >> 17; // b
    155         x ^= y ^ (y >> 26); // c
    156         prng.s[0] = y;
    157         prng.s[1] = x;
    158         uint64_t s = x + y;
    159 
    160         return s * 5.421010862427522e-20;/* 2^-64 */
    161     }
    162 
    163 
    164     /**
    165      * Seeds the context's PRNG.
    166      *
    167      * @param [in] seed  A 64-bit unsigned integer seed.
    168      */
    169 
    170     virtual inline void   fastSeed(uint64_t seed){
    171         int i;
    172 
    173         prng.s[0] =  seed;
    174         prng.s[1] = ~seed;/* Guarantees one of the elements will be non-zero. */
    175 
    176         /**
    177          * Escape from zero-land (see xorshift128+ paper). Approximately 20
    178          * iterations required according to the graph.
    179          */
    180 
    181         for(i=0;i<20;i++){
    182             fastRandom();
    183         }
    184     }
    185 
    186 
    187     /**
    188      * Estimates the homography using the given context, matches and parameters to
    189      * PROSAC.
    190      *
    191      * @param [in]     src     The pointer to the source points of the matches.
    192      *                             Cannot be NULL.
    193      * @param [in]     dst     The pointer to the destination points of the matches.
    194      *                             Cannot be NULL.
    195      * @param [out]    inl     The pointer to the output mask of inlier matches.
    196      *                             May be NULL.
    197      * @param [in]     N       The number of matches.
    198      * @param [in]     maxD    The maximum distance.
    199      * @param [in]     maxI    The maximum number of PROSAC iterations.
    200      * @param [in]     rConvg  The RANSAC convergence parameter.
    201      * @param [in]     cfd     The required confidence in the solution.
    202      * @param [in]     minInl  The minimum required number of inliers.
    203      * @param [in]     beta    The beta-parameter for the non-randomness criterion.
    204      * @param [in]     flags   A union of flags to control the estimation.
    205      * @param [in]     guessH  An extrinsic guess at the solution H, or NULL if
    206      *                         none provided.
    207      * @param [out]    finalH  The final estimation of H, or the zero matrix if
    208      *                         the minimum number of inliers was not met.
    209      *                         Cannot be NULL.
    210      * @return                 The number of inliers if the minimum number of
    211      *                         inliers for acceptance was reached; 0 otherwise.
    212      */
    213 
    214     virtual unsigned      rhoHest(const float*   src,     /* Source points */
    215                                   const float*   dst,     /* Destination points */
    216                                   char*          inl,     /* Inlier mask */
    217                                   unsigned       N,       /*  = src.length = dst.length = inl.length */
    218                                   float          maxD,    /* Works:     3.0 */
    219                                   unsigned       maxI,    /* Works:    2000 */
    220                                   unsigned       rConvg,  /* Works:    2000 */
    221                                   double         cfd,     /* Works:   0.995 */
    222                                   unsigned       minInl,  /* Minimum:     4 */
    223                                   double         beta,    /* Works:    0.35 */
    224                                   unsigned       flags,   /* Works:       0 */
    225                                   const float*   guessH,  /* Extrinsic guess, NULL if none provided */
    226                                   float*         finalH) = 0; /* Final result. */
    227 
    228 
    229 
    230     /* PRNG XORshift128+ */
    231     struct{
    232         uint64_t  s[2];            /* PRNG state */
    233     } prng;
    234 };
    235 
    236 
    237 
    238 /**
    239  * Generic C implementation of RHO algorithm.
    240  */
    241 
    242 struct RHO_HEST_REFC : RHO_HEST{
    243     /**
    244      * Virtual Arguments.
    245      *
    246      * Exactly the same as at function call, except:
    247      * - minInl is enforced to be >= 4.
    248      */
    249 
    250     struct{
    251         const float* src;
    252         const float* dst;
    253         char*        inl;
    254         unsigned     N;
    255         float        maxD;
    256         unsigned     maxI;
    257         unsigned     rConvg;
    258         double       cfd;
    259         unsigned     minInl;
    260         double       beta;
    261         unsigned     flags;
    262         const float* guessH;
    263         float*       finalH;
    264     } arg;
    265 
    266     /* PROSAC Control */
    267     struct{
    268         unsigned  i;               /* Iteration Number */
    269         unsigned  phNum;           /* Phase Number */
    270         unsigned  phEndI;          /* Phase End Iteration */
    271         double    phEndFpI;        /* Phase floating-point End Iteration */
    272         unsigned  phMax;           /* Termination phase number */
    273         unsigned  phNumInl;        /* Number of inliers for termination phase */
    274         unsigned  numModels;       /* Number of models tested */
    275         unsigned* smpl;            /* Sample of match indexes */
    276     } ctrl;
    277 
    278     /* Current model being tested */
    279     struct{
    280         float*    pkdPts;          /* Packed points */
    281         float*    H;               /* Homography */
    282         char*     inl;             /* Mask of inliers */
    283         unsigned  numInl;          /* Number of inliers */
    284     } curr;
    285 
    286     /* Best model (so far) */
    287     struct{
    288         float*    H;               /* Homography */
    289         char*     inl;             /* Mask of inliers */
    290         unsigned  numInl;          /* Number of inliers */
    291     } best;
    292 
    293     /* Non-randomness criterion */
    294     struct{
    295         std::vector<unsigned> tbl; /* Non-Randomness: Table */
    296         unsigned  size;            /* Non-Randomness: Size */
    297         double    beta;            /* Non-Randomness: Beta */
    298     } nr;
    299 
    300     /* SPRT Evaluator */
    301     struct{
    302         double    t_M;             /* t_M */
    303         double    m_S;             /* m_S */
    304         double    epsilon;         /* Epsilon */
    305         double    delta;           /* delta */
    306         double    A;               /* SPRT Threshold */
    307         unsigned  Ntested;         /* Number of points tested */
    308         unsigned  Ntestedtotal;    /* Number of points tested in total */
    309         int       good;            /* Good/bad flag */
    310         double    lambdaAccept;    /* Accept multiplier */
    311         double    lambdaReject;    /* Reject multiplier */
    312     } eval;
    313 
    314     /* Levenberg-Marquardt Refinement */
    315     struct{
    316         float  (* JtJ)[8];         /* JtJ matrix */
    317         float  (* tmp1)[8];        /* Temporary 1 */
    318         float*    Jte;             /* Jte vector */
    319     } lm;
    320 
    321     /* Memory Management */
    322     struct{
    323         cv::Mat perObj;
    324         cv::Mat perRun;
    325     } mem;
    326 
    327     /* Initialized? */
    328     int initialized;
    329 
    330 
    331     /* Empty constructors and destructors */
    332     public:
    333     RHO_HEST_REFC();
    334     private: /* Forbid copying. */
    335     RHO_HEST_REFC(const RHO_HEST_REFC&);
    336     public:
    337     ~RHO_HEST_REFC();
    338 
    339     /* Methods to implement external interface */
    340     inline int    initialize(void);
    341     inline void   finalize(void);
    342     inline int    ensureCapacity(unsigned N, double beta);
    343     unsigned      rhoHest(const float*   src,     /* Source points */
    344                           const float*   dst,     /* Destination points */
    345                           char*          inl,     /* Inlier mask */
    346                           unsigned       N,       /*  = src.length = dst.length = inl.length */
    347                           float          maxD,    /* Works:     3.0 */
    348                           unsigned       maxI,    /* Works:    2000 */
    349                           unsigned       rConvg,  /* Works:    2000 */
    350                           double         cfd,     /* Works:   0.995 */
    351                           unsigned       minInl,  /* Minimum:     4 */
    352                           double         beta,    /* Works:    0.35 */
    353                           unsigned       flags,   /* Works:       0 */
    354                           const float*   guessH,  /* Extrinsic guess, NULL if none provided */
    355                           float*         finalH); /* Final result. */
    356 
    357 
    358 
    359     /* Methods to implement internals */
    360     inline void   allocatePerObj(void);
    361     inline void   allocatePerRun(void);
    362     inline void   deallocatePerRun(void);
    363     inline void   deallocatePerObj(void);
    364     inline int    initRun(void);
    365     inline void   finiRun(void);
    366     inline int    haveExtrinsicGuess(void);
    367     inline int    hypothesize(void);
    368     inline int    verify(void);
    369     inline int    isNREnabled(void);
    370     inline int    isRefineEnabled(void);
    371     inline int    isFinalRefineEnabled(void);
    372     inline int    PROSACPhaseEndReached(void);
    373     inline void   PROSACGoToNextPhase(void);
    374     inline void   getPROSACSample(void);
    375     inline void   rndSmpl(unsigned  sampleSize,
    376                           unsigned* currentSample,
    377                           unsigned  dataSetSize);
    378     inline int    isSampleDegenerate(void);
    379     inline void   generateModel(void);
    380     inline int    isModelDegenerate(void);
    381     inline void   evaluateModelSPRT(void);
    382     inline void   updateSPRT(void);
    383     inline void   designSPRTTest(void);
    384     inline int    isBestModel(void);
    385     inline int    isBestModelGoodEnough(void);
    386     inline void   saveBestModel(void);
    387     inline void   nStarOptimize(void);
    388     inline void   updateBounds(void);
    389     inline void   outputModel(void);
    390     inline void   outputZeroH(void);
    391     inline int    canRefine(void);
    392     inline void   refine(void);
    393 };
    394 
    395 
    396 
    397 
    398 /**
    399  * Prototypes for purely-computational code.
    400  */
    401 
    402 static inline void   sacInitNonRand       (double    beta,
    403                                            unsigned  start,
    404                                            unsigned  N,
    405                                            unsigned* nonRandMinInl);
    406 static inline double sacInitPEndFpI       (const unsigned ransacConvg,
    407                                            const unsigned n,
    408                                            const unsigned s);
    409 static inline unsigned sacCalcIterBound   (double   confidence,
    410                                            double   inlierRate,
    411                                            unsigned sampleSize,
    412                                            unsigned maxIterBound);
    413 static inline void   hFuncRefC            (float* packedPoints, float* H);
    414 static inline void   sacCalcJacobianErrors(const float* H,
    415                                            const float* src,
    416                                            const float* dst,
    417                                            const char*  inl,
    418                                            unsigned     N,
    419                                            float     (* JtJ)[8],
    420                                            float*       Jte,
    421                                            float*       Sp);
    422 static inline float  sacLMGain            (const float*  dH,
    423                                            const float*  Jte,
    424                                            const float   S,
    425                                            const float   newS,
    426                                            const float   lambda);
    427 static inline int    sacChol8x8Damped     (const float (*A)[8],
    428                                            float         lambda,
    429                                            float       (*L)[8]);
    430 static inline void   sacTRInv8x8          (const float (*L)[8],
    431                                            float       (*M)[8]);
    432 static inline void   sacTRISolve8x8       (const float (*L)[8],
    433                                            const float*  Jte,
    434                                            float*        dH);
    435 static inline void   sacSub8x1            (float*       Hout,
    436                                            const float* H,
    437                                            const float* dH);
    438 
    439 
    440 
    441 /* Functions */
    442 
    443 /**
    444  * External access to context constructor.
    445  *
    446  * @return A pointer to the context if successful; NULL if an error occured.
    447  */
    448 
    449 Ptr<RHO_HEST> rhoInit(void){
    450     /* Select an optimized implementation of RHO here. */
    451 
    452 #if 1
    453     /**
    454      * For now, only the generic C implementation is available. In the future,
    455      * SSE2/AVX/AVX2/FMA/NEON versions may be added, and they will be selected
    456      * depending on cv::checkHardwareSupport()'s return values.
    457      */
    458 
    459     Ptr<RHO_HEST> p = Ptr<RHO_HEST>(new RHO_HEST_REFC);
    460 #endif
    461 
    462     /* Initialize it. */
    463     if(p){
    464         if(!p->initialize()){
    465             p.release();
    466         }
    467     }
    468 
    469     /* Return it. */
    470     return p;
    471 }
    472 
    473 
    474 /**
    475  * External access to non-randomness table resize.
    476  */
    477 
    478 int  rhoEnsureCapacity(Ptr<RHO_HEST> p, unsigned N, double beta){
    479     return p->ensureCapacity(N, beta);
    480 }
    481 
    482 
    483 /**
    484  * Seeds the internal PRNG with the given seed.
    485  */
    486 
    487 void rhoSeed(Ptr<RHO_HEST> p, uint64_t seed){
    488     p->fastSeed(seed);
    489 }
    490 
    491 
    492 /**
    493  * Estimates the homography using the given context, matches and parameters to
    494  * PROSAC.
    495  *
    496  * @param [in/out] p       The context to use for homography estimation. Must
    497  *                             be already initialized. Cannot be NULL.
    498  * @param [in]     src     The pointer to the source points of the matches.
    499  *                             Must be aligned to 4 bytes. Cannot be NULL.
    500  * @param [in]     dst     The pointer to the destination points of the matches.
    501  *                             Must be aligned to 16 bytes. Cannot be NULL.
    502  * @param [out]    inl     The pointer to the output mask of inlier matches.
    503  *                             Must be aligned to 16 bytes. May be NULL.
    504  * @param [in]     N       The number of matches.
    505  * @param [in]     maxD    The maximum distance.
    506  * @param [in]     maxI    The maximum number of PROSAC iterations.
    507  * @param [in]     rConvg  The RANSAC convergence parameter.
    508  * @param [in]     cfd     The required confidence in the solution.
    509  * @param [in]     minInl  The minimum required number of inliers.
    510  * @param [in]     beta    The beta-parameter for the non-randomness criterion.
    511  * @param [in]     flags   A union of flags to control the estimation.
    512  * @param [in]     guessH  An extrinsic guess at the solution H, or NULL if
    513  *                         none provided.
    514  * @param [out]    finalH  The final estimation of H, or the zero matrix if
    515  *                         the minimum number of inliers was not met.
    516  *                         Cannot be NULL.
    517  * @return                 The number of inliers if the minimum number of
    518  *                         inliers for acceptance was reached; 0 otherwise.
    519  */
    520 
    521 unsigned rhoHest(Ptr<RHO_HEST> p,       /* Homography estimation context. */
    522                  const float*  src,     /* Source points */
    523                  const float*  dst,     /* Destination points */
    524                  char*         inl,     /* Inlier mask */
    525                  unsigned      N,       /*  = src.length = dst.length = inl.length */
    526                  float         maxD,    /* Works:     3.0 */
    527                  unsigned      maxI,    /* Works:    2000 */
    528                  unsigned      rConvg,  /* Works:    2000 */
    529                  double        cfd,     /* Works:   0.995 */
    530                  unsigned      minInl,  /* Minimum:     4 */
    531                  double        beta,    /* Works:    0.35 */
    532                  unsigned      flags,   /* Works:       0 */
    533                  const float*  guessH,  /* Extrinsic guess, NULL if none provided */
    534                  float*        finalH){ /* Final result. */
    535     return p->rhoHest(src, dst, inl, N, maxD, maxI, rConvg, cfd, minInl, beta,
    536                       flags, guessH, finalH);
    537 }
    538 
    539 
    540 
    541 
    542 
    543 
    544 
    545 
    546 
    547 
    548 
    549 
    550 /*********************** RHO_HEST_REFC implementation **********************/
    551 
    552 /**
    553  * Constructor for RHO_HEST_REFC.
    554  *
    555  * Does nothing. True initialization is done by initialize().
    556  */
    557 
    558 RHO_HEST_REFC::RHO_HEST_REFC() : initialized(0){
    559 
    560 }
    561 
    562 /**
    563  * Private copy constructor for RHO_HEST_REFC. Disabled.
    564  */
    565 
    566 RHO_HEST_REFC::RHO_HEST_REFC(const RHO_HEST_REFC&) : initialized(0){
    567 
    568 }
    569 
    570 /**
    571  * Destructor for RHO_HEST_REFC.
    572  */
    573 
    574 RHO_HEST_REFC::~RHO_HEST_REFC(){
    575     if(initialized){
    576         finalize();
    577     }
    578 }
    579 
    580 
    581 
    582 /**
    583  * Initialize the estimator context, by allocating the aligned buffers
    584  * internally needed.
    585  *
    586  * Currently there are 5 per-estimator buffers:
    587  * - The buffer of m indexes representing a sample
    588  * - The buffer of 16 floats representing m matches (x,y) -> (X,Y).
    589  * - The buffer for the current homography
    590  * - The buffer for the best-so-far homography
    591  * - Optionally, the non-randomness criterion table
    592  *
    593  * Returns 0 if unsuccessful and non-0 otherwise.
    594  */
    595 
    596 inline int    RHO_HEST_REFC::initialize(void){
    597     initialized = 0;
    598 
    599 
    600     allocatePerObj();
    601 
    602     curr.inl    = NULL;
    603     curr.numInl = 0;
    604 
    605     best.inl    = NULL;
    606     best.numInl = 0;
    607 
    608     nr.size     = 0;
    609     nr.beta     = 0.0;
    610 
    611 
    612     fastSeed((uint64_t)~0);
    613 
    614 
    615     int areAllAllocsSuccessful = !mem.perObj.empty();
    616 
    617     if(!areAllAllocsSuccessful){
    618         finalize();
    619     }else{
    620         initialized = 1;
    621     }
    622 
    623     return areAllAllocsSuccessful;
    624 }
    625 
    626 /**
    627  * Finalize.
    628  *
    629  * Finalize the estimator context, by freeing the aligned buffers used
    630  * internally.
    631  */
    632 
    633 inline void   RHO_HEST_REFC::finalize(void){
    634     if(initialized){
    635         deallocatePerObj();
    636 
    637         initialized = 0;
    638     }
    639 }
    640 
    641 /**
    642  * Ensure that the estimator context's internal table for non-randomness
    643  * criterion is at least of the given size, and uses the given beta. The table
    644  * should be larger than the maximum number of matches fed into the estimator.
    645  *
    646  * A value of N of 0 requests deallocation of the table.
    647  *
    648  * @param [in] N     If 0, deallocate internal table. If > 0, ensure that the
    649  *                   internal table is of at least this size, reallocating if
    650  *                   necessary.
    651  * @param [in] beta  The beta-factor to use within the table.
    652  * @return 0 if unsuccessful; non-zero otherwise.
    653  *
    654  * Reads:  nr.*
    655  * Writes: nr.*
    656  */
    657 
    658 inline int    RHO_HEST_REFC::ensureCapacity(unsigned N, double beta){
    659     if(N == 0){
    660         /* Clear. */
    661         nr.tbl.clear();
    662         nr.size = 0;
    663     }else if(nr.beta != beta){
    664         /* Beta changed. Redo all the work. */
    665         nr.tbl.resize(N);
    666         nr.beta = beta;
    667         sacInitNonRand(nr.beta, 0, N, &nr.tbl[0]);
    668         nr.size = N;
    669     }else if(N > nr.size){
    670         /* Work is partially done. Do rest of it. */
    671         nr.tbl.resize(N);
    672         sacInitNonRand(nr.beta, nr.size, N, &nr.tbl[nr.size]);
    673         nr.size = N;
    674     }else{
    675         /* Work is already done. Do nothing. */
    676     }
    677 
    678     return 1;
    679 }
    680 
    681 
    682 /**
    683  * Estimates the homography using the given context, matches and parameters to
    684  * PROSAC.
    685  *
    686  * @param [in]     src     The pointer to the source points of the matches.
    687  *                             Must be aligned to 4 bytes. Cannot be NULL.
    688  * @param [in]     dst     The pointer to the destination points of the matches.
    689  *                             Must be aligned to 4 bytes. Cannot be NULL.
    690  * @param [out]    inl     The pointer to the output mask of inlier matches.
    691  *                             Must be aligned to 4 bytes. May be NULL.
    692  * @param [in]     N       The number of matches.
    693  * @param [in]     maxD    The maximum distance.
    694  * @param [in]     maxI    The maximum number of PROSAC iterations.
    695  * @param [in]     rConvg  The RANSAC convergence parameter.
    696  * @param [in]     cfd     The required confidence in the solution.
    697  * @param [in]     minInl  The minimum required number of inliers.
    698  * @param [in]     beta    The beta-parameter for the non-randomness criterion.
    699  * @param [in]     flags   A union of flags to control the estimation.
    700  * @param [in]     guessH  An extrinsic guess at the solution H, or NULL if
    701  *                         none provided.
    702  * @param [out]    finalH  The final estimation of H, or the zero matrix if
    703  *                         the minimum number of inliers was not met.
    704  *                         Cannot be NULL.
    705  * @return                 The number of inliers if the minimum number of
    706  *                         inliers for acceptance was reached; 0 otherwise.
    707  */
    708 
    709 unsigned RHO_HEST_REFC::rhoHest(const float*   src,     /* Source points */
    710                                 const float*   dst,     /* Destination points */
    711                                 char*          inl,     /* Inlier mask */
    712                                 unsigned       N,       /*  = src.length = dst.length = inl.length */
    713                                 float          maxD,    /* Works:     3.0 */
    714                                 unsigned       maxI,    /* Works:    2000 */
    715                                 unsigned       rConvg,  /* Works:    2000 */
    716                                 double         cfd,     /* Works:   0.995 */
    717                                 unsigned       minInl,  /* Minimum:     4 */
    718                                 double         beta,    /* Works:    0.35 */
    719                                 unsigned       flags,   /* Works:       0 */
    720                                 const float*   guessH,  /* Extrinsic guess, NULL if none provided */
    721                                 float*         finalH){ /* Final result. */
    722 
    723     /**
    724      * Setup
    725      */
    726 
    727     arg.src     = src;
    728     arg.dst     = dst;
    729     arg.inl     = inl;
    730     arg.N       = N;
    731     arg.maxD    = maxD;
    732     arg.maxI    = maxI;
    733     arg.rConvg  = rConvg;
    734     arg.cfd     = cfd;
    735     arg.minInl  = minInl;
    736     arg.beta    = beta;
    737     arg.flags   = flags;
    738     arg.guessH  = guessH;
    739     arg.finalH  = finalH;
    740     if(!initRun()){
    741         outputZeroH();
    742         finiRun();
    743         return 0;
    744     }
    745 
    746     /**
    747      * Extrinsic Guess
    748      */
    749 
    750     if(haveExtrinsicGuess()){
    751         verify();
    752     }
    753 
    754 
    755     /**
    756      * PROSAC Loop
    757      */
    758 
    759     for(ctrl.i=0; ctrl.i < arg.maxI || ctrl.i < 100; ctrl.i++){
    760         hypothesize() && verify();
    761     }
    762 
    763 
    764     /**
    765      * Teardown
    766      */
    767 
    768     if(isFinalRefineEnabled() && canRefine()){
    769         refine();
    770     }
    771 
    772     outputModel();
    773     finiRun();
    774     return isBestModelGoodEnough() ? best.numInl : 0;
    775 }
    776 
    777 
    778 /**
    779  * Allocate per-object dynamic storage.
    780  *
    781  * This includes aligned, fixed-size internal buffers, but excludes any buffers
    782  * whose size cannot be determined ahead-of-time (before the number of matches
    783  * is known).
    784  *
    785  * All buffer memory is allocated in one single shot, and all pointers are
    786  * initialized.
    787  */
    788 
    789 inline void   RHO_HEST_REFC::allocatePerObj(void){
    790     /* We have known sizes */
    791     size_t ctrl_smpl_sz   = SMPL_SIZE*sizeof(*ctrl.smpl);
    792     size_t curr_pkdPts_sz = SMPL_SIZE*2*2*sizeof(*curr.pkdPts);
    793     size_t curr_H_sz      = HSIZE;
    794     size_t best_H_sz      = HSIZE;
    795     size_t lm_JtJ_sz      = 8*8*sizeof(float);
    796     size_t lm_tmp1_sz     = 8*8*sizeof(float);
    797     size_t lm_Jte_sz      = 1*8*sizeof(float);
    798 
    799     /* We compute offsets */
    800     size_t total = 0;
    801 #define MK_OFFSET(v)                                     \
    802     size_t v ## _of = total;                             \
    803     total = alignSize(v ## _of  +  v ## _sz, MEM_ALIGN)
    804 
    805     MK_OFFSET(ctrl_smpl);
    806     MK_OFFSET(curr_pkdPts);
    807     MK_OFFSET(curr_H);
    808     MK_OFFSET(best_H);
    809     MK_OFFSET(lm_JtJ);
    810     MK_OFFSET(lm_tmp1);
    811     MK_OFFSET(lm_Jte);
    812 
    813 #undef MK_OFFSET
    814 
    815     /* Allocate dynamic memory managed by cv::Mat */
    816     mem.perObj.create(1, (int)(total + MEM_ALIGN), CV_8UC1);
    817 
    818     /* Extract aligned pointer */
    819     unsigned char* ptr = alignPtr(mem.perObj.data, MEM_ALIGN);
    820 
    821     /* Assign pointers */
    822     ctrl.smpl   = (unsigned*)  (ptr + ctrl_smpl_of);
    823     curr.pkdPts = (float*)     (ptr + curr_pkdPts_of);
    824     curr.H      = (float*)     (ptr + curr_H_of);
    825     best.H      = (float*)     (ptr + best_H_of);
    826     lm.JtJ      = (float(*)[8])(ptr + lm_JtJ_of);
    827     lm.tmp1     = (float(*)[8])(ptr + lm_tmp1_of);
    828     lm.Jte      = (float*)     (ptr + lm_Jte_of);
    829 }
    830 
    831 
    832 /**
    833  * Allocate per-run dynamic storage.
    834  *
    835  * This includes storage that is proportional to the number of points, such as
    836  * the inlier mask.
    837  */
    838 
    839 inline void   RHO_HEST_REFC::allocatePerRun(void){
    840     /* We have known sizes */
    841     size_t best_inl_sz = arg.N;
    842     size_t curr_inl_sz = arg.N;
    843 
    844     /* We compute offsets */
    845     size_t total = 0;
    846 #define MK_OFFSET(v)                                     \
    847     size_t v ## _of = total;                             \
    848     total = alignSize(v ## _of  +  v ## _sz, MEM_ALIGN)
    849 
    850     MK_OFFSET(best_inl);
    851     MK_OFFSET(curr_inl);
    852 
    853 #undef MK_OFFSET
    854 
    855     /* Allocate dynamic memory managed by cv::Mat */
    856     mem.perRun.create(1, (int)(total + MEM_ALIGN), CV_8UC1);
    857 
    858     /* Extract aligned pointer */
    859     unsigned char* ptr = alignPtr(mem.perRun.data, MEM_ALIGN);
    860 
    861     /* Assign pointers */
    862     best.inl  = (char*)(ptr + best_inl_of);
    863     curr.inl  = (char*)(ptr + curr_inl_of);
    864 }
    865 
    866 
    867 /**
    868  * Deallocate per-run dynamic storage.
    869  *
    870  * Undoes the work by allocatePerRun().
    871  */
    872 
    873 inline void   RHO_HEST_REFC::deallocatePerRun(void){
    874     best.inl  = NULL;
    875     curr.inl  = NULL;
    876 
    877     mem.perRun.release();
    878 }
    879 
    880 
    881 /**
    882  * Deallocate per-object dynamic storage.
    883  *
    884  * Undoes the work by allocatePerObj().
    885  */
    886 
    887 inline void   RHO_HEST_REFC::deallocatePerObj(void){
    888     ctrl.smpl   = NULL;
    889     curr.pkdPts = NULL;
    890     curr.H      = NULL;
    891     best.H      = NULL;
    892     lm.JtJ      = NULL;
    893     lm.tmp1     = NULL;
    894     lm.Jte      = NULL;
    895 
    896     mem.perObj.release();
    897 }
    898 
    899 
    900 /**
    901  * Initialize SAC for a run given its arguments.
    902  *
    903  * Performs sanity-checks and memory allocations. Also initializes the state.
    904  *
    905  * @returns 0 if per-run initialization failed at any point; non-zero
    906  *          otherwise.
    907  *
    908  * Reads:  arg.*, nr.*
    909  * Writes: curr.*, best.*, ctrl.*, eval.*
    910  */
    911 
    912 inline int    RHO_HEST_REFC::initRun(void){
    913     /**
    914      * Sanitize arguments.
    915      *
    916      * Runs zeroth because these are easy-to-check errors and unambiguously
    917      * mean something or other.
    918      */
    919 
    920     if(!arg.src || !arg.dst){
    921         /* Arguments src or dst are insane, must be != NULL */
    922         return 0;
    923     }
    924     if(arg.N < (unsigned)SMPL_SIZE){
    925         /* Argument N is insane, must be >= 4. */
    926         return 0;
    927     }
    928     if(arg.maxD < 0){
    929         /* Argument maxD is insane, must be >= 0. */
    930         return 0;
    931     }
    932     if(arg.cfd < 0 || arg.cfd > 1){
    933         /* Argument cfd is insane, must be in [0, 1]. */
    934         return 0;
    935     }
    936     /* Clamp minInl to 4 or higher. */
    937     arg.minInl = arg.minInl < (unsigned)SMPL_SIZE ? SMPL_SIZE : arg.minInl;
    938     if(isNREnabled() && (arg.beta <= 0 || arg.beta >= 1)){
    939         /* Argument beta is insane, must be in (0, 1). */
    940         return 0;
    941     }
    942     if(!arg.finalH){
    943         /* Argument finalH is insane, must be != NULL */
    944         return 0;
    945     }
    946 
    947     /**
    948      * Optional NR setup.
    949      *
    950      * Runs first because it is decoupled from most other things (*) and if it
    951      * fails, it is easy to recover from.
    952      *
    953      * (*) The only things this code depends on is the flags argument, the nr.*
    954      *     substruct and the sanity-checked N and beta arguments from above.
    955      */
    956 
    957     if(isNREnabled() && !ensureCapacity(arg.N, arg.beta)){
    958         return 0;
    959     }
    960 
    961     /**
    962      * Inlier mask alloc.
    963      *
    964      * Runs second because we want to quit as fast as possible if we can't even
    965      * allocate the two masks.
    966      */
    967 
    968     allocatePerRun();
    969 
    970     memset(best.inl, 0, arg.N);
    971     memset(curr.inl, 0, arg.N);
    972 
    973     /**
    974      * Reset scalar per-run state.
    975      *
    976      * Runs third because there's no point in resetting/calculating a large
    977      * number of fields if something in the above junk failed.
    978      */
    979 
    980     ctrl.i            = 0;
    981     ctrl.phNum        = SMPL_SIZE;
    982     ctrl.phEndI       = 1;
    983     ctrl.phEndFpI     = sacInitPEndFpI(arg.rConvg, arg.N, SMPL_SIZE);
    984     ctrl.phMax        = arg.N;
    985     ctrl.phNumInl     = 0;
    986     ctrl.numModels    = 0;
    987 
    988     if(haveExtrinsicGuess()){
    989         memcpy(curr.H, arg.guessH, HSIZE);
    990     }else{
    991         memset(curr.H, 0, HSIZE);
    992     }
    993     curr.numInl       = 0;
    994 
    995     memset(best.H, 0, HSIZE);
    996     best.numInl       = 0;
    997 
    998     eval.Ntested      = 0;
    999     eval.Ntestedtotal = 0;
   1000     eval.good         = 1;
   1001     eval.t_M          = SPRT_T_M;
   1002     eval.m_S          = SPRT_M_S;
   1003     eval.epsilon      = SPRT_EPSILON;
   1004     eval.delta        = SPRT_DELTA;
   1005     designSPRTTest();
   1006 
   1007     return 1;
   1008 }
   1009 
   1010 /**
   1011  * Finalize SAC run.
   1012  *
   1013  * Deallocates per-run allocatable resources. Currently this consists only of
   1014  * the best and current inlier masks, which are equal in size to p->arg.N
   1015  * bytes.
   1016  *
   1017  * Reads:  arg.bestInl, curr.inl, best.inl
   1018  * Writes: curr.inl, best.inl
   1019  */
   1020 
   1021 inline void   RHO_HEST_REFC::finiRun(void){
   1022     deallocatePerRun();
   1023 }
   1024 
   1025 /**
   1026  * Hypothesize a model.
   1027  *
   1028  * Selects randomly a sample (within the rules of PROSAC) and generates a
   1029  * new current model, and applies degeneracy tests to it.
   1030  *
   1031  * @returns 0 if hypothesized model could be rejected early as degenerate, and
   1032  * non-zero otherwise.
   1033  */
   1034 
   1035 inline int    RHO_HEST_REFC::hypothesize(void){
   1036     if(PROSACPhaseEndReached()){
   1037         PROSACGoToNextPhase();
   1038     }
   1039 
   1040     getPROSACSample();
   1041     if(isSampleDegenerate()){
   1042         return 0;
   1043     }
   1044 
   1045     generateModel();
   1046     if(isModelDegenerate()){
   1047         return 0;
   1048     }
   1049 
   1050     return 1;
   1051 }
   1052 
   1053 /**
   1054  * Verify the hypothesized model.
   1055  *
   1056  * Given the current model, evaluate its quality. If it is better than
   1057  * everything before, save as new best model (and possibly refine it).
   1058  *
   1059  * Returns 1.
   1060  */
   1061 
   1062 inline int    RHO_HEST_REFC::verify(void){
   1063     evaluateModelSPRT();
   1064     updateSPRT();
   1065 
   1066     if(isBestModel()){
   1067         saveBestModel();
   1068 
   1069         if(isRefineEnabled() && canRefine()){
   1070             refine();
   1071         }
   1072 
   1073         updateBounds();
   1074 
   1075         if(isNREnabled()){
   1076             nStarOptimize();
   1077         }
   1078     }
   1079 
   1080     return 1;
   1081 }
   1082 
   1083 /**
   1084  * Check whether extrinsic guess was provided or not.
   1085  *
   1086  * @return Zero if no extrinsic guess was provided; non-zero otherwiseEE.
   1087  */
   1088 
   1089 inline int    RHO_HEST_REFC::haveExtrinsicGuess(void){
   1090     return !!arg.guessH;
   1091 }
   1092 
   1093 /**
   1094  * Check whether non-randomness criterion is enabled.
   1095  *
   1096  * @return Zero if non-randomness criterion disabled; non-zero if not.
   1097  */
   1098 
   1099 inline int    RHO_HEST_REFC::isNREnabled(void){
   1100     return arg.flags & RHO_FLAG_ENABLE_NR;
   1101 }
   1102 
   1103 /**
   1104  * Check whether best-model-so-far refinement is enabled.
   1105  *
   1106  * @return Zero if best-model-so-far refinement disabled; non-zero if not.
   1107  */
   1108 
   1109 inline int    RHO_HEST_REFC::isRefineEnabled(void){
   1110     return arg.flags & RHO_FLAG_ENABLE_REFINEMENT;
   1111 }
   1112 
   1113 /**
   1114  * Check whether final-model refinement is enabled.
   1115  *
   1116  * @return Zero if final-model refinement disabled; non-zero if not.
   1117  */
   1118 
   1119 inline int    RHO_HEST_REFC::isFinalRefineEnabled(void){
   1120     return arg.flags & RHO_FLAG_ENABLE_FINAL_REFINEMENT;
   1121 }
   1122 
   1123 /**
   1124  * Computes whether the end of the current PROSAC phase has been reached. At
   1125  * PROSAC phase phNum, only matches [0, phNum) are sampled from.
   1126  *
   1127  * Reads    (direct): ctrl.i, ctrl.phEndI, ctrl.phNum, ctrl.phMax
   1128  * Reads   (callees): None.
   1129  * Writes   (direct): None.
   1130  * Writes  (callees): None.
   1131  */
   1132 
   1133 inline int    RHO_HEST_REFC::PROSACPhaseEndReached(void){
   1134     return ctrl.i >= ctrl.phEndI && ctrl.phNum < ctrl.phMax;
   1135 }
   1136 
   1137 /**
   1138  * Updates unconditionally the necessary fields to move to the next PROSAC
   1139  * stage.
   1140  *
   1141  * Not idempotent.
   1142  *
   1143  * Reads    (direct): ctrl.phNum, ctrl.phEndFpI, ctrl.phEndI
   1144  * Reads   (callees): None.
   1145  * Writes   (direct): ctrl.phNum, ctrl.phEndFpI, ctrl.phEndI
   1146  * Writes  (callees): None.
   1147  */
   1148 
   1149 inline void   RHO_HEST_REFC::PROSACGoToNextPhase(void){
   1150     double next;
   1151 
   1152     ctrl.phNum++;
   1153     next = (ctrl.phEndFpI * ctrl.phNum)/(ctrl.phNum - SMPL_SIZE);
   1154     ctrl.phEndI  += (unsigned)ceil(next - ctrl.phEndFpI);
   1155     ctrl.phEndFpI = next;
   1156 }
   1157 
   1158 /**
   1159  * Get a sample according to PROSAC rules. Namely:
   1160  * - If we're past the phase end interation, select randomly 4 out of the first
   1161  *   phNum matches.
   1162  * - Otherwise, select match phNum-1 and select randomly the 3 others out of
   1163  *   the first phNum-1 matches.
   1164  *
   1165  * Reads    (direct): ctrl.i, ctrl.phEndI, ctrl.phNum
   1166  * Reads   (callees): prng.s
   1167  * Writes   (direct): ctrl.smpl
   1168  * Writes  (callees): prng.s
   1169  */
   1170 
   1171 inline void   RHO_HEST_REFC::getPROSACSample(void){
   1172     if(ctrl.i > ctrl.phEndI){
   1173         /* FIXME: Dubious. Review. */
   1174         rndSmpl(4, ctrl.smpl, ctrl.phNum);/* Used to be phMax */
   1175     }else{
   1176         rndSmpl(3, ctrl.smpl, ctrl.phNum-1);
   1177         ctrl.smpl[3] = ctrl.phNum-1;
   1178     }
   1179 }
   1180 
   1181 /**
   1182  * Choose, without repetition, sampleSize integers in the range [0, numDataPoints).
   1183  *
   1184  * Reads    (direct): None.
   1185  * Reads   (callees): prng.s
   1186  * Writes   (direct): None.
   1187  * Writes  (callees): prng.s
   1188  */
   1189 
   1190 inline void   RHO_HEST_REFC::rndSmpl(unsigned  sampleSize,
   1191                                      unsigned* currentSample,
   1192                                      unsigned  dataSetSize){
   1193     /**
   1194      * If sampleSize is very close to dataSetSize, we use selection sampling.
   1195      * Otherwise we use the naive sampling technique wherein we select random
   1196      * indexes until sampleSize of them are distinct.
   1197      */
   1198 
   1199     if(sampleSize*2>dataSetSize){
   1200         /**
   1201          * Selection Sampling:
   1202          *
   1203          * Algorithm S (Selection sampling technique). To select n records at random from a set of N, where 0 < n  N.
   1204          * S1. [Initialize.] Set t  0, m  0. (During this algorithm, m represents the number of records selected so far,
   1205          *                                      and t is the total number of input records that we have dealt with.)
   1206          * S2. [Generate U.] Generate a random number U, uniformly distributed between zero and one.
   1207          * S3. [Test.] If (N  t)U  n  m, go to step S5.
   1208          * S4. [Select.] Select the next record for the sample, and increase m and t by 1. If m < n, go to step S2;
   1209          *               otherwise the sample is complete and the algorithm terminates.
   1210          * S5. [Skip.] Skip the next record (do not include it in the sample), increase t by 1, and go back to step S2.
   1211          *
   1212          * Replaced m with i and t with j in the below code.
   1213          */
   1214 
   1215         unsigned i=0,j=0;
   1216 
   1217         for(i=0;i<sampleSize;j++){
   1218             double U=fastRandom();
   1219             if((dataSetSize-j)*U < (sampleSize-i)){
   1220                 currentSample[i++]=j;
   1221             }
   1222         }
   1223     }else{
   1224         /**
   1225          * Naive sampling technique. Generate indexes until sampleSize of them are distinct.
   1226          */
   1227 
   1228         unsigned i, j;
   1229         for(i=0;i<sampleSize;i++){
   1230             int inList;
   1231 
   1232             do{
   1233                 currentSample[i] = (unsigned)(dataSetSize*fastRandom());
   1234 
   1235                 inList=0;
   1236                 for(j=0;j<i;j++){
   1237                     if(currentSample[i] == currentSample[j]){
   1238                         inList=1;
   1239                         break;
   1240                     }
   1241                 }
   1242             }while(inList);
   1243         }
   1244     }
   1245 }
   1246 
   1247 /**
   1248  * Checks whether the *sample* is degenerate prior to model generation.
   1249  * - First, the extremely cheap numerical degeneracy test is run, which weeds
   1250  *   out bad samples to the optimized GE implementation.
   1251  * - Second, the geometrical degeneracy test is run, which weeds out most other
   1252  *   bad samples.
   1253  *
   1254  * Reads    (direct): ctrl.smpl, arg.src, arg.dst
   1255  * Reads   (callees): None.
   1256  * Writes   (direct): curr.pkdPts
   1257  * Writes  (callees): None.
   1258  */
   1259 
   1260 inline int    RHO_HEST_REFC::isSampleDegenerate(void){
   1261     unsigned i0 = ctrl.smpl[0], i1 = ctrl.smpl[1], i2 = ctrl.smpl[2], i3 = ctrl.smpl[3];
   1262     typedef struct{float x,y;} MyPt2f;
   1263     MyPt2f* pkdPts = (MyPt2f*)curr.pkdPts, *src = (MyPt2f*)arg.src, *dst = (MyPt2f*)arg.dst;
   1264 
   1265     /**
   1266      * Pack the matches selected by the SAC algorithm.
   1267      * Must be packed  points[0:7]  = {srcx0, srcy0, srcx1, srcy1, srcx2, srcy2, srcx3, srcy3}
   1268      *                 points[8:15] = {dstx0, dsty0, dstx1, dsty1, dstx2, dsty2, dstx3, dsty3}
   1269      * Gather 4 points into the vector
   1270      */
   1271 
   1272     pkdPts[0] = src[i0];
   1273     pkdPts[1] = src[i1];
   1274     pkdPts[2] = src[i2];
   1275     pkdPts[3] = src[i3];
   1276     pkdPts[4] = dst[i0];
   1277     pkdPts[5] = dst[i1];
   1278     pkdPts[6] = dst[i2];
   1279     pkdPts[7] = dst[i3];
   1280 
   1281     /**
   1282      * If the matches' source points have common x and y coordinates, abort.
   1283      */
   1284 
   1285     if(pkdPts[0].x == pkdPts[1].x || pkdPts[1].x == pkdPts[2].x ||
   1286        pkdPts[2].x == pkdPts[3].x || pkdPts[0].x == pkdPts[2].x ||
   1287        pkdPts[1].x == pkdPts[3].x || pkdPts[0].x == pkdPts[3].x ||
   1288        pkdPts[0].y == pkdPts[1].y || pkdPts[1].y == pkdPts[2].y ||
   1289        pkdPts[2].y == pkdPts[3].y || pkdPts[0].y == pkdPts[2].y ||
   1290        pkdPts[1].y == pkdPts[3].y || pkdPts[0].y == pkdPts[3].y){
   1291         return 1;
   1292     }
   1293 
   1294     /* If the matches do not satisfy the strong geometric constraint, abort. */
   1295     /* (0 x 1) * 2 */
   1296     float cross0s0 = pkdPts[0].y-pkdPts[1].y;
   1297     float cross0s1 = pkdPts[1].x-pkdPts[0].x;
   1298     float cross0s2 = pkdPts[0].x*pkdPts[1].y-pkdPts[0].y*pkdPts[1].x;
   1299     float dots0    = cross0s0*pkdPts[2].x + cross0s1*pkdPts[2].y + cross0s2;
   1300     float cross0d0 = pkdPts[4].y-pkdPts[5].y;
   1301     float cross0d1 = pkdPts[5].x-pkdPts[4].x;
   1302     float cross0d2 = pkdPts[4].x*pkdPts[5].y-pkdPts[4].y*pkdPts[5].x;
   1303     float dotd0    = cross0d0*pkdPts[6].x + cross0d1*pkdPts[6].y + cross0d2;
   1304     if(((int)dots0^(int)dotd0) < 0){
   1305         return 1;
   1306     }
   1307     /* (0 x 1) * 3 */
   1308     float cross1s0 = cross0s0;
   1309     float cross1s1 = cross0s1;
   1310     float cross1s2 = cross0s2;
   1311     float dots1    = cross1s0*pkdPts[3].x + cross1s1*pkdPts[3].y + cross1s2;
   1312     float cross1d0 = cross0d0;
   1313     float cross1d1 = cross0d1;
   1314     float cross1d2 = cross0d2;
   1315     float dotd1    = cross1d0*pkdPts[7].x + cross1d1*pkdPts[7].y + cross1d2;
   1316     if(((int)dots1^(int)dotd1) < 0){
   1317         return 1;
   1318     }
   1319     /* (2 x 3) * 0 */
   1320     float cross2s0 = pkdPts[2].y-pkdPts[3].y;
   1321     float cross2s1 = pkdPts[3].x-pkdPts[2].x;
   1322     float cross2s2 = pkdPts[2].x*pkdPts[3].y-pkdPts[2].y*pkdPts[3].x;
   1323     float dots2    = cross2s0*pkdPts[0].x + cross2s1*pkdPts[0].y + cross2s2;
   1324     float cross2d0 = pkdPts[6].y-pkdPts[7].y;
   1325     float cross2d1 = pkdPts[7].x-pkdPts[6].x;
   1326     float cross2d2 = pkdPts[6].x*pkdPts[7].y-pkdPts[6].y*pkdPts[7].x;
   1327     float dotd2    = cross2d0*pkdPts[4].x + cross2d1*pkdPts[4].y + cross2d2;
   1328     if(((int)dots2^(int)dotd2) < 0){
   1329         return 1;
   1330     }
   1331     /* (2 x 3) * 1 */
   1332     float cross3s0 = cross2s0;
   1333     float cross3s1 = cross2s1;
   1334     float cross3s2 = cross2s2;
   1335     float dots3    = cross3s0*pkdPts[1].x + cross3s1*pkdPts[1].y + cross3s2;
   1336     float cross3d0 = cross2d0;
   1337     float cross3d1 = cross2d1;
   1338     float cross3d2 = cross2d2;
   1339     float dotd3    = cross3d0*pkdPts[5].x + cross3d1*pkdPts[5].y + cross3d2;
   1340     if(((int)dots3^(int)dotd3) < 0){
   1341         return 1;
   1342     }
   1343 
   1344     /* Otherwise, accept */
   1345     return 0;
   1346 }
   1347 
   1348 /**
   1349  * Compute homography of matches in gathered, packed sample and output the
   1350  * current homography.
   1351  *
   1352  * Reads    (direct): None.
   1353  * Reads   (callees): curr.pkdPts
   1354  * Writes   (direct): None.
   1355  * Writes  (callees): curr.H
   1356  */
   1357 
   1358 inline void   RHO_HEST_REFC::generateModel(void){
   1359     hFuncRefC(curr.pkdPts, curr.H);
   1360 }
   1361 
   1362 /**
   1363  * Checks whether the model is itself degenerate.
   1364  * - One test: All elements of the homography are added, and if the result is
   1365  *   NaN the homography is rejected.
   1366  *
   1367  * Reads    (direct): curr.H
   1368  * Reads   (callees): None.
   1369  * Writes   (direct): None.
   1370  * Writes  (callees): None.
   1371  */
   1372 
   1373 inline int    RHO_HEST_REFC::isModelDegenerate(void){
   1374     int degenerate;
   1375     float* H = curr.H;
   1376     float f=H[0]+H[1]+H[2]+H[3]+H[4]+H[5]+H[6]+H[7];
   1377 
   1378     /* degenerate = isnan(f); */
   1379     /* degenerate = f!=f;// Only NaN is not equal to itself. */
   1380     degenerate = cvIsNaN(f);
   1381     /* degenerate = 0; */
   1382 
   1383 
   1384     return degenerate;
   1385 }
   1386 
   1387 /**
   1388  * Evaluates the current model using SPRT for early exiting.
   1389  *
   1390  * Reads    (direct): arg.maxD, arg.src, arg.dst, arg.N, curr.inl, curr.H,
   1391  *                    ctrl.numModels, eval.Ntestedtotal, eval.lambdaAccept,
   1392  *                    eval.lambdaReject, eval.A
   1393  * Reads   (callees): None.
   1394  * Writes   (direct): ctrl.numModels, curr.numInl, eval.Ntested, eval.good,
   1395  *                    eval.Ntestedtotal
   1396  * Writes  (callees): None.
   1397  */
   1398 
   1399 inline void   RHO_HEST_REFC::evaluateModelSPRT(void){
   1400     unsigned i;
   1401     unsigned isInlier;
   1402     double   lambda  = 1.0;
   1403     float    distSq  = arg.maxD*arg.maxD;
   1404     const float* src = arg.src;
   1405     const float* dst = arg.dst;
   1406     char*    inl     = curr.inl;
   1407     const float*   H = curr.H;
   1408 
   1409 
   1410     ctrl.numModels++;
   1411 
   1412     curr.numInl   = 0;
   1413     eval.Ntested  = 0;
   1414     eval.good     = 1;
   1415 
   1416 
   1417     /* SCALAR */
   1418     for(i=0;i<arg.N && eval.good;i++){
   1419         /* Backproject */
   1420         float x=src[i*2],y=src[i*2+1];
   1421         float X=dst[i*2],Y=dst[i*2+1];
   1422 
   1423         float reprojX=H[0]*x+H[1]*y+H[2]; /*  ( X_1 )     ( H_11 H_12    H_13  ) (x_1)       */
   1424         float reprojY=H[3]*x+H[4]*y+H[5]; /*  ( X_2 )  =  ( H_21 H_22    H_23  ) (x_2)       */
   1425         float reprojZ=H[6]*x+H[7]*y+1.0f; /*  ( X_3 )     ( H_31 H_32 H_33=1.0 ) (x_3 = 1.0) */
   1426 
   1427         /* reproj is in homogeneous coordinates. To bring back to "regular" coordinates, divide by Z. */
   1428         reprojX/=reprojZ;
   1429         reprojY/=reprojZ;
   1430 
   1431         /* Compute distance */
   1432         reprojX-=X;
   1433         reprojY-=Y;
   1434         reprojX*=reprojX;
   1435         reprojY*=reprojY;
   1436         float reprojDist = reprojX+reprojY;
   1437 
   1438         /* ... */
   1439         isInlier   = reprojDist <= distSq;
   1440         curr.numInl += isInlier;
   1441         *inl++     = (char)isInlier;
   1442 
   1443 
   1444         /* SPRT */
   1445         lambda *= isInlier ? eval.lambdaAccept : eval.lambdaReject;
   1446         eval.good = lambda <= eval.A;
   1447         /* If !good, the threshold A was exceeded, so we're rejecting */
   1448     }
   1449 
   1450 
   1451     eval.Ntested       = i;
   1452     eval.Ntestedtotal += i;
   1453 }
   1454 
   1455 /**
   1456  * Update either the delta or epsilon SPRT parameters, depending on the events
   1457  * that transpired in the previous evaluation.
   1458  *
   1459  * Reads    (direct): eval.good, curr.numInl, arg.N, eval.Ntested, eval.delta
   1460  * Reads   (callees): eval.delta, eval.epsilon, eval.t_M, eval.m_S
   1461  * Writes   (direct): eval.epsilon, eval.delta
   1462  * Writes  (callees): eval.A, eval.lambdaReject, eval.lambdaAccept
   1463  */
   1464 
   1465 inline void   RHO_HEST_REFC::updateSPRT(void){
   1466     if(eval.good){
   1467         if(isBestModel()){
   1468             eval.epsilon = (double)curr.numInl/arg.N;
   1469             designSPRTTest();
   1470         }
   1471     }else{
   1472         double newDelta = (double)curr.numInl/eval.Ntested;
   1473 
   1474         if(newDelta > 0){
   1475             double relChange = fabs(eval.delta - newDelta)/ eval.delta;
   1476             if(relChange > MIN_DELTA_CHNG){
   1477                 eval.delta = newDelta;
   1478                 designSPRTTest();
   1479             }
   1480         }
   1481     }
   1482 }
   1483 
   1484 /**
   1485  * Numerically compute threshold A from the estimated delta, epsilon, t_M and
   1486  * m_S values.
   1487  *
   1488  * Epsilon:  Denotes the probability that a randomly chosen data point is
   1489  *           consistent with a good model.
   1490  * Delta:    Denotes the probability that a randomly chosen data point is
   1491  *           consistent with a bad model.
   1492  * t_M:      Time needed to instantiate a model hypotheses given a sample.
   1493  *           (Computing model parameters from a sample takes the same time
   1494  *            as verification of t_M data points)
   1495  * m_S:      The number of models that are verified per sample.
   1496  */
   1497 
   1498 static inline double sacDesignSPRTTest(double delta, double epsilon, double t_M, double m_S){
   1499     double An, C, K, prevAn;
   1500     unsigned i;
   1501 
   1502     /**
   1503      * Randomized RANSAC with Sequential Probability Ratio Test, ICCV 2005
   1504      * Eq (2)
   1505      */
   1506 
   1507     C = (1-delta)  *  log((1-delta)/(1-epsilon)) +
   1508         delta      *  log(  delta  /  epsilon  );
   1509 
   1510     /**
   1511      * Randomized RANSAC with Sequential Probability Ratio Test, ICCV 2005
   1512      * Eq (6)
   1513      * K = K_1/K_2 + 1 = (t_M*C)/m_S + 1
   1514      */
   1515 
   1516     K = t_M*C/m_S + 1;
   1517 
   1518     /**
   1519      * Randomized RANSAC with Sequential Probability Ratio Test, ICCV 2005
   1520      * Paragraph below Eq (6)
   1521      *
   1522      * A* = lim_{n -> infty} A_n, where
   1523      *     A_0     = K1/K2 + 1             and
   1524      *     A_{n+1} = K1/K2 + 1 + log(A_n)
   1525      * The series converges fast, typically within four iterations.
   1526      */
   1527 
   1528     An = K;
   1529     i  = 0;
   1530 
   1531     do{
   1532         prevAn = An;
   1533         An = K + log(An);
   1534     }while((An-prevAn > 1.5e-8)  &&  (++i < 10));
   1535 
   1536     /**
   1537      * Return A = An_stopping, with n_stopping < 10
   1538      */
   1539 
   1540     return An;
   1541 }
   1542 
   1543 /**
   1544  * Design the SPRT test. Shorthand for
   1545  *     A = sprt(delta, epsilon, t_M, m_S);
   1546  *
   1547  * Idempotent.
   1548  *
   1549  * Reads    (direct): eval.delta, eval.epsilon, eval.t_M, eval.m_S
   1550  * Reads   (callees): None.
   1551  * Writes   (direct): eval.A, eval.lambdaReject, eval.lambdaAccept.
   1552  * Writes  (callees): None.
   1553  */
   1554 
   1555 inline void   RHO_HEST_REFC::designSPRTTest(void){
   1556     eval.A = sacDesignSPRTTest(eval.delta, eval.epsilon, eval.t_M, eval.m_S);
   1557     eval.lambdaReject = ((1.0 - eval.delta) / (1.0 - eval.epsilon));
   1558     eval.lambdaAccept = ((   eval.delta   ) / (    eval.epsilon  ));
   1559 }
   1560 
   1561 /**
   1562  * Return whether the current model is the best model so far.
   1563  *
   1564  * @return Non-zero if this is the model with the most inliers seen so far;
   1565  *         0 otherwise.
   1566  *
   1567  * Reads    (direct): curr.numInl, best.numInl
   1568  * Reads   (callees): None.
   1569  * Writes   (direct): None.
   1570  * Writes  (callees): None.
   1571  */
   1572 
   1573 inline int    RHO_HEST_REFC::isBestModel(void){
   1574     return curr.numInl > best.numInl;
   1575 }
   1576 
   1577 /**
   1578  * Returns whether the current-best model is good enough to be an
   1579  * acceptable best model, by checking whether it meets the minimum
   1580  * number of inliers.
   1581  *
   1582  * @return Non-zero if the current model is "good enough" to save;
   1583  *         0 otherwise.
   1584  *
   1585  * Reads    (direct): best.numInl, arg.minInl
   1586  * Reads   (callees): None.
   1587  * Writes   (direct): None.
   1588  * Writes  (callees): None.
   1589  */
   1590 
   1591 inline int    RHO_HEST_REFC::isBestModelGoodEnough(void){
   1592     return best.numInl >= arg.minInl;
   1593 }
   1594 
   1595 /**
   1596  * Make current model new best model by swapping the homography, inlier mask
   1597  * and count of inliers between the current and best models.
   1598  *
   1599  * Reads    (direct): curr.H, curr.inl, curr.numInl,
   1600  *                    best.H, best.inl, best.numInl
   1601  * Reads   (callees): None.
   1602  * Writes   (direct): curr.H, curr.inl, curr.numInl,
   1603  *                    best.H, best.inl, best.numInl
   1604  * Writes  (callees): None.
   1605  */
   1606 
   1607 inline void   RHO_HEST_REFC::saveBestModel(void){
   1608     float*   H      = curr.H;
   1609     char*    inl    = curr.inl;
   1610     unsigned numInl = curr.numInl;
   1611 
   1612     curr.H       = best.H;
   1613     curr.inl     = best.inl;
   1614     curr.numInl  = best.numInl;
   1615 
   1616     best.H       = H;
   1617     best.inl     = inl;
   1618     best.numInl  = numInl;
   1619 }
   1620 
   1621 /**
   1622  * Compute NR table entries [start, N) for given beta.
   1623  */
   1624 
   1625 static inline void   sacInitNonRand(double    beta,
   1626                                     unsigned  start,
   1627                                     unsigned  N,
   1628                                     unsigned* nonRandMinInl){
   1629     unsigned n = SMPL_SIZE+1 > start ? SMPL_SIZE+1 : start;
   1630     double   beta_beta1_sq_chi = sqrt(beta*(1.0-beta)) * CHI_SQ;
   1631 
   1632     for(; n < N; n++){
   1633         double   mu      = n * beta;
   1634         double   sigma   = sqrt((double)n)* beta_beta1_sq_chi;
   1635         unsigned i_min   = (unsigned)ceil(SMPL_SIZE + mu + sigma);
   1636 
   1637         nonRandMinInl[n] = i_min;
   1638     }
   1639 }
   1640 
   1641 /**
   1642  * Optimize the stopping criterion to account for the non-randomness criterion
   1643  * of PROSAC.
   1644  *
   1645  * Reads    (direct): arg.N, best.numInl, nr.tbl, arg.inl, ctrl.phMax,
   1646  *                    ctrl.phNumInl, arg.cfd, arg.maxI
   1647  * Reads   (callees): None.
   1648  * Writes   (direct): arg.maxI, ctrl.phMax, ctrl.phNumInl
   1649  * Writes  (callees): None.
   1650  */
   1651 
   1652 inline void   RHO_HEST_REFC::nStarOptimize(void){
   1653     unsigned min_sample_length = 10*2; /*(N * INLIERS_RATIO) */
   1654     unsigned best_n       = arg.N;
   1655     unsigned test_n       = best_n;
   1656     unsigned bestNumInl   = best.numInl;
   1657     unsigned testNumInl   = bestNumInl;
   1658 
   1659     for(;test_n > min_sample_length && testNumInl;test_n--){
   1660         if(testNumInl*best_n > bestNumInl*test_n){
   1661             if(testNumInl < nr.tbl[test_n]){
   1662                 break;
   1663             }
   1664             best_n      = test_n;
   1665             bestNumInl  = testNumInl;
   1666         }
   1667         testNumInl -= !!best.inl[test_n-1];
   1668     }
   1669 
   1670     if(bestNumInl*ctrl.phMax > ctrl.phNumInl*best_n){
   1671         ctrl.phMax    = best_n;
   1672         ctrl.phNumInl = bestNumInl;
   1673         arg.maxI      = sacCalcIterBound(arg.cfd,
   1674                                          (double)ctrl.phNumInl/ctrl.phMax,
   1675                                          SMPL_SIZE,
   1676                                          arg.maxI);
   1677     }
   1678 }
   1679 
   1680 /**
   1681  * Classic RANSAC iteration bound based on largest # of inliers.
   1682  *
   1683  * Reads    (direct): arg.maxI, arg.cfd, best.numInl, arg.N
   1684  * Reads   (callees): None.
   1685  * Writes   (direct): arg.maxI
   1686  * Writes  (callees): None.
   1687  */
   1688 
   1689 inline void   RHO_HEST_REFC::updateBounds(void){
   1690     arg.maxI = sacCalcIterBound(arg.cfd,
   1691                                 (double)best.numInl/arg.N,
   1692                                 SMPL_SIZE,
   1693                                 arg.maxI);
   1694 }
   1695 
   1696 /**
   1697  * Ouput the best model so far to the output argument.
   1698  *
   1699  * Reads    (direct): arg.finalH, best.H, arg.inl, best.inl, arg.N
   1700  * Reads   (callees): arg.finalH, arg.inl, arg.N
   1701  * Writes   (direct): arg.finalH, arg.inl
   1702  * Writes  (callees): arg.finalH, arg.inl
   1703  */
   1704 
   1705 inline void   RHO_HEST_REFC::outputModel(void){
   1706     if(isBestModelGoodEnough()){
   1707         memcpy(arg.finalH, best.H, HSIZE);
   1708         if(arg.inl){
   1709             memcpy(arg.inl, best.inl, arg.N);
   1710         }
   1711     }else{
   1712         outputZeroH();
   1713     }
   1714 }
   1715 
   1716 /**
   1717  * Ouput a zeroed H to the output argument.
   1718  *
   1719  * Reads    (direct): arg.finalH, arg.inl, arg.N
   1720  * Reads   (callees): None.
   1721  * Writes   (direct): arg.finalH, arg.inl
   1722  * Writes  (callees): None.
   1723  */
   1724 
   1725 inline void   RHO_HEST_REFC::outputZeroH(void){
   1726     if(arg.finalH){
   1727         memset(arg.finalH, 0, HSIZE);
   1728     }
   1729     if(arg.inl){
   1730         memset(arg.inl,    0, arg.N);
   1731     }
   1732 }
   1733 
   1734 /**
   1735  * Compute the real-valued number of samples per phase, given the RANSAC convergence speed,
   1736  * data set size and sample size.
   1737  */
   1738 
   1739 static inline double sacInitPEndFpI(const unsigned ransacConvg,
   1740                                     const unsigned n,
   1741                                     const unsigned s){
   1742     double numer=1, denom=1;
   1743 
   1744     unsigned i;
   1745     for(i=0;i<s;i++){
   1746         numer *= s-i;
   1747         denom *= n-i;
   1748     }
   1749 
   1750     return ransacConvg*numer/denom;
   1751 }
   1752 
   1753 /**
   1754  * Estimate the number of iterations required based on the requested confidence,
   1755  * proportion of inliers in the best model so far and sample size.
   1756  *
   1757  * Clamp return value at maxIterationBound.
   1758  */
   1759 
   1760 static inline unsigned sacCalcIterBound(double   confidence,
   1761                                         double   inlierRate,
   1762                                         unsigned sampleSize,
   1763                                         unsigned maxIterBound){
   1764     unsigned retVal;
   1765 
   1766     /**
   1767      * Formula chosen from http://en.wikipedia.org/wiki/RANSAC#The_parameters :
   1768      *
   1769      * \[ k = \frac{\log{(1-confidence)}}{\log{(1-inlierRate**sampleSize)}} \]
   1770      */
   1771 
   1772     double atLeastOneOutlierProbability = 1.-pow(inlierRate, (double)sampleSize);
   1773 
   1774     /**
   1775      * There are two special cases: When argument to log() is 0 and when it is 1.
   1776      * Each has a special meaning.
   1777      */
   1778 
   1779     if(atLeastOneOutlierProbability>=1.){
   1780         /**
   1781          * A certainty of picking at least one outlier means that we will need
   1782          * an infinite amount of iterations in order to find a correct solution.
   1783          */
   1784 
   1785         retVal = maxIterBound;
   1786     }else if(atLeastOneOutlierProbability<=0.){
   1787         /**
   1788          * The certainty of NOT picking an outlier means that only 1 iteration
   1789          * is needed to find a solution.
   1790          */
   1791 
   1792         retVal = 1;
   1793     }else{
   1794         /**
   1795          * Since 1-confidence (the probability of the model being based on at
   1796          * least one outlier in the data) is equal to
   1797          * (1-inlierRate**sampleSize)**numIterations (the probability of picking
   1798          * at least one outlier in numIterations samples), we can isolate
   1799          * numIterations (the return value) into
   1800          */
   1801 
   1802         retVal = (unsigned)ceil(log(1.-confidence)/log(atLeastOneOutlierProbability));
   1803     }
   1804 
   1805     /**
   1806      * Clamp to maxIterationBound.
   1807      */
   1808 
   1809     return retVal <= maxIterBound ? retVal : maxIterBound;
   1810 }
   1811 
   1812 
   1813 /**
   1814  * Given 4 matches, computes the homography that relates them using Gaussian
   1815  * Elimination. The row operations are as given in the paper.
   1816  *
   1817  * TODO: Clean this up. The code is hideous, and might even conceal sign bugs
   1818  *       (specifically relating to whether the last column should be negated,
   1819  *        or not).
   1820  */
   1821 
   1822 static void hFuncRefC(float* packedPoints,/* Source (four x,y float coordinates) points followed by
   1823                                              destination (four x,y float coordinates) points, aligned by 32 bytes */
   1824                       float* H){          /* Homography (three 16-byte aligned rows of 3 floats) */
   1825     float x0=*packedPoints++;
   1826     float y0=*packedPoints++;
   1827     float x1=*packedPoints++;
   1828     float y1=*packedPoints++;
   1829     float x2=*packedPoints++;
   1830     float y2=*packedPoints++;
   1831     float x3=*packedPoints++;
   1832     float y3=*packedPoints++;
   1833     float X0=*packedPoints++;
   1834     float Y0=*packedPoints++;
   1835     float X1=*packedPoints++;
   1836     float Y1=*packedPoints++;
   1837     float X2=*packedPoints++;
   1838     float Y2=*packedPoints++;
   1839     float X3=*packedPoints++;
   1840     float Y3=*packedPoints++;
   1841 
   1842     float x0X0=x0*X0, x1X1=x1*X1, x2X2=x2*X2, x3X3=x3*X3;
   1843     float x0Y0=x0*Y0, x1Y1=x1*Y1, x2Y2=x2*Y2, x3Y3=x3*Y3;
   1844     float y0X0=y0*X0, y1X1=y1*X1, y2X2=y2*X2, y3X3=y3*X3;
   1845     float y0Y0=y0*Y0, y1Y1=y1*Y1, y2Y2=y2*Y2, y3Y3=y3*Y3;
   1846 
   1847 
   1848     /**
   1849      *  [0]   [1] Hidden   Prec
   1850      *  x0    y0    1       x1
   1851      *  x1    y1    1       x1
   1852      *  x2    y2    1       x1
   1853      *  x3    y3    1       x1
   1854      *
   1855      * Eliminate ones in column 2 and 5.
   1856      * R(0)-=R(2)
   1857      * R(1)-=R(2)
   1858      * R(3)-=R(2)
   1859      *
   1860      *  [0]   [1] Hidden   Prec
   1861      * x0-x2 y0-y2  0       x1+1
   1862      * x1-x2 y1-y2  0       x1+1
   1863      *  x2    y2    1       x1
   1864      * x3-x2 y3-y2  0       x1+1
   1865      *
   1866      * Eliminate column 0 of rows 1 and 3
   1867      * R(1)=(x0-x2)*R(1)-(x1-x2)*R(0),     y1'=(y1-y2)(x0-x2)-(x1-x2)(y0-y2)
   1868      * R(3)=(x0-x2)*R(3)-(x3-x2)*R(0),     y3'=(y3-y2)(x0-x2)-(x3-x2)(y0-y2)
   1869      *
   1870      *  [0]   [1] Hidden   Prec
   1871      * x0-x2 y0-y2  0      x1+1
   1872      *   0    y1'   0      x2+3
   1873      *  x2    y2    1       x1
   1874      *   0    y3'   0      x2+3
   1875      *
   1876      * Eliminate column 1 of rows 0 and 3
   1877      * R(3)=y1'*R(3)-y3'*R(1)
   1878      * R(0)=y1'*R(0)-(y0-y2)*R(1)
   1879      *
   1880      *  [0]   [1] Hidden   Prec
   1881      *  x0'    0    0      x3+5
   1882      *   0    y1'   0      x2+3
   1883      *  x2    y2    1       x1
   1884      *   0     0    0      x4+7
   1885      *
   1886      * Eliminate columns 0 and 1 of row 2
   1887      * R(0)/=x0'
   1888      * R(1)/=y1'
   1889      * R(2)-= (x2*R(0) + y2*R(1))
   1890      *
   1891      *  [0]   [1] Hidden   Prec
   1892      *   1     0    0      x6+10
   1893      *   0     1    0      x4+6
   1894      *   0     0    1      x4+7
   1895      *   0     0    0      x4+7
   1896      */
   1897 
   1898     /**
   1899      * Eliminate ones in column 2 and 5.
   1900      * R(0)-=R(2)
   1901      * R(1)-=R(2)
   1902      * R(3)-=R(2)
   1903      */
   1904 
   1905     /*float minor[4][2] = {{x0-x2,y0-y2},
   1906                          {x1-x2,y1-y2},
   1907                          {x2   ,y2   },
   1908                          {x3-x2,y3-y2}};*/
   1909     /*float major[8][3] = {{x2X2-x0X0,y2X2-y0X0,(X0-X2)},
   1910                          {x2X2-x1X1,y2X2-y1X1,(X1-X2)},
   1911                          {-x2X2    ,-y2X2    ,(X2   )},
   1912                          {x2X2-x3X3,y2X2-y3X3,(X3-X2)},
   1913                          {x2Y2-x0Y0,y2Y2-y0Y0,(Y0-Y2)},
   1914                          {x2Y2-x1Y1,y2Y2-y1Y1,(Y1-Y2)},
   1915                          {-x2Y2    ,-y2Y2    ,(Y2   )},
   1916                          {x2Y2-x3Y3,y2Y2-y3Y3,(Y3-Y2)}};*/
   1917     float minor[2][4] = {{x0-x2,x1-x2,x2   ,x3-x2},
   1918                          {y0-y2,y1-y2,y2   ,y3-y2}};
   1919     float major[3][8] = {{x2X2-x0X0,x2X2-x1X1,-x2X2    ,x2X2-x3X3,x2Y2-x0Y0,x2Y2-x1Y1,-x2Y2    ,x2Y2-x3Y3},
   1920                          {y2X2-y0X0,y2X2-y1X1,-y2X2    ,y2X2-y3X3,y2Y2-y0Y0,y2Y2-y1Y1,-y2Y2    ,y2Y2-y3Y3},
   1921                          { (X0-X2) , (X1-X2) , (X2   ) , (X3-X2) , (Y0-Y2) , (Y1-Y2) , (Y2   ) , (Y3-Y2) }};
   1922 
   1923     /**
   1924      * int i;
   1925      * for(i=0;i<8;i++) major[2][i]=-major[2][i];
   1926      * Eliminate column 0 of rows 1 and 3
   1927      * R(1)=(x0-x2)*R(1)-(x1-x2)*R(0),     y1'=(y1-y2)(x0-x2)-(x1-x2)(y0-y2)
   1928      * R(3)=(x0-x2)*R(3)-(x3-x2)*R(0),     y3'=(y3-y2)(x0-x2)-(x3-x2)(y0-y2)
   1929      */
   1930 
   1931     float scalar1=minor[0][0], scalar2=minor[0][1];
   1932     minor[1][1]=minor[1][1]*scalar1-minor[1][0]*scalar2;
   1933 
   1934     major[0][1]=major[0][1]*scalar1-major[0][0]*scalar2;
   1935     major[1][1]=major[1][1]*scalar1-major[1][0]*scalar2;
   1936     major[2][1]=major[2][1]*scalar1-major[2][0]*scalar2;
   1937 
   1938     major[0][5]=major[0][5]*scalar1-major[0][4]*scalar2;
   1939     major[1][5]=major[1][5]*scalar1-major[1][4]*scalar2;
   1940     major[2][5]=major[2][5]*scalar1-major[2][4]*scalar2;
   1941 
   1942     scalar2=minor[0][3];
   1943     minor[1][3]=minor[1][3]*scalar1-minor[1][0]*scalar2;
   1944 
   1945     major[0][3]=major[0][3]*scalar1-major[0][0]*scalar2;
   1946     major[1][3]=major[1][3]*scalar1-major[1][0]*scalar2;
   1947     major[2][3]=major[2][3]*scalar1-major[2][0]*scalar2;
   1948 
   1949     major[0][7]=major[0][7]*scalar1-major[0][4]*scalar2;
   1950     major[1][7]=major[1][7]*scalar1-major[1][4]*scalar2;
   1951     major[2][7]=major[2][7]*scalar1-major[2][4]*scalar2;
   1952 
   1953     /**
   1954      * Eliminate column 1 of rows 0 and 3
   1955      * R(3)=y1'*R(3)-y3'*R(1)
   1956      * R(0)=y1'*R(0)-(y0-y2)*R(1)
   1957      */
   1958 
   1959     scalar1=minor[1][1];scalar2=minor[1][3];
   1960     major[0][3]=major[0][3]*scalar1-major[0][1]*scalar2;
   1961     major[1][3]=major[1][3]*scalar1-major[1][1]*scalar2;
   1962     major[2][3]=major[2][3]*scalar1-major[2][1]*scalar2;
   1963 
   1964     major[0][7]=major[0][7]*scalar1-major[0][5]*scalar2;
   1965     major[1][7]=major[1][7]*scalar1-major[1][5]*scalar2;
   1966     major[2][7]=major[2][7]*scalar1-major[2][5]*scalar2;
   1967 
   1968     scalar2=minor[1][0];
   1969     minor[0][0]=minor[0][0]*scalar1-minor[0][1]*scalar2;
   1970 
   1971     major[0][0]=major[0][0]*scalar1-major[0][1]*scalar2;
   1972     major[1][0]=major[1][0]*scalar1-major[1][1]*scalar2;
   1973     major[2][0]=major[2][0]*scalar1-major[2][1]*scalar2;
   1974 
   1975     major[0][4]=major[0][4]*scalar1-major[0][5]*scalar2;
   1976     major[1][4]=major[1][4]*scalar1-major[1][5]*scalar2;
   1977     major[2][4]=major[2][4]*scalar1-major[2][5]*scalar2;
   1978 
   1979     /**
   1980      * Eliminate columns 0 and 1 of row 2
   1981      * R(0)/=x0'
   1982      * R(1)/=y1'
   1983      * R(2)-= (x2*R(0) + y2*R(1))
   1984      */
   1985 
   1986     scalar1=1.0f/minor[0][0];
   1987     major[0][0]*=scalar1;
   1988     major[1][0]*=scalar1;
   1989     major[2][0]*=scalar1;
   1990     major[0][4]*=scalar1;
   1991     major[1][4]*=scalar1;
   1992     major[2][4]*=scalar1;
   1993 
   1994     scalar1=1.0f/minor[1][1];
   1995     major[0][1]*=scalar1;
   1996     major[1][1]*=scalar1;
   1997     major[2][1]*=scalar1;
   1998     major[0][5]*=scalar1;
   1999     major[1][5]*=scalar1;
   2000     major[2][5]*=scalar1;
   2001 
   2002 
   2003     scalar1=minor[0][2];scalar2=minor[1][2];
   2004     major[0][2]-=major[0][0]*scalar1+major[0][1]*scalar2;
   2005     major[1][2]-=major[1][0]*scalar1+major[1][1]*scalar2;
   2006     major[2][2]-=major[2][0]*scalar1+major[2][1]*scalar2;
   2007 
   2008     major[0][6]-=major[0][4]*scalar1+major[0][5]*scalar2;
   2009     major[1][6]-=major[1][4]*scalar1+major[1][5]*scalar2;
   2010     major[2][6]-=major[2][4]*scalar1+major[2][5]*scalar2;
   2011 
   2012     /* Only major matters now. R(3) and R(7) correspond to the hollowed-out rows. */
   2013     scalar1=major[0][7];
   2014     major[1][7]/=scalar1;
   2015     major[2][7]/=scalar1;
   2016 
   2017     scalar1=major[0][0];major[1][0]-=scalar1*major[1][7];major[2][0]-=scalar1*major[2][7];
   2018     scalar1=major[0][1];major[1][1]-=scalar1*major[1][7];major[2][1]-=scalar1*major[2][7];
   2019     scalar1=major[0][2];major[1][2]-=scalar1*major[1][7];major[2][2]-=scalar1*major[2][7];
   2020     scalar1=major[0][3];major[1][3]-=scalar1*major[1][7];major[2][3]-=scalar1*major[2][7];
   2021     scalar1=major[0][4];major[1][4]-=scalar1*major[1][7];major[2][4]-=scalar1*major[2][7];
   2022     scalar1=major[0][5];major[1][5]-=scalar1*major[1][7];major[2][5]-=scalar1*major[2][7];
   2023     scalar1=major[0][6];major[1][6]-=scalar1*major[1][7];major[2][6]-=scalar1*major[2][7];
   2024 
   2025 
   2026     /* One column left (Two in fact, but the last one is the homography) */
   2027     scalar1=major[1][3];
   2028 
   2029     major[2][3]/=scalar1;
   2030     scalar1=major[1][0];major[2][0]-=scalar1*major[2][3];
   2031     scalar1=major[1][1];major[2][1]-=scalar1*major[2][3];
   2032     scalar1=major[1][2];major[2][2]-=scalar1*major[2][3];
   2033     scalar1=major[1][4];major[2][4]-=scalar1*major[2][3];
   2034     scalar1=major[1][5];major[2][5]-=scalar1*major[2][3];
   2035     scalar1=major[1][6];major[2][6]-=scalar1*major[2][3];
   2036     scalar1=major[1][7];major[2][7]-=scalar1*major[2][3];
   2037 
   2038 
   2039     /* Homography is done. */
   2040     H[0]=major[2][0];
   2041     H[1]=major[2][1];
   2042     H[2]=major[2][2];
   2043 
   2044     H[3]=major[2][4];
   2045     H[4]=major[2][5];
   2046     H[5]=major[2][6];
   2047 
   2048     H[6]=major[2][7];
   2049     H[7]=major[2][3];
   2050     H[8]=1.0;
   2051 }
   2052 
   2053 
   2054 /**
   2055  * Returns whether refinement is possible.
   2056  *
   2057  * NB This is separate from whether it is *enabled*.
   2058  *
   2059  * @return 0 if refinement isn't possible, non-zero otherwise.
   2060  *
   2061  * Reads    (direct): best.numInl
   2062  * Reads   (callees): None.
   2063  * Writes   (direct): None.
   2064  * Writes  (callees): None.
   2065  */
   2066 
   2067 inline int    RHO_HEST_REFC::canRefine(void){
   2068     /**
   2069      * If we only have 4 matches, GE's result is already optimal and cannot
   2070      * be refined any further.
   2071      */
   2072 
   2073     return best.numInl > (unsigned)SMPL_SIZE;
   2074 }
   2075 
   2076 
   2077 /**
   2078  * Refines the best-so-far homography (p->best.H).
   2079  *
   2080  * Reads    (direct): best.H, arg.src, arg.dst, best.inl, arg.N, lm.JtJ,
   2081  *                    lm.Jte, lm.tmp1
   2082  * Reads   (callees): None.
   2083  * Writes   (direct): best.H, lm.JtJ, lm.Jte, lm.tmp1
   2084  * Writes  (callees): None.
   2085  */
   2086 
   2087 inline void   RHO_HEST_REFC::refine(void){
   2088     int         i;
   2089     float       S, newS;  /* Sum of squared errors */
   2090     float       gain;     /* Gain-parameter. */
   2091     float       L  = 100.0f;/* Lambda of LevMarq */
   2092     float dH[8], newH[8];
   2093 
   2094     /**
   2095      * Iteratively refine the homography.
   2096      */
   2097     /* Find initial conditions */
   2098     sacCalcJacobianErrors(best.H, arg.src, arg.dst, best.inl, arg.N,
   2099                           lm.JtJ, lm.Jte,  &S);
   2100 
   2101     /*Levenberg-Marquardt Loop.*/
   2102     for(i=0;i<MAXLEVMARQITERS;i++){
   2103         /**
   2104          * Attempt a step given current state
   2105          *   - Jacobian-x-Jacobian   (JtJ)
   2106          *   - Jacobian-x-error      (Jte)
   2107          *   - Sum of squared errors (S)
   2108          * and current parameter
   2109          *   - Lambda (L)
   2110          * .
   2111          *
   2112          * This is done by solving the system of equations
   2113          *     Ax = b
   2114          * where A (JtJ) and b (Jte) are sourced from our current state, and
   2115          * the solution x becomes a step (dH) that is applied to best.H in
   2116          * order to compute a candidate homography (newH).
   2117          *
   2118          * The system above is solved by Cholesky decomposition of a
   2119          * sufficently-damped JtJ into a lower-triangular matrix (and its
   2120          * transpose), whose inverse is then computed. This inverse (and its
   2121          * transpose) then multiply Jte in order to find dH.
   2122          */
   2123 
   2124         while(!sacChol8x8Damped(lm.JtJ, L, lm.tmp1)){
   2125             L *= 2.0f;
   2126         }
   2127         sacTRInv8x8   (lm.tmp1, lm.tmp1);
   2128         sacTRISolve8x8(lm.tmp1, lm.Jte,  dH);
   2129         sacSub8x1     (newH,       best.H,  dH);
   2130         sacCalcJacobianErrors(newH, arg.src, arg.dst, best.inl, arg.N,
   2131                               NULL, NULL, &newS);
   2132         gain = sacLMGain(dH, lm.Jte, S, newS, L);
   2133         /*printf("Lambda: %12.6f  S: %12.6f  newS: %12.6f  Gain: %12.6f\n",
   2134                  L, S, newS, gain);*/
   2135 
   2136         /**
   2137          * If the gain is positive (i.e., the new Sum of Square Errors (newS)
   2138          * corresponding to newH is lower than the previous one (S) ), save
   2139          * the current state and accept the new step dH.
   2140          *
   2141          * If the gain is below LM_GAIN_LO, damp more (increase L), even if the
   2142          * gain was positive. If the gain is above LM_GAIN_HI, damp less
   2143          * (decrease L). Otherwise the gain is left unchanged.
   2144          */
   2145 
   2146         if(gain < LM_GAIN_LO){
   2147             L *= 8;
   2148             if(L>1000.0f/FLT_EPSILON){
   2149                 break;/* FIXME: Most naive termination criterion imaginable. */
   2150             }
   2151         }else if(gain > LM_GAIN_HI){
   2152             L *= 0.5;
   2153         }
   2154 
   2155         if(gain > 0){
   2156             S = newS;
   2157             memcpy(best.H, newH, sizeof(newH));
   2158             sacCalcJacobianErrors(best.H, arg.src, arg.dst, best.inl, arg.N,
   2159                                   lm.JtJ, lm.Jte,  &S);
   2160         }
   2161     }
   2162 }
   2163 
   2164 
   2165 /**
   2166  * Compute directly the JtJ, Jte and sum-of-squared-error for a given
   2167  * homography and set of inliers.
   2168  *
   2169  * This is possible because the product of J and its transpose as well as with
   2170  * the error and the sum-of-squared-error can all be computed additively
   2171  * (match-by-match), as one would intuitively expect; All matches make
   2172  * contributions to the error independently of each other.
   2173  *
   2174  * What this allows is a constant-space implementation of Lev-Marq that can
   2175  * nevertheless be vectorized if need be.
   2176  */
   2177 
   2178 static inline void   sacCalcJacobianErrors(const float* H,
   2179                                            const float* src,
   2180                                            const float* dst,
   2181                                            const char*  inl,
   2182                                            unsigned     N,
   2183                                            float     (* JtJ)[8],
   2184                                            float*       Jte,
   2185                                            float*       Sp){
   2186     unsigned i;
   2187     float    S;
   2188 
   2189     /* Zero out JtJ, Jte and S */
   2190     if(JtJ){memset(JtJ, 0, 8*8*sizeof(float));}
   2191     if(Jte){memset(Jte, 0, 8*1*sizeof(float));}
   2192     S = 0.0f;
   2193 
   2194     /* Additively compute JtJ and Jte */
   2195     for(i=0;i<N;i++){
   2196         /* Skip outliers */
   2197         if(!inl[i]){
   2198             continue;
   2199         }
   2200 
   2201         /**
   2202          * Otherwise, compute additively the upper triangular matrix JtJ and
   2203          * the Jtd vector within the following formula:
   2204          *
   2205          *     LaTeX:
   2206          *     (J^{T}J + \lambda \diag( J^{T}J )) \beta = J^{T}[ y - f(\Beta) ]
   2207          *     Simplified ASCII:
   2208          *     (JtJ + L*diag(JtJ)) beta = Jt e, where e (error) is y-f(Beta).
   2209          *
   2210          * For this we need to calculate
   2211          *     1) The 2D error (e) of the homography on the current point i
   2212          *        using the current parameters Beta.
   2213          *     2) The derivatives (J) of the error on the current point i under
   2214          *        perturbations of the current parameters Beta.
   2215          * Accumulate products of the error times the derivative to Jte, and
   2216          * products of the derivatives to JtJ.
   2217          */
   2218 
   2219         /* Compute Squared Error */
   2220         float x       = src[2*i+0];
   2221         float y       = src[2*i+1];
   2222         float X       = dst[2*i+0];
   2223         float Y       = dst[2*i+1];
   2224         float W       = (H[6]*x + H[7]*y + 1.0f);
   2225         float iW      = fabs(W) > FLT_EPSILON ? 1.0f/W : 0;
   2226 
   2227         float reprojX = (H[0]*x + H[1]*y + H[2]) * iW;
   2228         float reprojY = (H[3]*x + H[4]*y + H[5]) * iW;
   2229 
   2230         float eX      = reprojX - X;
   2231         float eY      = reprojY - Y;
   2232         float e       = eX*eX + eY*eY;
   2233         S            += e;
   2234 
   2235         /* Compute Jacobian */
   2236         if(JtJ || Jte){
   2237             float dxh11 = x          * iW;
   2238             float dxh12 = y          * iW;
   2239             float dxh13 =              iW;
   2240           /*float dxh21 = 0.0f;*/
   2241           /*float dxh22 = 0.0f;*/
   2242           /*float dxh23 = 0.0f;*/
   2243             float dxh31 = -reprojX*x * iW;
   2244             float dxh32 = -reprojX*y * iW;
   2245 
   2246           /*float dyh11 = 0.0f;*/
   2247           /*float dyh12 = 0.0f;*/
   2248           /*float dyh13 = 0.0f;*/
   2249             float dyh21 = x          * iW;
   2250             float dyh22 = y          * iW;
   2251             float dyh23 =              iW;
   2252             float dyh31 = -reprojY*x * iW;
   2253             float dyh32 = -reprojY*y * iW;
   2254 
   2255             /* Update Jte:          X             Y   */
   2256             if(Jte){
   2257                 Jte[0]    += eX   *dxh11              ;/*  +0 */
   2258                 Jte[1]    += eX   *dxh12              ;/*  +0 */
   2259                 Jte[2]    += eX   *dxh13              ;/*  +0 */
   2260                 Jte[3]    +=               eY   *dyh21;/* 0+  */
   2261                 Jte[4]    +=               eY   *dyh22;/* 0+  */
   2262                 Jte[5]    +=               eY   *dyh23;/* 0+  */
   2263                 Jte[6]    += eX   *dxh31 + eY   *dyh31;/*  +  */
   2264                 Jte[7]    += eX   *dxh32 + eY   *dyh32;/*  +  */
   2265             }
   2266 
   2267             /* Update JtJ:          X             Y    */
   2268             if(JtJ){
   2269                 JtJ[0][0] += dxh11*dxh11              ;/*  +0 */
   2270 
   2271                 JtJ[1][0] += dxh11*dxh12              ;/*  +0 */
   2272                 JtJ[1][1] += dxh12*dxh12              ;/*  +0 */
   2273 
   2274                 JtJ[2][0] += dxh11*dxh13              ;/*  +0 */
   2275                 JtJ[2][1] += dxh12*dxh13              ;/*  +0 */
   2276                 JtJ[2][2] += dxh13*dxh13              ;/*  +0 */
   2277 
   2278               /*JtJ[3][0] +=                          ;   0+0 */
   2279               /*JtJ[3][1] +=                          ;   0+0 */
   2280               /*JtJ[3][2] +=                          ;   0+0 */
   2281                 JtJ[3][3] +=               dyh21*dyh21;/* 0+  */
   2282 
   2283               /*JtJ[4][0] +=                          ;   0+0 */
   2284               /*JtJ[4][1] +=                          ;   0+0 */
   2285               /*JtJ[4][2] +=                          ;   0+0 */
   2286                 JtJ[4][3] +=               dyh21*dyh22;/* 0+  */
   2287                 JtJ[4][4] +=               dyh22*dyh22;/* 0+  */
   2288 
   2289               /*JtJ[5][0] +=                          ;   0+0 */
   2290               /*JtJ[5][1] +=                          ;   0+0 */
   2291               /*JtJ[5][2] +=                          ;   0+0 */
   2292                 JtJ[5][3] +=               dyh21*dyh23;/* 0+  */
   2293                 JtJ[5][4] +=               dyh22*dyh23;/* 0+  */
   2294                 JtJ[5][5] +=               dyh23*dyh23;/* 0+  */
   2295 
   2296                 JtJ[6][0] += dxh11*dxh31              ;/*  +0 */
   2297                 JtJ[6][1] += dxh12*dxh31              ;/*  +0 */
   2298                 JtJ[6][2] += dxh13*dxh31              ;/*  +0 */
   2299                 JtJ[6][3] +=               dyh21*dyh31;/* 0+  */
   2300                 JtJ[6][4] +=               dyh22*dyh31;/* 0+  */
   2301                 JtJ[6][5] +=               dyh23*dyh31;/* 0+  */
   2302                 JtJ[6][6] += dxh31*dxh31 + dyh31*dyh31;/*  +  */
   2303 
   2304                 JtJ[7][0] += dxh11*dxh32              ;/*  +0 */
   2305                 JtJ[7][1] += dxh12*dxh32              ;/*  +0 */
   2306                 JtJ[7][2] += dxh13*dxh32              ;/*  +0 */
   2307                 JtJ[7][3] +=               dyh21*dyh32;/* 0+  */
   2308                 JtJ[7][4] +=               dyh22*dyh32;/* 0+  */
   2309                 JtJ[7][5] +=               dyh23*dyh32;/* 0+  */
   2310                 JtJ[7][6] += dxh31*dxh32 + dyh31*dyh32;/*  +  */
   2311                 JtJ[7][7] += dxh32*dxh32 + dyh32*dyh32;/*  +  */
   2312             }
   2313         }
   2314     }
   2315 
   2316     if(Sp){*Sp = S;}
   2317 }
   2318 
   2319 
   2320 /**
   2321  * Compute the Levenberg-Marquardt "gain" obtained by the given step dH.
   2322  *
   2323  * Drawn from http://www2.imm.dtu.dk/documents/ftp/tr99/tr05_99.pdf.
   2324  */
   2325 
   2326 static inline float  sacLMGain(const float*  dH,
   2327                                const float*  Jte,
   2328                                const float   S,
   2329                                const float   newS,
   2330                                const float   lambda){
   2331     float dS = S-newS;
   2332     float dL = 0;
   2333     int i;
   2334 
   2335     /* Compute h^t h... */
   2336     for(i=0;i<8;i++){
   2337         dL += dH[i]*dH[i];
   2338     }
   2339     /* Compute mu * h^t h... */
   2340     dL *= lambda;
   2341     /* Subtract h^t F'... */
   2342     for(i=0;i<8;i++){
   2343         dL += dH[i]*Jte[i];/* += as opposed to -=, since dH we compute is
   2344                               opposite sign. */
   2345     }
   2346     /* Multiply by 1/2... */
   2347     dL *= 0.5;
   2348 
   2349     /* Return gain as S-newS / L0 - LH. */
   2350     return fabs(dL) < FLT_EPSILON ? dS : dS / dL;
   2351 }
   2352 
   2353 
   2354 /**
   2355  * Cholesky decomposition on 8x8 real positive-definite matrix defined by its
   2356  * lower-triangular half. Outputs L, the lower triangular part of the
   2357  * decomposition.
   2358  *
   2359  * A and L can overlap fully (in-place) or not at all, but may not partially
   2360  * overlap.
   2361  *
   2362  * For damping, the diagonal elements are scaled by 1.0 + lambda.
   2363  *
   2364  * Returns zero if decomposition unsuccessful, and non-zero otherwise.
   2365  *
   2366  * Source: http://en.wikipedia.org/wiki/Cholesky_decomposition#
   2367  * The_Cholesky.E2.80.93Banachiewicz_and_Cholesky.E2.80.93Crout_algorithms
   2368  */
   2369 
   2370 static inline int    sacChol8x8Damped(const float (*A)[8],
   2371                                       float         lambda,
   2372                                       float       (*L)[8]){
   2373     const register int N = 8;
   2374     int i, j, k;
   2375     float  lambdap1 = lambda + 1.0f;
   2376     float  x;
   2377 
   2378     for(i=0;i<N;i++){/* Row */
   2379         /* Pre-diagonal elements */
   2380         for(j=0;j<i;j++){
   2381             x = A[i][j];               /* Aij */
   2382             for(k=0;k<j;k++){
   2383                 x -= L[i][k] * L[j][k];/* - Sum_{k=0..j-1} Lik*Ljk */
   2384             }
   2385             L[i][j] = x / L[j][j];     /* Lij = ... / Ljj */
   2386         }
   2387 
   2388         /* Diagonal element */
   2389         {j = i;
   2390             x = A[j][j] * lambdap1;    /* Ajj */
   2391             for(k=0;k<j;k++){
   2392                 x -= L[j][k] * L[j][k];/* - Sum_{k=0..j-1} Ljk^2 */
   2393             }
   2394             if(x<0){
   2395                 return 0;
   2396             }
   2397             L[j][j] = sqrtf(x);        /* Ljj = sqrt( ... ) */
   2398         }
   2399     }
   2400 
   2401     return 1;
   2402 }
   2403 
   2404 
   2405 /**
   2406  * Invert lower-triangular 8x8 matrix L into lower-triangular matrix M.
   2407  *
   2408  * L and M can overlap fully (in-place) or not at all, but may not partially
   2409  * overlap.
   2410  *
   2411  * Uses formulation from
   2412  * http://www.cs.berkeley.edu/~knight/knight_math221_poster.pdf
   2413  * , adjusted for the fact that A^T^-1 = A^-1^T. Thus:
   2414  *
   2415  * U11    U12                   U11^-1   -U11^-1*U12*U22^-1
   2416  *                ->
   2417  *  0     U22                     0            U22^-1
   2418  *
   2419  * Becomes
   2420  *
   2421  * L11     0                    L11^-1           0
   2422  *                ->
   2423  * L21    L22            -L22^-1*L21*L11^-1    L22^-1
   2424  *
   2425  * Since
   2426  *
   2427  * ( -L11^T^-1*L21^T*L22^T^-1 )^T = -L22^T^-1^T*L21^T^T*L11^T^-1^T
   2428  *                                = -L22^T^T^-1*L21^T^T*L11^T^T^-1
   2429  *                                = -L22^-1*L21*L11^-1
   2430  */
   2431 
   2432 static inline void   sacTRInv8x8(const float (*L)[8],
   2433                                  float       (*M)[8]){
   2434     float s[2][2], t[2][2];
   2435     float u[4][4], v[4][4];
   2436 
   2437     /*
   2438         L00  0   0   0   0   0   0   0
   2439         L10 L11  0   0   0   0   0   0
   2440         L20 L21 L22  0   0   0   0   0
   2441         L30 L31 L32 L33  0   0   0   0
   2442         L40 L41 L42 L43 L44  0   0   0
   2443         L50 L51 L52 L53 L54 L55  0   0
   2444         L60 L61 L62 L63 L64 L65 L66  0
   2445         L70 L71 L72 L73 L74 L75 L76 L77
   2446     */
   2447 
   2448     /* Invert 4*2 1x1 matrices; Starts recursion. */
   2449     M[0][0] = 1.0f/L[0][0];
   2450     M[1][1] = 1.0f/L[1][1];
   2451     M[2][2] = 1.0f/L[2][2];
   2452     M[3][3] = 1.0f/L[3][3];
   2453     M[4][4] = 1.0f/L[4][4];
   2454     M[5][5] = 1.0f/L[5][5];
   2455     M[6][6] = 1.0f/L[6][6];
   2456     M[7][7] = 1.0f/L[7][7];
   2457 
   2458     /*
   2459         M00  0   0   0   0   0   0   0
   2460         L10 M11  0   0   0   0   0   0
   2461         L20 L21 M22  0   0   0   0   0
   2462         L30 L31 L32 M33  0   0   0   0
   2463         L40 L41 L42 L43 M44  0   0   0
   2464         L50 L51 L52 L53 L54 M55  0   0
   2465         L60 L61 L62 L63 L64 L65 M66  0
   2466         L70 L71 L72 L73 L74 L75 L76 M77
   2467     */
   2468 
   2469     /* 4*2 Matrix products of 1x1 matrices */
   2470     M[1][0] = -M[1][1]*L[1][0]*M[0][0];
   2471     M[3][2] = -M[3][3]*L[3][2]*M[2][2];
   2472     M[5][4] = -M[5][5]*L[5][4]*M[4][4];
   2473     M[7][6] = -M[7][7]*L[7][6]*M[6][6];
   2474 
   2475     /*
   2476         M00  0   0   0   0   0   0   0
   2477         M10 M11  0   0   0   0   0   0
   2478         L20 L21 M22  0   0   0   0   0
   2479         L30 L31 M32 M33  0   0   0   0
   2480         L40 L41 L42 L43 M44  0   0   0
   2481         L50 L51 L52 L53 M54 M55  0   0
   2482         L60 L61 L62 L63 L64 L65 M66  0
   2483         L70 L71 L72 L73 L74 L75 M76 M77
   2484     */
   2485 
   2486     /* 2*2 Matrix products of 2x2 matrices */
   2487 
   2488     /*
   2489        (M22  0 )   (L20 L21)   (M00  0 )
   2490      - (M32 M33) x (L30 L31) x (M10 M11)
   2491     */
   2492 
   2493     s[0][0] = M[2][2]*L[2][0];
   2494     s[0][1] = M[2][2]*L[2][1];
   2495     s[1][0] = M[3][2]*L[2][0]+M[3][3]*L[3][0];
   2496     s[1][1] = M[3][2]*L[2][1]+M[3][3]*L[3][1];
   2497 
   2498     t[0][0] = s[0][0]*M[0][0]+s[0][1]*M[1][0];
   2499     t[0][1] =                 s[0][1]*M[1][1];
   2500     t[1][0] = s[1][0]*M[0][0]+s[1][1]*M[1][0];
   2501     t[1][1] =                 s[1][1]*M[1][1];
   2502 
   2503     M[2][0] = -t[0][0];
   2504     M[2][1] = -t[0][1];
   2505     M[3][0] = -t[1][0];
   2506     M[3][1] = -t[1][1];
   2507 
   2508     /*
   2509        (M66  0 )   (L64 L65)   (M44  0 )
   2510      - (L76 M77) x (L74 L75) x (M54 M55)
   2511     */
   2512 
   2513     s[0][0] = M[6][6]*L[6][4];
   2514     s[0][1] = M[6][6]*L[6][5];
   2515     s[1][0] = M[7][6]*L[6][4]+M[7][7]*L[7][4];
   2516     s[1][1] = M[7][6]*L[6][5]+M[7][7]*L[7][5];
   2517 
   2518     t[0][0] = s[0][0]*M[4][4]+s[0][1]*M[5][4];
   2519     t[0][1] =                 s[0][1]*M[5][5];
   2520     t[1][0] = s[1][0]*M[4][4]+s[1][1]*M[5][4];
   2521     t[1][1] =                 s[1][1]*M[5][5];
   2522 
   2523     M[6][4] = -t[0][0];
   2524     M[6][5] = -t[0][1];
   2525     M[7][4] = -t[1][0];
   2526     M[7][5] = -t[1][1];
   2527 
   2528     /*
   2529         M00  0   0   0   0   0   0   0
   2530         M10 M11  0   0   0   0   0   0
   2531         M20 M21 M22  0   0   0   0   0
   2532         M30 M31 M32 M33  0   0   0   0
   2533         L40 L41 L42 L43 M44  0   0   0
   2534         L50 L51 L52 L53 M54 M55  0   0
   2535         L60 L61 L62 L63 M64 M65 M66  0
   2536         L70 L71 L72 L73 M74 M75 M76 M77
   2537     */
   2538 
   2539     /* 1*2 Matrix products of 4x4 matrices */
   2540 
   2541     /*
   2542        (M44  0   0   0 )   (L40 L41 L42 L43)   (M00  0   0   0 )
   2543        (M54 M55  0   0 )   (L50 L51 L52 L53)   (M10 M11  0   0 )
   2544        (M64 M65 M66  0 )   (L60 L61 L62 L63)   (M20 M21 M22  0 )
   2545      - (M74 M75 M76 M77) x (L70 L71 L72 L73) x (M30 M31 M32 M33)
   2546     */
   2547 
   2548     u[0][0] = M[4][4]*L[4][0];
   2549     u[0][1] = M[4][4]*L[4][1];
   2550     u[0][2] = M[4][4]*L[4][2];
   2551     u[0][3] = M[4][4]*L[4][3];
   2552     u[1][0] = M[5][4]*L[4][0]+M[5][5]*L[5][0];
   2553     u[1][1] = M[5][4]*L[4][1]+M[5][5]*L[5][1];
   2554     u[1][2] = M[5][4]*L[4][2]+M[5][5]*L[5][2];
   2555     u[1][3] = M[5][4]*L[4][3]+M[5][5]*L[5][3];
   2556     u[2][0] = M[6][4]*L[4][0]+M[6][5]*L[5][0]+M[6][6]*L[6][0];
   2557     u[2][1] = M[6][4]*L[4][1]+M[6][5]*L[5][1]+M[6][6]*L[6][1];
   2558     u[2][2] = M[6][4]*L[4][2]+M[6][5]*L[5][2]+M[6][6]*L[6][2];
   2559     u[2][3] = M[6][4]*L[4][3]+M[6][5]*L[5][3]+M[6][6]*L[6][3];
   2560     u[3][0] = M[7][4]*L[4][0]+M[7][5]*L[5][0]+M[7][6]*L[6][0]+M[7][7]*L[7][0];
   2561     u[3][1] = M[7][4]*L[4][1]+M[7][5]*L[5][1]+M[7][6]*L[6][1]+M[7][7]*L[7][1];
   2562     u[3][2] = M[7][4]*L[4][2]+M[7][5]*L[5][2]+M[7][6]*L[6][2]+M[7][7]*L[7][2];
   2563     u[3][3] = M[7][4]*L[4][3]+M[7][5]*L[5][3]+M[7][6]*L[6][3]+M[7][7]*L[7][3];
   2564 
   2565     v[0][0] = u[0][0]*M[0][0]+u[0][1]*M[1][0]+u[0][2]*M[2][0]+u[0][3]*M[3][0];
   2566     v[0][1] =                 u[0][1]*M[1][1]+u[0][2]*M[2][1]+u[0][3]*M[3][1];
   2567     v[0][2] =                                 u[0][2]*M[2][2]+u[0][3]*M[3][2];
   2568     v[0][3] =                                                 u[0][3]*M[3][3];
   2569     v[1][0] = u[1][0]*M[0][0]+u[1][1]*M[1][0]+u[1][2]*M[2][0]+u[1][3]*M[3][0];
   2570     v[1][1] =                 u[1][1]*M[1][1]+u[1][2]*M[2][1]+u[1][3]*M[3][1];
   2571     v[1][2] =                                 u[1][2]*M[2][2]+u[1][3]*M[3][2];
   2572     v[1][3] =                                                 u[1][3]*M[3][3];
   2573     v[2][0] = u[2][0]*M[0][0]+u[2][1]*M[1][0]+u[2][2]*M[2][0]+u[2][3]*M[3][0];
   2574     v[2][1] =                 u[2][1]*M[1][1]+u[2][2]*M[2][1]+u[2][3]*M[3][1];
   2575     v[2][2] =                                 u[2][2]*M[2][2]+u[2][3]*M[3][2];
   2576     v[2][3] =                                                 u[2][3]*M[3][3];
   2577     v[3][0] = u[3][0]*M[0][0]+u[3][1]*M[1][0]+u[3][2]*M[2][0]+u[3][3]*M[3][0];
   2578     v[3][1] =                 u[3][1]*M[1][1]+u[3][2]*M[2][1]+u[3][3]*M[3][1];
   2579     v[3][2] =                                 u[3][2]*M[2][2]+u[3][3]*M[3][2];
   2580     v[3][3] =                                                 u[3][3]*M[3][3];
   2581 
   2582     M[4][0] = -v[0][0];
   2583     M[4][1] = -v[0][1];
   2584     M[4][2] = -v[0][2];
   2585     M[4][3] = -v[0][3];
   2586     M[5][0] = -v[1][0];
   2587     M[5][1] = -v[1][1];
   2588     M[5][2] = -v[1][2];
   2589     M[5][3] = -v[1][3];
   2590     M[6][0] = -v[2][0];
   2591     M[6][1] = -v[2][1];
   2592     M[6][2] = -v[2][2];
   2593     M[6][3] = -v[2][3];
   2594     M[7][0] = -v[3][0];
   2595     M[7][1] = -v[3][1];
   2596     M[7][2] = -v[3][2];
   2597     M[7][3] = -v[3][3];
   2598 
   2599     /*
   2600         M00  0   0   0   0   0   0   0
   2601         M10 M11  0   0   0   0   0   0
   2602         M20 M21 M22  0   0   0   0   0
   2603         M30 M31 M32 M33  0   0   0   0
   2604         M40 M41 M42 M43 M44  0   0   0
   2605         M50 M51 M52 M53 M54 M55  0   0
   2606         M60 M61 M62 M63 M64 M65 M66  0
   2607         M70 M71 M72 M73 M74 M75 M76 M77
   2608     */
   2609 }
   2610 
   2611 
   2612 /**
   2613  * Solves dH = inv(JtJ) Jte. The argument lower-triangular matrix is the
   2614  * inverse of L as produced by the Cholesky decomposition LL^T of the matrix
   2615  * JtJ; Thus the operation performed here is a left-multiplication of a vector
   2616  * by two triangular matrices. The math is below:
   2617  *
   2618  * JtJ      = LL^T
   2619  * Linv     = L^-1
   2620  * (JtJ)^-1 = (LL^T)^-1
   2621  *          = (L^T^-1)(Linv)
   2622  *          = (Linv^T)(Linv)
   2623  * dH       = ((JtJ)^-1) (Jte)
   2624  *          = (Linv^T)(Linv) (Jte)
   2625  *
   2626  * where J is nx8, Jt is 8xn, JtJ is 8x8 PD, e is nx1, Jte is 8x1, L is lower
   2627  * triangular 8x8 and dH is 8x1.
   2628  */
   2629 
   2630 static inline void   sacTRISolve8x8(const float (*L)[8],
   2631                                     const float*  Jte,
   2632                                     float*        dH){
   2633     float t[8];
   2634 
   2635     t[0]  = L[0][0]*Jte[0];
   2636     t[1]  = L[1][0]*Jte[0]+L[1][1]*Jte[1];
   2637     t[2]  = L[2][0]*Jte[0]+L[2][1]*Jte[1]+L[2][2]*Jte[2];
   2638     t[3]  = L[3][0]*Jte[0]+L[3][1]*Jte[1]+L[3][2]*Jte[2]+L[3][3]*Jte[3];
   2639     t[4]  = L[4][0]*Jte[0]+L[4][1]*Jte[1]+L[4][2]*Jte[2]+L[4][3]*Jte[3]+L[4][4]*Jte[4];
   2640     t[5]  = L[5][0]*Jte[0]+L[5][1]*Jte[1]+L[5][2]*Jte[2]+L[5][3]*Jte[3]+L[5][4]*Jte[4]+L[5][5]*Jte[5];
   2641     t[6]  = L[6][0]*Jte[0]+L[6][1]*Jte[1]+L[6][2]*Jte[2]+L[6][3]*Jte[3]+L[6][4]*Jte[4]+L[6][5]*Jte[5]+L[6][6]*Jte[6];
   2642     t[7]  = L[7][0]*Jte[0]+L[7][1]*Jte[1]+L[7][2]*Jte[2]+L[7][3]*Jte[3]+L[7][4]*Jte[4]+L[7][5]*Jte[5]+L[7][6]*Jte[6]+L[7][7]*Jte[7];
   2643 
   2644 
   2645     dH[0] = L[0][0]*t[0]+L[1][0]*t[1]+L[2][0]*t[2]+L[3][0]*t[3]+L[4][0]*t[4]+L[5][0]*t[5]+L[6][0]*t[6]+L[7][0]*t[7];
   2646     dH[1] =              L[1][1]*t[1]+L[2][1]*t[2]+L[3][1]*t[3]+L[4][1]*t[4]+L[5][1]*t[5]+L[6][1]*t[6]+L[7][1]*t[7];
   2647     dH[2] =                           L[2][2]*t[2]+L[3][2]*t[3]+L[4][2]*t[4]+L[5][2]*t[5]+L[6][2]*t[6]+L[7][2]*t[7];
   2648     dH[3] =                                        L[3][3]*t[3]+L[4][3]*t[4]+L[5][3]*t[5]+L[6][3]*t[6]+L[7][3]*t[7];
   2649     dH[4] =                                                     L[4][4]*t[4]+L[5][4]*t[5]+L[6][4]*t[6]+L[7][4]*t[7];
   2650     dH[5] =                                                                  L[5][5]*t[5]+L[6][5]*t[6]+L[7][5]*t[7];
   2651     dH[6] =                                                                               L[6][6]*t[6]+L[7][6]*t[7];
   2652     dH[7] =                                                                                            L[7][7]*t[7];
   2653 }
   2654 
   2655 
   2656 /**
   2657  * Subtract dH from H.
   2658  */
   2659 
   2660 static inline void   sacSub8x1(float* Hout, const float* H, const float* dH){
   2661     Hout[0] = H[0] - dH[0];
   2662     Hout[1] = H[1] - dH[1];
   2663     Hout[2] = H[2] - dH[2];
   2664     Hout[3] = H[3] - dH[3];
   2665     Hout[4] = H[4] - dH[4];
   2666     Hout[5] = H[5] - dH[5];
   2667     Hout[6] = H[6] - dH[6];
   2668     Hout[7] = H[7] - dH[7];
   2669 }
   2670 
   2671 
   2672 /* End namespace cv */
   2673 }
   2674