Home | History | Annotate | Download | only in dbreg
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #pragma once
     19 
     20 #ifdef _WIN32
     21 #ifdef DBREG_EXPORTS
     22 #define DBREG_API __declspec(dllexport)
     23 #else
     24 #define DBREG_API __declspec(dllimport)
     25 #endif
     26 #else
     27 #define DBREG_API
     28 #endif
     29 
     30 // @jke - the next few lines are for extracting timing data.  TODO: Remove after test
     31 #define PROFILE 0
     32 
     33 #include "dbstabsmooth.h"
     34 
     35 #include <db_feature_detection.h>
     36 #include <db_feature_matching.h>
     37 #include <db_rob_image_homography.h>
     38 
     39 #if PROFILE
     40     #include <sys/time.h>
     41 #endif
     42 
     43 /*! \mainpage db_FrameToReferenceRegistration
     44 
     45  \section intro Introduction
     46 
     47  db_FrameToReferenceRegistration provides a simple interface to a set of sophisticated algorithms for stabilizing
     48  video sequences.  As its name suggests, the class is used to compute parameters that will allow us to warp incoming video
     49  frames and register them with respect to a so-called <i>reference</i> frame.  The reference frame is simply the first
     50  frame of a sequence; the registration process is that of estimating the parameters of a warp that can be applied to
     51  subsequent frames to make those frames align with the reference.  A video made up of these warped frames will be more
     52  stable than the input video.
     53 
     54  For more technical information on the internal structure of the algorithms used within the db_FrameToRegistration class,
     55  please follow this <a href="../Sarnoff image registration.docx">link</a>.
     56 
     57  \section usage Usage
     58  In addition to the class constructor, there are two main functions of db_FrameToReferenceRegistration that are of
     59  interest to the programmer.  db_FrameToReferenceRegistration::Init(...) is used to initialize the parameters of the
     60  registration algorithm. db_FrameToReferenceRegistration::AddFrame(...) is the method by which each new video frame
     61  is introduced to the registration algorithm, and produces the estimated registration warp parameters.
     62 
     63  The following example illustrates how the major methods of the class db_FrameToReferenceRegistration can be used together
     64  to calculate the registration parameters for an image sequence.  In the example, the calls to the methods of
     65  db_FrameToReferenceRegistration match those found in the API, but supporting code should be considered pseudo-code.
     66  For a more complete example, please consult the source code for dbregtest.
     67 
     68 
     69     \code
     70     // feature-based image registration class:
     71     db_FrameToReferenceRegistration reg;
     72 
     73     // Image data
     74     const unsigned char * const * image_storage;
     75 
     76     // The 3x3 frame to reference registration parameters
     77     double frame_to_ref_homography[9];
     78 
     79     // a counter to count the number of frames processed.
     80     unsigned long frame_counter;
     81     // ...
     82 
     83     // main loop - keep going while there are images to process.
     84     while (ImagesAreAvailable)
     85     {
     86         // Call functions to place latest data into image_storage
     87         // ...
     88 
     89         // if the registration object is not yet initialized, then do so
     90         // The arguments to this function are explained in the accompanying
     91         // html API documentation
     92         if (!reg.Initialized())
     93         {
     94             reg.Init(w,h,motion_model_type,25,linear_polish,quarter_resolution,
     95                    DB_POINT_STANDARDDEV,reference_update_period,
     96                    do_motion_smoothing,motion_smoothing_gain,
     97                    DB_DEFAULT_NR_SAMPLES,DB_DEFAULT_CHUNK_SIZE,
     98                    nr_corners,max_disparity);
     99         }
    100 
    101         // Present the new image data to the registration algorithm,
    102         // with the result being stored in the frame_to_ref_homography
    103         // variable.
    104         reg.AddFrame(image_storage,frame_to_ref_homography);
    105 
    106         // frame_to_ref_homography now contains the stabilizing transform
    107         // use this to warp the latest image for display, etc.
    108 
    109         // if this is the first frame, we need to tell the registration
    110         // class to store the image as its reference.  Otherwise, AddFrame
    111         // takes care of that.
    112         if (frame_counter == 0)
    113         {
    114             reg.UpdateReference(image_storage);
    115         }
    116 
    117         // increment the frame counter
    118         frame_counter++;
    119     }
    120 
    121     \endcode
    122 
    123  */
    124 
    125 /*!
    126  * Performs feature-based frame to reference image registration.
    127  */
    128 class DBREG_API db_FrameToReferenceRegistration
    129 {
    130 public:
    131     db_FrameToReferenceRegistration(void);
    132     ~db_FrameToReferenceRegistration();
    133 
    134     /*!
    135      * Set parameters and allocate memory. Note: The default values of these parameters have been set to the values used for the android implementation (i.e. the demo APK).
    136      * \param width         image width
    137      * \param height        image height
    138      * \param homography_type see definitions in \ref LMRobImageHomography
    139      * \param max_iterations    max number of polishing steps
    140      * \param linear_polish     whether to perform a linear polishing step after RANSAC
    141      * \param quarter_resolution    whether to process input images at quarter resolution (for computational efficiency)
    142      * \param scale         Cauchy scale coefficient (see db_ExpCauchyReprojectionError() )
    143      * \param reference_update_period   how often to update the alignment reference (in units of number of frames)
    144      * \param do_motion_smoothing   whether to perform display reference smoothing
    145      * \param motion_smoothing_gain weight factor to reflect how fast the display reference must follow the current frame if motion smoothing is enabled
    146      * \param nr_samples        number of times to compute a hypothesis
    147      * \param chunk_size        size of cost chunks
    148      * \param cd_target_nr_corners  target number of corners for corner detector
    149      * \param cm_max_disparity      maximum disparity search range for corner matcher (in units of ratio of image width)
    150      * \param cm_use_smaller_matching_window    if set to true, uses a correlation window of 5x5 instead of the default 11x11
    151      * \param cd_nr_horz_blocks     the number of horizontal blocks for the corner detector to partition the image
    152      * \param cd_nr_vert_blocks     the number of vertical blocks for the corner detector to partition the image
    153     */
    154     void Init(int width, int height,
    155           int       homography_type = DB_HOMOGRAPHY_TYPE_DEFAULT,
    156           int       max_iterations = DB_DEFAULT_MAX_ITERATIONS,
    157           bool      linear_polish = false,
    158           bool   quarter_resolution = true,
    159           double  scale = DB_POINT_STANDARDDEV,
    160           unsigned int reference_update_period = 3,
    161           bool   do_motion_smoothing = false,
    162           double motion_smoothing_gain = 0.75,
    163           int   nr_samples = DB_DEFAULT_NR_SAMPLES,
    164           int   chunk_size = DB_DEFAULT_CHUNK_SIZE,
    165           int    cd_target_nr_corners = 500,
    166           double cm_max_disparity = 0.2,
    167           bool   cm_use_smaller_matching_window = false,
    168           int    cd_nr_horz_blocks = 5,
    169           int    cd_nr_vert_blocks = 5);
    170 
    171     /*!
    172      * Reset the transformation type that is being use to perform alignment. Use this to change the alignment type at run time.
    173      * \param homography_type   the type of transformation to use for performing alignment (see definitions in \ref LMRobImageHomography)
    174     */
    175     void ResetHomographyType(int homography_type) { m_homography_type = homography_type; }
    176 
    177     /*!
    178      * Enable/Disable motion smoothing. Use this to turn motion smoothing on/off at run time.
    179      * \param enable    flag indicating whether to turn the motion smoothing on or off.
    180     */
    181     void ResetSmoothing(bool enable) { m_do_motion_smoothing = enable; }
    182 
    183     /*!
    184      * Align an inspection image to an existing reference image, update the reference image if due and perform motion smoothing if enabled.
    185      * \param im                new inspection image
    186      * \param H             computed transformation from reference to inspection coordinate frame. Identity is returned if no reference frame was set.
    187      * \param force_reference   make this the new reference image
    188      */
    189     int AddFrame(const unsigned char * const * im, double H[9], bool force_reference=false, bool prewarp=false);
    190 
    191     /*!
    192      * Returns true if Init() was run.
    193      */
    194     bool Initialized() const { return m_initialized; }
    195 
    196     /*!
    197      * Returns true if the current frame is being used as the alignment reference.
    198     */
    199     bool IsCurrentReference() const { return m_current_is_reference; }
    200 
    201     /*!
    202      * Returns true if we need to call UpdateReference now.
    203      */
    204     bool NeedReferenceUpdate();
    205 
    206     /*!
    207      * Returns the pointer reference to the alignment reference image data
    208     */
    209     unsigned char ** GetReferenceImage() { return m_reference_image; }
    210 
    211     /*!
    212      * Returns the pointer reference to the double array containing the homogeneous coordinates for the matched reference image corners.
    213     */
    214     double * GetRefCorners() { return m_corners_ref; }
    215     /*!
    216      * Returns the pointer reference to the double array containing the homogeneous coordinates for the matched inspection image corners.
    217     */
    218     double * GetInsCorners() { return m_corners_ins; }
    219     /*!
    220      * Returns the number of correspondences between the reference and inspection images.
    221     */
    222     int GetNrMatches() { return m_nr_matches; }
    223 
    224     /*!
    225      * Returns the number of corners detected in the current reference image.
    226     */
    227     int GetNrRefCorners() { return m_nr_corners_ref; }
    228 
    229     /*!
    230      * Returns the pointer to an array of indices that were found to be RANSAC inliers from the matched corner lists.
    231     */
    232     int* GetInliers() { return m_inlier_indices; }
    233 
    234     /*!
    235      * Returns the number of inliers from the RANSAC matching step.
    236     */
    237     int  GetNrInliers() { return m_num_inlier_indices; }
    238 
    239     //std::vector<int>& GetInliers();
    240     //void Polish(std::vector<int> &inlier_indices);
    241 
    242     /*!
    243      * Perform a linear polishing step by re-estimating the alignment transformation using the RANSAC inliers.
    244      * \param inlier_indices    pointer to an array of indices that were found to be RANSAC inliers from the matched corner lists.
    245      * \param num_inlier_indices    number of inliers i.e. the length of the array passed as the first argument.
    246     */
    247     void Polish(int *inlier_indices, int &num_inlier_indices);
    248 
    249     /*!
    250      * Reset the motion smoothing parameters to their initial values.
    251     */
    252     void ResetMotionSmoothingParameters() { m_stab_smoother.Init(); }
    253 
    254     /*!
    255      * Update the alignment reference image to the specified image.
    256      * \param im    pointer to the image data to be used as the new alignment reference.
    257      * \param subsample boolean flag to control whether the function should internally subsample the provided image to the size provided in the Init() function.
    258     */
    259     int UpdateReference(const unsigned char * const * im, bool subsample = true, bool detect_corners = true);
    260 
    261     /*!
    262      * Returns the transformation from the display reference to the alignment reference frame
    263     */
    264     void Get_H_dref_to_ref(double H[9]);
    265     /*!
    266      * Returns the transformation from the display reference to the inspection reference frame
    267     */
    268     void Get_H_dref_to_ins(double H[9]);
    269     /*!
    270      * Set the transformation from the display reference to the inspection reference frame
    271      * \param H the transformation to set
    272     */
    273     void Set_H_dref_to_ins(double H[9]);
    274 
    275     /*!
    276      * Reset the display reference to the current frame.
    277     */
    278     void ResetDisplayReference();
    279 
    280     /*!
    281      * Estimate a secondary motion model starting from the specified transformation.
    282      * \param H the primary motion model to start from
    283     */
    284     void EstimateSecondaryModel(double H[9]);
    285 
    286     /*!
    287      *
    288     */
    289     void SelectOutliers();
    290 
    291     char *profile_string;
    292 
    293 protected:
    294     void Clean();
    295     void GenerateQuarterResImage(const unsigned char* const * im);
    296 
    297     int     m_im_width;
    298     int     m_im_height;
    299 
    300     // RANSAC and refinement parameters:
    301     int m_homography_type;
    302     int     m_max_iterations;
    303     double  m_scale;
    304     int     m_nr_samples;
    305     int     m_chunk_size;
    306     double  m_outlier_t2;
    307 
    308     // Whether to fit a linear model to just the inliers at the end
    309     bool   m_linear_polish;
    310     double m_polish_C[36];
    311     double m_polish_D[6];
    312 
    313     // local state
    314     bool m_current_is_reference;
    315     bool m_initialized;
    316 
    317     // inspection to reference homography:
    318     double m_H_ref_to_ins[9];
    319     double m_H_dref_to_ref[9];
    320 
    321     // feature extraction and matching:
    322     db_CornerDetector_u m_cd;
    323     db_Matcher_u        m_cm;
    324 
    325     // length of corner arrays:
    326     unsigned long m_max_nr_corners;
    327 
    328     // corner locations of reference image features:
    329     double * m_x_corners_ref;
    330     double * m_y_corners_ref;
    331     int  m_nr_corners_ref;
    332 
    333     // corner locations of inspection image features:
    334     double * m_x_corners_ins;
    335     double * m_y_corners_ins;
    336     int      m_nr_corners_ins;
    337 
    338     // length of match index arrays:
    339     unsigned long m_max_nr_matches;
    340 
    341     // match indices:
    342     int * m_match_index_ref;
    343     int * m_match_index_ins;
    344     int   m_nr_matches;
    345 
    346     // pointer to internal copy of the reference image:
    347     unsigned char ** m_reference_image;
    348 
    349     // pointer to internal copy of last aligned inspection image:
    350     unsigned char ** m_aligned_ins_image;
    351 
    352     // pointer to quarter resolution image, if used.
    353     unsigned char** m_quarter_res_image;
    354 
    355     // temporary storage for the quarter resolution image processing
    356     unsigned char** m_horz_smooth_subsample_image;
    357 
    358     // temporary space for homography computation:
    359     double * m_temp_double;
    360     int * m_temp_int;
    361 
    362     // homogenous image point arrays:
    363     double * m_corners_ref;
    364     double * m_corners_ins;
    365 
    366     // Indices of the points within the match lists
    367     int * m_inlier_indices;
    368     int m_num_inlier_indices;
    369 
    370     //void ComputeInliers(double H[9], std::vector<int> &inlier_indices);
    371     void ComputeInliers(double H[9]);
    372 
    373     // cost arrays:
    374     void ComputeCostArray();
    375     bool m_sq_cost_computed;
    376     double * m_sq_cost;
    377 
    378     // cost histogram:
    379     void ComputeCostHistogram();
    380     int *m_cost_histogram;
    381 
    382     void SetOutlierThreshold();
    383 
    384     // utility function for smoothing the motion parameters.
    385     void SmoothMotion(void);
    386 
    387 private:
    388     double m_K[9];
    389     const int m_over_allocation;
    390 
    391     bool m_reference_set;
    392 
    393     // Maximum number of inliers seen until now w.r.t the current reference frame
    394     int m_max_inlier_count;
    395 
    396     // Number of cost histogram bins:
    397     int m_nr_bins;
    398     // All costs above this threshold get put into the last bin:
    399     int m_max_cost_pix;
    400 
    401     // whether to quarter the image resolution for processing, or not
    402     bool m_quarter_resolution;
    403 
    404     // the period (in number of frames) for reference update.
    405     unsigned int m_reference_update_period;
    406 
    407     // the number of frames processed so far.
    408     unsigned int m_nr_frames_processed;
    409 
    410     // smoother for motion transformations
    411     db_StabilizationSmoother m_stab_smoother;
    412 
    413     // boolean to control whether motion smoothing occurs (or not)
    414     bool m_do_motion_smoothing;
    415 
    416     // double to set the gain for motion smoothing
    417     double m_motion_smoothing_gain;
    418 };
    419 /*!
    420  Create look-up tables to undistort images. Only Bougeut (Matlab toolkit)
    421  is currently supported. Can be used with db_WarpImageLut_u().
    422  \code
    423     xd = H*xs;
    424     xd = xd/xd(3);
    425  \endcode
    426  \param lut_x   pre-allocated float image
    427  \param lut_y   pre-allocated float image
    428  \param w       width
    429  \param h       height
    430  \param H       image homography from source to destination
    431  */
    432 inline void db_GenerateHomographyLut(float ** lut_x,float ** lut_y,int w,int h,const double H[9])
    433 {
    434     assert(lut_x && lut_y);
    435     double x[3] = {0.0,0.0,1.0};
    436     double xb[3];
    437 
    438 /*
    439     double xl[3];
    440 
    441     // Determine the output coordinate system ROI
    442     double Hinv[9];
    443     db_InvertAffineTransform(Hinv,H);
    444     db_Multiply3x3_3x1(xl, Hinv, x);
    445     xl[0] = db_SafeDivision(xl[0],xl[2]);
    446     xl[1] = db_SafeDivision(xl[1],xl[2]);
    447 */
    448 
    449     for ( int i = 0; i < w; ++i )
    450         for ( int j = 0; j < h; ++j )
    451         {
    452             x[0] = double(i);
    453             x[1] = double(j);
    454             db_Multiply3x3_3x1(xb, H, x);
    455             xb[0] = db_SafeDivision(xb[0],xb[2]);
    456             xb[1] = db_SafeDivision(xb[1],xb[2]);
    457 
    458             lut_x[j][i] = float(xb[0]);
    459             lut_y[j][i] = float(xb[1]);
    460         }
    461 }
    462 
    463 /*!
    464  * Perform a look-up table warp for packed RGB ([rgbrgbrgb...]) images.
    465  * The LUTs must be float images of the same size as source image.
    466  * The source value x_s is determined from destination (x_d,y_d) through lut_x
    467  * and y_s is determined from lut_y:
    468    \code
    469    x_s = lut_x[y_d][x_d];
    470    y_s = lut_y[y_d][x_d];
    471    \endcode
    472 
    473  * \param src   source image (w*3 by h)
    474  * \param dst   destination image (w*3 by h)
    475  * \param w     width
    476  * \param h     height
    477  * \param lut_x LUT for x
    478  * \param lut_y LUT for y
    479  */
    480 inline void db_WarpImageLutFast_rgb(const unsigned char * const * src, unsigned char ** dst, int w, int h,
    481                                   const float * const * lut_x, const float * const * lut_y)
    482 {
    483     assert(src && dst);
    484     int xd=0, yd=0;
    485 
    486     for ( int i = 0; i < w; ++i )
    487         for ( int j = 0; j < h; ++j )
    488         {
    489             xd = static_cast<unsigned int>(lut_x[j][i]);
    490             yd = static_cast<unsigned int>(lut_y[j][i]);
    491             if ( xd >= w || yd >= h ||
    492                  xd < 0 || yd < 0)
    493             {
    494                 dst[j][3*i  ] = 0;
    495                 dst[j][3*i+1] = 0;
    496                 dst[j][3*i+2] = 0;
    497             }
    498             else
    499             {
    500                 dst[j][3*i  ] = src[yd][3*xd  ];
    501                 dst[j][3*i+1] = src[yd][3*xd+1];
    502                 dst[j][3*i+2] = src[yd][3*xd+2];
    503             }
    504         }
    505 }
    506 
    507 inline unsigned char db_BilinearInterpolationRGB(double y, double x, const unsigned char * const * v, int offset)
    508 {
    509          int floor_x=(int) x;
    510          int floor_y=(int) y;
    511 
    512          int ceil_x=floor_x+1;
    513          int ceil_y=floor_y+1;
    514 
    515          unsigned char f00 = v[floor_y][3*floor_x+offset];
    516          unsigned char f01 = v[floor_y][3*ceil_x+offset];
    517          unsigned char f10 = v[ceil_y][3*floor_x+offset];
    518          unsigned char f11 = v[ceil_y][3*ceil_x+offset];
    519 
    520          double xl = x-floor_x;
    521          double yl = y-floor_y;
    522 
    523          return (unsigned char)(f00*(1-yl)*(1-xl) + f10*yl*(1-xl) + f01*(1-yl)*xl + f11*yl*xl);
    524 }
    525 
    526 inline void db_WarpImageLutBilinear_rgb(const unsigned char * const * src, unsigned char ** dst, int w, int h,
    527                                   const float * const * lut_x, const float * const * lut_y)
    528 {
    529     assert(src && dst);
    530     double xd=0.0, yd=0.0;
    531 
    532     for ( int i = 0; i < w; ++i )
    533         for ( int j = 0; j < h; ++j )
    534         {
    535             xd = static_cast<double>(lut_x[j][i]);
    536             yd = static_cast<double>(lut_y[j][i]);
    537             if ( xd > w-2 || yd > h-2 ||
    538                  xd < 0.0 || yd < 0.0)
    539             {
    540                 dst[j][3*i  ] = 0;
    541                 dst[j][3*i+1] = 0;
    542                 dst[j][3*i+2] = 0;
    543             }
    544             else
    545             {
    546                 dst[j][3*i  ] = db_BilinearInterpolationRGB(yd,xd,src,0);
    547                 dst[j][3*i+1] = db_BilinearInterpolationRGB(yd,xd,src,1);
    548                 dst[j][3*i+2] = db_BilinearInterpolationRGB(yd,xd,src,2);
    549             }
    550         }
    551 }
    552 
    553 inline double SquaredInhomogenousHomographyError(double y[3],double H[9],double x[3]){
    554     double x0,x1,x2,mult;
    555     double sd;
    556 
    557     x0=H[0]*x[0]+H[1]*x[1]+H[2];
    558     x1=H[3]*x[0]+H[4]*x[1]+H[5];
    559     x2=H[6]*x[0]+H[7]*x[1]+H[8];
    560     mult=1.0/((x2!=0.0)?x2:1.0);
    561     sd=(y[0]-x0*mult)*(y[0]-x0*mult)+(y[1]-x1*mult)*(y[1]-x1*mult);
    562 
    563     return(sd);
    564 }
    565 
    566 
    567 // functions related to profiling
    568 #if PROFILE
    569 
    570 /* return current time in milliseconds */
    571 static double
    572 now_ms(void)
    573 {
    574     //struct timespec res;
    575     struct timeval res;
    576     //clock_gettime(CLOCK_REALTIME, &res);
    577     gettimeofday(&res, NULL);
    578     return 1000.0*res.tv_sec + (double)res.tv_usec/1e3;
    579 }
    580 
    581 #endif
    582