Home | History | Annotate | Download | only in object_tracking
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include <math.h>
     17 
     18 #include "tensorflow/examples/android/jni/object_tracking/geom.h"
     19 #include "tensorflow/examples/android/jni/object_tracking/image-inl.h"
     20 #include "tensorflow/examples/android/jni/object_tracking/image.h"
     21 #include "tensorflow/examples/android/jni/object_tracking/time_log.h"
     22 #include "tensorflow/examples/android/jni/object_tracking/utils.h"
     23 
     24 #include "tensorflow/examples/android/jni/object_tracking/config.h"
     25 #include "tensorflow/examples/android/jni/object_tracking/flow_cache.h"
     26 #include "tensorflow/examples/android/jni/object_tracking/frame_pair.h"
     27 #include "tensorflow/examples/android/jni/object_tracking/image_data.h"
     28 #include "tensorflow/examples/android/jni/object_tracking/keypoint.h"
     29 #include "tensorflow/examples/android/jni/object_tracking/keypoint_detector.h"
     30 #include "tensorflow/examples/android/jni/object_tracking/optical_flow.h"
     31 
     32 namespace tf_tracking {
     33 
     34 OpticalFlow::OpticalFlow(const OpticalFlowConfig* const config)
     35     : config_(config),
     36       frame1_(NULL),
     37       frame2_(NULL),
     38       working_size_(config->image_size) {}
     39 
     40 
     41 void OpticalFlow::NextFrame(const ImageData* const image_data) {
     42   // Special case for the first frame: make sure the image ends up in
     43   // frame1_ so that keypoint detection can be done on it if desired.
     44   frame1_ = (frame1_ == NULL) ? image_data : frame2_;
     45   frame2_ = image_data;
     46 }
     47 
     48 
     49 // Static heart of the optical flow computation.
     50 // Lucas Kanade algorithm.
     51 bool OpticalFlow::FindFlowAtPoint_LK(const Image<uint8_t>& img_I,
     52                                      const Image<uint8_t>& img_J,
     53                                      const Image<int32_t>& I_x,
     54                                      const Image<int32_t>& I_y, const float p_x,
     55                                      const float p_y, float* out_g_x,
     56                                      float* out_g_y) {
     57   float g_x = *out_g_x;
     58   float g_y = *out_g_y;
     59   // Get values for frame 1.  They remain constant through the inner
     60   // iteration loop.
     61   float vals_I[kFlowArraySize];
     62   float vals_I_x[kFlowArraySize];
     63   float vals_I_y[kFlowArraySize];
     64 
     65   const int kPatchSize = 2 * kFlowIntegrationWindowSize + 1;
     66   const float kWindowSizeFloat = static_cast<float>(kFlowIntegrationWindowSize);
     67 
     68 #if USE_FIXED_POINT_FLOW
     69   const int fixed_x_max = RealToFixed1616(img_I.width_less_one_) - 1;
     70   const int fixed_y_max = RealToFixed1616(img_I.height_less_one_) - 1;
     71 #else
     72   const float real_x_max = I_x.width_less_one_ - EPSILON;
     73   const float real_y_max = I_x.height_less_one_ - EPSILON;
     74 #endif
     75 
     76   // Get the window around the original point.
     77   const float src_left_real = p_x - kWindowSizeFloat;
     78   const float src_top_real = p_y - kWindowSizeFloat;
     79   float* vals_I_ptr = vals_I;
     80   float* vals_I_x_ptr = vals_I_x;
     81   float* vals_I_y_ptr = vals_I_y;
     82 #if USE_FIXED_POINT_FLOW
     83   // Source integer coordinates.
     84   const int src_left_fixed = RealToFixed1616(src_left_real);
     85   const int src_top_fixed = RealToFixed1616(src_top_real);
     86 
     87   for (int y = 0; y < kPatchSize; ++y) {
     88     const int fp_y = Clip(src_top_fixed + (y << 16), 0, fixed_y_max);
     89 
     90     for (int x = 0; x < kPatchSize; ++x) {
     91       const int fp_x = Clip(src_left_fixed + (x << 16), 0, fixed_x_max);
     92 
     93       *vals_I_ptr++ = img_I.GetPixelInterpFixed1616(fp_x, fp_y);
     94       *vals_I_x_ptr++ = I_x.GetPixelInterpFixed1616(fp_x, fp_y);
     95       *vals_I_y_ptr++ = I_y.GetPixelInterpFixed1616(fp_x, fp_y);
     96     }
     97   }
     98 #else
     99   for (int y = 0; y < kPatchSize; ++y) {
    100     const float y_pos = Clip(src_top_real + y, 0.0f, real_y_max);
    101 
    102     for (int x = 0; x < kPatchSize; ++x) {
    103       const float x_pos = Clip(src_left_real + x, 0.0f, real_x_max);
    104 
    105       *vals_I_ptr++ = img_I.GetPixelInterp(x_pos, y_pos);
    106       *vals_I_x_ptr++ = I_x.GetPixelInterp(x_pos, y_pos);
    107       *vals_I_y_ptr++ = I_y.GetPixelInterp(x_pos, y_pos);
    108     }
    109   }
    110 #endif
    111 
    112   // Compute the spatial gradient matrix about point p.
    113   float G[] = { 0, 0, 0, 0 };
    114   CalculateG(vals_I_x, vals_I_y, kFlowArraySize, G);
    115 
    116   // Find the inverse of G.
    117   float G_inv[4];
    118   if (!Invert2x2(G, G_inv)) {
    119     return false;
    120   }
    121 
    122 #if NORMALIZE
    123   const float mean_I = ComputeMean(vals_I, kFlowArraySize);
    124   const float std_dev_I = ComputeStdDev(vals_I, kFlowArraySize, mean_I);
    125 #endif
    126 
    127   // Iterate kNumIterations times or until we converge.
    128   for (int iteration = 0; iteration < kNumIterations; ++iteration) {
    129     // Get values for frame 2.
    130     float vals_J[kFlowArraySize];
    131 
    132     // Get the window around the destination point.
    133     const float left_real = p_x + g_x - kWindowSizeFloat;
    134     const float top_real  = p_y + g_y - kWindowSizeFloat;
    135     float* vals_J_ptr = vals_J;
    136 #if USE_FIXED_POINT_FLOW
    137     // The top-left sub-pixel is set for the current iteration (in 16:16
    138     // fixed). This is constant over one iteration.
    139     const int left_fixed = RealToFixed1616(left_real);
    140     const int top_fixed  = RealToFixed1616(top_real);
    141 
    142     for (int win_y = 0; win_y < kPatchSize; ++win_y) {
    143       const int fp_y = Clip(top_fixed + (win_y << 16), 0, fixed_y_max);
    144       for (int win_x = 0; win_x < kPatchSize; ++win_x) {
    145         const int fp_x = Clip(left_fixed + (win_x << 16), 0, fixed_x_max);
    146         *vals_J_ptr++ = img_J.GetPixelInterpFixed1616(fp_x, fp_y);
    147       }
    148     }
    149 #else
    150     for (int win_y = 0; win_y < kPatchSize; ++win_y) {
    151       const float y_pos = Clip(top_real + win_y, 0.0f, real_y_max);
    152       for (int win_x = 0; win_x < kPatchSize; ++win_x) {
    153         const float x_pos = Clip(left_real + win_x, 0.0f, real_x_max);
    154         *vals_J_ptr++ = img_J.GetPixelInterp(x_pos, y_pos);
    155       }
    156     }
    157 #endif
    158 
    159 #if NORMALIZE
    160     const float mean_J = ComputeMean(vals_J, kFlowArraySize);
    161     const float std_dev_J = ComputeStdDev(vals_J, kFlowArraySize, mean_J);
    162 
    163     // TODO(andrewharp): Probably better to completely detect and handle the
    164     // "corner case" where the patch is fully outside the image diagonally.
    165     const float std_dev_ratio = std_dev_J > 0.0f ? std_dev_I / std_dev_J : 1.0f;
    166 #endif
    167 
    168     // Compute image mismatch vector.
    169     float b_x = 0.0f;
    170     float b_y = 0.0f;
    171 
    172     vals_I_ptr = vals_I;
    173     vals_J_ptr = vals_J;
    174     vals_I_x_ptr = vals_I_x;
    175     vals_I_y_ptr = vals_I_y;
    176 
    177     for (int win_y = 0; win_y < kPatchSize; ++win_y) {
    178       for (int win_x = 0; win_x < kPatchSize; ++win_x) {
    179 #if NORMALIZE
    180         // Normalized Image difference.
    181         const float dI =
    182             (*vals_I_ptr++ - mean_I) - (*vals_J_ptr++ - mean_J) * std_dev_ratio;
    183 #else
    184         const float dI = *vals_I_ptr++ - *vals_J_ptr++;
    185 #endif
    186         b_x += dI * *vals_I_x_ptr++;
    187         b_y += dI * *vals_I_y_ptr++;
    188       }
    189     }
    190 
    191     // Optical flow... solve n = G^-1 * b
    192     const float n_x = (G_inv[0] * b_x) + (G_inv[1] * b_y);
    193     const float n_y = (G_inv[2] * b_x) + (G_inv[3] * b_y);
    194 
    195     // Update best guess with residual displacement from this level and
    196     // iteration.
    197     g_x += n_x;
    198     g_y += n_y;
    199 
    200     // LOGV("Iteration %d: delta (%.3f, %.3f)", iteration, n_x, n_y);
    201 
    202     // Abort early if we're already below the threshold.
    203     if (Square(n_x) + Square(n_y) < Square(kTrackingAbortThreshold)) {
    204       break;
    205     }
    206   }  // Iteration.
    207 
    208   // Copy value back into output.
    209   *out_g_x = g_x;
    210   *out_g_y = g_y;
    211   return true;
    212 }
    213 
    214 
    215 // Pointwise flow using translational 2dof ESM.
    216 bool OpticalFlow::FindFlowAtPoint_ESM(
    217     const Image<uint8_t>& img_I, const Image<uint8_t>& img_J,
    218     const Image<int32_t>& I_x, const Image<int32_t>& I_y,
    219     const Image<int32_t>& J_x, const Image<int32_t>& J_y, const float p_x,
    220     const float p_y, float* out_g_x, float* out_g_y) {
    221   float g_x = *out_g_x;
    222   float g_y = *out_g_y;
    223   const float area_inv = 1.0f / static_cast<float>(kFlowArraySize);
    224 
    225   // Get values for frame 1. They remain constant through the inner
    226   // iteration loop.
    227   uint8_t vals_I[kFlowArraySize];
    228   uint8_t vals_J[kFlowArraySize];
    229   int16_t src_gradient_x[kFlowArraySize];
    230   int16_t src_gradient_y[kFlowArraySize];
    231 
    232   // TODO(rspring): try out the IntegerPatchAlign() method once
    233   // the code for that is in ../common.
    234   const float wsize_float = static_cast<float>(kFlowIntegrationWindowSize);
    235   const int src_left_fixed = RealToFixed1616(p_x - wsize_float);
    236   const int src_top_fixed = RealToFixed1616(p_y - wsize_float);
    237   const int patch_size = 2 * kFlowIntegrationWindowSize + 1;
    238 
    239   // Create the keypoint template patch from a subpixel location.
    240   if (!img_I.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
    241                                              patch_size, patch_size, vals_I) ||
    242       !I_x.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
    243                                            patch_size, patch_size,
    244                                            src_gradient_x) ||
    245       !I_y.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed,
    246                                            patch_size, patch_size,
    247                                            src_gradient_y)) {
    248     return false;
    249   }
    250 
    251   int bright_offset = 0;
    252   int sum_diff = 0;
    253 
    254   // The top-left sub-pixel is set for the current iteration (in 16:16 fixed).
    255   // This is constant over one iteration.
    256   int left_fixed = RealToFixed1616(p_x + g_x - wsize_float);
    257   int top_fixed  = RealToFixed1616(p_y + g_y - wsize_float);
    258 
    259   // The truncated version gives the most top-left pixel that is used.
    260   int left_trunc = left_fixed >> 16;
    261   int top_trunc = top_fixed >> 16;
    262 
    263   // Compute an initial brightness offset.
    264   if (kDoBrightnessNormalize &&
    265       left_trunc >= 0 && top_trunc >= 0 &&
    266       (left_trunc + patch_size) < img_J.width_less_one_ &&
    267       (top_trunc + patch_size) < img_J.height_less_one_) {
    268     int templ_index = 0;
    269     const uint8_t* j_row = img_J[top_trunc] + left_trunc;
    270 
    271     const int j_stride = img_J.stride();
    272 
    273     for (int y = 0; y < patch_size; ++y, j_row += j_stride) {
    274       for (int x = 0; x < patch_size; ++x) {
    275         sum_diff += static_cast<int>(j_row[x]) - vals_I[templ_index++];
    276       }
    277     }
    278 
    279     bright_offset = static_cast<int>(static_cast<float>(sum_diff) * area_inv);
    280   }
    281 
    282   // Iterate kNumIterations times or until we go out of image.
    283   for (int iteration = 0; iteration < kNumIterations; ++iteration) {
    284     int jtj[3] = { 0, 0, 0 };
    285     int jtr[2] = { 0, 0 };
    286     sum_diff = 0;
    287 
    288     // Extract the target image values.
    289     // Extract the gradient from the target image patch and accumulate to
    290     // the gradient of the source image patch.
    291     if (!img_J.ExtractPatchAtSubpixelFixed1616(left_fixed, top_fixed,
    292                                                patch_size, patch_size,
    293                                                vals_J)) {
    294       break;
    295     }
    296 
    297     const uint8_t* templ_row = vals_I;
    298     const uint8_t* extract_row = vals_J;
    299     const int16_t* src_dx_row = src_gradient_x;
    300     const int16_t* src_dy_row = src_gradient_y;
    301 
    302     for (int y = 0; y < patch_size; ++y, templ_row += patch_size,
    303          src_dx_row += patch_size, src_dy_row += patch_size,
    304          extract_row += patch_size) {
    305       const int fp_y = top_fixed + (y << 16);
    306       for (int x = 0; x < patch_size; ++x) {
    307         const int fp_x = left_fixed + (x << 16);
    308         int32_t target_dx = J_x.GetPixelInterpFixed1616(fp_x, fp_y);
    309         int32_t target_dy = J_y.GetPixelInterpFixed1616(fp_x, fp_y);
    310 
    311         // Combine the two Jacobians.
    312         // Right-shift by one to account for the fact that we add
    313         // two Jacobians.
    314         int32_t dx = (src_dx_row[x] + target_dx) >> 1;
    315         int32_t dy = (src_dy_row[x] + target_dy) >> 1;
    316 
    317         // The current residual b - h(q) == extracted - (template + offset)
    318         int32_t diff = static_cast<int32_t>(extract_row[x]) -
    319                        static_cast<int32_t>(templ_row[x]) - bright_offset;
    320 
    321         jtj[0] += dx * dx;
    322         jtj[1] += dx * dy;
    323         jtj[2] += dy * dy;
    324 
    325         jtr[0] += dx * diff;
    326         jtr[1] += dy * diff;
    327 
    328         sum_diff += diff;
    329       }
    330     }
    331 
    332     const float jtr1_float = static_cast<float>(jtr[0]);
    333     const float jtr2_float = static_cast<float>(jtr[1]);
    334 
    335     // Add some baseline stability to the system.
    336     jtj[0] += kEsmRegularizer;
    337     jtj[2] += kEsmRegularizer;
    338 
    339     const int64_t prod1 = static_cast<int64_t>(jtj[0]) * jtj[2];
    340     const int64_t prod2 = static_cast<int64_t>(jtj[1]) * jtj[1];
    341 
    342     // One ESM step.
    343     const float jtj_1[4] = { static_cast<float>(jtj[2]),
    344                              static_cast<float>(-jtj[1]),
    345                              static_cast<float>(-jtj[1]),
    346                              static_cast<float>(jtj[0]) };
    347     const double det_inv = 1.0 / static_cast<double>(prod1 - prod2);
    348 
    349     g_x -= det_inv * (jtj_1[0] * jtr1_float + jtj_1[1] * jtr2_float);
    350     g_y -= det_inv * (jtj_1[2] * jtr1_float + jtj_1[3] * jtr2_float);
    351 
    352     if (kDoBrightnessNormalize) {
    353       bright_offset +=
    354           static_cast<int>(area_inv * static_cast<float>(sum_diff) + 0.5f);
    355     }
    356 
    357     // Update top left position.
    358     left_fixed = RealToFixed1616(p_x + g_x - wsize_float);
    359     top_fixed  = RealToFixed1616(p_y + g_y - wsize_float);
    360 
    361     left_trunc = left_fixed >> 16;
    362     top_trunc = top_fixed >> 16;
    363 
    364     // Abort iterations if we go out of borders.
    365     if (left_trunc < 0 || top_trunc < 0 ||
    366         (left_trunc + patch_size) >= J_x.width_less_one_ ||
    367         (top_trunc + patch_size) >= J_y.height_less_one_) {
    368       break;
    369     }
    370   }  // Iteration.
    371 
    372   // Copy value back into output.
    373   *out_g_x = g_x;
    374   *out_g_y = g_y;
    375   return true;
    376 }
    377 
    378 
    379 bool OpticalFlow::FindFlowAtPointReversible(
    380     const int level, const float u_x, const float u_y,
    381     const bool reverse_flow,
    382     float* flow_x, float* flow_y) const {
    383   const ImageData& frame_a = reverse_flow ? *frame2_ : *frame1_;
    384   const ImageData& frame_b = reverse_flow ? *frame1_ : *frame2_;
    385 
    386   // Images I (prev) and J (next).
    387   const Image<uint8_t>& img_I = *frame_a.GetPyramidSqrt2Level(level * 2);
    388   const Image<uint8_t>& img_J = *frame_b.GetPyramidSqrt2Level(level * 2);
    389 
    390   // Computed gradients.
    391   const Image<int32_t>& I_x = *frame_a.GetSpatialX(level);
    392   const Image<int32_t>& I_y = *frame_a.GetSpatialY(level);
    393   const Image<int32_t>& J_x = *frame_b.GetSpatialX(level);
    394   const Image<int32_t>& J_y = *frame_b.GetSpatialY(level);
    395 
    396   // Shrink factor from original.
    397   const float shrink_factor = (1 << level);
    398 
    399   // Image position vector (p := u^l), scaled for this level.
    400   const float scaled_p_x = u_x / shrink_factor;
    401   const float scaled_p_y = u_y / shrink_factor;
    402 
    403   float scaled_flow_x = *flow_x / shrink_factor;
    404   float scaled_flow_y = *flow_y / shrink_factor;
    405 
    406   // LOGE("FindFlowAtPoint level %d: %5.2f, %5.2f (%5.2f, %5.2f)", level,
    407   //     scaled_p_x, scaled_p_y, &scaled_flow_x, &scaled_flow_y);
    408 
    409   const bool success = kUseEsm ?
    410     FindFlowAtPoint_ESM(img_I, img_J, I_x, I_y, J_x, J_y,
    411                         scaled_p_x, scaled_p_y,
    412                         &scaled_flow_x, &scaled_flow_y) :
    413     FindFlowAtPoint_LK(img_I, img_J, I_x, I_y,
    414                        scaled_p_x, scaled_p_y,
    415                        &scaled_flow_x, &scaled_flow_y);
    416 
    417   *flow_x = scaled_flow_x * shrink_factor;
    418   *flow_y = scaled_flow_y * shrink_factor;
    419 
    420   return success;
    421 }
    422 
    423 
    424 bool OpticalFlow::FindFlowAtPointSingleLevel(
    425     const int level,
    426     const float u_x, const float u_y,
    427     const bool filter_by_fb_error,
    428     float* flow_x, float* flow_y) const {
    429   if (!FindFlowAtPointReversible(level, u_x, u_y, false, flow_x, flow_y)) {
    430     return false;
    431   }
    432 
    433   if (filter_by_fb_error) {
    434     const float new_position_x = u_x + *flow_x;
    435     const float new_position_y = u_y + *flow_y;
    436 
    437     float reverse_flow_x = 0.0f;
    438     float reverse_flow_y = 0.0f;
    439 
    440     // Now find the backwards flow and confirm it lines up with the original
    441     // starting point.
    442     if (!FindFlowAtPointReversible(level, new_position_x, new_position_y,
    443                                    true,
    444                                    &reverse_flow_x, &reverse_flow_y)) {
    445       LOGE("Backward error!");
    446       return false;
    447     }
    448 
    449     const float discrepancy_length =
    450         sqrtf(Square(*flow_x + reverse_flow_x) +
    451               Square(*flow_y + reverse_flow_y));
    452 
    453     const float flow_length = sqrtf(Square(*flow_x) + Square(*flow_y));
    454 
    455     return discrepancy_length <
    456         (kMaxForwardBackwardErrorAllowed * flow_length);
    457   }
    458 
    459   return true;
    460 }
    461 
    462 
    463 // An implementation of the Pyramidal Lucas-Kanade Optical Flow algorithm.
    464 // See http://robots.stanford.edu/cs223b04/algo_tracking.pdf for details.
    465 bool OpticalFlow::FindFlowAtPointPyramidal(const float u_x, const float u_y,
    466                                            const bool filter_by_fb_error,
    467                                            float* flow_x, float* flow_y) const {
    468   const int max_level = MAX(kMinNumPyramidLevelsToUseForAdjustment,
    469                             kNumPyramidLevels - kNumCacheLevels);
    470 
    471   // For every level in the pyramid, update the coordinates of the best match.
    472   for (int l = max_level - 1; l >= 0; --l) {
    473     if (!FindFlowAtPointSingleLevel(l, u_x, u_y,
    474                                     filter_by_fb_error, flow_x, flow_y)) {
    475       return false;
    476     }
    477   }
    478 
    479   return true;
    480 }
    481 
    482 }  // namespace tf_tracking
    483