1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include <math.h> 17 18 #include "tensorflow/examples/android/jni/object_tracking/geom.h" 19 #include "tensorflow/examples/android/jni/object_tracking/image-inl.h" 20 #include "tensorflow/examples/android/jni/object_tracking/image.h" 21 #include "tensorflow/examples/android/jni/object_tracking/time_log.h" 22 #include "tensorflow/examples/android/jni/object_tracking/utils.h" 23 24 #include "tensorflow/examples/android/jni/object_tracking/config.h" 25 #include "tensorflow/examples/android/jni/object_tracking/flow_cache.h" 26 #include "tensorflow/examples/android/jni/object_tracking/frame_pair.h" 27 #include "tensorflow/examples/android/jni/object_tracking/image_data.h" 28 #include "tensorflow/examples/android/jni/object_tracking/keypoint.h" 29 #include "tensorflow/examples/android/jni/object_tracking/keypoint_detector.h" 30 #include "tensorflow/examples/android/jni/object_tracking/optical_flow.h" 31 32 namespace tf_tracking { 33 34 OpticalFlow::OpticalFlow(const OpticalFlowConfig* const config) 35 : config_(config), 36 frame1_(NULL), 37 frame2_(NULL), 38 working_size_(config->image_size) {} 39 40 41 void OpticalFlow::NextFrame(const ImageData* const image_data) { 42 // Special case for the first frame: make sure the image ends up in 43 // frame1_ so that keypoint detection can be done on it if desired. 44 frame1_ = (frame1_ == NULL) ? image_data : frame2_; 45 frame2_ = image_data; 46 } 47 48 49 // Static heart of the optical flow computation. 50 // Lucas Kanade algorithm. 51 bool OpticalFlow::FindFlowAtPoint_LK(const Image<uint8_t>& img_I, 52 const Image<uint8_t>& img_J, 53 const Image<int32_t>& I_x, 54 const Image<int32_t>& I_y, const float p_x, 55 const float p_y, float* out_g_x, 56 float* out_g_y) { 57 float g_x = *out_g_x; 58 float g_y = *out_g_y; 59 // Get values for frame 1. They remain constant through the inner 60 // iteration loop. 61 float vals_I[kFlowArraySize]; 62 float vals_I_x[kFlowArraySize]; 63 float vals_I_y[kFlowArraySize]; 64 65 const int kPatchSize = 2 * kFlowIntegrationWindowSize + 1; 66 const float kWindowSizeFloat = static_cast<float>(kFlowIntegrationWindowSize); 67 68 #if USE_FIXED_POINT_FLOW 69 const int fixed_x_max = RealToFixed1616(img_I.width_less_one_) - 1; 70 const int fixed_y_max = RealToFixed1616(img_I.height_less_one_) - 1; 71 #else 72 const float real_x_max = I_x.width_less_one_ - EPSILON; 73 const float real_y_max = I_x.height_less_one_ - EPSILON; 74 #endif 75 76 // Get the window around the original point. 77 const float src_left_real = p_x - kWindowSizeFloat; 78 const float src_top_real = p_y - kWindowSizeFloat; 79 float* vals_I_ptr = vals_I; 80 float* vals_I_x_ptr = vals_I_x; 81 float* vals_I_y_ptr = vals_I_y; 82 #if USE_FIXED_POINT_FLOW 83 // Source integer coordinates. 84 const int src_left_fixed = RealToFixed1616(src_left_real); 85 const int src_top_fixed = RealToFixed1616(src_top_real); 86 87 for (int y = 0; y < kPatchSize; ++y) { 88 const int fp_y = Clip(src_top_fixed + (y << 16), 0, fixed_y_max); 89 90 for (int x = 0; x < kPatchSize; ++x) { 91 const int fp_x = Clip(src_left_fixed + (x << 16), 0, fixed_x_max); 92 93 *vals_I_ptr++ = img_I.GetPixelInterpFixed1616(fp_x, fp_y); 94 *vals_I_x_ptr++ = I_x.GetPixelInterpFixed1616(fp_x, fp_y); 95 *vals_I_y_ptr++ = I_y.GetPixelInterpFixed1616(fp_x, fp_y); 96 } 97 } 98 #else 99 for (int y = 0; y < kPatchSize; ++y) { 100 const float y_pos = Clip(src_top_real + y, 0.0f, real_y_max); 101 102 for (int x = 0; x < kPatchSize; ++x) { 103 const float x_pos = Clip(src_left_real + x, 0.0f, real_x_max); 104 105 *vals_I_ptr++ = img_I.GetPixelInterp(x_pos, y_pos); 106 *vals_I_x_ptr++ = I_x.GetPixelInterp(x_pos, y_pos); 107 *vals_I_y_ptr++ = I_y.GetPixelInterp(x_pos, y_pos); 108 } 109 } 110 #endif 111 112 // Compute the spatial gradient matrix about point p. 113 float G[] = { 0, 0, 0, 0 }; 114 CalculateG(vals_I_x, vals_I_y, kFlowArraySize, G); 115 116 // Find the inverse of G. 117 float G_inv[4]; 118 if (!Invert2x2(G, G_inv)) { 119 return false; 120 } 121 122 #if NORMALIZE 123 const float mean_I = ComputeMean(vals_I, kFlowArraySize); 124 const float std_dev_I = ComputeStdDev(vals_I, kFlowArraySize, mean_I); 125 #endif 126 127 // Iterate kNumIterations times or until we converge. 128 for (int iteration = 0; iteration < kNumIterations; ++iteration) { 129 // Get values for frame 2. 130 float vals_J[kFlowArraySize]; 131 132 // Get the window around the destination point. 133 const float left_real = p_x + g_x - kWindowSizeFloat; 134 const float top_real = p_y + g_y - kWindowSizeFloat; 135 float* vals_J_ptr = vals_J; 136 #if USE_FIXED_POINT_FLOW 137 // The top-left sub-pixel is set for the current iteration (in 16:16 138 // fixed). This is constant over one iteration. 139 const int left_fixed = RealToFixed1616(left_real); 140 const int top_fixed = RealToFixed1616(top_real); 141 142 for (int win_y = 0; win_y < kPatchSize; ++win_y) { 143 const int fp_y = Clip(top_fixed + (win_y << 16), 0, fixed_y_max); 144 for (int win_x = 0; win_x < kPatchSize; ++win_x) { 145 const int fp_x = Clip(left_fixed + (win_x << 16), 0, fixed_x_max); 146 *vals_J_ptr++ = img_J.GetPixelInterpFixed1616(fp_x, fp_y); 147 } 148 } 149 #else 150 for (int win_y = 0; win_y < kPatchSize; ++win_y) { 151 const float y_pos = Clip(top_real + win_y, 0.0f, real_y_max); 152 for (int win_x = 0; win_x < kPatchSize; ++win_x) { 153 const float x_pos = Clip(left_real + win_x, 0.0f, real_x_max); 154 *vals_J_ptr++ = img_J.GetPixelInterp(x_pos, y_pos); 155 } 156 } 157 #endif 158 159 #if NORMALIZE 160 const float mean_J = ComputeMean(vals_J, kFlowArraySize); 161 const float std_dev_J = ComputeStdDev(vals_J, kFlowArraySize, mean_J); 162 163 // TODO(andrewharp): Probably better to completely detect and handle the 164 // "corner case" where the patch is fully outside the image diagonally. 165 const float std_dev_ratio = std_dev_J > 0.0f ? std_dev_I / std_dev_J : 1.0f; 166 #endif 167 168 // Compute image mismatch vector. 169 float b_x = 0.0f; 170 float b_y = 0.0f; 171 172 vals_I_ptr = vals_I; 173 vals_J_ptr = vals_J; 174 vals_I_x_ptr = vals_I_x; 175 vals_I_y_ptr = vals_I_y; 176 177 for (int win_y = 0; win_y < kPatchSize; ++win_y) { 178 for (int win_x = 0; win_x < kPatchSize; ++win_x) { 179 #if NORMALIZE 180 // Normalized Image difference. 181 const float dI = 182 (*vals_I_ptr++ - mean_I) - (*vals_J_ptr++ - mean_J) * std_dev_ratio; 183 #else 184 const float dI = *vals_I_ptr++ - *vals_J_ptr++; 185 #endif 186 b_x += dI * *vals_I_x_ptr++; 187 b_y += dI * *vals_I_y_ptr++; 188 } 189 } 190 191 // Optical flow... solve n = G^-1 * b 192 const float n_x = (G_inv[0] * b_x) + (G_inv[1] * b_y); 193 const float n_y = (G_inv[2] * b_x) + (G_inv[3] * b_y); 194 195 // Update best guess with residual displacement from this level and 196 // iteration. 197 g_x += n_x; 198 g_y += n_y; 199 200 // LOGV("Iteration %d: delta (%.3f, %.3f)", iteration, n_x, n_y); 201 202 // Abort early if we're already below the threshold. 203 if (Square(n_x) + Square(n_y) < Square(kTrackingAbortThreshold)) { 204 break; 205 } 206 } // Iteration. 207 208 // Copy value back into output. 209 *out_g_x = g_x; 210 *out_g_y = g_y; 211 return true; 212 } 213 214 215 // Pointwise flow using translational 2dof ESM. 216 bool OpticalFlow::FindFlowAtPoint_ESM( 217 const Image<uint8_t>& img_I, const Image<uint8_t>& img_J, 218 const Image<int32_t>& I_x, const Image<int32_t>& I_y, 219 const Image<int32_t>& J_x, const Image<int32_t>& J_y, const float p_x, 220 const float p_y, float* out_g_x, float* out_g_y) { 221 float g_x = *out_g_x; 222 float g_y = *out_g_y; 223 const float area_inv = 1.0f / static_cast<float>(kFlowArraySize); 224 225 // Get values for frame 1. They remain constant through the inner 226 // iteration loop. 227 uint8_t vals_I[kFlowArraySize]; 228 uint8_t vals_J[kFlowArraySize]; 229 int16_t src_gradient_x[kFlowArraySize]; 230 int16_t src_gradient_y[kFlowArraySize]; 231 232 // TODO(rspring): try out the IntegerPatchAlign() method once 233 // the code for that is in ../common. 234 const float wsize_float = static_cast<float>(kFlowIntegrationWindowSize); 235 const int src_left_fixed = RealToFixed1616(p_x - wsize_float); 236 const int src_top_fixed = RealToFixed1616(p_y - wsize_float); 237 const int patch_size = 2 * kFlowIntegrationWindowSize + 1; 238 239 // Create the keypoint template patch from a subpixel location. 240 if (!img_I.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed, 241 patch_size, patch_size, vals_I) || 242 !I_x.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed, 243 patch_size, patch_size, 244 src_gradient_x) || 245 !I_y.ExtractPatchAtSubpixelFixed1616(src_left_fixed, src_top_fixed, 246 patch_size, patch_size, 247 src_gradient_y)) { 248 return false; 249 } 250 251 int bright_offset = 0; 252 int sum_diff = 0; 253 254 // The top-left sub-pixel is set for the current iteration (in 16:16 fixed). 255 // This is constant over one iteration. 256 int left_fixed = RealToFixed1616(p_x + g_x - wsize_float); 257 int top_fixed = RealToFixed1616(p_y + g_y - wsize_float); 258 259 // The truncated version gives the most top-left pixel that is used. 260 int left_trunc = left_fixed >> 16; 261 int top_trunc = top_fixed >> 16; 262 263 // Compute an initial brightness offset. 264 if (kDoBrightnessNormalize && 265 left_trunc >= 0 && top_trunc >= 0 && 266 (left_trunc + patch_size) < img_J.width_less_one_ && 267 (top_trunc + patch_size) < img_J.height_less_one_) { 268 int templ_index = 0; 269 const uint8_t* j_row = img_J[top_trunc] + left_trunc; 270 271 const int j_stride = img_J.stride(); 272 273 for (int y = 0; y < patch_size; ++y, j_row += j_stride) { 274 for (int x = 0; x < patch_size; ++x) { 275 sum_diff += static_cast<int>(j_row[x]) - vals_I[templ_index++]; 276 } 277 } 278 279 bright_offset = static_cast<int>(static_cast<float>(sum_diff) * area_inv); 280 } 281 282 // Iterate kNumIterations times or until we go out of image. 283 for (int iteration = 0; iteration < kNumIterations; ++iteration) { 284 int jtj[3] = { 0, 0, 0 }; 285 int jtr[2] = { 0, 0 }; 286 sum_diff = 0; 287 288 // Extract the target image values. 289 // Extract the gradient from the target image patch and accumulate to 290 // the gradient of the source image patch. 291 if (!img_J.ExtractPatchAtSubpixelFixed1616(left_fixed, top_fixed, 292 patch_size, patch_size, 293 vals_J)) { 294 break; 295 } 296 297 const uint8_t* templ_row = vals_I; 298 const uint8_t* extract_row = vals_J; 299 const int16_t* src_dx_row = src_gradient_x; 300 const int16_t* src_dy_row = src_gradient_y; 301 302 for (int y = 0; y < patch_size; ++y, templ_row += patch_size, 303 src_dx_row += patch_size, src_dy_row += patch_size, 304 extract_row += patch_size) { 305 const int fp_y = top_fixed + (y << 16); 306 for (int x = 0; x < patch_size; ++x) { 307 const int fp_x = left_fixed + (x << 16); 308 int32_t target_dx = J_x.GetPixelInterpFixed1616(fp_x, fp_y); 309 int32_t target_dy = J_y.GetPixelInterpFixed1616(fp_x, fp_y); 310 311 // Combine the two Jacobians. 312 // Right-shift by one to account for the fact that we add 313 // two Jacobians. 314 int32_t dx = (src_dx_row[x] + target_dx) >> 1; 315 int32_t dy = (src_dy_row[x] + target_dy) >> 1; 316 317 // The current residual b - h(q) == extracted - (template + offset) 318 int32_t diff = static_cast<int32_t>(extract_row[x]) - 319 static_cast<int32_t>(templ_row[x]) - bright_offset; 320 321 jtj[0] += dx * dx; 322 jtj[1] += dx * dy; 323 jtj[2] += dy * dy; 324 325 jtr[0] += dx * diff; 326 jtr[1] += dy * diff; 327 328 sum_diff += diff; 329 } 330 } 331 332 const float jtr1_float = static_cast<float>(jtr[0]); 333 const float jtr2_float = static_cast<float>(jtr[1]); 334 335 // Add some baseline stability to the system. 336 jtj[0] += kEsmRegularizer; 337 jtj[2] += kEsmRegularizer; 338 339 const int64_t prod1 = static_cast<int64_t>(jtj[0]) * jtj[2]; 340 const int64_t prod2 = static_cast<int64_t>(jtj[1]) * jtj[1]; 341 342 // One ESM step. 343 const float jtj_1[4] = { static_cast<float>(jtj[2]), 344 static_cast<float>(-jtj[1]), 345 static_cast<float>(-jtj[1]), 346 static_cast<float>(jtj[0]) }; 347 const double det_inv = 1.0 / static_cast<double>(prod1 - prod2); 348 349 g_x -= det_inv * (jtj_1[0] * jtr1_float + jtj_1[1] * jtr2_float); 350 g_y -= det_inv * (jtj_1[2] * jtr1_float + jtj_1[3] * jtr2_float); 351 352 if (kDoBrightnessNormalize) { 353 bright_offset += 354 static_cast<int>(area_inv * static_cast<float>(sum_diff) + 0.5f); 355 } 356 357 // Update top left position. 358 left_fixed = RealToFixed1616(p_x + g_x - wsize_float); 359 top_fixed = RealToFixed1616(p_y + g_y - wsize_float); 360 361 left_trunc = left_fixed >> 16; 362 top_trunc = top_fixed >> 16; 363 364 // Abort iterations if we go out of borders. 365 if (left_trunc < 0 || top_trunc < 0 || 366 (left_trunc + patch_size) >= J_x.width_less_one_ || 367 (top_trunc + patch_size) >= J_y.height_less_one_) { 368 break; 369 } 370 } // Iteration. 371 372 // Copy value back into output. 373 *out_g_x = g_x; 374 *out_g_y = g_y; 375 return true; 376 } 377 378 379 bool OpticalFlow::FindFlowAtPointReversible( 380 const int level, const float u_x, const float u_y, 381 const bool reverse_flow, 382 float* flow_x, float* flow_y) const { 383 const ImageData& frame_a = reverse_flow ? *frame2_ : *frame1_; 384 const ImageData& frame_b = reverse_flow ? *frame1_ : *frame2_; 385 386 // Images I (prev) and J (next). 387 const Image<uint8_t>& img_I = *frame_a.GetPyramidSqrt2Level(level * 2); 388 const Image<uint8_t>& img_J = *frame_b.GetPyramidSqrt2Level(level * 2); 389 390 // Computed gradients. 391 const Image<int32_t>& I_x = *frame_a.GetSpatialX(level); 392 const Image<int32_t>& I_y = *frame_a.GetSpatialY(level); 393 const Image<int32_t>& J_x = *frame_b.GetSpatialX(level); 394 const Image<int32_t>& J_y = *frame_b.GetSpatialY(level); 395 396 // Shrink factor from original. 397 const float shrink_factor = (1 << level); 398 399 // Image position vector (p := u^l), scaled for this level. 400 const float scaled_p_x = u_x / shrink_factor; 401 const float scaled_p_y = u_y / shrink_factor; 402 403 float scaled_flow_x = *flow_x / shrink_factor; 404 float scaled_flow_y = *flow_y / shrink_factor; 405 406 // LOGE("FindFlowAtPoint level %d: %5.2f, %5.2f (%5.2f, %5.2f)", level, 407 // scaled_p_x, scaled_p_y, &scaled_flow_x, &scaled_flow_y); 408 409 const bool success = kUseEsm ? 410 FindFlowAtPoint_ESM(img_I, img_J, I_x, I_y, J_x, J_y, 411 scaled_p_x, scaled_p_y, 412 &scaled_flow_x, &scaled_flow_y) : 413 FindFlowAtPoint_LK(img_I, img_J, I_x, I_y, 414 scaled_p_x, scaled_p_y, 415 &scaled_flow_x, &scaled_flow_y); 416 417 *flow_x = scaled_flow_x * shrink_factor; 418 *flow_y = scaled_flow_y * shrink_factor; 419 420 return success; 421 } 422 423 424 bool OpticalFlow::FindFlowAtPointSingleLevel( 425 const int level, 426 const float u_x, const float u_y, 427 const bool filter_by_fb_error, 428 float* flow_x, float* flow_y) const { 429 if (!FindFlowAtPointReversible(level, u_x, u_y, false, flow_x, flow_y)) { 430 return false; 431 } 432 433 if (filter_by_fb_error) { 434 const float new_position_x = u_x + *flow_x; 435 const float new_position_y = u_y + *flow_y; 436 437 float reverse_flow_x = 0.0f; 438 float reverse_flow_y = 0.0f; 439 440 // Now find the backwards flow and confirm it lines up with the original 441 // starting point. 442 if (!FindFlowAtPointReversible(level, new_position_x, new_position_y, 443 true, 444 &reverse_flow_x, &reverse_flow_y)) { 445 LOGE("Backward error!"); 446 return false; 447 } 448 449 const float discrepancy_length = 450 sqrtf(Square(*flow_x + reverse_flow_x) + 451 Square(*flow_y + reverse_flow_y)); 452 453 const float flow_length = sqrtf(Square(*flow_x) + Square(*flow_y)); 454 455 return discrepancy_length < 456 (kMaxForwardBackwardErrorAllowed * flow_length); 457 } 458 459 return true; 460 } 461 462 463 // An implementation of the Pyramidal Lucas-Kanade Optical Flow algorithm. 464 // See http://robots.stanford.edu/cs223b04/algo_tracking.pdf for details. 465 bool OpticalFlow::FindFlowAtPointPyramidal(const float u_x, const float u_y, 466 const bool filter_by_fb_error, 467 float* flow_x, float* flow_y) const { 468 const int max_level = MAX(kMinNumPyramidLevelsToUseForAdjustment, 469 kNumPyramidLevels - kNumCacheLevels); 470 471 // For every level in the pyramid, update the coordinates of the best match. 472 for (int l = max_level - 1; l >= 0; --l) { 473 if (!FindFlowAtPointSingleLevel(l, u_x, u_y, 474 filter_by_fb_error, flow_x, flow_y)) { 475 return false; 476 } 477 } 478 479 return true; 480 } 481 482 } // namespace tf_tracking 483