1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ 17 #define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ 18 19 #include <stdint.h> 20 21 #include "tensorflow/examples/android/jni/object_tracking/geom.h" 22 #include "tensorflow/examples/android/jni/object_tracking/image.h" 23 #include "tensorflow/examples/android/jni/object_tracking/utils.h" 24 25 namespace tf_tracking { 26 27 template <typename T> 28 Image<T>::Image(const int width, const int height) 29 : width_less_one_(width - 1), 30 height_less_one_(height - 1), 31 data_size_(width * height), 32 own_data_(true), 33 width_(width), 34 height_(height), 35 stride_(width) { 36 Allocate(); 37 } 38 39 template <typename T> 40 Image<T>::Image(const Size& size) 41 : width_less_one_(size.width - 1), 42 height_less_one_(size.height - 1), 43 data_size_(size.width * size.height), 44 own_data_(true), 45 width_(size.width), 46 height_(size.height), 47 stride_(size.width) { 48 Allocate(); 49 } 50 51 // Constructor that creates an image from preallocated data. 52 // Note: The image takes ownership of the data lifecycle, unless own_data is 53 // set to false. 54 template <typename T> 55 Image<T>::Image(const int width, const int height, T* const image_data, 56 const bool own_data) : 57 width_less_one_(width - 1), 58 height_less_one_(height - 1), 59 data_size_(width * height), 60 own_data_(own_data), 61 width_(width), 62 height_(height), 63 stride_(width) { 64 image_data_ = image_data; 65 SCHECK(image_data_ != NULL, "Can't create image with NULL data!"); 66 } 67 68 template <typename T> 69 Image<T>::~Image() { 70 if (own_data_) { 71 delete[] image_data_; 72 } 73 image_data_ = NULL; 74 } 75 76 template<typename T> 77 template<class DstType> 78 bool Image<T>::ExtractPatchAtSubpixelFixed1616(const int fp_x, 79 const int fp_y, 80 const int patchwidth, 81 const int patchheight, 82 DstType* to_data) const { 83 // Calculate weights. 84 const int trunc_x = fp_x >> 16; 85 const int trunc_y = fp_y >> 16; 86 87 if (trunc_x < 0 || trunc_y < 0 || 88 (trunc_x + patchwidth) >= width_less_one_ || 89 (trunc_y + patchheight) >= height_less_one_) { 90 return false; 91 } 92 93 // Now walk over destination patch and fill from interpolated source image. 94 for (int y = 0; y < patchheight; ++y, to_data += patchwidth) { 95 for (int x = 0; x < patchwidth; ++x) { 96 to_data[x] = 97 static_cast<DstType>(GetPixelInterpFixed1616(fp_x + (x << 16), 98 fp_y + (y << 16))); 99 } 100 } 101 102 return true; 103 } 104 105 template <typename T> 106 Image<T>* Image<T>::Crop( 107 const int left, const int top, const int right, const int bottom) const { 108 SCHECK(left >= 0 && left < width_, "out of bounds at %d!", left); 109 SCHECK(right >= 0 && right < width_, "out of bounds at %d!", right); 110 SCHECK(top >= 0 && top < height_, "out of bounds at %d!", top); 111 SCHECK(bottom >= 0 && bottom < height_, "out of bounds at %d!", bottom); 112 113 SCHECK(left <= right, "mismatch!"); 114 SCHECK(top <= bottom, "mismatch!"); 115 116 const int new_width = right - left + 1; 117 const int new_height = bottom - top + 1; 118 119 Image<T>* const cropped_image = new Image(new_width, new_height); 120 121 for (int y = 0; y < new_height; ++y) { 122 memcpy((*cropped_image)[y], ((*this)[y + top] + left), 123 new_width * sizeof(T)); 124 } 125 126 return cropped_image; 127 } 128 129 template <typename T> 130 inline float Image<T>::GetPixelInterp(const float x, const float y) const { 131 // Do int conversion one time. 132 const int floored_x = static_cast<int>(x); 133 const int floored_y = static_cast<int>(y); 134 135 // Note: it might be the case that the *_[min|max] values are clipped, and 136 // these (the a b c d vals) aren't (for speed purposes), but that doesn't 137 // matter. We'll just be blending the pixel with itself in that case anyway. 138 const float b = x - floored_x; 139 const float a = 1.0f - b; 140 141 const float d = y - floored_y; 142 const float c = 1.0f - d; 143 144 SCHECK(ValidInterpPixel(x, y), 145 "x or y out of bounds! %.2f [0 - %d), %.2f [0 - %d)", 146 x, width_less_one_, y, height_less_one_); 147 148 const T* const pix_ptr = (*this)[floored_y] + floored_x; 149 150 // Get the pixel values surrounding this point. 151 const T& p1 = pix_ptr[0]; 152 const T& p2 = pix_ptr[1]; 153 const T& p3 = pix_ptr[width_]; 154 const T& p4 = pix_ptr[width_ + 1]; 155 156 // Simple bilinear interpolation between four reference pixels. 157 // If x is the value requested: 158 // a b 159 // ------- 160 // c |p1 p2| 161 // | x | 162 // d |p3 p4| 163 // ------- 164 return c * ((a * p1) + (b * p2)) + 165 d * ((a * p3) + (b * p4)); 166 } 167 168 169 template <typename T> 170 inline T Image<T>::GetPixelInterpFixed1616( 171 const int fp_x_whole, const int fp_y_whole) const { 172 static const int kFixedPointOne = 0x00010000; 173 static const int kFixedPointHalf = 0x00008000; 174 static const int kFixedPointTruncateMask = 0xFFFF0000; 175 176 int trunc_x = fp_x_whole & kFixedPointTruncateMask; 177 int trunc_y = fp_y_whole & kFixedPointTruncateMask; 178 const int fp_x = fp_x_whole - trunc_x; 179 const int fp_y = fp_y_whole - trunc_y; 180 181 // Scale the truncated values back to regular ints. 182 trunc_x >>= 16; 183 trunc_y >>= 16; 184 185 const int one_minus_fp_x = kFixedPointOne - fp_x; 186 const int one_minus_fp_y = kFixedPointOne - fp_y; 187 188 const T* trunc_start = (*this)[trunc_y] + trunc_x; 189 190 const T a = trunc_start[0]; 191 const T b = trunc_start[1]; 192 const T c = trunc_start[stride_]; 193 const T d = trunc_start[stride_ + 1]; 194 195 return ( 196 (one_minus_fp_y * static_cast<int64_t>(one_minus_fp_x * a + fp_x * b) + 197 fp_y * static_cast<int64_t>(one_minus_fp_x * c + fp_x * d) + 198 kFixedPointHalf) >> 199 32); 200 } 201 202 template <typename T> 203 inline bool Image<T>::ValidPixel(const int x, const int y) const { 204 return InRange(x, ZERO, width_less_one_) && 205 InRange(y, ZERO, height_less_one_); 206 } 207 208 template <typename T> 209 inline BoundingBox Image<T>::GetContainingBox() const { 210 return BoundingBox( 211 0, 0, width_less_one_ - EPSILON, height_less_one_ - EPSILON); 212 } 213 214 template <typename T> 215 inline bool Image<T>::Contains(const BoundingBox& bounding_box) const { 216 // TODO(andrewharp): Come up with a more elegant way of ensuring that bounds 217 // are ok. 218 return GetContainingBox().Contains(bounding_box); 219 } 220 221 template <typename T> 222 inline bool Image<T>::ValidInterpPixel(const float x, const float y) const { 223 // Exclusive of max because we can be more efficient if we don't handle 224 // interpolating on or past the last pixel. 225 return (x >= ZERO) && (x < width_less_one_) && 226 (y >= ZERO) && (y < height_less_one_); 227 } 228 229 template <typename T> 230 void Image<T>::DownsampleAveraged(const T* const original, const int stride, 231 const int factor) { 232 #ifdef __ARM_NEON 233 if (factor == 4 || factor == 2) { 234 DownsampleAveragedNeon(original, stride, factor); 235 return; 236 } 237 #endif 238 239 // TODO(andrewharp): delete or enable this for non-uint8_t downsamples. 240 const int pixels_per_block = factor * factor; 241 242 // For every pixel in resulting image. 243 for (int y = 0; y < height_; ++y) { 244 const int orig_y = y * factor; 245 const int y_bound = orig_y + factor; 246 247 // Sum up the original pixels. 248 for (int x = 0; x < width_; ++x) { 249 const int orig_x = x * factor; 250 const int x_bound = orig_x + factor; 251 252 // Making this int32_t because type U or T might overflow. 253 int32_t pixel_sum = 0; 254 255 // Grab all the pixels that make up this pixel. 256 for (int curr_y = orig_y; curr_y < y_bound; ++curr_y) { 257 const T* p = original + curr_y * stride + orig_x; 258 259 for (int curr_x = orig_x; curr_x < x_bound; ++curr_x) { 260 pixel_sum += *p++; 261 } 262 } 263 264 (*this)[y][x] = pixel_sum / pixels_per_block; 265 } 266 } 267 } 268 269 template <typename T> 270 void Image<T>::DownsampleInterpolateNearest(const Image<T>& original) { 271 // Calculating the scaling factors based on target image size. 272 const float factor_x = static_cast<float>(original.GetWidth()) / 273 static_cast<float>(width_); 274 const float factor_y = static_cast<float>(original.GetHeight()) / 275 static_cast<float>(height_); 276 277 // Calculating initial offset in x-axis. 278 const float offset_x = 0.5f * (original.GetWidth() - width_) / width_; 279 280 // Calculating initial offset in y-axis. 281 const float offset_y = 0.5f * (original.GetHeight() - height_) / height_; 282 283 float orig_y = offset_y; 284 285 // For every pixel in resulting image. 286 for (int y = 0; y < height_; ++y) { 287 float orig_x = offset_x; 288 289 // Finding nearest pixel on y-axis. 290 const int nearest_y = static_cast<int>(orig_y + 0.5f); 291 const T* row_data = original[nearest_y]; 292 293 T* pixel_ptr = (*this)[y]; 294 295 for (int x = 0; x < width_; ++x) { 296 // Finding nearest pixel on x-axis. 297 const int nearest_x = static_cast<int>(orig_x + 0.5f); 298 299 *pixel_ptr++ = row_data[nearest_x]; 300 301 orig_x += factor_x; 302 } 303 304 orig_y += factor_y; 305 } 306 } 307 308 template <typename T> 309 void Image<T>::DownsampleInterpolateLinear(const Image<T>& original) { 310 // TODO(andrewharp): Turn this into a general compare sizes/bulk 311 // copy method. 312 if (original.GetWidth() == GetWidth() && 313 original.GetHeight() == GetHeight() && 314 original.stride() == stride()) { 315 memcpy(image_data_, original.data(), data_size_ * sizeof(T)); 316 return; 317 } 318 319 // Calculating the scaling factors based on target image size. 320 const float factor_x = static_cast<float>(original.GetWidth()) / 321 static_cast<float>(width_); 322 const float factor_y = static_cast<float>(original.GetHeight()) / 323 static_cast<float>(height_); 324 325 // Calculating initial offset in x-axis. 326 const float offset_x = 0; 327 const int offset_x_fp = RealToFixed1616(offset_x); 328 329 // Calculating initial offset in y-axis. 330 const float offset_y = 0; 331 const int offset_y_fp = RealToFixed1616(offset_y); 332 333 // Get the fixed point scaling factor value. 334 // Shift by 8 so we can fit everything into a 4 byte int later for speed 335 // reasons. This means the precision is limited to 1 / 256th of a pixel, 336 // but this should be good enough. 337 const int factor_x_fp = RealToFixed1616(factor_x) >> 8; 338 const int factor_y_fp = RealToFixed1616(factor_y) >> 8; 339 340 int src_y_fp = offset_y_fp >> 8; 341 342 static const int kFixedPointOne8 = 0x00000100; 343 static const int kFixedPointHalf8 = 0x00000080; 344 static const int kFixedPointTruncateMask8 = 0xFFFFFF00; 345 346 // For every pixel in resulting image. 347 for (int y = 0; y < height_; ++y) { 348 int src_x_fp = offset_x_fp >> 8; 349 350 int trunc_y = src_y_fp & kFixedPointTruncateMask8; 351 const int fp_y = src_y_fp - trunc_y; 352 353 // Scale the truncated values back to regular ints. 354 trunc_y >>= 8; 355 356 const int one_minus_fp_y = kFixedPointOne8 - fp_y; 357 358 T* pixel_ptr = (*this)[y]; 359 360 // Make sure not to read from an invalid row. 361 const int trunc_y_b = MIN(original.height_less_one_, trunc_y + 1); 362 const T* other_top_ptr = original[trunc_y]; 363 const T* other_bot_ptr = original[trunc_y_b]; 364 365 int last_trunc_x = -1; 366 int trunc_x = -1; 367 368 T a = 0; 369 T b = 0; 370 T c = 0; 371 T d = 0; 372 373 for (int x = 0; x < width_; ++x) { 374 trunc_x = src_x_fp & kFixedPointTruncateMask8; 375 376 const int fp_x = (src_x_fp - trunc_x) >> 8; 377 378 // Scale the truncated values back to regular ints. 379 trunc_x >>= 8; 380 381 // It's possible we're reading from the same pixels 382 if (trunc_x != last_trunc_x) { 383 // Make sure not to read from an invalid column. 384 const int trunc_x_b = MIN(original.width_less_one_, trunc_x + 1); 385 a = other_top_ptr[trunc_x]; 386 b = other_top_ptr[trunc_x_b]; 387 c = other_bot_ptr[trunc_x]; 388 d = other_bot_ptr[trunc_x_b]; 389 last_trunc_x = trunc_x; 390 } 391 392 const int one_minus_fp_x = kFixedPointOne8 - fp_x; 393 394 const int32_t value = 395 ((one_minus_fp_y * one_minus_fp_x * a + fp_x * b) + 396 (fp_y * one_minus_fp_x * c + fp_x * d) + kFixedPointHalf8) >> 397 16; 398 399 *pixel_ptr++ = value; 400 401 src_x_fp += factor_x_fp; 402 } 403 src_y_fp += factor_y_fp; 404 } 405 } 406 407 template <typename T> 408 void Image<T>::DownsampleSmoothed3x3(const Image<T>& original) { 409 for (int y = 0; y < height_; ++y) { 410 const int orig_y = Clip(2 * y, ZERO, original.height_less_one_); 411 const int min_y = Clip(orig_y - 1, ZERO, original.height_less_one_); 412 const int max_y = Clip(orig_y + 1, ZERO, original.height_less_one_); 413 414 for (int x = 0; x < width_; ++x) { 415 const int orig_x = Clip(2 * x, ZERO, original.width_less_one_); 416 const int min_x = Clip(orig_x - 1, ZERO, original.width_less_one_); 417 const int max_x = Clip(orig_x + 1, ZERO, original.width_less_one_); 418 419 // Center. 420 int32_t pixel_sum = original[orig_y][orig_x] * 4; 421 422 // Sides. 423 pixel_sum += (original[orig_y][max_x] + 424 original[orig_y][min_x] + 425 original[max_y][orig_x] + 426 original[min_y][orig_x]) * 2; 427 428 // Diagonals. 429 pixel_sum += (original[min_y][max_x] + 430 original[min_y][min_x] + 431 original[max_y][max_x] + 432 original[max_y][min_x]); 433 434 (*this)[y][x] = pixel_sum >> 4; // 16 435 } 436 } 437 } 438 439 template <typename T> 440 void Image<T>::DownsampleSmoothed5x5(const Image<T>& original) { 441 const int max_x = original.width_less_one_; 442 const int max_y = original.height_less_one_; 443 444 // The JY Bouget paper on Lucas-Kanade recommends a 445 // [1/16 1/4 3/8 1/4 1/16]^2 filter. 446 // This works out to a [1 4 6 4 1]^2 / 256 array, precomputed below. 447 static const int window_radius = 2; 448 static const int window_size = window_radius*2 + 1; 449 static const int window_weights[] = {1, 4, 6, 4, 1, // 16 + 450 4, 16, 24, 16, 4, // 64 + 451 6, 24, 36, 24, 6, // 96 + 452 4, 16, 24, 16, 4, // 64 + 453 1, 4, 6, 4, 1}; // 16 = 256 454 455 // We'll multiply and sum with the whole numbers first, then divide by 456 // the total weight to normalize at the last moment. 457 for (int y = 0; y < height_; ++y) { 458 for (int x = 0; x < width_; ++x) { 459 int32_t pixel_sum = 0; 460 461 const int* w = window_weights; 462 const int start_x = Clip((x << 1) - window_radius, ZERO, max_x); 463 464 // Clip the boundaries to the size of the image. 465 for (int window_y = 0; window_y < window_size; ++window_y) { 466 const int start_y = 467 Clip((y << 1) - window_radius + window_y, ZERO, max_y); 468 469 const T* p = original[start_y] + start_x; 470 471 for (int window_x = 0; window_x < window_size; ++window_x) { 472 pixel_sum += *p++ * *w++; 473 } 474 } 475 476 // Conversion to type T will happen here after shifting right 8 bits to 477 // divide by 256. 478 (*this)[y][x] = pixel_sum >> 8; 479 } 480 } 481 } 482 483 template <typename T> 484 template <typename U> 485 inline T Image<T>::ScharrPixelX(const Image<U>& original, 486 const int center_x, const int center_y) const { 487 const int min_x = Clip(center_x - 1, ZERO, original.width_less_one_); 488 const int max_x = Clip(center_x + 1, ZERO, original.width_less_one_); 489 const int min_y = Clip(center_y - 1, ZERO, original.height_less_one_); 490 const int max_y = Clip(center_y + 1, ZERO, original.height_less_one_); 491 492 // Convolution loop unrolled for performance... 493 return (3 * (original[min_y][max_x] 494 + original[max_y][max_x] 495 - original[min_y][min_x] 496 - original[max_y][min_x]) 497 + 10 * (original[center_y][max_x] 498 - original[center_y][min_x])) / 32; 499 } 500 501 template <typename T> 502 template <typename U> 503 inline T Image<T>::ScharrPixelY(const Image<U>& original, 504 const int center_x, const int center_y) const { 505 const int min_x = Clip(center_x - 1, 0, original.width_less_one_); 506 const int max_x = Clip(center_x + 1, 0, original.width_less_one_); 507 const int min_y = Clip(center_y - 1, 0, original.height_less_one_); 508 const int max_y = Clip(center_y + 1, 0, original.height_less_one_); 509 510 // Convolution loop unrolled for performance... 511 return (3 * (original[max_y][min_x] 512 + original[max_y][max_x] 513 - original[min_y][min_x] 514 - original[min_y][max_x]) 515 + 10 * (original[max_y][center_x] 516 - original[min_y][center_x])) / 32; 517 } 518 519 template <typename T> 520 template <typename U> 521 inline void Image<T>::ScharrX(const Image<U>& original) { 522 for (int y = 0; y < height_; ++y) { 523 for (int x = 0; x < width_; ++x) { 524 SetPixel(x, y, ScharrPixelX(original, x, y)); 525 } 526 } 527 } 528 529 template <typename T> 530 template <typename U> 531 inline void Image<T>::ScharrY(const Image<U>& original) { 532 for (int y = 0; y < height_; ++y) { 533 for (int x = 0; x < width_; ++x) { 534 SetPixel(x, y, ScharrPixelY(original, x, y)); 535 } 536 } 537 } 538 539 template <typename T> 540 template <typename U> 541 void Image<T>::DerivativeX(const Image<U>& original) { 542 for (int y = 0; y < height_; ++y) { 543 const U* const source_row = original[y]; 544 T* const dest_row = (*this)[y]; 545 546 // Compute first pixel. Approximated with forward difference. 547 dest_row[0] = source_row[1] - source_row[0]; 548 549 // All the pixels in between. Central difference method. 550 const U* source_prev_pixel = source_row; 551 T* dest_pixel = dest_row + 1; 552 const U* source_next_pixel = source_row + 2; 553 for (int x = 1; x < width_less_one_; ++x) { 554 *dest_pixel++ = HalfDiff(*source_prev_pixel++, *source_next_pixel++); 555 } 556 557 // Last pixel. Approximated with backward difference. 558 dest_row[width_less_one_] = 559 source_row[width_less_one_] - source_row[width_less_one_ - 1]; 560 } 561 } 562 563 template <typename T> 564 template <typename U> 565 void Image<T>::DerivativeY(const Image<U>& original) { 566 const int src_stride = original.stride(); 567 568 // Compute 1st row. Approximated with forward difference. 569 { 570 const U* const src_row = original[0]; 571 T* dest_row = (*this)[0]; 572 for (int x = 0; x < width_; ++x) { 573 dest_row[x] = src_row[x + src_stride] - src_row[x]; 574 } 575 } 576 577 // Compute all rows in between using central difference. 578 for (int y = 1; y < height_less_one_; ++y) { 579 T* dest_row = (*this)[y]; 580 581 const U* source_prev_pixel = original[y - 1]; 582 const U* source_next_pixel = original[y + 1]; 583 for (int x = 0; x < width_; ++x) { 584 *dest_row++ = HalfDiff(*source_prev_pixel++, *source_next_pixel++); 585 } 586 } 587 588 // Compute last row. Approximated with backward difference. 589 { 590 const U* const src_row = original[height_less_one_]; 591 T* dest_row = (*this)[height_less_one_]; 592 for (int x = 0; x < width_; ++x) { 593 dest_row[x] = src_row[x] - src_row[x - src_stride]; 594 } 595 } 596 } 597 598 template <typename T> 599 template <typename U> 600 inline T Image<T>::ConvolvePixel3x3(const Image<U>& original, 601 const int* const filter, 602 const int center_x, const int center_y, 603 const int total) const { 604 int32_t sum = 0; 605 for (int filter_y = 0; filter_y < 3; ++filter_y) { 606 const int y = Clip(center_y - 1 + filter_y, 0, original.GetHeight()); 607 for (int filter_x = 0; filter_x < 3; ++filter_x) { 608 const int x = Clip(center_x - 1 + filter_x, 0, original.GetWidth()); 609 sum += original[y][x] * filter[filter_y * 3 + filter_x]; 610 } 611 } 612 return sum / total; 613 } 614 615 template <typename T> 616 template <typename U> 617 inline void Image<T>::Convolve3x3(const Image<U>& original, 618 const int32_t* const filter) { 619 int32_t sum = 0; 620 for (int i = 0; i < 9; ++i) { 621 sum += abs(filter[i]); 622 } 623 for (int y = 0; y < height_; ++y) { 624 for (int x = 0; x < width_; ++x) { 625 SetPixel(x, y, ConvolvePixel3x3(original, filter, x, y, sum)); 626 } 627 } 628 } 629 630 template <typename T> 631 inline void Image<T>::FromArray(const T* const pixels, const int stride, 632 const int factor) { 633 if (factor == 1 && stride == width_) { 634 // If not subsampling, memcpy per line should be faster. 635 memcpy(this->image_data_, pixels, data_size_ * sizeof(T)); 636 return; 637 } 638 639 DownsampleAveraged(pixels, stride, factor); 640 } 641 642 } // namespace tf_tracking 643 644 #endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_INL_H_ 645