Home | History | Annotate | Download | only in object_tracking
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
     17 #define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
     18 
     19 #include <math.h>
     20 
     21 #include "tensorflow/examples/android/jni/object_tracking/geom.h"
     22 
     23 namespace tf_tracking {
     24 
     25 // Arbitrary keypoint type ids for labeling the origin of tracked keypoints.
     26 enum KeypointType {
     27   KEYPOINT_TYPE_DEFAULT = 0,
     28   KEYPOINT_TYPE_FAST = 1,
     29   KEYPOINT_TYPE_INTEREST = 2
     30 };
     31 
     32 // Struct that can be used to more richly store the results of a detection
     33 // than a single number, while still maintaining comparability.
     34 struct MatchScore {
     35   explicit MatchScore(double val) : value(val) {}
     36   MatchScore() { value = 0.0; }
     37 
     38   double value;
     39 
     40   MatchScore& operator+(const MatchScore& rhs) {
     41     value += rhs.value;
     42     return *this;
     43   }
     44 
     45   friend std::ostream& operator<<(std::ostream& stream,
     46                                   const MatchScore& detection) {
     47     stream << detection.value;
     48     return stream;
     49   }
     50 };
     51 inline bool operator< (const MatchScore& cC1, const MatchScore& cC2) {
     52     return cC1.value < cC2.value;
     53 }
     54 inline bool operator> (const MatchScore& cC1, const MatchScore& cC2) {
     55     return cC1.value > cC2.value;
     56 }
     57 inline bool operator>= (const MatchScore& cC1, const MatchScore& cC2) {
     58     return cC1.value >= cC2.value;
     59 }
     60 inline bool operator<= (const MatchScore& cC1, const MatchScore& cC2) {
     61     return cC1.value <= cC2.value;
     62 }
     63 
     64 // Fixed seed used for all random number generators.
     65 static const int kRandomNumberSeed = 11111;
     66 
     67 // TODO(andrewharp): Move as many of these settings as possible into a settings
     68 // object which can be passed in from Java at runtime.
     69 
     70 // Whether or not to use ESM instead of LK flow.
     71 static const bool kUseEsm = false;
     72 
     73 // This constant gets added to the diagonal of the Hessian
     74 // before solving for translation in 2dof ESM.
     75 // It ensures better behavior especially in the absence of
     76 // strong texture.
     77 static const int kEsmRegularizer = 20;
     78 
     79 // Do we want to brightness-normalize each keypoint patch when we compute
     80 // its flow using ESM?
     81 static const bool kDoBrightnessNormalize = true;
     82 
     83 // Whether or not to use fixed-point interpolated pixel lookups in optical flow.
     84 #define USE_FIXED_POINT_FLOW 1
     85 
     86 // Whether to normalize keypoint windows for intensity in LK optical flow.
     87 // This is a define for now because it helps keep the code streamlined.
     88 #define NORMALIZE 1
     89 
     90 // Number of keypoints to store per frame.
     91 static const int kMaxKeypoints = 76;
     92 
     93 // Keypoint detection.
     94 static const int kMaxTempKeypoints = 1024;
     95 
     96 // Number of floats each keypoint takes up when exporting to an array.
     97 static const int kKeypointStep = 7;
     98 
     99 // Number of frame deltas to keep around in the circular queue.
    100 static const int kNumFrames = 512;
    101 
    102 // Number of iterations to do tracking on each keypoint at each pyramid level.
    103 static const int kNumIterations = 3;
    104 
    105 // The number of bins (on a side) to divide each bin from the previous
    106 // cache level into.  Higher numbers will decrease performance by increasing
    107 // cache misses, but mean that cache hits are more locally relevant.
    108 static const int kCacheBranchFactor = 2;
    109 
    110 // Number of levels to put in the cache.
    111 // Each level of the cache is a square grid of bins, length:
    112 // branch_factor^(level - 1) on each side.
    113 //
    114 // This may be greater than kNumPyramidLevels. Setting it to 0 means no
    115 // caching is enabled.
    116 static const int kNumCacheLevels = 3;
    117 
    118 // The level at which the cache pyramid gets cut off and replaced by a matrix
    119 // transform if such a matrix has been provided to the cache.
    120 static const int kCacheCutoff = 1;
    121 
    122 static const int kNumPyramidLevels = 4;
    123 
    124 // The minimum number of keypoints needed in an object's area.
    125 static const int kMaxKeypointsForObject = 16;
    126 
    127 // Minimum number of pyramid levels to use after getting cached value.
    128 // This allows fine-scale adjustment from the cached value, which is taken
    129 // from the center of the corresponding top cache level box.
    130 // Can be [0, kNumPyramidLevels).
    131 static const int kMinNumPyramidLevelsToUseForAdjustment = 1;
    132 
    133 // Window size to integrate over to find local image derivative.
    134 static const int kFlowIntegrationWindowSize = 3;
    135 
    136 // Total area of integration windows.
    137 static const int kFlowArraySize =
    138     (2 * kFlowIntegrationWindowSize + 1) * (2 * kFlowIntegrationWindowSize + 1);
    139 
    140 // Error that's considered good enough to early abort tracking.
    141 static const float kTrackingAbortThreshold = 0.03f;
    142 
    143 // Maximum number of deviations a keypoint-correspondence delta can be from the
    144 // weighted average before being thrown out for region-based queries.
    145 static const float kNumDeviations = 2.0f;
    146 
    147 // The length of the allowed delta between the forward and the backward
    148 // flow deltas in terms of the length of the forward flow vector.
    149 static const float kMaxForwardBackwardErrorAllowed = 0.5f;
    150 
    151 // Threshold for pixels to be considered different.
    152 static const int kFastDiffAmount = 10;
    153 
    154 // How far from edge of frame to stop looking for FAST keypoints.
    155 static const int kFastBorderBuffer = 10;
    156 
    157 // Determines if non-detected arbitrary keypoints should be added to regions.
    158 // This will help if no keypoints have been detected in the region yet.
    159 static const bool kAddArbitraryKeypoints = true;
    160 
    161 // How many arbitrary keypoints to add along each axis as candidates for each
    162 // region?
    163 static const int kNumToAddAsCandidates = 1;
    164 
    165 // In terms of region dimensions, how closely can we place keypoints
    166 // next to each other?
    167 static const float kClosestPercent = 0.6f;
    168 
    169 // How many FAST qualifying pixels must be connected to a pixel for it to be
    170 // considered a candidate keypoint for Harris filtering.
    171 static const int kMinNumConnectedForFastKeypoint = 8;
    172 
    173 // Size of the window to integrate over for Harris filtering.
    174 // Compare to kFlowIntegrationWindowSize.
    175 static const int kHarrisWindowSize = 2;
    176 
    177 
    178 // DETECTOR PARAMETERS
    179 
    180 // Before relocalizing, make sure the new proposed position is better than
    181 // the existing position by a small amount to prevent thrashing.
    182 static const MatchScore kMatchScoreBuffer(0.01f);
    183 
    184 // Minimum score a tracked object can have and still be considered a match.
    185 // TODO(andrewharp): Make this a per detector thing.
    186 static const MatchScore kMinimumMatchScore(0.5f);
    187 
    188 static const float kMinimumCorrelationForTracking = 0.4f;
    189 
    190 static const MatchScore kMatchScoreForImmediateTermination(0.0f);
    191 
    192 // Run the detector every N frames.
    193 static const int kDetectEveryNFrames = 4;
    194 
    195 // How many features does each feature_set contain?
    196 static const int kFeaturesPerFeatureSet = 10;
    197 
    198 // The number of FeatureSets managed by the object detector.
    199 // More FeatureSets can increase recall at the cost of performance.
    200 static const int kNumFeatureSets = 7;
    201 
    202 // How many FeatureSets must respond affirmatively for a candidate descriptor
    203 // and position to be given more thorough attention?
    204 static const int kNumFeatureSetsForCandidate = 2;
    205 
    206 // How large the thumbnails used for correlation validation are.  Used for both
    207 // width and height.
    208 static const int kNormalizedThumbnailSize = 11;
    209 
    210 // The area of intersection divided by union for the bounding boxes that tells
    211 // if this tracking has slipped enough to invalidate all unlocked examples.
    212 static const float kPositionOverlapThreshold = 0.6f;
    213 
    214 // The number of detection failures allowed before an object goes invisible.
    215 // Tracking will still occur, so if it is actually still being tracked and
    216 // comes back into a detectable position, it's likely to be found.
    217 static const int kMaxNumDetectionFailures = 4;
    218 
    219 
    220 // Minimum square size to scan with sliding window.
    221 static const float kScanMinSquareSize = 16.0f;
    222 
    223 // Minimum square size to scan with sliding window.
    224 static const float kScanMaxSquareSize = 64.0f;
    225 
    226 // Scale difference for consecutive scans of the sliding window.
    227 static const float kScanScaleFactor = sqrtf(2.0f);
    228 
    229 // Step size for sliding window.
    230 static const int kScanStepSize = 10;
    231 
    232 
    233 // How tightly to pack the descriptor boxes for confirmed exemplars.
    234 static const float kLockedScaleFactor = 1 / sqrtf(2.0f);
    235 
    236 // How tightly to pack the descriptor boxes for unconfirmed exemplars.
    237 static const float kUnlockedScaleFactor = 1 / 2.0f;
    238 
    239 // How tightly the boxes to scan centered at the last known position will be
    240 // packed.
    241 static const float kLastKnownPositionScaleFactor = 1.0f / sqrtf(2.0f);
    242 
    243 // The bounds on how close a new object example must be to existing object
    244 // examples for detection to be valid.
    245 static const float kMinCorrelationForNewExample = 0.75f;
    246 static const float kMaxCorrelationForNewExample = 0.99f;
    247 
    248 
    249 // The number of safe tries an exemplar has after being created before
    250 // missed detections count against it.
    251 static const int kFreeTries = 5;
    252 
    253 // A false positive is worth this many missed detections.
    254 static const int kFalsePositivePenalty = 5;
    255 
    256 struct ObjectDetectorConfig {
    257   const Size image_size;
    258 
    259   explicit ObjectDetectorConfig(const Size& image_size)
    260       : image_size(image_size) {}
    261   virtual ~ObjectDetectorConfig() = default;
    262 };
    263 
    264 struct KeypointDetectorConfig {
    265   const Size image_size;
    266 
    267   bool detect_skin;
    268 
    269   explicit KeypointDetectorConfig(const Size& image_size)
    270       : image_size(image_size),
    271         detect_skin(false) {}
    272 };
    273 
    274 
    275 struct OpticalFlowConfig {
    276   const Size image_size;
    277 
    278   explicit OpticalFlowConfig(const Size& image_size)
    279       : image_size(image_size) {}
    280 };
    281 
    282 struct TrackerConfig {
    283   const Size image_size;
    284   KeypointDetectorConfig keypoint_detector_config;
    285   OpticalFlowConfig flow_config;
    286   bool always_track;
    287 
    288   float object_box_scale_factor_for_features;
    289 
    290   explicit TrackerConfig(const Size& image_size)
    291       : image_size(image_size),
    292         keypoint_detector_config(image_size),
    293         flow_config(image_size),
    294         always_track(false),
    295         object_box_scale_factor_for_features(1.0f) {}
    296 };
    297 
    298 }  // namespace tf_tracking
    299 
    300 #endif  // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
    301