Home | History | Annotate | Download | only in video_processing
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 #include "webrtc/modules/video_processing/content_analysis.h"
     11 
     12 #include <math.h>
     13 #include <stdlib.h>
     14 
     15 #include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
     16 #include "webrtc/system_wrappers/include/tick_util.h"
     17 
     18 namespace webrtc {
     19 
     20 VPMContentAnalysis::VPMContentAnalysis(bool runtime_cpu_detection)
     21     : orig_frame_(NULL),
     22       prev_frame_(NULL),
     23       width_(0),
     24       height_(0),
     25       skip_num_(1),
     26       border_(8),
     27       motion_magnitude_(0.0f),
     28       spatial_pred_err_(0.0f),
     29       spatial_pred_err_h_(0.0f),
     30       spatial_pred_err_v_(0.0f),
     31       first_frame_(true),
     32       ca_Init_(false),
     33       content_metrics_(NULL) {
     34   ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_C;
     35   TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_C;
     36 
     37   if (runtime_cpu_detection) {
     38 #if defined(WEBRTC_ARCH_X86_FAMILY)
     39     if (WebRtc_GetCPUInfo(kSSE2)) {
     40       ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_SSE2;
     41       TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_SSE2;
     42     }
     43 #endif
     44   }
     45   Release();
     46 }
     47 
     48 VPMContentAnalysis::~VPMContentAnalysis() {
     49   Release();
     50 }
     51 
     52 VideoContentMetrics* VPMContentAnalysis::ComputeContentMetrics(
     53     const VideoFrame& inputFrame) {
     54   if (inputFrame.IsZeroSize())
     55     return NULL;
     56 
     57   // Init if needed (native dimension change).
     58   if (width_ != inputFrame.width() || height_ != inputFrame.height()) {
     59     if (VPM_OK != Initialize(inputFrame.width(), inputFrame.height()))
     60       return NULL;
     61   }
     62   // Only interested in the Y plane.
     63   orig_frame_ = inputFrame.buffer(kYPlane);
     64 
     65   // Compute spatial metrics: 3 spatial prediction errors.
     66   (this->*ComputeSpatialMetrics)();
     67 
     68   // Compute motion metrics
     69   if (first_frame_ == false)
     70     ComputeMotionMetrics();
     71 
     72   // Saving current frame as previous one: Y only.
     73   memcpy(prev_frame_, orig_frame_, width_ * height_);
     74 
     75   first_frame_ = false;
     76   ca_Init_ = true;
     77 
     78   return ContentMetrics();
     79 }
     80 
     81 int32_t VPMContentAnalysis::Release() {
     82   if (content_metrics_ != NULL) {
     83     delete content_metrics_;
     84     content_metrics_ = NULL;
     85   }
     86 
     87   if (prev_frame_ != NULL) {
     88     delete[] prev_frame_;
     89     prev_frame_ = NULL;
     90   }
     91 
     92   width_ = 0;
     93   height_ = 0;
     94   first_frame_ = true;
     95 
     96   return VPM_OK;
     97 }
     98 
     99 int32_t VPMContentAnalysis::Initialize(int width, int height) {
    100   width_ = width;
    101   height_ = height;
    102   first_frame_ = true;
    103 
    104   // skip parameter: # of skipped rows: for complexity reduction
    105   //  temporal also currently uses it for column reduction.
    106   skip_num_ = 1;
    107 
    108   // use skipNum = 2 for 4CIF, WHD
    109   if ((height_ >= 576) && (width_ >= 704)) {
    110     skip_num_ = 2;
    111   }
    112   // use skipNum = 4 for FULLL_HD images
    113   if ((height_ >= 1080) && (width_ >= 1920)) {
    114     skip_num_ = 4;
    115   }
    116 
    117   if (content_metrics_ != NULL) {
    118     delete content_metrics_;
    119   }
    120 
    121   if (prev_frame_ != NULL) {
    122     delete[] prev_frame_;
    123   }
    124 
    125   // Spatial Metrics don't work on a border of 8. Minimum processing
    126   // block size is 16 pixels.  So make sure the width and height support this.
    127   if (width_ <= 32 || height_ <= 32) {
    128     ca_Init_ = false;
    129     return VPM_PARAMETER_ERROR;
    130   }
    131 
    132   content_metrics_ = new VideoContentMetrics();
    133   if (content_metrics_ == NULL) {
    134     return VPM_MEMORY;
    135   }
    136 
    137   prev_frame_ = new uint8_t[width_ * height_];  // Y only.
    138   if (prev_frame_ == NULL)
    139     return VPM_MEMORY;
    140 
    141   return VPM_OK;
    142 }
    143 
    144 // Compute motion metrics: magnitude over non-zero motion vectors,
    145 //  and size of zero cluster
    146 int32_t VPMContentAnalysis::ComputeMotionMetrics() {
    147   // Motion metrics: only one is derived from normalized
    148   //  (MAD) temporal difference
    149   (this->*TemporalDiffMetric)();
    150   return VPM_OK;
    151 }
    152 
    153 // Normalized temporal difference (MAD): used as a motion level metric
    154 // Normalize MAD by spatial contrast: images with more contrast
    155 //  (pixel variance) likely have larger temporal difference
    156 // To reduce complexity, we compute the metric for a reduced set of points.
    157 int32_t VPMContentAnalysis::TemporalDiffMetric_C() {
    158   // size of original frame
    159   int sizei = height_;
    160   int sizej = width_;
    161   uint32_t tempDiffSum = 0;
    162   uint32_t pixelSum = 0;
    163   uint64_t pixelSqSum = 0;
    164 
    165   uint32_t num_pixels = 0;  // Counter for # of pixels.
    166   const int width_end = ((width_ - 2 * border_) & -16) + border_;
    167 
    168   for (int i = border_; i < sizei - border_; i += skip_num_) {
    169     for (int j = border_; j < width_end; j++) {
    170       num_pixels += 1;
    171       int ssn = i * sizej + j;
    172 
    173       uint8_t currPixel = orig_frame_[ssn];
    174       uint8_t prevPixel = prev_frame_[ssn];
    175 
    176       tempDiffSum +=
    177           static_cast<uint32_t>(abs((int16_t)(currPixel - prevPixel)));
    178       pixelSum += static_cast<uint32_t>(currPixel);
    179       pixelSqSum += static_cast<uint64_t>(currPixel * currPixel);
    180     }
    181   }
    182 
    183   // Default.
    184   motion_magnitude_ = 0.0f;
    185 
    186   if (tempDiffSum == 0)
    187     return VPM_OK;
    188 
    189   // Normalize over all pixels.
    190   float const tempDiffAvg =
    191       static_cast<float>(tempDiffSum) / static_cast<float>(num_pixels);
    192   float const pixelSumAvg =
    193       static_cast<float>(pixelSum) / static_cast<float>(num_pixels);
    194   float const pixelSqSumAvg =
    195       static_cast<float>(pixelSqSum) / static_cast<float>(num_pixels);
    196   float contrast = pixelSqSumAvg - (pixelSumAvg * pixelSumAvg);
    197 
    198   if (contrast > 0.0) {
    199     contrast = sqrt(contrast);
    200     motion_magnitude_ = tempDiffAvg / contrast;
    201   }
    202   return VPM_OK;
    203 }
    204 
    205 // Compute spatial metrics:
    206 // To reduce complexity, we compute the metric for a reduced set of points.
    207 // The spatial metrics are rough estimates of the prediction error cost for
    208 //  each QM spatial mode: 2x2,1x2,2x1
    209 // The metrics are a simple estimate of the up-sampling prediction error,
    210 // estimated assuming sub-sampling for decimation (no filtering),
    211 // and up-sampling back up with simple bilinear interpolation.
    212 int32_t VPMContentAnalysis::ComputeSpatialMetrics_C() {
    213   const int sizei = height_;
    214   const int sizej = width_;
    215 
    216   // Pixel mean square average: used to normalize the spatial metrics.
    217   uint32_t pixelMSA = 0;
    218 
    219   uint32_t spatialErrSum = 0;
    220   uint32_t spatialErrVSum = 0;
    221   uint32_t spatialErrHSum = 0;
    222 
    223   // make sure work section is a multiple of 16
    224   const int width_end = ((sizej - 2 * border_) & -16) + border_;
    225 
    226   for (int i = border_; i < sizei - border_; i += skip_num_) {
    227     for (int j = border_; j < width_end; j++) {
    228       int ssn1 = i * sizej + j;
    229       int ssn2 = (i + 1) * sizej + j;  // bottom
    230       int ssn3 = (i - 1) * sizej + j;  // top
    231       int ssn4 = i * sizej + j + 1;    // right
    232       int ssn5 = i * sizej + j - 1;    // left
    233 
    234       uint16_t refPixel1 = orig_frame_[ssn1] << 1;
    235       uint16_t refPixel2 = orig_frame_[ssn1] << 2;
    236 
    237       uint8_t bottPixel = orig_frame_[ssn2];
    238       uint8_t topPixel = orig_frame_[ssn3];
    239       uint8_t rightPixel = orig_frame_[ssn4];
    240       uint8_t leftPixel = orig_frame_[ssn5];
    241 
    242       spatialErrSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
    243           refPixel2 - static_cast<uint16_t>(bottPixel + topPixel + leftPixel +
    244                                             rightPixel))));
    245       spatialErrVSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
    246           refPixel1 - static_cast<uint16_t>(bottPixel + topPixel))));
    247       spatialErrHSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
    248           refPixel1 - static_cast<uint16_t>(leftPixel + rightPixel))));
    249       pixelMSA += orig_frame_[ssn1];
    250     }
    251   }
    252 
    253   // Normalize over all pixels.
    254   const float spatialErr = static_cast<float>(spatialErrSum >> 2);
    255   const float spatialErrH = static_cast<float>(spatialErrHSum >> 1);
    256   const float spatialErrV = static_cast<float>(spatialErrVSum >> 1);
    257   const float norm = static_cast<float>(pixelMSA);
    258 
    259   // 2X2:
    260   spatial_pred_err_ = spatialErr / norm;
    261   // 1X2:
    262   spatial_pred_err_h_ = spatialErrH / norm;
    263   // 2X1:
    264   spatial_pred_err_v_ = spatialErrV / norm;
    265   return VPM_OK;
    266 }
    267 
    268 VideoContentMetrics* VPMContentAnalysis::ContentMetrics() {
    269   if (ca_Init_ == false)
    270     return NULL;
    271 
    272   content_metrics_->spatial_pred_err = spatial_pred_err_;
    273   content_metrics_->spatial_pred_err_h = spatial_pred_err_h_;
    274   content_metrics_->spatial_pred_err_v = spatial_pred_err_v_;
    275   // Motion metric: normalized temporal difference (MAD).
    276   content_metrics_->motion_magnitude = motion_magnitude_;
    277 
    278   return content_metrics_;
    279 }
    280 
    281 }  // namespace webrtc
    282