1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 #include "webrtc/modules/video_processing/content_analysis.h" 11 12 #include <math.h> 13 #include <stdlib.h> 14 15 #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" 16 #include "webrtc/system_wrappers/include/tick_util.h" 17 18 namespace webrtc { 19 20 VPMContentAnalysis::VPMContentAnalysis(bool runtime_cpu_detection) 21 : orig_frame_(NULL), 22 prev_frame_(NULL), 23 width_(0), 24 height_(0), 25 skip_num_(1), 26 border_(8), 27 motion_magnitude_(0.0f), 28 spatial_pred_err_(0.0f), 29 spatial_pred_err_h_(0.0f), 30 spatial_pred_err_v_(0.0f), 31 first_frame_(true), 32 ca_Init_(false), 33 content_metrics_(NULL) { 34 ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_C; 35 TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_C; 36 37 if (runtime_cpu_detection) { 38 #if defined(WEBRTC_ARCH_X86_FAMILY) 39 if (WebRtc_GetCPUInfo(kSSE2)) { 40 ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_SSE2; 41 TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_SSE2; 42 } 43 #endif 44 } 45 Release(); 46 } 47 48 VPMContentAnalysis::~VPMContentAnalysis() { 49 Release(); 50 } 51 52 VideoContentMetrics* VPMContentAnalysis::ComputeContentMetrics( 53 const VideoFrame& inputFrame) { 54 if (inputFrame.IsZeroSize()) 55 return NULL; 56 57 // Init if needed (native dimension change). 58 if (width_ != inputFrame.width() || height_ != inputFrame.height()) { 59 if (VPM_OK != Initialize(inputFrame.width(), inputFrame.height())) 60 return NULL; 61 } 62 // Only interested in the Y plane. 63 orig_frame_ = inputFrame.buffer(kYPlane); 64 65 // Compute spatial metrics: 3 spatial prediction errors. 66 (this->*ComputeSpatialMetrics)(); 67 68 // Compute motion metrics 69 if (first_frame_ == false) 70 ComputeMotionMetrics(); 71 72 // Saving current frame as previous one: Y only. 73 memcpy(prev_frame_, orig_frame_, width_ * height_); 74 75 first_frame_ = false; 76 ca_Init_ = true; 77 78 return ContentMetrics(); 79 } 80 81 int32_t VPMContentAnalysis::Release() { 82 if (content_metrics_ != NULL) { 83 delete content_metrics_; 84 content_metrics_ = NULL; 85 } 86 87 if (prev_frame_ != NULL) { 88 delete[] prev_frame_; 89 prev_frame_ = NULL; 90 } 91 92 width_ = 0; 93 height_ = 0; 94 first_frame_ = true; 95 96 return VPM_OK; 97 } 98 99 int32_t VPMContentAnalysis::Initialize(int width, int height) { 100 width_ = width; 101 height_ = height; 102 first_frame_ = true; 103 104 // skip parameter: # of skipped rows: for complexity reduction 105 // temporal also currently uses it for column reduction. 106 skip_num_ = 1; 107 108 // use skipNum = 2 for 4CIF, WHD 109 if ((height_ >= 576) && (width_ >= 704)) { 110 skip_num_ = 2; 111 } 112 // use skipNum = 4 for FULLL_HD images 113 if ((height_ >= 1080) && (width_ >= 1920)) { 114 skip_num_ = 4; 115 } 116 117 if (content_metrics_ != NULL) { 118 delete content_metrics_; 119 } 120 121 if (prev_frame_ != NULL) { 122 delete[] prev_frame_; 123 } 124 125 // Spatial Metrics don't work on a border of 8. Minimum processing 126 // block size is 16 pixels. So make sure the width and height support this. 127 if (width_ <= 32 || height_ <= 32) { 128 ca_Init_ = false; 129 return VPM_PARAMETER_ERROR; 130 } 131 132 content_metrics_ = new VideoContentMetrics(); 133 if (content_metrics_ == NULL) { 134 return VPM_MEMORY; 135 } 136 137 prev_frame_ = new uint8_t[width_ * height_]; // Y only. 138 if (prev_frame_ == NULL) 139 return VPM_MEMORY; 140 141 return VPM_OK; 142 } 143 144 // Compute motion metrics: magnitude over non-zero motion vectors, 145 // and size of zero cluster 146 int32_t VPMContentAnalysis::ComputeMotionMetrics() { 147 // Motion metrics: only one is derived from normalized 148 // (MAD) temporal difference 149 (this->*TemporalDiffMetric)(); 150 return VPM_OK; 151 } 152 153 // Normalized temporal difference (MAD): used as a motion level metric 154 // Normalize MAD by spatial contrast: images with more contrast 155 // (pixel variance) likely have larger temporal difference 156 // To reduce complexity, we compute the metric for a reduced set of points. 157 int32_t VPMContentAnalysis::TemporalDiffMetric_C() { 158 // size of original frame 159 int sizei = height_; 160 int sizej = width_; 161 uint32_t tempDiffSum = 0; 162 uint32_t pixelSum = 0; 163 uint64_t pixelSqSum = 0; 164 165 uint32_t num_pixels = 0; // Counter for # of pixels. 166 const int width_end = ((width_ - 2 * border_) & -16) + border_; 167 168 for (int i = border_; i < sizei - border_; i += skip_num_) { 169 for (int j = border_; j < width_end; j++) { 170 num_pixels += 1; 171 int ssn = i * sizej + j; 172 173 uint8_t currPixel = orig_frame_[ssn]; 174 uint8_t prevPixel = prev_frame_[ssn]; 175 176 tempDiffSum += 177 static_cast<uint32_t>(abs((int16_t)(currPixel - prevPixel))); 178 pixelSum += static_cast<uint32_t>(currPixel); 179 pixelSqSum += static_cast<uint64_t>(currPixel * currPixel); 180 } 181 } 182 183 // Default. 184 motion_magnitude_ = 0.0f; 185 186 if (tempDiffSum == 0) 187 return VPM_OK; 188 189 // Normalize over all pixels. 190 float const tempDiffAvg = 191 static_cast<float>(tempDiffSum) / static_cast<float>(num_pixels); 192 float const pixelSumAvg = 193 static_cast<float>(pixelSum) / static_cast<float>(num_pixels); 194 float const pixelSqSumAvg = 195 static_cast<float>(pixelSqSum) / static_cast<float>(num_pixels); 196 float contrast = pixelSqSumAvg - (pixelSumAvg * pixelSumAvg); 197 198 if (contrast > 0.0) { 199 contrast = sqrt(contrast); 200 motion_magnitude_ = tempDiffAvg / contrast; 201 } 202 return VPM_OK; 203 } 204 205 // Compute spatial metrics: 206 // To reduce complexity, we compute the metric for a reduced set of points. 207 // The spatial metrics are rough estimates of the prediction error cost for 208 // each QM spatial mode: 2x2,1x2,2x1 209 // The metrics are a simple estimate of the up-sampling prediction error, 210 // estimated assuming sub-sampling for decimation (no filtering), 211 // and up-sampling back up with simple bilinear interpolation. 212 int32_t VPMContentAnalysis::ComputeSpatialMetrics_C() { 213 const int sizei = height_; 214 const int sizej = width_; 215 216 // Pixel mean square average: used to normalize the spatial metrics. 217 uint32_t pixelMSA = 0; 218 219 uint32_t spatialErrSum = 0; 220 uint32_t spatialErrVSum = 0; 221 uint32_t spatialErrHSum = 0; 222 223 // make sure work section is a multiple of 16 224 const int width_end = ((sizej - 2 * border_) & -16) + border_; 225 226 for (int i = border_; i < sizei - border_; i += skip_num_) { 227 for (int j = border_; j < width_end; j++) { 228 int ssn1 = i * sizej + j; 229 int ssn2 = (i + 1) * sizej + j; // bottom 230 int ssn3 = (i - 1) * sizej + j; // top 231 int ssn4 = i * sizej + j + 1; // right 232 int ssn5 = i * sizej + j - 1; // left 233 234 uint16_t refPixel1 = orig_frame_[ssn1] << 1; 235 uint16_t refPixel2 = orig_frame_[ssn1] << 2; 236 237 uint8_t bottPixel = orig_frame_[ssn2]; 238 uint8_t topPixel = orig_frame_[ssn3]; 239 uint8_t rightPixel = orig_frame_[ssn4]; 240 uint8_t leftPixel = orig_frame_[ssn5]; 241 242 spatialErrSum += static_cast<uint32_t>(abs(static_cast<int16_t>( 243 refPixel2 - static_cast<uint16_t>(bottPixel + topPixel + leftPixel + 244 rightPixel)))); 245 spatialErrVSum += static_cast<uint32_t>(abs(static_cast<int16_t>( 246 refPixel1 - static_cast<uint16_t>(bottPixel + topPixel)))); 247 spatialErrHSum += static_cast<uint32_t>(abs(static_cast<int16_t>( 248 refPixel1 - static_cast<uint16_t>(leftPixel + rightPixel)))); 249 pixelMSA += orig_frame_[ssn1]; 250 } 251 } 252 253 // Normalize over all pixels. 254 const float spatialErr = static_cast<float>(spatialErrSum >> 2); 255 const float spatialErrH = static_cast<float>(spatialErrHSum >> 1); 256 const float spatialErrV = static_cast<float>(spatialErrVSum >> 1); 257 const float norm = static_cast<float>(pixelMSA); 258 259 // 2X2: 260 spatial_pred_err_ = spatialErr / norm; 261 // 1X2: 262 spatial_pred_err_h_ = spatialErrH / norm; 263 // 2X1: 264 spatial_pred_err_v_ = spatialErrV / norm; 265 return VPM_OK; 266 } 267 268 VideoContentMetrics* VPMContentAnalysis::ContentMetrics() { 269 if (ca_Init_ == false) 270 return NULL; 271 272 content_metrics_->spatial_pred_err = spatial_pred_err_; 273 content_metrics_->spatial_pred_err_h = spatial_pred_err_h_; 274 content_metrics_->spatial_pred_err_v = spatial_pred_err_v_; 275 // Motion metric: normalized temporal difference (MAD). 276 content_metrics_->motion_magnitude = motion_magnitude_; 277 278 return content_metrics_; 279 } 280 281 } // namespace webrtc 282