Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
     12 #define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
     13 
     14 #include "webrtc/common_types.h"
     15 #include "webrtc/typedefs.h"
     16 
     17 /******************************************************/
     18 /* Quality Modes: Resolution and Robustness settings  */
     19 /******************************************************/
     20 
     21 namespace webrtc {
     22 struct VideoContentMetrics;
     23 
     24 struct VCMResolutionScale {
     25   VCMResolutionScale()
     26       : codec_width(640),
     27         codec_height(480),
     28         frame_rate(30.0f),
     29         spatial_width_fact(1.0f),
     30         spatial_height_fact(1.0f),
     31         temporal_fact(1.0f),
     32         change_resolution_spatial(false),
     33         change_resolution_temporal(false) {
     34   }
     35   uint16_t codec_width;
     36   uint16_t codec_height;
     37   float frame_rate;
     38   float spatial_width_fact;
     39   float spatial_height_fact;
     40   float temporal_fact;
     41   bool change_resolution_spatial;
     42   bool change_resolution_temporal;
     43 };
     44 
     45 enum ImageType {
     46   kQCIF = 0,            // 176x144
     47   kHCIF,                // 264x216 = half(~3/4x3/4) CIF.
     48   kQVGA,                // 320x240 = quarter VGA.
     49   kCIF,                 // 352x288
     50   kHVGA,                // 480x360 = half(~3/4x3/4) VGA.
     51   kVGA,                 // 640x480
     52   kQFULLHD,             // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD.
     53   kWHD,                 // 1280x720
     54   kFULLHD,              // 1920x1080
     55   kNumImageTypes
     56 };
     57 
     58 const uint32_t kSizeOfImageType[kNumImageTypes] =
     59 { 25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600 };
     60 
     61 enum FrameRateLevelClass {
     62   kFrameRateLow,
     63   kFrameRateMiddle1,
     64   kFrameRateMiddle2,
     65   kFrameRateHigh
     66 };
     67 
     68 enum ContentLevelClass {
     69   kLow,
     70   kHigh,
     71   kDefault
     72 };
     73 
     74 struct VCMContFeature {
     75   VCMContFeature()
     76       : value(0.0f),
     77         level(kDefault) {
     78   }
     79   void Reset() {
     80     value = 0.0f;
     81     level = kDefault;
     82   }
     83   float value;
     84   ContentLevelClass level;
     85 };
     86 
     87 enum UpDownAction {
     88   kUpResolution,
     89   kDownResolution
     90 };
     91 
     92 enum SpatialAction {
     93   kNoChangeSpatial,
     94   kOneHalfSpatialUniform,        // 3/4 x 3/4: 9/6 ~1/2 pixel reduction.
     95   kOneQuarterSpatialUniform,     // 1/2 x 1/2: 1/4 pixel reduction.
     96   kNumModesSpatial
     97 };
     98 
     99 enum TemporalAction {
    100   kNoChangeTemporal,
    101   kTwoThirdsTemporal,     // 2/3 frame rate reduction
    102   kOneHalfTemporal,       // 1/2 frame rate reduction
    103   kNumModesTemporal
    104 };
    105 
    106 struct ResolutionAction {
    107   ResolutionAction()
    108       : spatial(kNoChangeSpatial),
    109         temporal(kNoChangeTemporal) {
    110   }
    111   SpatialAction spatial;
    112   TemporalAction temporal;
    113 };
    114 
    115 // Down-sampling factors for spatial (width and height), and temporal.
    116 const float kFactorWidthSpatial[kNumModesSpatial] =
    117     { 1.0f, 4.0f / 3.0f, 2.0f };
    118 
    119 const float kFactorHeightSpatial[kNumModesSpatial] =
    120     { 1.0f, 4.0f / 3.0f, 2.0f };
    121 
    122 const float kFactorTemporal[kNumModesTemporal] =
    123     { 1.0f, 1.5f, 2.0f };
    124 
    125 enum EncoderState {
    126   kStableEncoding,    // Low rate mis-match, stable buffer levels.
    127   kStressedEncoding,  // Significant over-shooting of target rate,
    128                       // Buffer under-flow, etc.
    129   kEasyEncoding       // Significant under-shooting of target rate.
    130 };
    131 
    132 // QmMethod class: main class for resolution and robustness settings
    133 
    134 class VCMQmMethod {
    135  public:
    136   VCMQmMethod();
    137   virtual ~VCMQmMethod();
    138 
    139   // Reset values
    140   void ResetQM();
    141   virtual void Reset() = 0;
    142 
    143   // Compute content class.
    144   uint8_t ComputeContentClass();
    145 
    146   // Update with the content metrics.
    147   void UpdateContent(const VideoContentMetrics* content_metrics);
    148 
    149   // Compute spatial texture magnitude and level.
    150   // Spatial texture is a spatial prediction error measure.
    151   void ComputeSpatial();
    152 
    153   // Compute motion magnitude and level for NFD metric.
    154   // NFD is normalized frame difference (normalized by spatial variance).
    155   void ComputeMotionNFD();
    156 
    157   // Get the imageType (CIF, VGA, HD, etc) for the system width/height.
    158   ImageType GetImageType(uint16_t width, uint16_t height);
    159 
    160   // Return the closest image type.
    161   ImageType FindClosestImageType(uint16_t width, uint16_t height);
    162 
    163   // Get the frame rate level.
    164   FrameRateLevelClass FrameRateLevel(float frame_rate);
    165 
    166  protected:
    167   // Content Data.
    168   const VideoContentMetrics* content_metrics_;
    169 
    170   // Encoder frame sizes and native frame sizes.
    171   uint16_t width_;
    172   uint16_t height_;
    173   float user_frame_rate_;
    174   uint16_t native_width_;
    175   uint16_t native_height_;
    176   float native_frame_rate_;
    177   float aspect_ratio_;
    178   // Image type and frame rate leve, for the current encoder resolution.
    179   ImageType image_type_;
    180   FrameRateLevelClass framerate_level_;
    181   // Content class data.
    182   VCMContFeature motion_;
    183   VCMContFeature spatial_;
    184   uint8_t content_class_;
    185   bool init_;
    186 };
    187 
    188 // Resolution settings class
    189 
    190 class VCMQmResolution : public VCMQmMethod {
    191  public:
    192   VCMQmResolution();
    193   virtual ~VCMQmResolution();
    194 
    195   // Reset all quantities.
    196   virtual void Reset();
    197 
    198   // Reset rate quantities and counters after every SelectResolution() call.
    199   void ResetRates();
    200 
    201   // Reset down-sampling state.
    202   void ResetDownSamplingState();
    203 
    204   // Get the encoder state.
    205   EncoderState GetEncoderState();
    206 
    207   // Initialize after SetEncodingData in media_opt.
    208   int Initialize(float bitrate,
    209                  float user_framerate,
    210                  uint16_t width,
    211                  uint16_t height,
    212                  int num_layers);
    213 
    214   // Update the encoder frame size.
    215   void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height);
    216 
    217   // Update with actual bit rate (size of the latest encoded frame)
    218   // and frame type, after every encoded frame.
    219   void UpdateEncodedSize(int encoded_size,
    220                          FrameType encoded_frame_type);
    221 
    222   // Update with new target bitrate, actual encoder sent rate, frame_rate,
    223   // loss rate: every ~1 sec from SetTargetRates in media_opt.
    224   void UpdateRates(float target_bitrate,
    225                    float encoder_sent_rate,
    226                    float incoming_framerate,
    227                    uint8_t packet_loss);
    228 
    229   // Extract ST (spatio-temporal) resolution action.
    230   // Inputs: qm: Reference to the quality modes pointer.
    231   // Output: the spatial and/or temporal scale change.
    232   int SelectResolution(VCMResolutionScale** qm);
    233 
    234  private:
    235   // Set the default resolution action.
    236   void SetDefaultAction();
    237 
    238   // Compute rates for the selection of down-sampling action.
    239   void ComputeRatesForSelection();
    240 
    241   // Compute the encoder state.
    242   void ComputeEncoderState();
    243 
    244   // Return true if the action is to go back up in resolution.
    245   bool GoingUpResolution();
    246 
    247   // Return true if the action is to go down in resolution.
    248   bool GoingDownResolution();
    249 
    250   // Check the condition for going up in resolution by the scale factors:
    251   // |facWidth|, |facHeight|, |facTemp|.
    252   // |scaleFac| is a scale factor for the transition rate.
    253   bool ConditionForGoingUp(float fac_width,
    254                            float fac_height,
    255                            float fac_temp,
    256                            float scale_fac);
    257 
    258   // Get the bitrate threshold for the resolution action.
    259   // The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action.
    260   // |scaleFac| is a scale factor for the transition rate.
    261   float GetTransitionRate(float fac_width,
    262                           float fac_height,
    263                           float fac_temp,
    264                           float scale_fac);
    265 
    266   // Update the down-sampling state.
    267   void UpdateDownsamplingState(UpDownAction up_down);
    268 
    269   // Update the codec frame size and frame rate.
    270   void UpdateCodecResolution();
    271 
    272   // Return a state based on average target rate relative transition rate.
    273   uint8_t RateClass(float transition_rate);
    274 
    275   // Adjust the action selected from the table.
    276   void AdjustAction();
    277 
    278   // Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2.
    279   void ConvertSpatialFractionalToWhole();
    280 
    281   // Returns true if the new frame sizes, under the selected spatial action,
    282   // are of even size.
    283   bool EvenFrameSize();
    284 
    285   // Insert latest down-sampling action into the history list.
    286   void InsertLatestDownAction();
    287 
    288   // Remove the last (first element) down-sampling action from the list.
    289   void RemoveLastDownAction();
    290 
    291   // Check constraints on the amount of down-sampling allowed.
    292   void ConstrainAmountOfDownSampling();
    293 
    294   // For going up in resolution: pick spatial or temporal action,
    295   // if both actions were separately selected.
    296   void PickSpatialOrTemporal();
    297 
    298   // Select the directional (1x2 or 2x1) spatial down-sampling action.
    299   void SelectSpatialDirectionMode(float transition_rate);
    300 
    301   enum { kDownActionHistorySize = 10};
    302 
    303   VCMResolutionScale* qm_;
    304   // Encoder rate control parameters.
    305   float target_bitrate_;
    306   float incoming_framerate_;
    307   float per_frame_bandwidth_;
    308   float buffer_level_;
    309 
    310   // Data accumulated every ~1sec from MediaOpt.
    311   float sum_target_rate_;
    312   float sum_incoming_framerate_;
    313   float sum_rate_MM_;
    314   float sum_rate_MM_sgn_;
    315   float sum_packet_loss_;
    316   // Counters.
    317   uint32_t frame_cnt_;
    318   uint32_t frame_cnt_delta_;
    319   uint32_t update_rate_cnt_;
    320   uint32_t low_buffer_cnt_;
    321 
    322   // Resolution state parameters.
    323   float state_dec_factor_spatial_;
    324   float state_dec_factor_temporal_;
    325 
    326   // Quantities used for selection.
    327   float avg_target_rate_;
    328   float avg_incoming_framerate_;
    329   float avg_ratio_buffer_low_;
    330   float avg_rate_mismatch_;
    331   float avg_rate_mismatch_sgn_;
    332   float avg_packet_loss_;
    333   EncoderState encoder_state_;
    334   ResolutionAction action_;
    335   // Short history of the down-sampling actions from the Initialize() state.
    336   // This is needed for going up in resolution. Since the total amount of
    337   // down-sampling actions are constrained, the length of the list need not be
    338   // large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample.
    339   ResolutionAction down_action_history_[kDownActionHistorySize];
    340   int num_layers_;
    341 };
    342 
    343 // Robustness settings class.
    344 
    345 class VCMQmRobustness : public VCMQmMethod {
    346  public:
    347   VCMQmRobustness();
    348   ~VCMQmRobustness();
    349 
    350   virtual void Reset();
    351 
    352   // Adjust FEC rate based on content: every ~1 sec from SetTargetRates.
    353   // Returns an adjustment factor.
    354   float AdjustFecFactor(uint8_t code_rate_delta,
    355                         float total_rate,
    356                         float framerate,
    357                         uint32_t rtt_time,
    358                         uint8_t packet_loss);
    359 
    360   // Set the UEP protection on/off.
    361   bool SetUepProtection(uint8_t code_rate_delta,
    362                         float total_rate,
    363                         uint8_t packet_loss,
    364                         bool frame_type);
    365 
    366  private:
    367   // Previous state of network parameters.
    368   float prev_total_rate_;
    369   uint32_t prev_rtt_time_;
    370   uint8_t prev_packet_loss_;
    371   uint8_t prev_code_rate_delta_;
    372 };
    373 }  // namespace webrtc
    374 #endif  // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
    375