Home | History | Annotate | Download | only in video_coding
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
     12 #define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
     13 
     14 #include "webrtc/common_types.h"
     15 #include "webrtc/typedefs.h"
     16 
     17 /******************************************************/
     18 /* Quality Modes: Resolution and Robustness settings  */
     19 /******************************************************/
     20 
     21 namespace webrtc {
     22 struct VideoContentMetrics;
     23 
     24 struct VCMResolutionScale {
     25   VCMResolutionScale()
     26       : codec_width(640),
     27         codec_height(480),
     28         frame_rate(30.0f),
     29         spatial_width_fact(1.0f),
     30         spatial_height_fact(1.0f),
     31         temporal_fact(1.0f),
     32         change_resolution_spatial(false),
     33         change_resolution_temporal(false) {}
     34   uint16_t codec_width;
     35   uint16_t codec_height;
     36   float frame_rate;
     37   float spatial_width_fact;
     38   float spatial_height_fact;
     39   float temporal_fact;
     40   bool change_resolution_spatial;
     41   bool change_resolution_temporal;
     42 };
     43 
     44 enum ImageType {
     45   kQCIF = 0,  // 176x144
     46   kHCIF,      // 264x216 = half(~3/4x3/4) CIF.
     47   kQVGA,      // 320x240 = quarter VGA.
     48   kCIF,       // 352x288
     49   kHVGA,      // 480x360 = half(~3/4x3/4) VGA.
     50   kVGA,       // 640x480
     51   kQFULLHD,   // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD.
     52   kWHD,       // 1280x720
     53   kFULLHD,    // 1920x1080
     54   kNumImageTypes
     55 };
     56 
     57 const uint32_t kSizeOfImageType[kNumImageTypes] = {
     58     25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600};
     59 
     60 enum FrameRateLevelClass {
     61   kFrameRateLow,
     62   kFrameRateMiddle1,
     63   kFrameRateMiddle2,
     64   kFrameRateHigh
     65 };
     66 
     67 enum ContentLevelClass { kLow, kHigh, kDefault };
     68 
     69 struct VCMContFeature {
     70   VCMContFeature() : value(0.0f), level(kDefault) {}
     71   void Reset() {
     72     value = 0.0f;
     73     level = kDefault;
     74   }
     75   float value;
     76   ContentLevelClass level;
     77 };
     78 
     79 enum UpDownAction { kUpResolution, kDownResolution };
     80 
     81 enum SpatialAction {
     82   kNoChangeSpatial,
     83   kOneHalfSpatialUniform,     // 3/4 x 3/4: 9/6 ~1/2 pixel reduction.
     84   kOneQuarterSpatialUniform,  // 1/2 x 1/2: 1/4 pixel reduction.
     85   kNumModesSpatial
     86 };
     87 
     88 enum TemporalAction {
     89   kNoChangeTemporal,
     90   kTwoThirdsTemporal,  // 2/3 frame rate reduction
     91   kOneHalfTemporal,    // 1/2 frame rate reduction
     92   kNumModesTemporal
     93 };
     94 
     95 struct ResolutionAction {
     96   ResolutionAction() : spatial(kNoChangeSpatial), temporal(kNoChangeTemporal) {}
     97   SpatialAction spatial;
     98   TemporalAction temporal;
     99 };
    100 
    101 // Down-sampling factors for spatial (width and height), and temporal.
    102 const float kFactorWidthSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f};
    103 
    104 const float kFactorHeightSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f};
    105 
    106 const float kFactorTemporal[kNumModesTemporal] = {1.0f, 1.5f, 2.0f};
    107 
    108 enum EncoderState {
    109   kStableEncoding,    // Low rate mis-match, stable buffer levels.
    110   kStressedEncoding,  // Significant over-shooting of target rate,
    111                       // Buffer under-flow, etc.
    112   kEasyEncoding       // Significant under-shooting of target rate.
    113 };
    114 
    115 // QmMethod class: main class for resolution and robustness settings
    116 
    117 class VCMQmMethod {
    118  public:
    119   VCMQmMethod();
    120   virtual ~VCMQmMethod();
    121 
    122   // Reset values
    123   void ResetQM();
    124   virtual void Reset() = 0;
    125 
    126   // Compute content class.
    127   uint8_t ComputeContentClass();
    128 
    129   // Update with the content metrics.
    130   void UpdateContent(const VideoContentMetrics* content_metrics);
    131 
    132   // Compute spatial texture magnitude and level.
    133   // Spatial texture is a spatial prediction error measure.
    134   void ComputeSpatial();
    135 
    136   // Compute motion magnitude and level for NFD metric.
    137   // NFD is normalized frame difference (normalized by spatial variance).
    138   void ComputeMotionNFD();
    139 
    140   // Get the imageType (CIF, VGA, HD, etc) for the system width/height.
    141   ImageType GetImageType(uint16_t width, uint16_t height);
    142 
    143   // Return the closest image type.
    144   ImageType FindClosestImageType(uint16_t width, uint16_t height);
    145 
    146   // Get the frame rate level.
    147   FrameRateLevelClass FrameRateLevel(float frame_rate);
    148 
    149  protected:
    150   // Content Data.
    151   const VideoContentMetrics* content_metrics_;
    152 
    153   // Encoder frame sizes and native frame sizes.
    154   uint16_t width_;
    155   uint16_t height_;
    156   float user_frame_rate_;
    157   uint16_t native_width_;
    158   uint16_t native_height_;
    159   float native_frame_rate_;
    160   float aspect_ratio_;
    161   // Image type and frame rate leve, for the current encoder resolution.
    162   ImageType image_type_;
    163   FrameRateLevelClass framerate_level_;
    164   // Content class data.
    165   VCMContFeature motion_;
    166   VCMContFeature spatial_;
    167   uint8_t content_class_;
    168   bool init_;
    169 };
    170 
    171 // Resolution settings class
    172 
    173 class VCMQmResolution : public VCMQmMethod {
    174  public:
    175   VCMQmResolution();
    176   virtual ~VCMQmResolution();
    177 
    178   // Reset all quantities.
    179   virtual void Reset();
    180 
    181   // Reset rate quantities and counters after every SelectResolution() call.
    182   void ResetRates();
    183 
    184   // Reset down-sampling state.
    185   void ResetDownSamplingState();
    186 
    187   // Get the encoder state.
    188   EncoderState GetEncoderState();
    189 
    190   // Initialize after SetEncodingData in media_opt.
    191   int Initialize(float bitrate,
    192                  float user_framerate,
    193                  uint16_t width,
    194                  uint16_t height,
    195                  int num_layers);
    196 
    197   // Update the encoder frame size.
    198   void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height);
    199 
    200   // Update with actual bit rate (size of the latest encoded frame)
    201   // and frame type, after every encoded frame.
    202   void UpdateEncodedSize(size_t encoded_size);
    203 
    204   // Update with new target bitrate, actual encoder sent rate, frame_rate,
    205   // loss rate: every ~1 sec from SetTargetRates in media_opt.
    206   void UpdateRates(float target_bitrate,
    207                    float encoder_sent_rate,
    208                    float incoming_framerate,
    209                    uint8_t packet_loss);
    210 
    211   // Extract ST (spatio-temporal) resolution action.
    212   // Inputs: qm: Reference to the quality modes pointer.
    213   // Output: the spatial and/or temporal scale change.
    214   int SelectResolution(VCMResolutionScale** qm);
    215 
    216  private:
    217   // Set the default resolution action.
    218   void SetDefaultAction();
    219 
    220   // Compute rates for the selection of down-sampling action.
    221   void ComputeRatesForSelection();
    222 
    223   // Compute the encoder state.
    224   void ComputeEncoderState();
    225 
    226   // Return true if the action is to go back up in resolution.
    227   bool GoingUpResolution();
    228 
    229   // Return true if the action is to go down in resolution.
    230   bool GoingDownResolution();
    231 
    232   // Check the condition for going up in resolution by the scale factors:
    233   // |facWidth|, |facHeight|, |facTemp|.
    234   // |scaleFac| is a scale factor for the transition rate.
    235   bool ConditionForGoingUp(float fac_width,
    236                            float fac_height,
    237                            float fac_temp,
    238                            float scale_fac);
    239 
    240   // Get the bitrate threshold for the resolution action.
    241   // The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action.
    242   // |scaleFac| is a scale factor for the transition rate.
    243   float GetTransitionRate(float fac_width,
    244                           float fac_height,
    245                           float fac_temp,
    246                           float scale_fac);
    247 
    248   // Update the down-sampling state.
    249   void UpdateDownsamplingState(UpDownAction up_down);
    250 
    251   // Update the codec frame size and frame rate.
    252   void UpdateCodecResolution();
    253 
    254   // Return a state based on average target rate relative transition rate.
    255   uint8_t RateClass(float transition_rate);
    256 
    257   // Adjust the action selected from the table.
    258   void AdjustAction();
    259 
    260   // Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2.
    261   void ConvertSpatialFractionalToWhole();
    262 
    263   // Returns true if the new frame sizes, under the selected spatial action,
    264   // are of even size.
    265   bool EvenFrameSize();
    266 
    267   // Insert latest down-sampling action into the history list.
    268   void InsertLatestDownAction();
    269 
    270   // Remove the last (first element) down-sampling action from the list.
    271   void RemoveLastDownAction();
    272 
    273   // Check constraints on the amount of down-sampling allowed.
    274   void ConstrainAmountOfDownSampling();
    275 
    276   // For going up in resolution: pick spatial or temporal action,
    277   // if both actions were separately selected.
    278   void PickSpatialOrTemporal();
    279 
    280   // Select the directional (1x2 or 2x1) spatial down-sampling action.
    281   void SelectSpatialDirectionMode(float transition_rate);
    282 
    283   enum { kDownActionHistorySize = 10 };
    284 
    285   VCMResolutionScale* qm_;
    286   // Encoder rate control parameters.
    287   float target_bitrate_;
    288   float incoming_framerate_;
    289   float per_frame_bandwidth_;
    290   float buffer_level_;
    291 
    292   // Data accumulated every ~1sec from MediaOpt.
    293   float sum_target_rate_;
    294   float sum_incoming_framerate_;
    295   float sum_rate_MM_;
    296   float sum_rate_MM_sgn_;
    297   float sum_packet_loss_;
    298   // Counters.
    299   uint32_t frame_cnt_;
    300   uint32_t frame_cnt_delta_;
    301   uint32_t update_rate_cnt_;
    302   uint32_t low_buffer_cnt_;
    303 
    304   // Resolution state parameters.
    305   float state_dec_factor_spatial_;
    306   float state_dec_factor_temporal_;
    307 
    308   // Quantities used for selection.
    309   float avg_target_rate_;
    310   float avg_incoming_framerate_;
    311   float avg_ratio_buffer_low_;
    312   float avg_rate_mismatch_;
    313   float avg_rate_mismatch_sgn_;
    314   float avg_packet_loss_;
    315   EncoderState encoder_state_;
    316   ResolutionAction action_;
    317   // Short history of the down-sampling actions from the Initialize() state.
    318   // This is needed for going up in resolution. Since the total amount of
    319   // down-sampling actions are constrained, the length of the list need not be
    320   // large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample.
    321   ResolutionAction down_action_history_[kDownActionHistorySize];
    322   int num_layers_;
    323 };
    324 
    325 // Robustness settings class.
    326 
    327 class VCMQmRobustness : public VCMQmMethod {
    328  public:
    329   VCMQmRobustness();
    330   ~VCMQmRobustness();
    331 
    332   virtual void Reset();
    333 
    334   // Adjust FEC rate based on content: every ~1 sec from SetTargetRates.
    335   // Returns an adjustment factor.
    336   float AdjustFecFactor(uint8_t code_rate_delta,
    337                         float total_rate,
    338                         float framerate,
    339                         int64_t rtt_time,
    340                         uint8_t packet_loss);
    341 
    342   // Set the UEP protection on/off.
    343   bool SetUepProtection(uint8_t code_rate_delta,
    344                         float total_rate,
    345                         uint8_t packet_loss,
    346                         bool frame_type);
    347 
    348  private:
    349   // Previous state of network parameters.
    350   float prev_total_rate_;
    351   int64_t prev_rtt_time_;
    352   uint8_t prev_packet_loss_;
    353   uint8_t prev_code_rate_delta_;
    354 };
    355 }  // namespace webrtc
    356 #endif  // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
    357