Home | History | Annotate | Download | only in vda
      1 // Copyright 2015 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // This file contains an implementation of a VP9 bitstream parser. The main
      6 // purpose of this parser is to support hardware decode acceleration. Some
      7 // accelerators, e.g. libva which implements VA-API, require the caller
      8 // (chrome) to feed them parsed VP9 frame header.
      9 //
     10 // See media::VP9Decoder for example usage.
     11 //
     12 #ifndef VP9_PARSER_H_
     13 #define VP9_PARSER_H_
     14 
     15 #include <stddef.h>
     16 #include <stdint.h>
     17 #include <sys/types.h>
     18 
     19 #include <deque>
     20 #include <memory>
     21 
     22 #include "base/callback.h"
     23 #include "base/macros.h"
     24 #include "base/memory/weak_ptr.h"
     25 
     26 namespace media {
     27 
     28 const int kVp9MaxProfile = 4;
     29 const int kVp9NumRefFramesLog2 = 3;
     30 const size_t kVp9NumRefFrames = 1 << kVp9NumRefFramesLog2;
     31 const uint8_t kVp9MaxProb = 255;
     32 const size_t kVp9NumRefsPerFrame = 3;
     33 const size_t kVp9NumFrameContextsLog2 = 2;
     34 const size_t kVp9NumFrameContexts = 1 << kVp9NumFrameContextsLog2;
     35 
     36 using Vp9Prob = uint8_t;
     37 
     38 enum class Vp9ColorSpace {
     39   UNKNOWN = 0,
     40   BT_601 = 1,
     41   BT_709 = 2,
     42   SMPTE_170 = 3,
     43   SMPTE_240 = 4,
     44   BT_2020 = 5,
     45   RESERVED = 6,
     46   SRGB = 7,
     47 };
     48 
     49 enum Vp9InterpolationFilter {
     50   EIGHTTAP = 0,
     51   EIGHTTAP_SMOOTH = 1,
     52   EIGHTTAP_SHARP = 2,
     53   BILINEAR = 3,
     54   SWITCHABLE = 4,
     55 };
     56 
     57 enum Vp9RefType {
     58   VP9_FRAME_INTRA = 0,
     59   VP9_FRAME_LAST = 1,
     60   VP9_FRAME_GOLDEN = 2,
     61   VP9_FRAME_ALTREF = 3,
     62   VP9_FRAME_MAX = 4,
     63 };
     64 
     65 enum Vp9ReferenceMode {
     66   SINGLE_REFERENCE = 0,
     67   COMPOUND_REFERENCE = 1,
     68   REFERENCE_MODE_SELECT = 2,
     69 };
     70 
     71 struct Vp9SegmentationParams {
     72   static const size_t kNumSegments = 8;
     73   static const size_t kNumTreeProbs = kNumSegments - 1;
     74   static const size_t kNumPredictionProbs = 3;
     75   enum SegmentLevelFeature {
     76     SEG_LVL_ALT_Q = 0,
     77     SEG_LVL_ALT_LF = 1,
     78     SEG_LVL_REF_FRAME = 2,
     79     SEG_LVL_SKIP = 3,
     80     SEG_LVL_MAX
     81   };
     82 
     83   bool enabled;
     84 
     85   bool update_map;
     86   uint8_t tree_probs[kNumTreeProbs];
     87   bool temporal_update;
     88   uint8_t pred_probs[kNumPredictionProbs];
     89 
     90   bool update_data;
     91   bool abs_or_delta_update;
     92   bool feature_enabled[kNumSegments][SEG_LVL_MAX];
     93   int16_t feature_data[kNumSegments][SEG_LVL_MAX];
     94 
     95   int16_t y_dequant[kNumSegments][2];
     96   int16_t uv_dequant[kNumSegments][2];
     97 
     98   bool FeatureEnabled(size_t seg_id, SegmentLevelFeature feature) const {
     99     return feature_enabled[seg_id][feature];
    100   }
    101 
    102   int16_t FeatureData(size_t seg_id, SegmentLevelFeature feature) const {
    103     return feature_data[seg_id][feature];
    104   }
    105 };
    106 
    107 struct Vp9LoopFilterParams {
    108   static const size_t kNumModeDeltas = 2;
    109 
    110   uint8_t level;
    111   uint8_t sharpness;
    112 
    113   bool delta_enabled;
    114   bool delta_update;
    115   bool update_ref_deltas[VP9_FRAME_MAX];
    116   int8_t ref_deltas[VP9_FRAME_MAX];
    117   bool update_mode_deltas[kNumModeDeltas];
    118   int8_t mode_deltas[kNumModeDeltas];
    119 
    120   // Calculated from above fields.
    121   uint8_t lvl[Vp9SegmentationParams::kNumSegments][VP9_FRAME_MAX]
    122              [kNumModeDeltas];
    123 };
    124 
    125 // Members of Vp9FrameHeader will be 0-initialized by Vp9Parser::ParseNextFrame.
    126 struct Vp9QuantizationParams {
    127   bool IsLossless() const {
    128     return base_q_idx == 0 && delta_q_y_dc == 0 && delta_q_uv_dc == 0 &&
    129            delta_q_uv_ac == 0;
    130   }
    131 
    132   uint8_t base_q_idx;
    133   int8_t delta_q_y_dc;
    134   int8_t delta_q_uv_dc;
    135   int8_t delta_q_uv_ac;
    136 };
    137 
    138 // Entropy context for frame parsing
    139 struct Vp9FrameContext {
    140   bool IsValid() const;
    141 
    142   Vp9Prob tx_probs_8x8[2][1];
    143   Vp9Prob tx_probs_16x16[2][2];
    144   Vp9Prob tx_probs_32x32[2][3];
    145 
    146   Vp9Prob coef_probs[4][2][2][6][6][3];
    147   Vp9Prob skip_prob[3];
    148   Vp9Prob inter_mode_probs[7][3];
    149   Vp9Prob interp_filter_probs[4][2];
    150   Vp9Prob is_inter_prob[4];
    151 
    152   Vp9Prob comp_mode_prob[5];
    153   Vp9Prob single_ref_prob[5][2];
    154   Vp9Prob comp_ref_prob[5];
    155 
    156   Vp9Prob y_mode_probs[4][9];
    157   Vp9Prob uv_mode_probs[10][9];
    158   Vp9Prob partition_probs[16][3];
    159 
    160   Vp9Prob mv_joint_probs[3];
    161   Vp9Prob mv_sign_prob[2];
    162   Vp9Prob mv_class_probs[2][10];
    163   Vp9Prob mv_class0_bit_prob[2];
    164   Vp9Prob mv_bits_prob[2][10];
    165   Vp9Prob mv_class0_fr_probs[2][2][3];
    166   Vp9Prob mv_fr_probs[2][3];
    167   Vp9Prob mv_class0_hp_prob[2];
    168   Vp9Prob mv_hp_prob[2];
    169 };
    170 
    171 struct Vp9CompressedHeader {
    172   enum Vp9TxMode {
    173     ONLY_4X4 = 0,
    174     ALLOW_8X8 = 1,
    175     ALLOW_16X16 = 2,
    176     ALLOW_32X32 = 3,
    177     TX_MODE_SELECT = 4,
    178     TX_MODES = 5,
    179   };
    180 
    181   Vp9TxMode tx_mode;
    182   Vp9ReferenceMode reference_mode;
    183 };
    184 
    185 // VP9 frame header.
    186 struct Vp9FrameHeader {
    187   enum FrameType {
    188     KEYFRAME = 0,
    189     INTERFRAME = 1,
    190   };
    191 
    192   bool IsKeyframe() const;
    193   bool IsIntra() const;
    194   bool RefreshFlag(size_t i) const {
    195     return !!(refresh_frame_flags & (1u << i));
    196   }
    197 
    198   uint8_t profile;
    199 
    200   bool show_existing_frame;
    201   uint8_t frame_to_show_map_idx;
    202 
    203   FrameType frame_type;
    204 
    205   bool show_frame;
    206   bool error_resilient_mode;
    207 
    208   uint8_t bit_depth;
    209   Vp9ColorSpace color_space;
    210   bool color_range;
    211   uint8_t subsampling_x;
    212   uint8_t subsampling_y;
    213 
    214   // The range of frame_width and frame_height is 1..2^16.
    215   uint32_t frame_width;
    216   uint32_t frame_height;
    217   uint32_t render_width;
    218   uint32_t render_height;
    219 
    220   bool intra_only;
    221   uint8_t reset_frame_context;
    222   uint8_t refresh_frame_flags;
    223   uint8_t ref_frame_idx[kVp9NumRefsPerFrame];
    224   bool ref_frame_sign_bias[Vp9RefType::VP9_FRAME_MAX];
    225   bool allow_high_precision_mv;
    226   Vp9InterpolationFilter interpolation_filter;
    227 
    228   bool refresh_frame_context;
    229   bool frame_parallel_decoding_mode;
    230   uint8_t frame_context_idx;
    231   // |frame_context_idx_to_save_probs| is to be used by save_probs() only, and
    232   // |frame_context_idx| otherwise.
    233   uint8_t frame_context_idx_to_save_probs;
    234 
    235   Vp9QuantizationParams quant_params;
    236 
    237   uint8_t tile_cols_log2;
    238   uint8_t tile_rows_log2;
    239 
    240   // Pointer to the beginning of frame data. It is a responsibility of the
    241   // client of the Vp9Parser to maintain validity of this data while it is
    242   // being used outside of that class.
    243   const uint8_t* data;
    244 
    245   // Size of |data| in bytes.
    246   size_t frame_size;
    247 
    248   // Size of compressed header in bytes.
    249   size_t header_size_in_bytes;
    250 
    251   // Size of uncompressed header in bytes.
    252   size_t uncompressed_header_size;
    253 
    254   Vp9CompressedHeader compressed_header;
    255   // Initial frame entropy context after load_probs2(frame_context_idx).
    256   Vp9FrameContext initial_frame_context;
    257   // Current frame entropy context after header parsing.
    258   Vp9FrameContext frame_context;
    259 };
    260 
    261 // A parser for VP9 bitstream.
    262 class Vp9Parser {
    263  public:
    264   // If context update is needed after decoding a frame, the client must
    265   // execute this callback, passing the updated context state.
    266   using ContextRefreshCallback = base::Callback<void(const Vp9FrameContext&)>;
    267 
    268   // ParseNextFrame() return values. See documentation for ParseNextFrame().
    269   enum Result {
    270     kOk,
    271     kInvalidStream,
    272     kEOStream,
    273     kAwaitingRefresh,
    274   };
    275 
    276   // The parsing context to keep track of references.
    277   struct ReferenceSlot {
    278     bool initialized;
    279     uint32_t frame_width;
    280     uint32_t frame_height;
    281     uint8_t subsampling_x;
    282     uint8_t subsampling_y;
    283     uint8_t bit_depth;
    284 
    285     // More fields for consistency checking.
    286     uint8_t profile;
    287     Vp9ColorSpace color_space;
    288   };
    289 
    290   // The parsing context that persists across frames.
    291   class Context {
    292    public:
    293     class Vp9FrameContextManager {
    294      public:
    295       Vp9FrameContextManager();
    296       ~Vp9FrameContextManager();
    297       bool initialized() const { return initialized_; }
    298       bool needs_client_update() const { return needs_client_update_; }
    299       const Vp9FrameContext& frame_context() const;
    300 
    301       // Resets to uninitialized state.
    302       void Reset();
    303 
    304       // Marks this context as requiring an update from parser's client.
    305       void SetNeedsClientUpdate();
    306 
    307       // Updates frame context.
    308       void Update(const Vp9FrameContext& frame_context);
    309 
    310       // Returns a callback to update frame context at a later time with.
    311       ContextRefreshCallback GetUpdateCb();
    312 
    313      private:
    314       // Updates frame context from parser's client.
    315       void UpdateFromClient(const Vp9FrameContext& frame_context);
    316 
    317       bool initialized_ = false;
    318       bool needs_client_update_ = false;
    319       Vp9FrameContext frame_context_;
    320 
    321       base::WeakPtrFactory<Vp9FrameContextManager> weak_ptr_factory_;
    322     };
    323 
    324     void Reset();
    325 
    326     // Mark |frame_context_idx| as requiring update from the client.
    327     void MarkFrameContextForUpdate(size_t frame_context_idx);
    328 
    329     // Update frame context at |frame_context_idx| with the contents of
    330     // |frame_context|.
    331     void UpdateFrameContext(size_t frame_context_idx,
    332                             const Vp9FrameContext& frame_context);
    333 
    334     // Return ReferenceSlot for frame at |ref_idx|.
    335     const ReferenceSlot& GetRefSlot(size_t ref_idx) const;
    336 
    337     // Update contents of ReferenceSlot at |ref_idx| with the contents of
    338     // |ref_slot|.
    339     void UpdateRefSlot(size_t ref_idx, const ReferenceSlot& ref_slot);
    340 
    341     const Vp9SegmentationParams& segmentation() const { return segmentation_; }
    342 
    343     const Vp9LoopFilterParams& loop_filter() const { return loop_filter_; }
    344 
    345    private:
    346     friend class Vp9UncompressedHeaderParser;
    347     friend class Vp9Parser;
    348 
    349     // Segmentation and loop filter state.
    350     Vp9SegmentationParams segmentation_;
    351     Vp9LoopFilterParams loop_filter_;
    352 
    353     // Frame references.
    354     ReferenceSlot ref_slots_[kVp9NumRefFrames];
    355 
    356     Vp9FrameContextManager frame_context_managers_[kVp9NumFrameContexts];
    357   };
    358 
    359   // The constructor. See ParseNextFrame() for comments for
    360   // |parsing_compressed_header|.
    361   explicit Vp9Parser(bool parsing_compressed_header);
    362   ~Vp9Parser();
    363 
    364   // Set a new stream buffer to read from, starting at |stream| and of size
    365   // |stream_size| in bytes. |stream| must point to the beginning of a single
    366   // frame or a single superframe, is owned by caller and must remain valid
    367   // until the next call to SetStream().
    368   void SetStream(const uint8_t* stream, off_t stream_size);
    369 
    370   // Parse the next frame in the current stream buffer, filling |fhdr| with
    371   // the parsed frame header and updating current segmentation and loop filter
    372   // state.
    373   // Return kOk if a frame has successfully been parsed,
    374   //        kEOStream if there is no more data in the current stream buffer,
    375   //        kAwaitingRefresh if this frame awaiting frame context update, or
    376   //        kInvalidStream on error.
    377   Result ParseNextFrame(Vp9FrameHeader* fhdr);
    378 
    379   // Return current parsing context.
    380   const Context& context() const { return context_; }
    381 
    382   // Return a ContextRefreshCallback, which, if not null, has to be called with
    383   // the new context state after the frame associated with |frame_context_idx|
    384   // is decoded.
    385   ContextRefreshCallback GetContextRefreshCb(size_t frame_context_idx);
    386 
    387   // Clear parser state and return to an initialized state.
    388   void Reset();
    389 
    390  private:
    391   // Stores start pointer and size of each frame within the current superframe.
    392   struct FrameInfo {
    393     FrameInfo() = default;
    394     FrameInfo(const uint8_t* ptr, off_t size);
    395     bool IsValid() const { return ptr != nullptr; }
    396     void Reset() { ptr = nullptr; }
    397 
    398     // Starting address of the frame.
    399     const uint8_t* ptr = nullptr;
    400 
    401     // Size of the frame in bytes.
    402     off_t size = 0;
    403   };
    404 
    405   std::deque<FrameInfo> ParseSuperframe();
    406 
    407   size_t GetQIndex(const Vp9QuantizationParams& quant, size_t segid) const;
    408   void SetupSegmentationDequant();
    409   void SetupLoopFilter();
    410   void UpdateSlots();
    411 
    412   // Current address in the bitstream buffer.
    413   const uint8_t* stream_;
    414 
    415   // Remaining bytes in stream_.
    416   off_t bytes_left_;
    417 
    418   bool parsing_compressed_header_;
    419 
    420   // FrameInfo for the remaining frames in the current superframe to be parsed.
    421   std::deque<FrameInfo> frames_;
    422 
    423   Context context_;
    424 
    425   FrameInfo curr_frame_info_;
    426   Vp9FrameHeader curr_frame_header_;
    427 
    428   DISALLOW_COPY_AND_ASSIGN(Vp9Parser);
    429 };
    430 
    431 }  // namespace media
    432 
    433 #endif  // VP9_PARSER_H_
    434