Home | History | Annotate | Download | only in vda
      1 // Copyright 2015 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
      6 #define V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
      7 
      8 #include <stddef.h>
      9 #include <stdint.h>
     10 
     11 #include <memory>
     12 #include <queue>
     13 #include <utility>
     14 #include <vector>
     15 
     16 #include "base/macros.h"
     17 #include "base/memory/linked_ptr.h"
     18 #include "base/memory/ref_counted.h"
     19 #include "base/memory/weak_ptr.h"
     20 #include "base/synchronization/waitable_event.h"
     21 #include "base/threading/thread.h"
     22 #include "h264_decoder.h"
     23 #include "v4l2_device.h"
     24 #include "video_decode_accelerator.h"
     25 #include "videodev2.h"
     26 #include "vp8_decoder.h"
     27 #include "vp9_decoder.h"
     28 
     29 namespace media {
     30 
     31 // An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice
     32 // level codec API for decoding. The slice level API provides only a low-level
     33 // decoding functionality and requires userspace to provide support for parsing
     34 // the input stream and managing decoder state across frames.
     35 class V4L2SliceVideoDecodeAccelerator
     36     : public VideoDecodeAccelerator {
     37  public:
     38   class V4L2DecodeSurface;
     39 
     40   V4L2SliceVideoDecodeAccelerator(
     41       const scoped_refptr<V4L2Device>& device);
     42   ~V4L2SliceVideoDecodeAccelerator() override;
     43 
     44   // VideoDecodeAccelerator implementation.
     45   bool Initialize(const Config& config, Client* client) override;
     46   void Decode(const BitstreamBuffer& bitstream_buffer) override;
     47   void AssignPictureBuffers(const std::vector<PictureBuffer>& buffers) override;
     48   void ImportBufferForPicture(
     49       int32_t picture_buffer_id,
     50       const std::vector<base::FileDescriptor>& dmabuf_fds) override;
     51   void ReusePictureBuffer(int32_t picture_buffer_id) override;
     52   void Flush() override;
     53   void Reset() override;
     54   void Destroy() override;
     55   bool TryToSetupDecodeOnSeparateThread(
     56       const base::WeakPtr<Client>& decode_client,
     57       const scoped_refptr<base::SingleThreadTaskRunner>& decode_task_runner)
     58       override;
     59 
     60   static VideoDecodeAccelerator::SupportedProfiles GetSupportedProfiles();
     61 
     62  private:
     63   class V4L2H264Accelerator;
     64   class V4L2VP8Accelerator;
     65   class V4L2VP9Accelerator;
     66 
     67   // Record for input buffers.
     68   struct InputRecord {
     69     InputRecord();
     70     int32_t input_id;
     71     void* address;
     72     size_t length;
     73     size_t bytes_used;
     74     bool at_device;
     75   };
     76 
     77   // Record for output buffers.
     78   struct OutputRecord {
     79     OutputRecord();
     80     bool at_device;
     81     bool at_client;
     82     int32_t picture_id;
     83     std::vector<base::ScopedFD> dmabuf_fds;
     84     bool cleared;
     85   };
     86 
     87   // See http://crbug.com/255116.
     88   // Input bitstream buffer size for up to 1080p streams.
     89   const size_t kInputBufferMaxSizeFor1080p = 1024 * 1024;
     90   // Input bitstream buffer size for up to 4k streams.
     91   const size_t kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p;
     92   const size_t kNumInputBuffers = 16;
     93 
     94   // Input format V4L2 fourccs this class supports.
     95   static const uint32_t supported_input_fourccs_[];
     96 
     97   //
     98   // Below methods are used by accelerator implementations.
     99   //
    100   // Append slice data in |data| of size |size| to pending hardware
    101   // input buffer with |index|. This buffer will be submitted for decode
    102   // on the next DecodeSurface(). Return true on success.
    103   bool SubmitSlice(int index, const uint8_t* data, size_t size);
    104 
    105   // Submit controls in |ext_ctrls| to hardware. Return true on success.
    106   bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls);
    107 
    108   // Gets current control values for controls in |ext_ctrls| from the driver.
    109   // Return true on success.
    110   bool GetExtControls(struct v4l2_ext_controls* ext_ctrls);
    111 
    112   // Return true if the driver exposes V4L2 control |ctrl_id|, false otherwise.
    113   bool IsCtrlExposed(uint32_t ctrl_id);
    114 
    115   // Decode of |dec_surface| is ready to be submitted and all codec-specific
    116   // settings are set in hardware.
    117   void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
    118 
    119   // |dec_surface| is ready to be outputted once decode is finished.
    120   // This can be called before decode is actually done in hardware, and this
    121   // method is responsible for maintaining the ordering, i.e. the surfaces will
    122   // be outputted in the same order as SurfaceReady calls. To do so, the
    123   // surfaces are put on decoder_display_queue_ and sent to output in that
    124   // order once all preceding surfaces are sent.
    125   void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
    126 
    127   //
    128   // Internal methods of this class.
    129   //
    130   // Recycle a V4L2 input buffer with |index| after dequeuing from device.
    131   void ReuseInputBuffer(int index);
    132 
    133   // Recycle V4L2 output buffer with |index|. Used as surface release callback.
    134   void ReuseOutputBuffer(int index);
    135 
    136   // Queue a |dec_surface| to device for decoding.
    137   void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
    138 
    139   // Dequeue any V4L2 buffers available and process.
    140   void Dequeue();
    141 
    142   // V4L2 QBUF helpers.
    143   bool EnqueueInputRecord(int index, uint32_t config_store);
    144   bool EnqueueOutputRecord(int index);
    145 
    146   // Set input and output formats in hardware.
    147   bool SetupFormats();
    148 
    149   // Create input and output buffers.
    150   bool CreateInputBuffers();
    151   bool CreateOutputBuffers();
    152 
    153   // Destroy input buffers.
    154   void DestroyInputBuffers();
    155 
    156   // Destroy output buffers. If |dismiss| is true, also dismissing the
    157   // associated PictureBuffers.
    158   bool DestroyOutputs(bool dismiss);
    159 
    160   // Used by DestroyOutputs.
    161   bool DestroyOutputBuffers();
    162 
    163   // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer()
    164   // and signal |done| after finishing.
    165   void DismissPictures(const std::vector<int32_t>& picture_buffer_ids,
    166                        base::WaitableEvent* done);
    167 
    168   // Task to finish initialization on decoder_thread_.
    169   void InitializeTask();
    170 
    171   void NotifyError(Error error);
    172   void DestroyTask();
    173 
    174   // Sets the state to kError and notifies client if needed.
    175   void SetErrorState(Error error);
    176 
    177   // Event handling. Events include flush, reset and resolution change and are
    178   // processed while in kIdle state.
    179 
    180   // Surface set change (resolution change) flow.
    181   // If we have no surfaces allocated, start it immediately, otherwise mark
    182   // ourselves as pending for surface set change.
    183   void InitiateSurfaceSetChange();
    184   // If a surface set change is pending and we are ready, stop the device,
    185   // destroy outputs, releasing resources and dismissing pictures as required,
    186   // followed by starting the flow to allocate a new set for the current
    187   // resolution/DPB size, as provided by decoder.
    188   bool FinishSurfaceSetChange();
    189 
    190   // Flush flow when requested by client.
    191   // When Flush() is called, it posts a FlushTask, which checks the input queue.
    192   // If nothing is pending for decode on decoder_input_queue_, we call
    193   // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef
    194   // onto the decoder_input_queue_ to schedule a flush. When we reach it later
    195   // on, we call InitiateFlush() to perform it at the correct time.
    196   void FlushTask();
    197   // Tell the decoder to flush all frames, reset it and mark us as scheduled
    198   // for flush, so that we can finish it once all pending decodes are finished.
    199   void InitiateFlush();
    200   // To be called if decoder_flushing_ is true. If not all pending frames are
    201   // decoded, return false, requesting the caller to try again later.
    202   // Otherwise perform flush by sending all pending pictures to the client,
    203   // notify it that flush is finished and return true, informing the caller
    204   // that further progress can be made.
    205   bool FinishFlush();
    206 
    207   // Reset flow when requested by client.
    208   // Drop all inputs, reset the decoder and mark us as pending for reset.
    209   void ResetTask();
    210   // To be called if decoder_resetting_ is true. If not all pending frames are
    211   // decoded, return false, requesting the caller to try again later.
    212   // Otherwise perform reset by dropping all pending outputs (client is not
    213   // interested anymore), notifying it that reset is finished, and return true,
    214   // informing the caller that further progress can be made.
    215   bool FinishReset();
    216 
    217   // Called when a new event is pended. Transitions us into kIdle state (if not
    218   // already in it), if possible. Also starts processing events.
    219   void NewEventPending();
    220 
    221   // Called after all events are processed successfully (i.e. all Finish*()
    222   // methods return true) to return to decoding state.
    223   bool FinishEventProcessing();
    224 
    225   // Process pending events, if any.
    226   void ProcessPendingEventsIfNeeded();
    227 
    228   // Allocate V4L2 buffers and assign them to |buffers| provided by the client
    229   // via AssignPictureBuffers() on decoder thread.
    230   void AssignPictureBuffersTask(const std::vector<PictureBuffer>& buffers);
    231 
    232   // Use buffer backed by dmabuf file descriptors in |passed_dmabuf_fds| for the
    233   // OutputRecord associated with |picture_buffer_id|, taking ownership of the
    234   // file descriptors.
    235   void ImportBufferForPictureTask(
    236       int32_t picture_buffer_id,
    237       // TODO(posciak): (crbug.com/561749) we should normally be able to pass
    238       // the vector by itself via std::move, but it's not possible to do this
    239       // if this method is used as a callback.
    240       std::unique_ptr<std::vector<base::ScopedFD>> passed_dmabuf_fds);
    241 
    242   // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_
    243   // returning an event.
    244   void ServiceDeviceTask();
    245 
    246   // Schedule poll if we have any buffers queued and the poll thread
    247   // is not stopped (on surface set change).
    248   void SchedulePollIfNeeded();
    249 
    250   // Attempt to start/stop device_poll_thread_.
    251   bool StartDevicePoll();
    252   bool StopDevicePoll(bool keep_input_state);
    253 
    254   // Ran on device_poll_thread_ to wait for device events.
    255   void DevicePollTask(bool poll_device);
    256 
    257   enum State {
    258     // We are in this state until Initialize() returns successfully.
    259     // We can't post errors to the client in this state yet.
    260     kUninitialized,
    261     // Initialize() returned successfully.
    262     kInitialized,
    263     // This state allows making progress decoding more input stream.
    264     kDecoding,
    265     // Transitional state when we are not decoding any more stream, but are
    266     // performing flush, reset, resolution change or are destroying ourselves.
    267     kIdle,
    268     // Requested new PictureBuffers via ProvidePictureBuffers(), awaiting
    269     // AssignPictureBuffers().
    270     kAwaitingPictureBuffers,
    271     // Error state, set when sending NotifyError to client.
    272     kError,
    273   };
    274 
    275   // Buffer id for flush buffer, queued by FlushTask().
    276   const int kFlushBufferId = -2;
    277 
    278   // Handler for Decode() on decoder_thread_.
    279   void DecodeTask(const BitstreamBuffer& bitstream_buffer);
    280 
    281   // Schedule a new DecodeBufferTask if we are decoding.
    282   void ScheduleDecodeBufferTaskIfNeeded();
    283 
    284   // Main decoder loop. Keep decoding the current buffer in decoder_, asking
    285   // for more stream via TrySetNewBistreamBuffer() if decoder_ requests so,
    286   // and handle other returns from it appropriately.
    287   void DecodeBufferTask();
    288 
    289   // Check decoder_input_queue_ for any available buffers to decode and
    290   // set the decoder_current_bitstream_buffer_ to the next buffer if one is
    291   // available, taking it off the queue. Also set the current stream pointer
    292   // in decoder_, and return true.
    293   // Return false if no buffers are pending on decoder_input_queue_.
    294   bool TrySetNewBistreamBuffer();
    295 
    296   // Auto-destruction reference for EGLSync (for message-passing).
    297   void ReusePictureBufferTask(int32_t picture_buffer_id);
    298 
    299   // Called to actually send |dec_surface| to the client, after it is decoded
    300   // preserving the order in which it was scheduled via SurfaceReady().
    301   void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
    302 
    303   // Goes over the |decoder_display_queue_| and sends all buffers from the
    304   // front of the queue that are already decoded to the client, in order.
    305   void TryOutputSurfaces();
    306 
    307   // Creates a new decode surface or returns nullptr if one is not available.
    308   scoped_refptr<V4L2DecodeSurface> CreateSurface();
    309 
    310   // Send decoded pictures to PictureReady.
    311   void SendPictureReady();
    312 
    313   // Callback that indicates a picture has been cleared.
    314   void PictureCleared();
    315 
    316   size_t input_planes_count_;
    317   size_t output_planes_count_;
    318 
    319   // GPU Child thread task runner.
    320   const scoped_refptr<base::SingleThreadTaskRunner> child_task_runner_;
    321 
    322   // Task runner Decode() and PictureReady() run on.
    323   scoped_refptr<base::SingleThreadTaskRunner> decode_task_runner_;
    324 
    325   // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
    326   // device worker threads back to the child thread.
    327   base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_;
    328 
    329   // To expose client callbacks from VideoDecodeAccelerator.
    330   // NOTE: all calls to these objects *MUST* be executed on
    331   // child_task_runner_.
    332   std::unique_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>>
    333       client_ptr_factory_;
    334   base::WeakPtr<VideoDecodeAccelerator::Client> client_;
    335   // Callbacks to |decode_client_| must be executed on |decode_task_runner_|.
    336   base::WeakPtr<Client> decode_client_;
    337 
    338   // V4L2 device in use.
    339   scoped_refptr<V4L2Device> device_;
    340 
    341   // Thread to communicate with the device on.
    342   base::Thread decoder_thread_;
    343   scoped_refptr<base::SingleThreadTaskRunner> decoder_thread_task_runner_;
    344 
    345   // Thread used to poll the device for events.
    346   base::Thread device_poll_thread_;
    347 
    348   // Input queue state.
    349   bool input_streamon_;
    350   // Number of input buffers enqueued to the device.
    351   int input_buffer_queued_count_;
    352   // Input buffers ready to use; LIFO since we don't care about ordering.
    353   std::list<int> free_input_buffers_;
    354   // Mapping of int index to an input buffer record.
    355   std::vector<InputRecord> input_buffer_map_;
    356 
    357   // Output queue state.
    358   bool output_streamon_;
    359   // Number of output buffers enqueued to the device.
    360   int output_buffer_queued_count_;
    361   // Output buffers ready to use.
    362   std::list<int> free_output_buffers_;
    363   // Mapping of int index to an output buffer record.
    364   std::vector<OutputRecord> output_buffer_map_;
    365 
    366   VideoCodecProfile video_profile_;
    367   uint32_t input_format_fourcc_;
    368   uint32_t output_format_fourcc_;
    369   Size visible_size_;
    370   Size coded_size_;
    371 
    372   struct BitstreamBufferRef;
    373   // Input queue of stream buffers coming from the client.
    374   std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_;
    375   // BitstreamBuffer currently being processed.
    376   std::unique_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;
    377 
    378   // Queue storing decode surfaces ready to be output as soon as they are
    379   // decoded. The surfaces must be output in order they are queued.
    380   std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_;
    381 
    382   // Decoder state.
    383   State state_;
    384 
    385   Config::OutputMode output_mode_;
    386 
    387   // If any of these are true, we are waiting for the device to finish decoding
    388   // all previously-queued frames, so we can finish the flush/reset/surface
    389   // change flows. These can stack.
    390   bool decoder_flushing_;
    391   bool decoder_resetting_;
    392   bool surface_set_change_pending_;
    393 
    394   // Hardware accelerators.
    395   // TODO(posciak): Try to have a superclass here if possible.
    396   std::unique_ptr<V4L2H264Accelerator> h264_accelerator_;
    397   std::unique_ptr<V4L2VP8Accelerator> vp8_accelerator_;
    398   std::unique_ptr<V4L2VP9Accelerator> vp9_accelerator_;
    399 
    400   // Codec-specific software decoder in use.
    401   std::unique_ptr<AcceleratedVideoDecoder> decoder_;
    402 
    403   // Surfaces queued to device to keep references to them while decoded.
    404   using V4L2DecodeSurfaceByOutputId =
    405       std::map<int, scoped_refptr<V4L2DecodeSurface>>;
    406   V4L2DecodeSurfaceByOutputId surfaces_at_device_;
    407 
    408   // Surfaces sent to client to keep references to them while displayed.
    409   using V4L2DecodeSurfaceByPictureBufferId =
    410       std::map<int32_t, scoped_refptr<V4L2DecodeSurface>>;
    411   V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_;
    412 
    413   // Record for decoded pictures that can be sent to PictureReady.
    414   struct PictureRecord {
    415     PictureRecord(bool cleared, const Picture& picture);
    416     ~PictureRecord();
    417     bool cleared;  // Whether the texture is cleared and safe to render from.
    418     Picture picture;  // The decoded picture.
    419   };
    420   // Pictures that are ready but not sent to PictureReady yet.
    421   std::queue<PictureRecord> pending_picture_ready_;
    422 
    423   // The number of pictures that are sent to PictureReady and will be cleared.
    424   int picture_clearing_count_;
    425 
    426   // The WeakPtrFactory for |weak_this_|.
    427   base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_;
    428 
    429   DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator);
    430 };
    431 
    432 }  // namespace media
    433 
    434 #endif  // V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
    435