Home | History | Annotate | Download | only in vda
      1 // Copyright 2015 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 // Note: ported from Chromium commit head: 85fdf90
      5 
      6 #ifndef V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
      7 #define V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
      8 
      9 #include <stddef.h>
     10 #include <stdint.h>
     11 
     12 #include <memory>
     13 #include <queue>
     14 #include <utility>
     15 #include <vector>
     16 
     17 #include "base/macros.h"
     18 #include "base/memory/linked_ptr.h"
     19 #include "base/memory/ref_counted.h"
     20 #include "base/memory/weak_ptr.h"
     21 #include "base/synchronization/waitable_event.h"
     22 #include "base/threading/thread.h"
     23 #include "h264_decoder.h"
     24 #include "v4l2_device.h"
     25 #include "video_decode_accelerator.h"
     26 #include "videodev2.h"
     27 #include "vp8_decoder.h"
     28 #include "vp9_decoder.h"
     29 
     30 namespace media {
     31 
     32 // An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice
     33 // level codec API for decoding. The slice level API provides only a low-level
     34 // decoding functionality and requires userspace to provide support for parsing
     35 // the input stream and managing decoder state across frames.
     36 class V4L2SliceVideoDecodeAccelerator
     37     : public VideoDecodeAccelerator {
     38  public:
     39   class V4L2DecodeSurface;
     40 
     41   V4L2SliceVideoDecodeAccelerator(
     42       const scoped_refptr<V4L2Device>& device);
     43   ~V4L2SliceVideoDecodeAccelerator() override;
     44 
     45   // VideoDecodeAccelerator implementation.
     46   bool Initialize(const Config& config, Client* client) override;
     47   void Decode(const BitstreamBuffer& bitstream_buffer) override;
     48   void AssignPictureBuffers(const std::vector<PictureBuffer>& buffers) override;
     49   void ImportBufferForPicture(
     50       int32_t picture_buffer_id,
     51       VideoPixelFormat pixel_format,
     52       const NativePixmapHandle& native_pixmap_handle) override;
     53   void ReusePictureBuffer(int32_t picture_buffer_id) override;
     54   void Flush() override;
     55   void Reset() override;
     56   void Destroy() override;
     57   bool TryToSetupDecodeOnSeparateThread(
     58       const base::WeakPtr<Client>& decode_client,
     59       const scoped_refptr<base::SingleThreadTaskRunner>& decode_task_runner)
     60       override;
     61 
     62   static VideoDecodeAccelerator::SupportedProfiles GetSupportedProfiles();
     63 
     64  private:
     65   class V4L2H264Accelerator;
     66   class V4L2VP8Accelerator;
     67   class V4L2VP9Accelerator;
     68 
     69   // Record for input buffers.
     70   struct InputRecord {
     71     InputRecord();
     72     int32_t input_id;
     73     void* address;
     74     size_t length;
     75     size_t bytes_used;
     76     bool at_device;
     77   };
     78 
     79   // Record for output buffers.
     80   struct OutputRecord {
     81     OutputRecord();
     82     OutputRecord(OutputRecord&&) = default;
     83     bool at_device;
     84     bool at_client;
     85     int32_t picture_id;
     86     std::vector<base::ScopedFD> dmabuf_fds;
     87     bool cleared;
     88   };
     89 
     90   // See http://crbug.com/255116.
     91   // Input bitstream buffer size for up to 1080p streams.
     92   const size_t kInputBufferMaxSizeFor1080p = 1024 * 1024;
     93   // Input bitstream buffer size for up to 4k streams.
     94   const size_t kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p;
     95   const size_t kNumInputBuffers = 16;
     96 
     97   // Input format V4L2 fourccs this class supports.
     98   static const uint32_t supported_input_fourccs_[];
     99 
    100   //
    101   // Below methods are used by accelerator implementations.
    102   //
    103   // Append slice data in |data| of size |size| to pending hardware
    104   // input buffer with |index|. This buffer will be submitted for decode
    105   // on the next DecodeSurface(). Return true on success.
    106   bool SubmitSlice(int index, const uint8_t* data, size_t size);
    107 
    108   // Submit controls in |ext_ctrls| to hardware. Return true on success.
    109   bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls);
    110 
    111   // Gets current control values for controls in |ext_ctrls| from the driver.
    112   // Return true on success.
    113   bool GetExtControls(struct v4l2_ext_controls* ext_ctrls);
    114 
    115   // Return true if the driver exposes V4L2 control |ctrl_id|, false otherwise.
    116   bool IsCtrlExposed(uint32_t ctrl_id);
    117 
    118   // Decode of |dec_surface| is ready to be submitted and all codec-specific
    119   // settings are set in hardware.
    120   void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
    121 
    122   // |dec_surface| is ready to be outputted once decode is finished.
    123   // This can be called before decode is actually done in hardware, and this
    124   // method is responsible for maintaining the ordering, i.e. the surfaces will
    125   // be outputted in the same order as SurfaceReady calls. To do so, the
    126   // surfaces are put on decoder_display_queue_ and sent to output in that
    127   // order once all preceding surfaces are sent.
    128   void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
    129 
    130   //
    131   // Internal methods of this class.
    132   //
    133   // Recycle a V4L2 input buffer with |index| after dequeuing from device.
    134   void ReuseInputBuffer(int index);
    135 
    136   // Recycle V4L2 output buffer with |index|. Used as surface release callback.
    137   void ReuseOutputBuffer(int index);
    138 
    139   // Queue a |dec_surface| to device for decoding.
    140   void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
    141 
    142   // Dequeue any V4L2 buffers available and process.
    143   void Dequeue();
    144 
    145   // V4L2 QBUF helpers.
    146   bool EnqueueInputRecord(int index, uint32_t config_store);
    147   bool EnqueueOutputRecord(int index);
    148 
    149   // Set input and output formats in hardware.
    150   bool SetupFormats();
    151 
    152   // Create input and output buffers.
    153   bool CreateInputBuffers();
    154   bool CreateOutputBuffers();
    155 
    156   // Destroy input buffers.
    157   void DestroyInputBuffers();
    158 
    159   // Destroy output buffers. If |dismiss| is true, also dismissing the
    160   // associated PictureBuffers.
    161   bool DestroyOutputs(bool dismiss);
    162 
    163   // Used by DestroyOutputs.
    164   bool DestroyOutputBuffers();
    165 
    166   // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer()
    167   // and signal |done| after finishing.
    168   void DismissPictures(const std::vector<int32_t>& picture_buffer_ids,
    169                        base::WaitableEvent* done);
    170 
    171   // Task to finish initialization on decoder_thread_.
    172   void InitializeTask();
    173 
    174   void NotifyError(Error error);
    175   void DestroyTask();
    176 
    177   // Sets the state to kError and notifies client if needed.
    178   void SetErrorState(Error error);
    179 
    180   // Event handling. Events include flush, reset and resolution change and are
    181   // processed while in kIdle state.
    182 
    183   // Surface set change (resolution change) flow.
    184   // If we have no surfaces allocated, start it immediately, otherwise mark
    185   // ourselves as pending for surface set change.
    186   void InitiateSurfaceSetChange();
    187   // If a surface set change is pending and we are ready, stop the device,
    188   // destroy outputs, releasing resources and dismissing pictures as required,
    189   // followed by starting the flow to allocate a new set for the current
    190   // resolution/DPB size, as provided by decoder.
    191   bool FinishSurfaceSetChange();
    192 
    193   // Flush flow when requested by client.
    194   // When Flush() is called, it posts a FlushTask, which checks the input queue.
    195   // If nothing is pending for decode on decoder_input_queue_, we call
    196   // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef
    197   // onto the decoder_input_queue_ to schedule a flush. When we reach it later
    198   // on, we call InitiateFlush() to perform it at the correct time.
    199   void FlushTask();
    200   // Tell the decoder to flush all frames, reset it and mark us as scheduled
    201   // for flush, so that we can finish it once all pending decodes are finished.
    202   void InitiateFlush();
    203   // To be called if decoder_flushing_ is true. If not all pending frames are
    204   // decoded, return false, requesting the caller to try again later.
    205   // Otherwise perform flush by sending all pending pictures to the client,
    206   // notify it that flush is finished and return true, informing the caller
    207   // that further progress can be made.
    208   bool FinishFlush();
    209 
    210   // Reset flow when requested by client.
    211   // Drop all inputs, reset the decoder and mark us as pending for reset.
    212   void ResetTask();
    213   // To be called if decoder_resetting_ is true. If not all pending frames are
    214   // decoded, return false, requesting the caller to try again later.
    215   // Otherwise perform reset by dropping all pending outputs (client is not
    216   // interested anymore), notifying it that reset is finished, and return true,
    217   // informing the caller that further progress can be made.
    218   bool FinishReset();
    219 
    220   // Called when a new event is pended. Transitions us into kIdle state (if not
    221   // already in it), if possible. Also starts processing events.
    222   void NewEventPending();
    223 
    224   // Called after all events are processed successfully (i.e. all Finish*()
    225   // methods return true) to return to decoding state.
    226   bool FinishEventProcessing();
    227 
    228   // Process pending events, if any.
    229   void ProcessPendingEventsIfNeeded();
    230 
    231   // Allocate V4L2 buffers and assign them to |buffers| provided by the client
    232   // via AssignPictureBuffers() on decoder thread.
    233   void AssignPictureBuffersTask(const std::vector<PictureBuffer>& buffers);
    234 
    235   // Use buffer backed by dmabuf file descriptors in |passed_dmabuf_fds| for the
    236   // OutputRecord associated with |picture_buffer_id|, taking ownership of the
    237   // file descriptors.
    238   void ImportBufferForPictureTask(
    239       int32_t picture_buffer_id,
    240       // TODO(posciak): (https://crbug.com/561749) we should normally be able to
    241       // pass the vector by itself via std::move, but it's not possible to do
    242       // this if this method is used as a callback.
    243       std::unique_ptr<std::vector<base::ScopedFD>> passed_dmabuf_fds);
    244 
    245   // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_
    246   // returning an event.
    247   void ServiceDeviceTask();
    248 
    249   // Schedule poll if we have any buffers queued and the poll thread
    250   // is not stopped (on surface set change).
    251   void SchedulePollIfNeeded();
    252 
    253   // Attempt to start/stop device_poll_thread_.
    254   bool StartDevicePoll();
    255   bool StopDevicePoll(bool keep_input_state);
    256 
    257   // Ran on device_poll_thread_ to wait for device events.
    258   void DevicePollTask(bool poll_device);
    259 
    260   enum State {
    261     // We are in this state until Initialize() returns successfully.
    262     // We can't post errors to the client in this state yet.
    263     kUninitialized,
    264     // Initialize() returned successfully.
    265     kInitialized,
    266     // This state allows making progress decoding more input stream.
    267     kDecoding,
    268     // Transitional state when we are not decoding any more stream, but are
    269     // performing flush, reset, resolution change or are destroying ourselves.
    270     kIdle,
    271     // Requested new PictureBuffers via ProvidePictureBuffers(), awaiting
    272     // AssignPictureBuffers().
    273     kAwaitingPictureBuffers,
    274     // Error state, set when sending NotifyError to client.
    275     kError,
    276   };
    277 
    278   // Buffer id for flush buffer, queued by FlushTask().
    279   const int kFlushBufferId = -2;
    280 
    281   // Handler for Decode() on decoder_thread_.
    282   void DecodeTask(const BitstreamBuffer& bitstream_buffer);
    283 
    284   // Schedule a new DecodeBufferTask if we are decoding.
    285   void ScheduleDecodeBufferTaskIfNeeded();
    286 
    287   // Main decoder loop. Keep decoding the current buffer in decoder_, asking
    288   // for more stream via TrySetNewBistreamBuffer() if decoder_ requests so,
    289   // and handle other returns from it appropriately.
    290   void DecodeBufferTask();
    291 
    292   // Check decoder_input_queue_ for any available buffers to decode and
    293   // set the decoder_current_bitstream_buffer_ to the next buffer if one is
    294   // available, taking it off the queue. Also set the current stream pointer
    295   // in decoder_, and return true.
    296   // Return false if no buffers are pending on decoder_input_queue_.
    297   bool TrySetNewBistreamBuffer();
    298 
    299   // Auto-destruction reference for EGLSync (for message-passing).
    300   void ReusePictureBufferTask(int32_t picture_buffer_id);
    301 
    302   // Called to actually send |dec_surface| to the client, after it is decoded
    303   // preserving the order in which it was scheduled via SurfaceReady().
    304   void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
    305 
    306   // Goes over the |decoder_display_queue_| and sends all buffers from the
    307   // front of the queue that are already decoded to the client, in order.
    308   void TryOutputSurfaces();
    309 
    310   // Creates a new decode surface or returns nullptr if one is not available.
    311   scoped_refptr<V4L2DecodeSurface> CreateSurface();
    312 
    313   // Send decoded pictures to PictureReady.
    314   void SendPictureReady();
    315 
    316   // Callback that indicates a picture has been cleared.
    317   void PictureCleared();
    318 
    319   size_t input_planes_count_;
    320   size_t output_planes_count_;
    321 
    322   // GPU Child thread task runner.
    323   const scoped_refptr<base::SingleThreadTaskRunner> child_task_runner_;
    324 
    325   // Task runner Decode() and PictureReady() run on.
    326   scoped_refptr<base::SingleThreadTaskRunner> decode_task_runner_;
    327 
    328   // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
    329   // device worker threads back to the child thread.
    330   base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_;
    331 
    332   // To expose client callbacks from VideoDecodeAccelerator.
    333   // NOTE: all calls to these objects *MUST* be executed on
    334   // child_task_runner_.
    335   std::unique_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>>
    336       client_ptr_factory_;
    337   base::WeakPtr<VideoDecodeAccelerator::Client> client_;
    338   // Callbacks to |decode_client_| must be executed on |decode_task_runner_|.
    339   base::WeakPtr<Client> decode_client_;
    340 
    341   // V4L2 device in use.
    342   scoped_refptr<V4L2Device> device_;
    343 
    344   // Thread to communicate with the device on.
    345   base::Thread decoder_thread_;
    346   scoped_refptr<base::SingleThreadTaskRunner> decoder_thread_task_runner_;
    347 
    348   // Thread used to poll the device for events.
    349   base::Thread device_poll_thread_;
    350 
    351   // Input queue state.
    352   bool input_streamon_;
    353   // Number of input buffers enqueued to the device.
    354   int input_buffer_queued_count_;
    355   // Input buffers ready to use; LIFO since we don't care about ordering.
    356   std::list<int> free_input_buffers_;
    357   // Mapping of int index to an input buffer record.
    358   std::vector<InputRecord> input_buffer_map_;
    359 
    360   // Output queue state.
    361   bool output_streamon_;
    362   // Number of output buffers enqueued to the device.
    363   int output_buffer_queued_count_;
    364   // Output buffers ready to use.
    365   std::list<int> free_output_buffers_;
    366   // Mapping of int index to an output buffer record.
    367   std::vector<OutputRecord> output_buffer_map_;
    368 
    369   VideoCodecProfile video_profile_;
    370   uint32_t input_format_fourcc_;
    371   uint32_t output_format_fourcc_;
    372   Size coded_size_;
    373 
    374   struct BitstreamBufferRef;
    375   // Input queue of stream buffers coming from the client.
    376   std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_;
    377   // BitstreamBuffer currently being processed.
    378   std::unique_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;
    379 
    380   // Queue storing decode surfaces ready to be output as soon as they are
    381   // decoded. The surfaces must be output in order they are queued.
    382   std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_;
    383 
    384   // Decoder state.
    385   State state_;
    386 
    387   Config::OutputMode output_mode_;
    388 
    389   // If any of these are true, we are waiting for the device to finish decoding
    390   // all previously-queued frames, so we can finish the flush/reset/surface
    391   // change flows. These can stack.
    392   bool decoder_flushing_;
    393   bool decoder_resetting_;
    394   bool surface_set_change_pending_;
    395 
    396   // Hardware accelerators.
    397   // TODO(posciak): Try to have a superclass here if possible.
    398   std::unique_ptr<V4L2H264Accelerator> h264_accelerator_;
    399   std::unique_ptr<V4L2VP8Accelerator> vp8_accelerator_;
    400   std::unique_ptr<V4L2VP9Accelerator> vp9_accelerator_;
    401 
    402   // Codec-specific software decoder in use.
    403   std::unique_ptr<AcceleratedVideoDecoder> decoder_;
    404 
    405   // Surfaces queued to device to keep references to them while decoded.
    406   using V4L2DecodeSurfaceByOutputId =
    407       std::map<int, scoped_refptr<V4L2DecodeSurface>>;
    408   V4L2DecodeSurfaceByOutputId surfaces_at_device_;
    409 
    410   // Surfaces sent to client to keep references to them while displayed.
    411   using V4L2DecodeSurfaceByPictureBufferId =
    412       std::map<int32_t, scoped_refptr<V4L2DecodeSurface>>;
    413   V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_;
    414 
    415   // Record for decoded pictures that can be sent to PictureReady.
    416   struct PictureRecord {
    417     PictureRecord(bool cleared, const Picture& picture);
    418     ~PictureRecord();
    419     bool cleared;  // Whether the texture is cleared and safe to render from.
    420     Picture picture;  // The decoded picture.
    421   };
    422 
    423   // Pictures that are ready but not sent to PictureReady yet.
    424   std::queue<PictureRecord> pending_picture_ready_;
    425 
    426   // The number of pictures that are sent to PictureReady and will be cleared.
    427   int picture_clearing_count_;
    428 
    429   // The WeakPtrFactory for |weak_this_|.
    430   base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_;
    431 
    432   DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator);
    433 };
    434 
    435 }  // namespace media
    436 
    437 #endif  // V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
    438