Home | History | Annotate | Download | only in media
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // This file contains an implementation of VideoDecoderAccelerator
      6 // that utilizes the hardware video decoder present on the Exynos SoC.
      7 
      8 #ifndef CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_
      9 #define CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_
     10 
     11 #include <list>
     12 #include <vector>
     13 
     14 #include "base/callback_forward.h"
     15 #include "base/memory/linked_ptr.h"
     16 #include "base/memory/scoped_ptr.h"
     17 #include "base/threading/thread.h"
     18 #include "content/common/content_export.h"
     19 #include "media/base/video_decoder_config.h"
     20 #include "media/video/video_decode_accelerator.h"
     21 #include "ui/gfx/size.h"
     22 #include "ui/gl/gl_bindings.h"
     23 
     24 namespace base {
     25 class MessageLoopProxy;
     26 }
     27 
     28 namespace content {
     29 class H264Parser;
     30 
     31 // This class handles Exynos video acceleration directly through the V4L2
     32 // devices exported by the Multi Format Codec and GScaler hardware blocks.
     33 //
     34 // The threading model of this class is driven by the fact that it needs to
     35 // interface two fundamentally different event queues -- the one Chromium
     36 // provides through MessageLoop, and the one driven by the V4L2 devices which
     37 // is waited on with epoll().  There are three threads involved in this class:
     38 //
     39 // * The child thread, which is the main GPU process thread which calls the
     40 //   media::VideoDecodeAccelerator entry points.  Calls from this thread
     41 //   generally do not block (with the exception of Initialize() and Destroy()).
     42 //   They post tasks to the decoder_thread_, which actually services the task
     43 //   and calls back when complete through the
     44 //   media::VideoDecodeAccelerator::Client interface.
     45 // * The decoder_thread_, owned by this class.  It services API tasks, through
     46 //   the *Task() routines, as well as V4L2 device events, through
     47 //   ServiceDeviceTask().  Almost all state modification is done on this thread.
     48 // * The device_poll_thread_, owned by this class.  All it does is epoll() on
     49 //   the V4L2 in DevicePollTask() and schedule a ServiceDeviceTask() on the
     50 //   decoder_thread_ when something interesting happens.
     51 //   TODO(sheu): replace this thread with an TYPE_IO decoder_thread_.
     52 //
     53 // Note that this class has no locks!  Everything's serviced on the
     54 // decoder_thread_, so there are no synchronization issues.
     55 // ... well, there are, but it's a matter of getting messages posted in the
     56 // right order, not fiddling with locks.
     57 class CONTENT_EXPORT ExynosVideoDecodeAccelerator :
     58     public media::VideoDecodeAccelerator {
     59  public:
     60   ExynosVideoDecodeAccelerator(
     61       EGLDisplay egl_display,
     62       EGLContext egl_context,
     63       Client* client,
     64       const base::Callback<bool(void)>& make_context_current);
     65   virtual ~ExynosVideoDecodeAccelerator();
     66 
     67   // media::VideoDecodeAccelerator implementation.
     68   // Note: Initialize() and Destroy() are synchronous.
     69   virtual bool Initialize(media::VideoCodecProfile profile) OVERRIDE;
     70   virtual void Decode(const media::BitstreamBuffer& bitstream_buffer) OVERRIDE;
     71   virtual void AssignPictureBuffers(
     72       const std::vector<media::PictureBuffer>& buffers) OVERRIDE;
     73   virtual void ReusePictureBuffer(int32 picture_buffer_id) OVERRIDE;
     74   virtual void Flush() OVERRIDE;
     75   virtual void Reset() OVERRIDE;
     76   virtual void Destroy() OVERRIDE;
     77 
     78   // Do any necessary initialization before the sandbox is enabled.
     79   static void PreSandboxInitialization();
     80 
     81   // Lazily initialize static data after sandbox is enabled.  Return false on
     82   // init failure.
     83   static bool PostSandboxInitialization();
     84 
     85  private:
     86   // These are rather subjectively tuned.
     87   enum {
     88     kMfcInputBufferCount = 8,
     89     // TODO(posciak): determine MFC input buffer size based on level limits.
     90     // See http://crbug.com/255116.
     91     kMfcInputBufferMaxSize = 1024 * 1024,
     92     kGscInputBufferCount = 4,
     93     // Number of output buffers to use for each VDA stage above what's required
     94     // by the decoder (e.g. DPB size, in H264).
     95     kDpbOutputBufferExtraCount = 3,
     96   };
     97 
     98   // Internal state of the decoder.
     99   enum State {
    100     kUninitialized,      // Initialize() not yet called.
    101     kInitialized,        // Initialize() returned true; ready to start decoding.
    102     kDecoding,           // DecodeBufferInitial() successful; decoding frames.
    103     kResetting,          // Presently resetting.
    104     kAfterReset,         // After Reset(), ready to start decoding again.
    105     kChangingResolution, // Performing resolution change, all remaining
    106                          // pre-change frames decoded and processed.
    107     kError,              // Error in kDecoding state.
    108   };
    109 
    110   enum BufferId {
    111     kFlushBufferId = -2  // Buffer id for flush buffer, queued by FlushTask().
    112   };
    113 
    114   // File descriptors we need to poll.
    115   enum PollFds {
    116     kPollMfc = (1 << 0),
    117     kPollGsc = (1 << 1),
    118   };
    119 
    120   // Auto-destruction reference for BitstreamBuffer, for message-passing from
    121   // Decode() to DecodeTask().
    122   struct BitstreamBufferRef;
    123 
    124   // Auto-destruction reference for an array of PictureBuffer, for
    125   // message-passing from AssignPictureBuffers() to AssignPictureBuffersTask().
    126   struct PictureBufferArrayRef;
    127 
    128   // Auto-destruction reference for EGLSync (for message-passing).
    129   struct EGLSyncKHRRef;
    130 
    131   // Record for MFC input buffers.
    132   struct MfcInputRecord {
    133     MfcInputRecord();
    134     ~MfcInputRecord();
    135     bool at_device;        // held by device.
    136     void* address;         // mmap() address.
    137     size_t length;         // mmap() length.
    138     off_t bytes_used;      // bytes filled in the mmap() segment.
    139     int32 input_id;        // triggering input_id as given to Decode().
    140   };
    141 
    142   // Record for MFC output buffers.
    143   struct MfcOutputRecord {
    144     MfcOutputRecord();
    145     ~MfcOutputRecord();
    146     bool at_device;        // held by device.
    147     size_t bytes_used[2];  // bytes used in each dmabuf.
    148     void* address[2];      // mmap() address for each plane.
    149     size_t length[2];      // mmap() length for each plane.
    150     int32 input_id;        // triggering input_id as given to Decode().
    151   };
    152 
    153   // Record for GSC input buffers.
    154   struct GscInputRecord {
    155     GscInputRecord();
    156     ~GscInputRecord();
    157     bool at_device;        // held by device.
    158     int mfc_output;        // MFC output buffer index to recycle when this input
    159                            // is complete.
    160   };
    161 
    162   // Record for GSC output buffers.
    163   struct GscOutputRecord {
    164     GscOutputRecord();
    165     ~GscOutputRecord();
    166     bool at_device;        // held by device.
    167     bool at_client;        // held by client.
    168     int fd;                // file descriptor from backing EGLImage.
    169     EGLImageKHR egl_image; // backing EGLImage.
    170     EGLSyncKHR egl_sync;   // sync the compositor's use of the EGLImage.
    171     int32 picture_id;      // picture buffer id as returned to PictureReady().
    172   };
    173 
    174   //
    175   // Decoding tasks, to be run on decode_thread_.
    176   //
    177 
    178   // Enqueue a BitstreamBuffer to decode.  This will enqueue a buffer to the
    179   // decoder_input_queue_, then queue a DecodeBufferTask() to actually decode
    180   // the buffer.
    181   void DecodeTask(scoped_ptr<BitstreamBufferRef> bitstream_record);
    182 
    183   // Decode from the buffers queued in decoder_input_queue_.  Calls
    184   // DecodeBufferInitial() or DecodeBufferContinue() as appropriate.
    185   void DecodeBufferTask();
    186   // Advance to the next fragment that begins a frame.
    187   bool AdvanceFrameFragment(const uint8* data, size_t size, size_t* endpos);
    188   // Schedule another DecodeBufferTask() if we're behind.
    189   void ScheduleDecodeBufferTaskIfNeeded();
    190 
    191   // Return true if we should continue to schedule DecodeBufferTask()s after
    192   // completion.  Store the amount of input actually consumed in |endpos|.
    193   bool DecodeBufferInitial(const void* data, size_t size, size_t* endpos);
    194   bool DecodeBufferContinue(const void* data, size_t size);
    195 
    196   // Accumulate data for the next frame to decode.  May return false in
    197   // non-error conditions; for example when pipeline is full and should be
    198   // retried later.
    199   bool AppendToInputFrame(const void* data, size_t size);
    200   // Flush data for one decoded frame.
    201   bool FlushInputFrame();
    202 
    203   // Process an AssignPictureBuffers() API call.  After this, the
    204   // device_poll_thread_ can be started safely, since we have all our
    205   // buffers.
    206   void AssignPictureBuffersTask(scoped_ptr<PictureBufferArrayRef> pic_buffers);
    207 
    208   // Service I/O on the V4L2 devices.  This task should only be scheduled from
    209   // DevicePollTask().  If |mfc_event_pending| is true, one or more events
    210   // on MFC file descriptor are pending.
    211   void ServiceDeviceTask(bool mfc_event_pending);
    212   // Handle the various device queues.
    213   void EnqueueMfc();
    214   void DequeueMfc();
    215   void EnqueueGsc();
    216   void DequeueGsc();
    217   // Handle incoming MFC events.
    218   void DequeueMfcEvents();
    219   // Enqueue a buffer on the corresponding queue.
    220   bool EnqueueMfcInputRecord();
    221   bool EnqueueMfcOutputRecord();
    222   bool EnqueueGscInputRecord();
    223   bool EnqueueGscOutputRecord();
    224 
    225   // Process a ReusePictureBuffer() API call.  The API call create an EGLSync
    226   // object on the main (GPU process) thread; we will record this object so we
    227   // can wait on it before reusing the buffer.
    228   void ReusePictureBufferTask(int32 picture_buffer_id,
    229                               scoped_ptr<EGLSyncKHRRef> egl_sync_ref);
    230 
    231   // Flush() task.  Child thread should not submit any more buffers until it
    232   // receives the NotifyFlushDone callback.  This task will schedule an empty
    233   // BitstreamBufferRef (with input_id == kFlushBufferId) to perform the flush.
    234   void FlushTask();
    235   // Notify the client of a flush completion, if required.  This should be
    236   // called any time a relevant queue could potentially be emptied: see
    237   // function definition.
    238   void NotifyFlushDoneIfNeeded();
    239 
    240   // Reset() task.  This task will schedule a ResetDoneTask() that will send
    241   // the NotifyResetDone callback, then set the decoder state to kResetting so
    242   // that all intervening tasks will drain.
    243   void ResetTask();
    244   // ResetDoneTask() will set the decoder state back to kAfterReset, so
    245   // subsequent decoding can continue.
    246   void ResetDoneTask();
    247 
    248   // Device destruction task.
    249   void DestroyTask();
    250 
    251   // Attempt to start/stop device_poll_thread_.
    252   bool StartDevicePoll();
    253   // If |keep_mfc_input_state| is true, don't reset MFC input state; used during
    254   // resolution change.
    255   bool StopDevicePoll(bool keep_mfc_input_state);
    256   // Set/clear the device poll interrupt (using device_poll_interrupt_fd_).
    257   bool SetDevicePollInterrupt();
    258   bool ClearDevicePollInterrupt();
    259 
    260   void StartResolutionChangeIfNeeded();
    261   void FinishResolutionChange();
    262   void ResumeAfterResolutionChange();
    263 
    264   // Try to get output format from MFC, detected after parsing the beginning
    265   // of the stream. Sets |again| to true if more parsing is needed.
    266   bool GetFormatInfo(struct v4l2_format* format, bool* again);
    267   // Create MFC output and GSC input and output buffers for the given |format|.
    268   bool CreateBuffersForFormat(const struct v4l2_format& format);
    269 
    270   //
    271   // Device tasks, to be run on device_poll_thread_.
    272   //
    273 
    274   // The device task.
    275   void DevicePollTask(unsigned int poll_fds);
    276 
    277   //
    278   // Safe from any thread.
    279   //
    280 
    281   // Error notification (using PostTask() to child thread, if necessary).
    282   void NotifyError(Error error);
    283 
    284   // Set the decoder_thread_ state (using PostTask to decoder thread, if
    285   // necessary).
    286   void SetDecoderState(State state);
    287 
    288   //
    289   // Other utility functions.  Called on decoder_thread_, unless
    290   // decoder_thread_ is not yet started, in which case the child thread can call
    291   // these (e.g. in Initialize() or Destroy()).
    292   //
    293 
    294   // Create the buffers we need.
    295   bool CreateMfcInputBuffers();
    296   bool CreateMfcOutputBuffers();
    297   bool CreateGscInputBuffers();
    298   bool CreateGscOutputBuffers();
    299 
    300   //
    301   // Methods run on child thread.
    302   //
    303 
    304   // Destroy buffers.
    305   void DestroyMfcInputBuffers();
    306   void DestroyMfcOutputBuffers();
    307   void DestroyGscInputBuffers();
    308   void DestroyGscOutputBuffers();
    309   void ResolutionChangeDestroyBuffers();
    310 
    311   // Our original calling message loop for the child thread.
    312   scoped_refptr<base::MessageLoopProxy> child_message_loop_proxy_;
    313 
    314   // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
    315   // device worker threads back to the child thread.  Because the worker threads
    316   // are members of this class, any task running on those threads is guaranteed
    317   // that this object is still alive.  As a result, tasks posted from the child
    318   // thread to the decoder or device thread should use base::Unretained(this),
    319   // and tasks posted the other way should use |weak_this_|.
    320   base::WeakPtr<ExynosVideoDecodeAccelerator> weak_this_;
    321 
    322   // To expose client callbacks from VideoDecodeAccelerator.
    323   // NOTE: all calls to these objects *MUST* be executed on
    324   // child_message_loop_proxy_.
    325   base::WeakPtrFactory<Client> client_ptr_factory_;
    326   base::WeakPtr<Client> client_;
    327 
    328   //
    329   // Decoder state, owned and operated by decoder_thread_.
    330   // Before decoder_thread_ has started, the decoder state is managed by
    331   // the child (main) thread.  After decoder_thread_ has started, the decoder
    332   // thread should be the only one managing these.
    333   //
    334 
    335   // This thread services tasks posted from the VDA API entry points by the
    336   // child thread and device service callbacks posted from the device thread.
    337   base::Thread decoder_thread_;
    338   // Decoder state machine state.
    339   State decoder_state_;
    340   // BitstreamBuffer we're presently reading.
    341   scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;
    342   // FlushTask() and ResetTask() should not affect buffers that have been
    343   // queued afterwards.  For flushing or resetting the pipeline then, we will
    344   // delay these buffers until after the flush or reset completes.
    345   int decoder_delay_bitstream_buffer_id_;
    346   // MFC input buffer we're presently filling.
    347   int decoder_current_input_buffer_;
    348   // We track the number of buffer decode tasks we have scheduled, since each
    349   // task execution should complete one buffer.  If we fall behind (due to
    350   // resource backpressure, etc.), we'll have to schedule more to catch up.
    351   int decoder_decode_buffer_tasks_scheduled_;
    352   // Picture buffers held by the client.
    353   int decoder_frames_at_client_;
    354   // Are we flushing?
    355   bool decoder_flushing_;
    356   // Got a notification from driver that it reached resolution change point
    357   // in the stream.
    358   bool resolution_change_pending_;
    359   // Got a reset request while we were performing resolution change.
    360   bool resolution_change_reset_pending_;
    361   // Input queue for decoder_thread_: BitstreamBuffers in.
    362   std::list<linked_ptr<BitstreamBufferRef> > decoder_input_queue_;
    363   // For H264 decode, hardware requires that we send it frame-sized chunks.
    364   // We'll need to parse the stream.
    365   scoped_ptr<content::H264Parser> decoder_h264_parser_;
    366   // Set if the decoder has a pending incomplete frame in an input buffer.
    367   bool decoder_partial_frame_pending_;
    368 
    369   //
    370   // Hardware state and associated queues.  Since decoder_thread_ services
    371   // the hardware, decoder_thread_ owns these too.
    372   //
    373 
    374   // Completed decode buffers, waiting for MFC.
    375   std::list<int> mfc_input_ready_queue_;
    376 
    377   // MFC decode device.
    378   int mfc_fd_;
    379 
    380   // MFC input buffer state.
    381   bool mfc_input_streamon_;
    382   // MFC input buffers enqueued to device.
    383   int mfc_input_buffer_queued_count_;
    384   // Input buffers ready to use, as a LIFO since we don't care about ordering.
    385   std::vector<int> mfc_free_input_buffers_;
    386   // Mapping of int index to MFC input buffer record.
    387   std::vector<MfcInputRecord> mfc_input_buffer_map_;
    388 
    389   // MFC output buffer state.
    390   bool mfc_output_streamon_;
    391   // MFC output buffers enqueued to device.
    392   int mfc_output_buffer_queued_count_;
    393   // Output buffers ready to use, as a LIFO since we don't care about ordering.
    394   std::vector<int> mfc_free_output_buffers_;
    395   // Mapping of int index to MFC output buffer record.
    396   std::vector<MfcOutputRecord> mfc_output_buffer_map_;
    397   // Required size of MFC output buffers.  Two sizes for two planes.
    398   size_t mfc_output_buffer_size_[2];
    399   uint32 mfc_output_buffer_pixelformat_;
    400   // Required size of DPB for decoding.
    401   int mfc_output_dpb_size_;
    402 
    403   // Completed MFC outputs, waiting for GSC.
    404   std::list<int> mfc_output_gsc_input_queue_;
    405 
    406   // GSC decode device.
    407   int gsc_fd_;
    408 
    409   // GSC input buffer state.
    410   bool gsc_input_streamon_;
    411   // GSC input buffers enqueued to device.
    412   int gsc_input_buffer_queued_count_;
    413   // Input buffers ready to use, as a LIFO since we don't care about ordering.
    414   std::vector<int> gsc_free_input_buffers_;
    415   // Mapping of int index to GSC input buffer record.
    416   std::vector<GscInputRecord> gsc_input_buffer_map_;
    417 
    418   // GSC output buffer state.
    419   bool gsc_output_streamon_;
    420   // GSC output buffers enqueued to device.
    421   int gsc_output_buffer_queued_count_;
    422   // Output buffers ready to use.  We need a FIFO here.
    423   std::list<int> gsc_free_output_buffers_;
    424   // Mapping of int index to GSC output buffer record.
    425   std::vector<GscOutputRecord> gsc_output_buffer_map_;
    426 
    427   // Output picture size.
    428   gfx::Size frame_buffer_size_;
    429 
    430   //
    431   // The device polling thread handles notifications of V4L2 device changes.
    432   //
    433 
    434   // The thread.
    435   base::Thread device_poll_thread_;
    436   // eventfd fd to signal device poll thread when its poll() should be
    437   // interrupted.
    438   int device_poll_interrupt_fd_;
    439 
    440   //
    441   // Other state, held by the child (main) thread.
    442   //
    443 
    444   // Make our context current before running any EGL entry points.
    445   base::Callback<bool(void)> make_context_current_;
    446 
    447   // EGL state
    448   EGLDisplay egl_display_;
    449   EGLContext egl_context_;
    450 
    451   // The codec we'll be decoding for.
    452   media::VideoCodecProfile video_profile_;
    453 
    454   DISALLOW_COPY_AND_ASSIGN(ExynosVideoDecodeAccelerator);
    455 };
    456 
    457 }  // namespace content
    458 
    459 #endif  // CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_
    460