1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file contains an implementation of VideoDecoderAccelerator 6 // that utilizes the hardware video decoder present on the Exynos SoC. 7 8 #ifndef CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_ 9 #define CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_ 10 11 #include <queue> 12 #include <vector> 13 14 #include "base/callback_forward.h" 15 #include "base/memory/linked_ptr.h" 16 #include "base/memory/scoped_ptr.h" 17 #include "base/synchronization/waitable_event.h" 18 #include "base/threading/thread.h" 19 #include "content/common/content_export.h" 20 #include "content/common/gpu/media/video_decode_accelerator_impl.h" 21 #include "media/base/limits.h" 22 #include "media/base/video_decoder_config.h" 23 #include "media/video/picture.h" 24 #include "ui/gfx/size.h" 25 #include "ui/gl/gl_bindings.h" 26 27 namespace base { 28 class MessageLoopProxy; 29 } 30 31 namespace content { 32 class H264Parser; 33 34 // This class handles Exynos video acceleration directly through the V4L2 35 // device exported by the Multi Format Codec hardware block. 36 // 37 // The threading model of this class is driven by the fact that it needs to 38 // interface two fundamentally different event queues -- the one Chromium 39 // provides through MessageLoop, and the one driven by the V4L2 devices which 40 // is waited on with epoll(). There are three threads involved in this class: 41 // 42 // * The child thread, which is the main GPU process thread which calls the 43 // media::VideoDecodeAccelerator entry points. Calls from this thread 44 // generally do not block (with the exception of Initialize() and Destroy()). 45 // They post tasks to the decoder_thread_, which actually services the task 46 // and calls back when complete through the 47 // media::VideoDecodeAccelerator::Client interface. 48 // * The decoder_thread_, owned by this class. It services API tasks, through 49 // the *Task() routines, as well as V4L2 device events, through 50 // ServiceDeviceTask(). Almost all state modification is done on this thread 51 // (this doesn't include buffer (re)allocation sequence, see below). 52 // * The device_poll_thread_, owned by this class. All it does is epoll() on 53 // the V4L2 in DevicePollTask() and schedule a ServiceDeviceTask() on the 54 // decoder_thread_ when something interesting happens. 55 // TODO(sheu): replace this thread with an TYPE_IO decoder_thread_. 56 // 57 // Note that this class has (almost) no locks, apart from the pictures_assigned_ 58 // WaitableEvent. Everything (apart from buffer (re)allocation) is serviced on 59 // the decoder_thread_, so there are no synchronization issues. 60 // ... well, there are, but it's a matter of getting messages posted in the 61 // right order, not fiddling with locks. 62 // Buffer creation is a two-step process that is serviced partially on the 63 // Child thread, because we need to wait for the client to provide textures 64 // for the buffers we allocate. We cannot keep the decoder thread running while 65 // the client allocates Pictures for us, because we need to REQBUFS first to get 66 // the required number of output buffers from the device and that cannot be done 67 // unless we free the previous set of buffers, leaving the decoding in a 68 // inoperable state for the duration of the wait for Pictures. So to prevent 69 // subtle races (esp. if we get Reset() in the meantime), we block the decoder 70 // thread while we wait for AssignPictureBuffers from the client. 71 class CONTENT_EXPORT ExynosVideoDecodeAccelerator 72 : public VideoDecodeAcceleratorImpl { 73 public: 74 ExynosVideoDecodeAccelerator( 75 EGLDisplay egl_display, 76 EGLContext egl_context, 77 Client* client, 78 const base::WeakPtr<Client>& io_client_, 79 const base::Callback<bool(void)>& make_context_current, 80 const scoped_refptr<base::MessageLoopProxy>& io_message_loop_proxy); 81 virtual ~ExynosVideoDecodeAccelerator(); 82 83 // media::VideoDecodeAccelerator implementation. 84 // Note: Initialize() and Destroy() are synchronous. 85 virtual bool Initialize(media::VideoCodecProfile profile) OVERRIDE; 86 virtual void Decode(const media::BitstreamBuffer& bitstream_buffer) OVERRIDE; 87 virtual void AssignPictureBuffers( 88 const std::vector<media::PictureBuffer>& buffers) OVERRIDE; 89 virtual void ReusePictureBuffer(int32 picture_buffer_id) OVERRIDE; 90 virtual void Flush() OVERRIDE; 91 virtual void Reset() OVERRIDE; 92 virtual void Destroy() OVERRIDE; 93 94 // VideoDecodeAcceleratorImpl implementation. 95 virtual bool CanDecodeOnIOThread() OVERRIDE; 96 97 private: 98 // These are rather subjectively tuned. 99 enum { 100 kMfcInputBufferCount = 8, 101 // TODO(posciak): determine MFC input buffer size based on level limits. 102 // See http://crbug.com/255116. 103 kMfcInputBufferMaxSize = 1024 * 1024, 104 // Number of output buffers to use for each VDA stage above what's required 105 // by the decoder (e.g. DPB size, in H264). We need 106 // media::limits::kMaxVideoFrames to fill up the GpuVideoDecode pipeline, 107 // and +1 for a frame in transit. 108 kDpbOutputBufferExtraCount = media::limits::kMaxVideoFrames + 1, 109 }; 110 111 // Internal state of the decoder. 112 enum State { 113 kUninitialized, // Initialize() not yet called. 114 kInitialized, // Initialize() returned true; ready to start decoding. 115 kDecoding, // DecodeBufferInitial() successful; decoding frames. 116 kResetting, // Presently resetting. 117 kAfterReset, // After Reset(), ready to start decoding again. 118 kChangingResolution, // Performing resolution change, all remaining 119 // pre-change frames decoded and processed. 120 kError, // Error in kDecoding state. 121 }; 122 123 enum BufferId { 124 kFlushBufferId = -2 // Buffer id for flush buffer, queued by FlushTask(). 125 }; 126 127 // File descriptors we need to poll. 128 enum PollFds { 129 kPollMfc = (1 << 0), 130 }; 131 132 // Auto-destruction reference for BitstreamBuffer, for message-passing from 133 // Decode() to DecodeTask(). 134 struct BitstreamBufferRef; 135 136 // Auto-destruction reference for an array of PictureBuffer, for 137 // simpler EGLImage cleanup if any calls fail in AssignPictureBuffers(). 138 struct PictureBufferArrayRef; 139 140 // Auto-destruction reference for EGLSync (for message-passing). 141 struct EGLSyncKHRRef; 142 143 // Record for decoded pictures that can be sent to PictureReady. 144 struct PictureRecord; 145 146 // Record for MFC input buffers. 147 struct MfcInputRecord { 148 MfcInputRecord(); 149 ~MfcInputRecord(); 150 bool at_device; // held by device. 151 void* address; // mmap() address. 152 size_t length; // mmap() length. 153 off_t bytes_used; // bytes filled in the mmap() segment. 154 int32 input_id; // triggering input_id as given to Decode(). 155 }; 156 157 // Record for MFC output buffers. 158 struct MfcOutputRecord { 159 MfcOutputRecord(); 160 ~MfcOutputRecord(); 161 bool at_device; // held by device. 162 bool at_client; // held by client. 163 int fds[2]; // file descriptors for each plane. 164 EGLImageKHR egl_image; // EGLImageKHR for the output buffer. 165 EGLSyncKHR egl_sync; // sync the compositor's use of the EGLImage. 166 int32 picture_id; // picture buffer id as returned to PictureReady(). 167 bool cleared; // Whether the texture is cleared and safe to render 168 // from. See TextureManager for details. 169 }; 170 171 // 172 // Decoding tasks, to be run on decode_thread_. 173 // 174 175 // Enqueue a BitstreamBuffer to decode. This will enqueue a buffer to the 176 // decoder_input_queue_, then queue a DecodeBufferTask() to actually decode 177 // the buffer. 178 void DecodeTask(const media::BitstreamBuffer& bitstream_buffer); 179 180 // Decode from the buffers queued in decoder_input_queue_. Calls 181 // DecodeBufferInitial() or DecodeBufferContinue() as appropriate. 182 void DecodeBufferTask(); 183 // Advance to the next fragment that begins a frame. 184 bool AdvanceFrameFragment(const uint8* data, size_t size, size_t* endpos); 185 // Schedule another DecodeBufferTask() if we're behind. 186 void ScheduleDecodeBufferTaskIfNeeded(); 187 188 // Return true if we should continue to schedule DecodeBufferTask()s after 189 // completion. Store the amount of input actually consumed in |endpos|. 190 bool DecodeBufferInitial(const void* data, size_t size, size_t* endpos); 191 bool DecodeBufferContinue(const void* data, size_t size); 192 193 // Accumulate data for the next frame to decode. May return false in 194 // non-error conditions; for example when pipeline is full and should be 195 // retried later. 196 bool AppendToInputFrame(const void* data, size_t size); 197 // Flush data for one decoded frame. 198 bool FlushInputFrame(); 199 200 // Service I/O on the V4L2 devices. This task should only be scheduled from 201 // DevicePollTask(). If |mfc_event_pending| is true, one or more events 202 // on MFC file descriptor are pending. 203 void ServiceDeviceTask(bool mfc_event_pending); 204 // Handle the various device queues. 205 void EnqueueMfc(); 206 void DequeueMfc(); 207 // Handle incoming MFC events. 208 void DequeueMfcEvents(); 209 // Enqueue a buffer on the corresponding queue. 210 bool EnqueueMfcInputRecord(); 211 bool EnqueueMfcOutputRecord(); 212 213 // Process a ReusePictureBuffer() API call. The API call create an EGLSync 214 // object on the main (GPU process) thread; we will record this object so we 215 // can wait on it before reusing the buffer. 216 void ReusePictureBufferTask(int32 picture_buffer_id, 217 scoped_ptr<EGLSyncKHRRef> egl_sync_ref); 218 219 // Flush() task. Child thread should not submit any more buffers until it 220 // receives the NotifyFlushDone callback. This task will schedule an empty 221 // BitstreamBufferRef (with input_id == kFlushBufferId) to perform the flush. 222 void FlushTask(); 223 // Notify the client of a flush completion, if required. This should be 224 // called any time a relevant queue could potentially be emptied: see 225 // function definition. 226 void NotifyFlushDoneIfNeeded(); 227 228 // Reset() task. This task will schedule a ResetDoneTask() that will send 229 // the NotifyResetDone callback, then set the decoder state to kResetting so 230 // that all intervening tasks will drain. 231 void ResetTask(); 232 // ResetDoneTask() will set the decoder state back to kAfterReset, so 233 // subsequent decoding can continue. 234 void ResetDoneTask(); 235 236 // Device destruction task. 237 void DestroyTask(); 238 239 // Attempt to start/stop device_poll_thread_. 240 bool StartDevicePoll(); 241 // If |keep_mfc_input_state| is true, don't reset MFC input state; used during 242 // resolution change. 243 bool StopDevicePoll(bool keep_mfc_input_state); 244 // Set/clear the device poll interrupt (using device_poll_interrupt_fd_). 245 bool SetDevicePollInterrupt(); 246 bool ClearDevicePollInterrupt(); 247 248 void StartResolutionChangeIfNeeded(); 249 void FinishResolutionChange(); 250 void ResumeAfterResolutionChange(); 251 252 // Try to get output format from MFC, detected after parsing the beginning 253 // of the stream. Sets |again| to true if more parsing is needed. 254 bool GetFormatInfo(struct v4l2_format* format, bool* again); 255 // Create MFC output buffers for the given |format|. 256 bool CreateBuffersForFormat(const struct v4l2_format& format); 257 258 // 259 // Device tasks, to be run on device_poll_thread_. 260 // 261 262 // The device task. 263 void DevicePollTask(unsigned int poll_fds); 264 265 // 266 // Safe from any thread. 267 // 268 269 // Error notification (using PostTask() to child thread, if necessary). 270 void NotifyError(Error error); 271 272 // Set the decoder_thread_ state (using PostTask to decoder thread, if 273 // necessary). 274 void SetDecoderState(State state); 275 276 // 277 // Other utility functions. Called on decoder_thread_, unless 278 // decoder_thread_ is not yet started, in which case the child thread can call 279 // these (e.g. in Initialize() or Destroy()). 280 // 281 282 // Create the buffers we need. 283 bool CreateMfcInputBuffers(); 284 bool CreateMfcOutputBuffers(); 285 286 // 287 // Methods run on child thread. 288 // 289 290 // Destroy buffers. 291 void DestroyMfcInputBuffers(); 292 void DestroyMfcOutputBuffers(); 293 void ResolutionChangeDestroyBuffers(); 294 295 // Send decoded pictures to PictureReady. 296 void SendPictureReady(); 297 298 // Callback that indicates a picture has been cleared. 299 void PictureCleared(); 300 301 // Our original calling message loop for the child thread. 302 scoped_refptr<base::MessageLoopProxy> child_message_loop_proxy_; 303 304 // Message loop of the IO thread. 305 scoped_refptr<base::MessageLoopProxy> io_message_loop_proxy_; 306 307 // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or 308 // device worker threads back to the child thread. Because the worker threads 309 // are members of this class, any task running on those threads is guaranteed 310 // that this object is still alive. As a result, tasks posted from the child 311 // thread to the decoder or device thread should use base::Unretained(this), 312 // and tasks posted the other way should use |weak_this_|. 313 base::WeakPtr<ExynosVideoDecodeAccelerator> weak_this_; 314 315 // To expose client callbacks from VideoDecodeAccelerator. 316 // NOTE: all calls to these objects *MUST* be executed on 317 // child_message_loop_proxy_. 318 base::WeakPtrFactory<Client> client_ptr_factory_; 319 base::WeakPtr<Client> client_; 320 // Callbacks to |io_client_| must be executed on |io_message_loop_proxy_|. 321 base::WeakPtr<Client> io_client_; 322 323 // 324 // Decoder state, owned and operated by decoder_thread_. 325 // Before decoder_thread_ has started, the decoder state is managed by 326 // the child (main) thread. After decoder_thread_ has started, the decoder 327 // thread should be the only one managing these. 328 // 329 330 // This thread services tasks posted from the VDA API entry points by the 331 // child thread and device service callbacks posted from the device thread. 332 base::Thread decoder_thread_; 333 // Decoder state machine state. 334 State decoder_state_; 335 // BitstreamBuffer we're presently reading. 336 scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_; 337 // FlushTask() and ResetTask() should not affect buffers that have been 338 // queued afterwards. For flushing or resetting the pipeline then, we will 339 // delay these buffers until after the flush or reset completes. 340 int decoder_delay_bitstream_buffer_id_; 341 // MFC input buffer we're presently filling. 342 int decoder_current_input_buffer_; 343 // We track the number of buffer decode tasks we have scheduled, since each 344 // task execution should complete one buffer. If we fall behind (due to 345 // resource backpressure, etc.), we'll have to schedule more to catch up. 346 int decoder_decode_buffer_tasks_scheduled_; 347 // Picture buffers held by the client. 348 int decoder_frames_at_client_; 349 // Are we flushing? 350 bool decoder_flushing_; 351 // Got a notification from driver that it reached resolution change point 352 // in the stream. 353 bool resolution_change_pending_; 354 // Got a reset request while we were performing resolution change. 355 bool resolution_change_reset_pending_; 356 // Input queue for decoder_thread_: BitstreamBuffers in. 357 std::queue<linked_ptr<BitstreamBufferRef> > decoder_input_queue_; 358 // For H264 decode, hardware requires that we send it frame-sized chunks. 359 // We'll need to parse the stream. 360 scoped_ptr<content::H264Parser> decoder_h264_parser_; 361 // Set if the decoder has a pending incomplete frame in an input buffer. 362 bool decoder_partial_frame_pending_; 363 364 // 365 // Hardware state and associated queues. Since decoder_thread_ services 366 // the hardware, decoder_thread_ owns these too. 367 // mfc_output_buffer_map_ and free_output_buffers_ are an exception during the 368 // buffer (re)allocation sequence, when the decoder_thread_ is blocked briefly 369 // while the Child thread manipulates them. 370 // 371 372 // Completed decode buffers, waiting for MFC. 373 std::queue<int> mfc_input_ready_queue_; 374 375 // MFC decode device. 376 int mfc_fd_; 377 378 // MFC input buffer state. 379 bool mfc_input_streamon_; 380 // MFC input buffers enqueued to device. 381 int mfc_input_buffer_queued_count_; 382 // Input buffers ready to use, as a LIFO since we don't care about ordering. 383 std::vector<int> mfc_free_input_buffers_; 384 // Mapping of int index to MFC input buffer record. 385 std::vector<MfcInputRecord> mfc_input_buffer_map_; 386 387 // MFC output buffer state. 388 bool mfc_output_streamon_; 389 // MFC output buffers enqueued to device. 390 int mfc_output_buffer_queued_count_; 391 // Output buffers ready to use, as a FIFO since we want oldest-first to hide 392 // synchronization latency with GL. 393 std::queue<int> mfc_free_output_buffers_; 394 // Mapping of int index to MFC output buffer record. 395 std::vector<MfcOutputRecord> mfc_output_buffer_map_; 396 // MFC output pixel format. 397 uint32 mfc_output_buffer_pixelformat_; 398 // Required size of DPB for decoding. 399 int mfc_output_dpb_size_; 400 401 // Pictures that are ready but not sent to PictureReady yet. 402 std::queue<PictureRecord> pending_picture_ready_; 403 404 // The number of pictures that are sent to PictureReady and will be cleared. 405 int picture_clearing_count_; 406 407 // Used by the decoder thread to wait for AssignPictureBuffers to arrive 408 // to avoid races with potential Reset requests. 409 base::WaitableEvent pictures_assigned_; 410 411 // Output picture size. 412 gfx::Size frame_buffer_size_; 413 414 // 415 // The device polling thread handles notifications of V4L2 device changes. 416 // 417 418 // The thread. 419 base::Thread device_poll_thread_; 420 // eventfd fd to signal device poll thread when its poll() should be 421 // interrupted. 422 int device_poll_interrupt_fd_; 423 424 // 425 // Other state, held by the child (main) thread. 426 // 427 428 // Make our context current before running any EGL entry points. 429 base::Callback<bool(void)> make_context_current_; 430 431 // EGL state 432 EGLDisplay egl_display_; 433 EGLContext egl_context_; 434 435 // The codec we'll be decoding for. 436 media::VideoCodecProfile video_profile_; 437 438 DISALLOW_COPY_AND_ASSIGN(ExynosVideoDecodeAccelerator); 439 }; 440 441 } // namespace content 442 443 #endif // CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_ 444