1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file contains an implementation of VideoDecoderAccelerator 6 // that utilizes the hardware video decoder present on the Exynos SoC. 7 8 #ifndef CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_ 9 #define CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_ 10 11 #include <list> 12 #include <vector> 13 14 #include "base/callback_forward.h" 15 #include "base/memory/linked_ptr.h" 16 #include "base/memory/scoped_ptr.h" 17 #include "base/threading/thread.h" 18 #include "content/common/content_export.h" 19 #include "media/base/video_decoder_config.h" 20 #include "media/video/video_decode_accelerator.h" 21 #include "ui/gfx/size.h" 22 #include "ui/gl/gl_bindings.h" 23 24 namespace base { 25 class MessageLoopProxy; 26 } 27 28 namespace content { 29 class H264Parser; 30 31 // This class handles Exynos video acceleration directly through the V4L2 32 // devices exported by the Multi Format Codec and GScaler hardware blocks. 33 // 34 // The threading model of this class is driven by the fact that it needs to 35 // interface two fundamentally different event queues -- the one Chromium 36 // provides through MessageLoop, and the one driven by the V4L2 devices which 37 // is waited on with epoll(). There are three threads involved in this class: 38 // 39 // * The child thread, which is the main GPU process thread which calls the 40 // media::VideoDecodeAccelerator entry points. Calls from this thread 41 // generally do not block (with the exception of Initialize() and Destroy()). 42 // They post tasks to the decoder_thread_, which actually services the task 43 // and calls back when complete through the 44 // media::VideoDecodeAccelerator::Client interface. 45 // * The decoder_thread_, owned by this class. It services API tasks, through 46 // the *Task() routines, as well as V4L2 device events, through 47 // ServiceDeviceTask(). Almost all state modification is done on this thread. 48 // * The device_poll_thread_, owned by this class. All it does is epoll() on 49 // the V4L2 in DevicePollTask() and schedule a ServiceDeviceTask() on the 50 // decoder_thread_ when something interesting happens. 51 // TODO(sheu): replace this thread with an TYPE_IO decoder_thread_. 52 // 53 // Note that this class has no locks! Everything's serviced on the 54 // decoder_thread_, so there are no synchronization issues. 55 // ... well, there are, but it's a matter of getting messages posted in the 56 // right order, not fiddling with locks. 57 class CONTENT_EXPORT ExynosVideoDecodeAccelerator : 58 public media::VideoDecodeAccelerator { 59 public: 60 ExynosVideoDecodeAccelerator( 61 EGLDisplay egl_display, 62 EGLContext egl_context, 63 Client* client, 64 const base::Callback<bool(void)>& make_context_current); 65 virtual ~ExynosVideoDecodeAccelerator(); 66 67 // media::VideoDecodeAccelerator implementation. 68 // Note: Initialize() and Destroy() are synchronous. 69 virtual bool Initialize(media::VideoCodecProfile profile) OVERRIDE; 70 virtual void Decode(const media::BitstreamBuffer& bitstream_buffer) OVERRIDE; 71 virtual void AssignPictureBuffers( 72 const std::vector<media::PictureBuffer>& buffers) OVERRIDE; 73 virtual void ReusePictureBuffer(int32 picture_buffer_id) OVERRIDE; 74 virtual void Flush() OVERRIDE; 75 virtual void Reset() OVERRIDE; 76 virtual void Destroy() OVERRIDE; 77 78 // Do any necessary initialization before the sandbox is enabled. 79 static void PreSandboxInitialization(); 80 81 // Lazily initialize static data after sandbox is enabled. Return false on 82 // init failure. 83 static bool PostSandboxInitialization(); 84 85 private: 86 // These are rather subjectively tuned. 87 enum { 88 kMfcInputBufferCount = 8, 89 // TODO(posciak): determine MFC input buffer size based on level limits. 90 // See http://crbug.com/255116. 91 kMfcInputBufferMaxSize = 1024 * 1024, 92 kGscInputBufferCount = 4, 93 // Number of output buffers to use for each VDA stage above what's required 94 // by the decoder (e.g. DPB size, in H264). 95 kDpbOutputBufferExtraCount = 3, 96 }; 97 98 // Internal state of the decoder. 99 enum State { 100 kUninitialized, // Initialize() not yet called. 101 kInitialized, // Initialize() returned true; ready to start decoding. 102 kDecoding, // DecodeBufferInitial() successful; decoding frames. 103 kResetting, // Presently resetting. 104 kAfterReset, // After Reset(), ready to start decoding again. 105 kChangingResolution, // Performing resolution change, all remaining 106 // pre-change frames decoded and processed. 107 kError, // Error in kDecoding state. 108 }; 109 110 enum BufferId { 111 kFlushBufferId = -2 // Buffer id for flush buffer, queued by FlushTask(). 112 }; 113 114 // File descriptors we need to poll. 115 enum PollFds { 116 kPollMfc = (1 << 0), 117 kPollGsc = (1 << 1), 118 }; 119 120 // Auto-destruction reference for BitstreamBuffer, for message-passing from 121 // Decode() to DecodeTask(). 122 struct BitstreamBufferRef; 123 124 // Auto-destruction reference for an array of PictureBuffer, for 125 // message-passing from AssignPictureBuffers() to AssignPictureBuffersTask(). 126 struct PictureBufferArrayRef; 127 128 // Auto-destruction reference for EGLSync (for message-passing). 129 struct EGLSyncKHRRef; 130 131 // Record for MFC input buffers. 132 struct MfcInputRecord { 133 MfcInputRecord(); 134 ~MfcInputRecord(); 135 bool at_device; // held by device. 136 void* address; // mmap() address. 137 size_t length; // mmap() length. 138 off_t bytes_used; // bytes filled in the mmap() segment. 139 int32 input_id; // triggering input_id as given to Decode(). 140 }; 141 142 // Record for MFC output buffers. 143 struct MfcOutputRecord { 144 MfcOutputRecord(); 145 ~MfcOutputRecord(); 146 bool at_device; // held by device. 147 size_t bytes_used[2]; // bytes used in each dmabuf. 148 void* address[2]; // mmap() address for each plane. 149 size_t length[2]; // mmap() length for each plane. 150 int32 input_id; // triggering input_id as given to Decode(). 151 }; 152 153 // Record for GSC input buffers. 154 struct GscInputRecord { 155 GscInputRecord(); 156 ~GscInputRecord(); 157 bool at_device; // held by device. 158 int mfc_output; // MFC output buffer index to recycle when this input 159 // is complete. 160 }; 161 162 // Record for GSC output buffers. 163 struct GscOutputRecord { 164 GscOutputRecord(); 165 ~GscOutputRecord(); 166 bool at_device; // held by device. 167 bool at_client; // held by client. 168 int fd; // file descriptor from backing EGLImage. 169 EGLImageKHR egl_image; // backing EGLImage. 170 EGLSyncKHR egl_sync; // sync the compositor's use of the EGLImage. 171 int32 picture_id; // picture buffer id as returned to PictureReady(). 172 }; 173 174 // 175 // Decoding tasks, to be run on decode_thread_. 176 // 177 178 // Enqueue a BitstreamBuffer to decode. This will enqueue a buffer to the 179 // decoder_input_queue_, then queue a DecodeBufferTask() to actually decode 180 // the buffer. 181 void DecodeTask(scoped_ptr<BitstreamBufferRef> bitstream_record); 182 183 // Decode from the buffers queued in decoder_input_queue_. Calls 184 // DecodeBufferInitial() or DecodeBufferContinue() as appropriate. 185 void DecodeBufferTask(); 186 // Advance to the next fragment that begins a frame. 187 bool AdvanceFrameFragment(const uint8* data, size_t size, size_t* endpos); 188 // Schedule another DecodeBufferTask() if we're behind. 189 void ScheduleDecodeBufferTaskIfNeeded(); 190 191 // Return true if we should continue to schedule DecodeBufferTask()s after 192 // completion. Store the amount of input actually consumed in |endpos|. 193 bool DecodeBufferInitial(const void* data, size_t size, size_t* endpos); 194 bool DecodeBufferContinue(const void* data, size_t size); 195 196 // Accumulate data for the next frame to decode. May return false in 197 // non-error conditions; for example when pipeline is full and should be 198 // retried later. 199 bool AppendToInputFrame(const void* data, size_t size); 200 // Flush data for one decoded frame. 201 bool FlushInputFrame(); 202 203 // Process an AssignPictureBuffers() API call. After this, the 204 // device_poll_thread_ can be started safely, since we have all our 205 // buffers. 206 void AssignPictureBuffersTask(scoped_ptr<PictureBufferArrayRef> pic_buffers); 207 208 // Service I/O on the V4L2 devices. This task should only be scheduled from 209 // DevicePollTask(). If |mfc_event_pending| is true, one or more events 210 // on MFC file descriptor are pending. 211 void ServiceDeviceTask(bool mfc_event_pending); 212 // Handle the various device queues. 213 void EnqueueMfc(); 214 void DequeueMfc(); 215 void EnqueueGsc(); 216 void DequeueGsc(); 217 // Handle incoming MFC events. 218 void DequeueMfcEvents(); 219 // Enqueue a buffer on the corresponding queue. 220 bool EnqueueMfcInputRecord(); 221 bool EnqueueMfcOutputRecord(); 222 bool EnqueueGscInputRecord(); 223 bool EnqueueGscOutputRecord(); 224 225 // Process a ReusePictureBuffer() API call. The API call create an EGLSync 226 // object on the main (GPU process) thread; we will record this object so we 227 // can wait on it before reusing the buffer. 228 void ReusePictureBufferTask(int32 picture_buffer_id, 229 scoped_ptr<EGLSyncKHRRef> egl_sync_ref); 230 231 // Flush() task. Child thread should not submit any more buffers until it 232 // receives the NotifyFlushDone callback. This task will schedule an empty 233 // BitstreamBufferRef (with input_id == kFlushBufferId) to perform the flush. 234 void FlushTask(); 235 // Notify the client of a flush completion, if required. This should be 236 // called any time a relevant queue could potentially be emptied: see 237 // function definition. 238 void NotifyFlushDoneIfNeeded(); 239 240 // Reset() task. This task will schedule a ResetDoneTask() that will send 241 // the NotifyResetDone callback, then set the decoder state to kResetting so 242 // that all intervening tasks will drain. 243 void ResetTask(); 244 // ResetDoneTask() will set the decoder state back to kAfterReset, so 245 // subsequent decoding can continue. 246 void ResetDoneTask(); 247 248 // Device destruction task. 249 void DestroyTask(); 250 251 // Attempt to start/stop device_poll_thread_. 252 bool StartDevicePoll(); 253 // If |keep_mfc_input_state| is true, don't reset MFC input state; used during 254 // resolution change. 255 bool StopDevicePoll(bool keep_mfc_input_state); 256 // Set/clear the device poll interrupt (using device_poll_interrupt_fd_). 257 bool SetDevicePollInterrupt(); 258 bool ClearDevicePollInterrupt(); 259 260 void StartResolutionChangeIfNeeded(); 261 void FinishResolutionChange(); 262 void ResumeAfterResolutionChange(); 263 264 // Try to get output format from MFC, detected after parsing the beginning 265 // of the stream. Sets |again| to true if more parsing is needed. 266 bool GetFormatInfo(struct v4l2_format* format, bool* again); 267 // Create MFC output and GSC input and output buffers for the given |format|. 268 bool CreateBuffersForFormat(const struct v4l2_format& format); 269 270 // 271 // Device tasks, to be run on device_poll_thread_. 272 // 273 274 // The device task. 275 void DevicePollTask(unsigned int poll_fds); 276 277 // 278 // Safe from any thread. 279 // 280 281 // Error notification (using PostTask() to child thread, if necessary). 282 void NotifyError(Error error); 283 284 // Set the decoder_thread_ state (using PostTask to decoder thread, if 285 // necessary). 286 void SetDecoderState(State state); 287 288 // 289 // Other utility functions. Called on decoder_thread_, unless 290 // decoder_thread_ is not yet started, in which case the child thread can call 291 // these (e.g. in Initialize() or Destroy()). 292 // 293 294 // Create the buffers we need. 295 bool CreateMfcInputBuffers(); 296 bool CreateMfcOutputBuffers(); 297 bool CreateGscInputBuffers(); 298 bool CreateGscOutputBuffers(); 299 300 // 301 // Methods run on child thread. 302 // 303 304 // Destroy buffers. 305 void DestroyMfcInputBuffers(); 306 void DestroyMfcOutputBuffers(); 307 void DestroyGscInputBuffers(); 308 void DestroyGscOutputBuffers(); 309 void ResolutionChangeDestroyBuffers(); 310 311 // Our original calling message loop for the child thread. 312 scoped_refptr<base::MessageLoopProxy> child_message_loop_proxy_; 313 314 // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or 315 // device worker threads back to the child thread. Because the worker threads 316 // are members of this class, any task running on those threads is guaranteed 317 // that this object is still alive. As a result, tasks posted from the child 318 // thread to the decoder or device thread should use base::Unretained(this), 319 // and tasks posted the other way should use |weak_this_|. 320 base::WeakPtr<ExynosVideoDecodeAccelerator> weak_this_; 321 322 // To expose client callbacks from VideoDecodeAccelerator. 323 // NOTE: all calls to these objects *MUST* be executed on 324 // child_message_loop_proxy_. 325 base::WeakPtrFactory<Client> client_ptr_factory_; 326 base::WeakPtr<Client> client_; 327 328 // 329 // Decoder state, owned and operated by decoder_thread_. 330 // Before decoder_thread_ has started, the decoder state is managed by 331 // the child (main) thread. After decoder_thread_ has started, the decoder 332 // thread should be the only one managing these. 333 // 334 335 // This thread services tasks posted from the VDA API entry points by the 336 // child thread and device service callbacks posted from the device thread. 337 base::Thread decoder_thread_; 338 // Decoder state machine state. 339 State decoder_state_; 340 // BitstreamBuffer we're presently reading. 341 scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_; 342 // FlushTask() and ResetTask() should not affect buffers that have been 343 // queued afterwards. For flushing or resetting the pipeline then, we will 344 // delay these buffers until after the flush or reset completes. 345 int decoder_delay_bitstream_buffer_id_; 346 // MFC input buffer we're presently filling. 347 int decoder_current_input_buffer_; 348 // We track the number of buffer decode tasks we have scheduled, since each 349 // task execution should complete one buffer. If we fall behind (due to 350 // resource backpressure, etc.), we'll have to schedule more to catch up. 351 int decoder_decode_buffer_tasks_scheduled_; 352 // Picture buffers held by the client. 353 int decoder_frames_at_client_; 354 // Are we flushing? 355 bool decoder_flushing_; 356 // Got a notification from driver that it reached resolution change point 357 // in the stream. 358 bool resolution_change_pending_; 359 // Got a reset request while we were performing resolution change. 360 bool resolution_change_reset_pending_; 361 // Input queue for decoder_thread_: BitstreamBuffers in. 362 std::list<linked_ptr<BitstreamBufferRef> > decoder_input_queue_; 363 // For H264 decode, hardware requires that we send it frame-sized chunks. 364 // We'll need to parse the stream. 365 scoped_ptr<content::H264Parser> decoder_h264_parser_; 366 // Set if the decoder has a pending incomplete frame in an input buffer. 367 bool decoder_partial_frame_pending_; 368 369 // 370 // Hardware state and associated queues. Since decoder_thread_ services 371 // the hardware, decoder_thread_ owns these too. 372 // 373 374 // Completed decode buffers, waiting for MFC. 375 std::list<int> mfc_input_ready_queue_; 376 377 // MFC decode device. 378 int mfc_fd_; 379 380 // MFC input buffer state. 381 bool mfc_input_streamon_; 382 // MFC input buffers enqueued to device. 383 int mfc_input_buffer_queued_count_; 384 // Input buffers ready to use, as a LIFO since we don't care about ordering. 385 std::vector<int> mfc_free_input_buffers_; 386 // Mapping of int index to MFC input buffer record. 387 std::vector<MfcInputRecord> mfc_input_buffer_map_; 388 389 // MFC output buffer state. 390 bool mfc_output_streamon_; 391 // MFC output buffers enqueued to device. 392 int mfc_output_buffer_queued_count_; 393 // Output buffers ready to use, as a LIFO since we don't care about ordering. 394 std::vector<int> mfc_free_output_buffers_; 395 // Mapping of int index to MFC output buffer record. 396 std::vector<MfcOutputRecord> mfc_output_buffer_map_; 397 // Required size of MFC output buffers. Two sizes for two planes. 398 size_t mfc_output_buffer_size_[2]; 399 uint32 mfc_output_buffer_pixelformat_; 400 // Required size of DPB for decoding. 401 int mfc_output_dpb_size_; 402 403 // Completed MFC outputs, waiting for GSC. 404 std::list<int> mfc_output_gsc_input_queue_; 405 406 // GSC decode device. 407 int gsc_fd_; 408 409 // GSC input buffer state. 410 bool gsc_input_streamon_; 411 // GSC input buffers enqueued to device. 412 int gsc_input_buffer_queued_count_; 413 // Input buffers ready to use, as a LIFO since we don't care about ordering. 414 std::vector<int> gsc_free_input_buffers_; 415 // Mapping of int index to GSC input buffer record. 416 std::vector<GscInputRecord> gsc_input_buffer_map_; 417 418 // GSC output buffer state. 419 bool gsc_output_streamon_; 420 // GSC output buffers enqueued to device. 421 int gsc_output_buffer_queued_count_; 422 // Output buffers ready to use. We need a FIFO here. 423 std::list<int> gsc_free_output_buffers_; 424 // Mapping of int index to GSC output buffer record. 425 std::vector<GscOutputRecord> gsc_output_buffer_map_; 426 427 // Output picture size. 428 gfx::Size frame_buffer_size_; 429 430 // 431 // The device polling thread handles notifications of V4L2 device changes. 432 // 433 434 // The thread. 435 base::Thread device_poll_thread_; 436 // eventfd fd to signal device poll thread when its poll() should be 437 // interrupted. 438 int device_poll_interrupt_fd_; 439 440 // 441 // Other state, held by the child (main) thread. 442 // 443 444 // Make our context current before running any EGL entry points. 445 base::Callback<bool(void)> make_context_current_; 446 447 // EGL state 448 EGLDisplay egl_display_; 449 EGLContext egl_context_; 450 451 // The codec we'll be decoding for. 452 media::VideoCodecProfile video_profile_; 453 454 DISALLOW_COPY_AND_ASSIGN(ExynosVideoDecodeAccelerator); 455 }; 456 457 } // namespace content 458 459 #endif // CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_ 460