1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <CoreVideo/CoreVideo.h> 6 #include <OpenGL/CGLIOSurface.h> 7 #include <OpenGL/gl.h> 8 9 #include "base/bind.h" 10 #include "base/command_line.h" 11 #include "base/sys_byteorder.h" 12 #include "base/thread_task_runner_handle.h" 13 #include "content/common/gpu/media/vt_video_decode_accelerator.h" 14 #include "content/public/common/content_switches.h" 15 #include "media/filters/h264_parser.h" 16 #include "ui/gl/scoped_binders.h" 17 #include "ui/gl/scoped_cgl.h" 18 19 using content_common_gpu_media::kModuleVt; 20 using content_common_gpu_media::InitializeStubs; 21 using content_common_gpu_media::IsVtInitialized; 22 using content_common_gpu_media::StubPathMap; 23 24 namespace content { 25 26 // Size of NALU length headers in AVCC/MPEG-4 format (can be 1, 2, or 4). 27 static const int kNALUHeaderLength = 4; 28 29 // We only request 5 picture buffers from the client which are used to hold the 30 // decoded samples. These buffers are then reused when the client tells us that 31 // it is done with the buffer. 32 static const int kNumPictureBuffers = 5; 33 34 // Route decoded frame callbacks back into the VTVideoDecodeAccelerator. 35 static void OutputThunk( 36 void* decompression_output_refcon, 37 void* source_frame_refcon, 38 OSStatus status, 39 VTDecodeInfoFlags info_flags, 40 CVImageBufferRef image_buffer, 41 CMTime presentation_time_stamp, 42 CMTime presentation_duration) { 43 VTVideoDecodeAccelerator* vda = 44 reinterpret_cast<VTVideoDecodeAccelerator*>(decompression_output_refcon); 45 int32_t bitstream_id = reinterpret_cast<intptr_t>(source_frame_refcon); 46 vda->Output(bitstream_id, status, image_buffer); 47 } 48 49 VTVideoDecodeAccelerator::DecodedFrame::DecodedFrame( 50 int32_t bitstream_id, 51 CVImageBufferRef image_buffer) 52 : bitstream_id(bitstream_id), 53 image_buffer(image_buffer) { 54 } 55 56 VTVideoDecodeAccelerator::DecodedFrame::~DecodedFrame() { 57 } 58 59 VTVideoDecodeAccelerator::PendingAction::PendingAction( 60 Action action, 61 int32_t bitstream_id) 62 : action(action), 63 bitstream_id(bitstream_id) { 64 } 65 66 VTVideoDecodeAccelerator::PendingAction::~PendingAction() { 67 } 68 69 VTVideoDecodeAccelerator::VTVideoDecodeAccelerator(CGLContextObj cgl_context) 70 : cgl_context_(cgl_context), 71 client_(NULL), 72 format_(NULL), 73 session_(NULL), 74 gpu_task_runner_(base::ThreadTaskRunnerHandle::Get()), 75 weak_this_factory_(this), 76 decoder_thread_("VTDecoderThread") { 77 callback_.decompressionOutputCallback = OutputThunk; 78 callback_.decompressionOutputRefCon = this; 79 } 80 81 VTVideoDecodeAccelerator::~VTVideoDecodeAccelerator() { 82 } 83 84 bool VTVideoDecodeAccelerator::Initialize( 85 media::VideoCodecProfile profile, 86 Client* client) { 87 DCHECK(CalledOnValidThread()); 88 client_ = client; 89 90 // Only H.264 is supported. 91 if (profile < media::H264PROFILE_MIN || profile > media::H264PROFILE_MAX) 92 return false; 93 94 // Require --no-sandbox until VideoToolbox library loading is part of sandbox 95 // startup (and this VDA is ready for regular users). 96 if (!base::CommandLine::ForCurrentProcess()->HasSwitch(switches::kNoSandbox)) 97 return false; 98 99 if (!IsVtInitialized()) { 100 // CoreVideo is also required, but the loader stops after the first 101 // path is loaded. Instead we rely on the transitive dependency from 102 // VideoToolbox to CoreVideo. 103 // TODO(sandersd): Fallback to PrivateFrameworks for VideoToolbox. 104 StubPathMap paths; 105 paths[kModuleVt].push_back(FILE_PATH_LITERAL( 106 "/System/Library/Frameworks/VideoToolbox.framework/VideoToolbox")); 107 if (!InitializeStubs(paths)) 108 return false; 109 } 110 111 // Spawn a thread to handle parsing and calling VideoToolbox. 112 if (!decoder_thread_.Start()) 113 return false; 114 115 return true; 116 } 117 118 // TODO(sandersd): Proper error reporting instead of CHECKs. 119 void VTVideoDecodeAccelerator::ConfigureDecoder( 120 const std::vector<const uint8_t*>& nalu_data_ptrs, 121 const std::vector<size_t>& nalu_data_sizes) { 122 DCHECK(decoder_thread_.message_loop_proxy()->BelongsToCurrentThread()); 123 // Construct a new format description from the parameter sets. 124 // TODO(sandersd): Replace this with custom code to support OS X < 10.9. 125 format_.reset(); 126 CHECK(!CMVideoFormatDescriptionCreateFromH264ParameterSets( 127 kCFAllocatorDefault, 128 nalu_data_ptrs.size(), // parameter_set_count 129 &nalu_data_ptrs.front(), // ¶meter_set_pointers 130 &nalu_data_sizes.front(), // ¶meter_set_sizes 131 kNALUHeaderLength, // nal_unit_header_length 132 format_.InitializeInto())); 133 CMVideoDimensions coded_dimensions = 134 CMVideoFormatDescriptionGetDimensions(format_); 135 136 // Prepare VideoToolbox configuration dictionaries. 137 base::ScopedCFTypeRef<CFMutableDictionaryRef> decoder_config( 138 CFDictionaryCreateMutable( 139 kCFAllocatorDefault, 140 1, // capacity 141 &kCFTypeDictionaryKeyCallBacks, 142 &kCFTypeDictionaryValueCallBacks)); 143 144 CFDictionarySetValue( 145 decoder_config, 146 // kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder 147 CFSTR("EnableHardwareAcceleratedVideoDecoder"), 148 kCFBooleanTrue); 149 150 base::ScopedCFTypeRef<CFMutableDictionaryRef> image_config( 151 CFDictionaryCreateMutable( 152 kCFAllocatorDefault, 153 4, // capacity 154 &kCFTypeDictionaryKeyCallBacks, 155 &kCFTypeDictionaryValueCallBacks)); 156 157 #define CFINT(i) CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &i) 158 // TODO(sandersd): RGBA option for 4:4:4 video. 159 int32_t pixel_format = kCVPixelFormatType_422YpCbCr8; 160 base::ScopedCFTypeRef<CFNumberRef> cf_pixel_format(CFINT(pixel_format)); 161 base::ScopedCFTypeRef<CFNumberRef> cf_width(CFINT(coded_dimensions.width)); 162 base::ScopedCFTypeRef<CFNumberRef> cf_height(CFINT(coded_dimensions.height)); 163 #undef CFINT 164 CFDictionarySetValue( 165 image_config, kCVPixelBufferPixelFormatTypeKey, cf_pixel_format); 166 CFDictionarySetValue(image_config, kCVPixelBufferWidthKey, cf_width); 167 CFDictionarySetValue(image_config, kCVPixelBufferHeightKey, cf_height); 168 CFDictionarySetValue( 169 image_config, kCVPixelBufferOpenGLCompatibilityKey, kCFBooleanTrue); 170 171 // TODO(sandersd): Check if the session is already compatible. 172 session_.reset(); 173 CHECK(!VTDecompressionSessionCreate( 174 kCFAllocatorDefault, 175 format_, // video_format_description 176 decoder_config, // video_decoder_specification 177 image_config, // destination_image_buffer_attributes 178 &callback_, // output_callback 179 session_.InitializeInto())); 180 181 // If the size has changed, trigger a request for new picture buffers. 182 // TODO(sandersd): Move to SendPictures(), and use this just as a hint for an 183 // upcoming size change. 184 gfx::Size new_coded_size(coded_dimensions.width, coded_dimensions.height); 185 if (coded_size_ != new_coded_size) { 186 coded_size_ = new_coded_size; 187 gpu_task_runner_->PostTask(FROM_HERE, base::Bind( 188 &VTVideoDecodeAccelerator::SizeChangedTask, 189 weak_this_factory_.GetWeakPtr(), 190 coded_size_));; 191 } 192 } 193 194 void VTVideoDecodeAccelerator::Decode(const media::BitstreamBuffer& bitstream) { 195 DCHECK(CalledOnValidThread()); 196 CHECK_GE(bitstream.id(), 0) << "Negative bitstream_id"; 197 pending_bitstream_ids_.push(bitstream.id()); 198 decoder_thread_.message_loop_proxy()->PostTask(FROM_HERE, base::Bind( 199 &VTVideoDecodeAccelerator::DecodeTask, base::Unretained(this), 200 bitstream)); 201 } 202 203 // TODO(sandersd): Proper error reporting instead of CHECKs. 204 void VTVideoDecodeAccelerator::DecodeTask( 205 const media::BitstreamBuffer bitstream) { 206 DCHECK(decoder_thread_.message_loop_proxy()->BelongsToCurrentThread()); 207 208 // Map the bitstream buffer. 209 base::SharedMemory memory(bitstream.handle(), true); 210 size_t size = bitstream.size(); 211 CHECK(memory.Map(size)); 212 const uint8_t* buf = static_cast<uint8_t*>(memory.memory()); 213 214 // NALUs are stored with Annex B format in the bitstream buffer (start codes), 215 // but VideoToolbox expects AVCC/MPEG-4 format (length headers), so we must 216 // rewrite the data. 217 // 218 // 1. Locate relevant NALUs and compute the size of the translated data. 219 // Also record any parameter sets for VideoToolbox initialization. 220 size_t data_size = 0; 221 std::vector<media::H264NALU> nalus; 222 std::vector<const uint8_t*> config_nalu_data_ptrs; 223 std::vector<size_t> config_nalu_data_sizes; 224 parser_.SetStream(buf, size); 225 media::H264NALU nalu; 226 while (true) { 227 media::H264Parser::Result result = parser_.AdvanceToNextNALU(&nalu); 228 if (result == media::H264Parser::kEOStream) 229 break; 230 CHECK_EQ(result, media::H264Parser::kOk); 231 // TODO(sandersd): Check that these are only at the start. 232 if (nalu.nal_unit_type == media::H264NALU::kSPS || 233 nalu.nal_unit_type == media::H264NALU::kPPS || 234 nalu.nal_unit_type == media::H264NALU::kSPSExt) { 235 DVLOG(2) << "Parameter set " << nalu.nal_unit_type; 236 config_nalu_data_ptrs.push_back(nalu.data); 237 config_nalu_data_sizes.push_back(nalu.size); 238 } else { 239 nalus.push_back(nalu); 240 data_size += kNALUHeaderLength + nalu.size; 241 } 242 } 243 244 // 2. Initialize VideoToolbox. 245 // TODO(sandersd): Reinitialize when there are new parameter sets. 246 if (!session_) 247 ConfigureDecoder(config_nalu_data_ptrs, config_nalu_data_sizes); 248 249 // If there are no non-configuration units, immediately return an empty 250 // (ie. dropped) frame. It is an error to create a MemoryBlock with zero 251 // size. 252 if (!data_size) { 253 gpu_task_runner_->PostTask(FROM_HERE, base::Bind( 254 &VTVideoDecodeAccelerator::OutputTask, 255 weak_this_factory_.GetWeakPtr(), 256 DecodedFrame(bitstream.id(), NULL))); 257 return; 258 } 259 260 // 3. Allocate a memory-backed CMBlockBuffer for the translated data. 261 base::ScopedCFTypeRef<CMBlockBufferRef> data; 262 CHECK(!CMBlockBufferCreateWithMemoryBlock( 263 kCFAllocatorDefault, 264 NULL, // &memory_block 265 data_size, // block_length 266 kCFAllocatorDefault, // block_allocator 267 NULL, // &custom_block_source 268 0, // offset_to_data 269 data_size, // data_length 270 0, // flags 271 data.InitializeInto())); 272 273 // 4. Copy NALU data, inserting length headers. 274 size_t offset = 0; 275 for (size_t i = 0; i < nalus.size(); i++) { 276 media::H264NALU& nalu = nalus[i]; 277 uint32_t header = base::HostToNet32(static_cast<uint32_t>(nalu.size)); 278 CHECK(!CMBlockBufferReplaceDataBytes( 279 &header, data, offset, kNALUHeaderLength)); 280 offset += kNALUHeaderLength; 281 CHECK(!CMBlockBufferReplaceDataBytes(nalu.data, data, offset, nalu.size)); 282 offset += nalu.size; 283 } 284 285 // 5. Package the data for VideoToolbox and request decoding. 286 base::ScopedCFTypeRef<CMSampleBufferRef> frame; 287 CHECK(!CMSampleBufferCreate( 288 kCFAllocatorDefault, 289 data, // data_buffer 290 true, // data_ready 291 NULL, // make_data_ready_callback 292 NULL, // make_data_ready_refcon 293 format_, // format_description 294 1, // num_samples 295 0, // num_sample_timing_entries 296 NULL, // &sample_timing_array 297 0, // num_sample_size_entries 298 NULL, // &sample_size_array 299 frame.InitializeInto())); 300 301 // Asynchronous Decompression allows for parallel submission of frames 302 // (without it, DecodeFrame() does not return until the frame has been 303 // decoded). We don't enable Temporal Processing so that frames are always 304 // returned in decode order; this makes it easier to avoid deadlock. 305 VTDecodeFrameFlags decode_flags = 306 kVTDecodeFrame_EnableAsynchronousDecompression; 307 308 intptr_t bitstream_id = bitstream.id(); 309 CHECK(!VTDecompressionSessionDecodeFrame( 310 session_, 311 frame, // sample_buffer 312 decode_flags, // decode_flags 313 reinterpret_cast<void*>(bitstream_id), // source_frame_refcon 314 NULL)); // &info_flags_out 315 } 316 317 // This method may be called on any VideoToolbox thread. 318 // TODO(sandersd): Proper error reporting instead of CHECKs. 319 void VTVideoDecodeAccelerator::Output( 320 int32_t bitstream_id, 321 OSStatus status, 322 CVImageBufferRef image_buffer) { 323 CHECK(!status); 324 CHECK_EQ(CFGetTypeID(image_buffer), CVPixelBufferGetTypeID()); 325 CFRetain(image_buffer); 326 gpu_task_runner_->PostTask(FROM_HERE, base::Bind( 327 &VTVideoDecodeAccelerator::OutputTask, 328 weak_this_factory_.GetWeakPtr(), 329 DecodedFrame(bitstream_id, image_buffer))); 330 } 331 332 void VTVideoDecodeAccelerator::OutputTask(DecodedFrame frame) { 333 DCHECK(CalledOnValidThread()); 334 decoded_frames_.push(frame); 335 ProcessDecodedFrames(); 336 } 337 338 void VTVideoDecodeAccelerator::SizeChangedTask(gfx::Size coded_size) { 339 DCHECK(CalledOnValidThread()); 340 texture_size_ = coded_size; 341 // TODO(sandersd): Dismiss existing picture buffers. 342 client_->ProvidePictureBuffers( 343 kNumPictureBuffers, texture_size_, GL_TEXTURE_RECTANGLE_ARB); 344 } 345 346 void VTVideoDecodeAccelerator::AssignPictureBuffers( 347 const std::vector<media::PictureBuffer>& pictures) { 348 DCHECK(CalledOnValidThread()); 349 350 for (size_t i = 0; i < pictures.size(); i++) { 351 CHECK(!texture_ids_.count(pictures[i].id())); 352 available_picture_ids_.push(pictures[i].id()); 353 texture_ids_[pictures[i].id()] = pictures[i].texture_id(); 354 } 355 356 // Pictures are not marked as uncleared until after this method returns, and 357 // they will be broken if they are used before that happens. So, schedule 358 // future work after that happens. 359 gpu_task_runner_->PostTask(FROM_HERE, base::Bind( 360 &VTVideoDecodeAccelerator::ProcessDecodedFrames, 361 weak_this_factory_.GetWeakPtr())); 362 } 363 364 void VTVideoDecodeAccelerator::ReusePictureBuffer(int32_t picture_id) { 365 DCHECK(CalledOnValidThread()); 366 DCHECK_EQ(CFGetRetainCount(picture_bindings_[picture_id]), 1); 367 picture_bindings_.erase(picture_id); 368 available_picture_ids_.push(picture_id); 369 ProcessDecodedFrames(); 370 } 371 372 void VTVideoDecodeAccelerator::CompleteAction(Action action) { 373 DCHECK(CalledOnValidThread()); 374 switch (action) { 375 case ACTION_FLUSH: 376 client_->NotifyFlushDone(); 377 break; 378 case ACTION_RESET: 379 client_->NotifyResetDone(); 380 break; 381 case ACTION_DESTROY: 382 delete this; 383 break; 384 } 385 } 386 387 void VTVideoDecodeAccelerator::CompleteActions(int32_t bitstream_id) { 388 DCHECK(CalledOnValidThread()); 389 while (!pending_actions_.empty() && 390 pending_actions_.front().bitstream_id == bitstream_id) { 391 CompleteAction(pending_actions_.front().action); 392 pending_actions_.pop(); 393 } 394 } 395 396 void VTVideoDecodeAccelerator::ProcessDecodedFrames() { 397 DCHECK(CalledOnValidThread()); 398 399 while (!decoded_frames_.empty()) { 400 if (pending_actions_.empty()) { 401 // No pending actions; send frames normally. 402 SendPictures(pending_bitstream_ids_.back()); 403 return; 404 } 405 406 int32_t next_action_bitstream_id = pending_actions_.front().bitstream_id; 407 int32_t last_sent_bitstream_id = -1; 408 switch (pending_actions_.front().action) { 409 case ACTION_FLUSH: 410 // Send frames normally. 411 last_sent_bitstream_id = SendPictures(next_action_bitstream_id); 412 break; 413 414 case ACTION_RESET: 415 // Drop decoded frames. 416 while (!decoded_frames_.empty() && 417 last_sent_bitstream_id != next_action_bitstream_id) { 418 last_sent_bitstream_id = decoded_frames_.front().bitstream_id; 419 decoded_frames_.pop(); 420 DCHECK_EQ(pending_bitstream_ids_.front(), last_sent_bitstream_id); 421 pending_bitstream_ids_.pop(); 422 client_->NotifyEndOfBitstreamBuffer(last_sent_bitstream_id); 423 } 424 break; 425 426 case ACTION_DESTROY: 427 // Drop decoded frames, without bookkeeping. 428 while (!decoded_frames_.empty()) { 429 last_sent_bitstream_id = decoded_frames_.front().bitstream_id; 430 decoded_frames_.pop(); 431 } 432 433 // Handle completing the action specially, as it is important not to 434 // access |this| after calling CompleteAction(). 435 if (last_sent_bitstream_id == next_action_bitstream_id) 436 CompleteAction(ACTION_DESTROY); 437 438 // Either |this| was deleted or no more progress can be made. 439 return; 440 } 441 442 // If we ran out of buffers (or pictures), no more progress can be made 443 // until more frames are decoded. 444 if (last_sent_bitstream_id != next_action_bitstream_id) 445 return; 446 447 // Complete all actions pending for this |bitstream_id|, then loop to see 448 // if progress can be made on the next action. 449 CompleteActions(next_action_bitstream_id); 450 } 451 } 452 453 int32_t VTVideoDecodeAccelerator::SendPictures(int32_t up_to_bitstream_id) { 454 DCHECK(CalledOnValidThread()); 455 DCHECK(!decoded_frames_.empty()); 456 457 if (available_picture_ids_.empty()) 458 return -1; 459 460 gfx::ScopedCGLSetCurrentContext scoped_set_current_context(cgl_context_); 461 glEnable(GL_TEXTURE_RECTANGLE_ARB); 462 463 int32_t last_sent_bitstream_id = -1; 464 while (!available_picture_ids_.empty() && 465 !decoded_frames_.empty() && 466 last_sent_bitstream_id != up_to_bitstream_id) { 467 DecodedFrame frame = decoded_frames_.front(); 468 decoded_frames_.pop(); 469 DCHECK_EQ(pending_bitstream_ids_.front(), frame.bitstream_id); 470 pending_bitstream_ids_.pop(); 471 int32_t picture_id = available_picture_ids_.front(); 472 available_picture_ids_.pop(); 473 474 CVImageBufferRef image_buffer = frame.image_buffer.get(); 475 if (image_buffer) { 476 IOSurfaceRef surface = CVPixelBufferGetIOSurface(image_buffer); 477 478 // TODO(sandersd): Find out why this sometimes fails due to no GL context. 479 gfx::ScopedTextureBinder 480 texture_binder(GL_TEXTURE_RECTANGLE_ARB, texture_ids_[picture_id]); 481 CHECK(!CGLTexImageIOSurface2D( 482 cgl_context_, // ctx 483 GL_TEXTURE_RECTANGLE_ARB, // target 484 GL_RGB, // internal_format 485 texture_size_.width(), // width 486 texture_size_.height(), // height 487 GL_YCBCR_422_APPLE, // format 488 GL_UNSIGNED_SHORT_8_8_APPLE, // type 489 surface, // io_surface 490 0)); // plane 491 492 picture_bindings_[picture_id] = frame.image_buffer; 493 client_->PictureReady(media::Picture( 494 picture_id, frame.bitstream_id, gfx::Rect(texture_size_))); 495 } 496 497 client_->NotifyEndOfBitstreamBuffer(frame.bitstream_id); 498 last_sent_bitstream_id = frame.bitstream_id; 499 } 500 501 glDisable(GL_TEXTURE_RECTANGLE_ARB); 502 return last_sent_bitstream_id; 503 } 504 505 void VTVideoDecodeAccelerator::FlushTask() { 506 DCHECK(decoder_thread_.message_loop_proxy()->BelongsToCurrentThread()); 507 CHECK(!VTDecompressionSessionFinishDelayedFrames(session_)); 508 } 509 510 void VTVideoDecodeAccelerator::QueueAction(Action action) { 511 DCHECK(CalledOnValidThread()); 512 if (pending_bitstream_ids_.empty()) { 513 // If there are no pending frames, all actions complete immediately. 514 CompleteAction(action); 515 } else { 516 // Otherwise, queue the action. 517 pending_actions_.push(PendingAction(action, pending_bitstream_ids_.back())); 518 519 // Request a flush to make sure the action will eventually complete. 520 decoder_thread_.message_loop_proxy()->PostTask(FROM_HERE, base::Bind( 521 &VTVideoDecodeAccelerator::FlushTask, base::Unretained(this))); 522 523 // See if we can make progress now that there is a new pending action. 524 ProcessDecodedFrames(); 525 } 526 } 527 528 void VTVideoDecodeAccelerator::Flush() { 529 DCHECK(CalledOnValidThread()); 530 QueueAction(ACTION_FLUSH); 531 } 532 533 void VTVideoDecodeAccelerator::Reset() { 534 DCHECK(CalledOnValidThread()); 535 QueueAction(ACTION_RESET); 536 } 537 538 void VTVideoDecodeAccelerator::Destroy() { 539 DCHECK(CalledOnValidThread()); 540 // Drop any other pending actions. 541 while (!pending_actions_.empty()) 542 pending_actions_.pop(); 543 // Return all bitstream buffers. 544 while (!pending_bitstream_ids_.empty()) { 545 client_->NotifyEndOfBitstreamBuffer(pending_bitstream_ids_.front()); 546 pending_bitstream_ids_.pop(); 547 } 548 QueueAction(ACTION_DESTROY); 549 } 550 551 bool VTVideoDecodeAccelerator::CanDecodeOnIOThread() { 552 return false; 553 } 554 555 } // namespace content 556