1 /* 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 * 10 */ 11 12 #include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.h" 13 14 #if defined(WEBRTC_VIDEO_TOOLBOX_SUPPORTED) 15 16 #include <string> 17 #include <vector> 18 19 #include "libyuv/convert_from.h" 20 #include "webrtc/base/checks.h" 21 #include "webrtc/base/logging.h" 22 #include "webrtc/base/scoped_ptr.h" 23 #include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_nalu.h" 24 25 namespace internal { 26 27 // Convenience function for creating a dictionary. 28 inline CFDictionaryRef CreateCFDictionary(CFTypeRef* keys, 29 CFTypeRef* values, 30 size_t size) { 31 return CFDictionaryCreate(kCFAllocatorDefault, keys, values, size, 32 &kCFTypeDictionaryKeyCallBacks, 33 &kCFTypeDictionaryValueCallBacks); 34 } 35 36 // Copies characters from a CFStringRef into a std::string. 37 std::string CFStringToString(const CFStringRef cf_string) { 38 RTC_DCHECK(cf_string); 39 std::string std_string; 40 // Get the size needed for UTF8 plus terminating character. 41 size_t buffer_size = 42 CFStringGetMaximumSizeForEncoding(CFStringGetLength(cf_string), 43 kCFStringEncodingUTF8) + 44 1; 45 rtc::scoped_ptr<char[]> buffer(new char[buffer_size]); 46 if (CFStringGetCString(cf_string, buffer.get(), buffer_size, 47 kCFStringEncodingUTF8)) { 48 // Copy over the characters. 49 std_string.assign(buffer.get()); 50 } 51 return std_string; 52 } 53 54 // Convenience function for setting a VT property. 55 void SetVTSessionProperty(VTSessionRef session, 56 CFStringRef key, 57 int32_t value) { 58 CFNumberRef cfNum = 59 CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &value); 60 OSStatus status = VTSessionSetProperty(session, key, cfNum); 61 CFRelease(cfNum); 62 if (status != noErr) { 63 std::string key_string = CFStringToString(key); 64 LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string 65 << " to " << value << ": " << status; 66 } 67 } 68 69 // Convenience function for setting a VT property. 70 void SetVTSessionProperty(VTSessionRef session, CFStringRef key, bool value) { 71 CFBooleanRef cf_bool = (value) ? kCFBooleanTrue : kCFBooleanFalse; 72 OSStatus status = VTSessionSetProperty(session, key, cf_bool); 73 if (status != noErr) { 74 std::string key_string = CFStringToString(key); 75 LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string 76 << " to " << value << ": " << status; 77 } 78 } 79 80 // Convenience function for setting a VT property. 81 void SetVTSessionProperty(VTSessionRef session, 82 CFStringRef key, 83 CFStringRef value) { 84 OSStatus status = VTSessionSetProperty(session, key, value); 85 if (status != noErr) { 86 std::string key_string = CFStringToString(key); 87 std::string val_string = CFStringToString(value); 88 LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string 89 << " to " << val_string << ": " << status; 90 } 91 } 92 93 // Struct that we pass to the encoder per frame to encode. We receive it again 94 // in the encoder callback. 95 struct FrameEncodeParams { 96 FrameEncodeParams(webrtc::EncodedImageCallback* cb, 97 const webrtc::CodecSpecificInfo* csi, 98 int32_t w, 99 int32_t h, 100 int64_t rtms, 101 uint32_t ts) 102 : callback(cb), width(w), height(h), render_time_ms(rtms), timestamp(ts) { 103 if (csi) { 104 codec_specific_info = *csi; 105 } else { 106 codec_specific_info.codecType = webrtc::kVideoCodecH264; 107 } 108 } 109 webrtc::EncodedImageCallback* callback; 110 webrtc::CodecSpecificInfo codec_specific_info; 111 int32_t width; 112 int32_t height; 113 int64_t render_time_ms; 114 uint32_t timestamp; 115 }; 116 117 // We receive I420Frames as input, but we need to feed CVPixelBuffers into the 118 // encoder. This performs the copy and format conversion. 119 // TODO(tkchin): See if encoder will accept i420 frames and compare performance. 120 bool CopyVideoFrameToPixelBuffer(const webrtc::VideoFrame& frame, 121 CVPixelBufferRef pixel_buffer) { 122 RTC_DCHECK(pixel_buffer); 123 RTC_DCHECK(CVPixelBufferGetPixelFormatType(pixel_buffer) == 124 kCVPixelFormatType_420YpCbCr8BiPlanarFullRange); 125 RTC_DCHECK(CVPixelBufferGetHeightOfPlane(pixel_buffer, 0) == 126 static_cast<size_t>(frame.height())); 127 RTC_DCHECK(CVPixelBufferGetWidthOfPlane(pixel_buffer, 0) == 128 static_cast<size_t>(frame.width())); 129 130 CVReturn cvRet = CVPixelBufferLockBaseAddress(pixel_buffer, 0); 131 if (cvRet != kCVReturnSuccess) { 132 LOG(LS_ERROR) << "Failed to lock base address: " << cvRet; 133 return false; 134 } 135 uint8_t* dst_y = reinterpret_cast<uint8_t*>( 136 CVPixelBufferGetBaseAddressOfPlane(pixel_buffer, 0)); 137 int dst_stride_y = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer, 0); 138 uint8_t* dst_uv = reinterpret_cast<uint8_t*>( 139 CVPixelBufferGetBaseAddressOfPlane(pixel_buffer, 1)); 140 int dst_stride_uv = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer, 1); 141 // Convert I420 to NV12. 142 int ret = libyuv::I420ToNV12( 143 frame.buffer(webrtc::kYPlane), frame.stride(webrtc::kYPlane), 144 frame.buffer(webrtc::kUPlane), frame.stride(webrtc::kUPlane), 145 frame.buffer(webrtc::kVPlane), frame.stride(webrtc::kVPlane), dst_y, 146 dst_stride_y, dst_uv, dst_stride_uv, frame.width(), frame.height()); 147 CVPixelBufferUnlockBaseAddress(pixel_buffer, 0); 148 if (ret) { 149 LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret; 150 return false; 151 } 152 return true; 153 } 154 155 // This is the callback function that VideoToolbox calls when encode is 156 // complete. 157 void VTCompressionOutputCallback(void* encoder, 158 void* params, 159 OSStatus status, 160 VTEncodeInfoFlags info_flags, 161 CMSampleBufferRef sample_buffer) { 162 rtc::scoped_ptr<FrameEncodeParams> encode_params( 163 reinterpret_cast<FrameEncodeParams*>(params)); 164 if (status != noErr) { 165 LOG(LS_ERROR) << "H264 encoding failed."; 166 return; 167 } 168 if (info_flags & kVTEncodeInfo_FrameDropped) { 169 LOG(LS_INFO) << "H264 encode dropped frame."; 170 } 171 172 bool is_keyframe = false; 173 CFArrayRef attachments = 174 CMSampleBufferGetSampleAttachmentsArray(sample_buffer, 0); 175 if (attachments != nullptr && CFArrayGetCount(attachments)) { 176 CFDictionaryRef attachment = 177 static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(attachments, 0)); 178 is_keyframe = 179 !CFDictionaryContainsKey(attachment, kCMSampleAttachmentKey_NotSync); 180 } 181 182 // Convert the sample buffer into a buffer suitable for RTP packetization. 183 // TODO(tkchin): Allocate buffers through a pool. 184 rtc::scoped_ptr<rtc::Buffer> buffer(new rtc::Buffer()); 185 rtc::scoped_ptr<webrtc::RTPFragmentationHeader> header; 186 if (!H264CMSampleBufferToAnnexBBuffer(sample_buffer, is_keyframe, 187 buffer.get(), header.accept())) { 188 return; 189 } 190 webrtc::EncodedImage frame(buffer->data(), buffer->size(), buffer->size()); 191 frame._encodedWidth = encode_params->width; 192 frame._encodedHeight = encode_params->height; 193 frame._completeFrame = true; 194 frame._frameType = 195 is_keyframe ? webrtc::kVideoFrameKey : webrtc::kVideoFrameDelta; 196 frame.capture_time_ms_ = encode_params->render_time_ms; 197 frame._timeStamp = encode_params->timestamp; 198 199 int result = encode_params->callback->Encoded( 200 frame, &(encode_params->codec_specific_info), header.get()); 201 if (result != 0) { 202 LOG(LS_ERROR) << "Encoded callback failed: " << result; 203 } 204 } 205 206 } // namespace internal 207 208 namespace webrtc { 209 210 H264VideoToolboxEncoder::H264VideoToolboxEncoder() 211 : callback_(nullptr), compression_session_(nullptr) {} 212 213 H264VideoToolboxEncoder::~H264VideoToolboxEncoder() { 214 DestroyCompressionSession(); 215 } 216 217 int H264VideoToolboxEncoder::InitEncode(const VideoCodec* codec_settings, 218 int number_of_cores, 219 size_t max_payload_size) { 220 RTC_DCHECK(codec_settings); 221 RTC_DCHECK_EQ(codec_settings->codecType, kVideoCodecH264); 222 // TODO(tkchin): We may need to enforce width/height dimension restrictions 223 // to match what the encoder supports. 224 width_ = codec_settings->width; 225 height_ = codec_settings->height; 226 // We can only set average bitrate on the HW encoder. 227 bitrate_ = codec_settings->startBitrate * 1000; 228 229 // TODO(tkchin): Try setting payload size via 230 // kVTCompressionPropertyKey_MaxH264SliceBytes. 231 232 return ResetCompressionSession(); 233 } 234 235 int H264VideoToolboxEncoder::Encode( 236 const VideoFrame& input_image, 237 const CodecSpecificInfo* codec_specific_info, 238 const std::vector<FrameType>* frame_types) { 239 if (input_image.IsZeroSize()) { 240 // It's possible to get zero sizes as a signal to produce keyframes (this 241 // happens for internal sources). But this shouldn't happen in 242 // webrtcvideoengine2. 243 RTC_NOTREACHED(); 244 return WEBRTC_VIDEO_CODEC_OK; 245 } 246 if (!callback_ || !compression_session_) { 247 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 248 } 249 250 // Get a pixel buffer from the pool and copy frame data over. 251 CVPixelBufferPoolRef pixel_buffer_pool = 252 VTCompressionSessionGetPixelBufferPool(compression_session_); 253 CVPixelBufferRef pixel_buffer = nullptr; 254 CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool, 255 &pixel_buffer); 256 if (ret != kCVReturnSuccess) { 257 LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret; 258 // We probably want to drop frames here, since failure probably means 259 // that the pool is empty. 260 return WEBRTC_VIDEO_CODEC_ERROR; 261 } 262 RTC_DCHECK(pixel_buffer); 263 if (!internal::CopyVideoFrameToPixelBuffer(input_image, pixel_buffer)) { 264 LOG(LS_ERROR) << "Failed to copy frame data."; 265 CVBufferRelease(pixel_buffer); 266 return WEBRTC_VIDEO_CODEC_ERROR; 267 } 268 269 // Check if we need a keyframe. 270 bool is_keyframe_required = false; 271 if (frame_types) { 272 for (auto frame_type : *frame_types) { 273 if (frame_type == kVideoFrameKey) { 274 is_keyframe_required = true; 275 break; 276 } 277 } 278 } 279 280 CMTime presentation_time_stamp = 281 CMTimeMake(input_image.render_time_ms(), 1000); 282 CFDictionaryRef frame_properties = nullptr; 283 if (is_keyframe_required) { 284 CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame}; 285 CFTypeRef values[] = {kCFBooleanTrue}; 286 frame_properties = internal::CreateCFDictionary(keys, values, 1); 287 } 288 rtc::scoped_ptr<internal::FrameEncodeParams> encode_params; 289 encode_params.reset(new internal::FrameEncodeParams( 290 callback_, codec_specific_info, width_, height_, 291 input_image.render_time_ms(), input_image.timestamp())); 292 VTCompressionSessionEncodeFrame( 293 compression_session_, pixel_buffer, presentation_time_stamp, 294 kCMTimeInvalid, frame_properties, encode_params.release(), nullptr); 295 if (frame_properties) { 296 CFRelease(frame_properties); 297 } 298 if (pixel_buffer) { 299 CVBufferRelease(pixel_buffer); 300 } 301 return WEBRTC_VIDEO_CODEC_OK; 302 } 303 304 int H264VideoToolboxEncoder::RegisterEncodeCompleteCallback( 305 EncodedImageCallback* callback) { 306 callback_ = callback; 307 return WEBRTC_VIDEO_CODEC_OK; 308 } 309 310 int H264VideoToolboxEncoder::SetChannelParameters(uint32_t packet_loss, 311 int64_t rtt) { 312 // Encoder doesn't know anything about packet loss or rtt so just return. 313 return WEBRTC_VIDEO_CODEC_OK; 314 } 315 316 int H264VideoToolboxEncoder::SetRates(uint32_t new_bitrate_kbit, 317 uint32_t frame_rate) { 318 bitrate_ = new_bitrate_kbit * 1000; 319 if (compression_session_) { 320 internal::SetVTSessionProperty(compression_session_, 321 kVTCompressionPropertyKey_AverageBitRate, 322 bitrate_); 323 } 324 return WEBRTC_VIDEO_CODEC_OK; 325 } 326 327 int H264VideoToolboxEncoder::Release() { 328 callback_ = nullptr; 329 // Need to reset to that the session is invalidated and won't use the 330 // callback anymore. 331 return ResetCompressionSession(); 332 } 333 334 int H264VideoToolboxEncoder::ResetCompressionSession() { 335 DestroyCompressionSession(); 336 337 // Set source image buffer attributes. These attributes will be present on 338 // buffers retrieved from the encoder's pixel buffer pool. 339 const size_t attributes_size = 3; 340 CFTypeRef keys[attributes_size] = { 341 #if defined(WEBRTC_IOS) 342 kCVPixelBufferOpenGLESCompatibilityKey, 343 #elif defined(WEBRTC_MAC) 344 kCVPixelBufferOpenGLCompatibilityKey, 345 #endif 346 kCVPixelBufferIOSurfacePropertiesKey, 347 kCVPixelBufferPixelFormatTypeKey 348 }; 349 CFDictionaryRef io_surface_value = 350 internal::CreateCFDictionary(nullptr, nullptr, 0); 351 int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange; 352 CFNumberRef pixel_format = 353 CFNumberCreate(nullptr, kCFNumberLongType, &nv12type); 354 CFTypeRef values[attributes_size] = {kCFBooleanTrue, io_surface_value, 355 pixel_format}; 356 CFDictionaryRef source_attributes = 357 internal::CreateCFDictionary(keys, values, attributes_size); 358 if (io_surface_value) { 359 CFRelease(io_surface_value); 360 io_surface_value = nullptr; 361 } 362 if (pixel_format) { 363 CFRelease(pixel_format); 364 pixel_format = nullptr; 365 } 366 OSStatus status = VTCompressionSessionCreate( 367 nullptr, // use default allocator 368 width_, height_, kCMVideoCodecType_H264, 369 nullptr, // use default encoder 370 source_attributes, 371 nullptr, // use default compressed data allocator 372 internal::VTCompressionOutputCallback, this, &compression_session_); 373 if (source_attributes) { 374 CFRelease(source_attributes); 375 source_attributes = nullptr; 376 } 377 if (status != noErr) { 378 LOG(LS_ERROR) << "Failed to create compression session: " << status; 379 return WEBRTC_VIDEO_CODEC_ERROR; 380 } 381 ConfigureCompressionSession(); 382 return WEBRTC_VIDEO_CODEC_OK; 383 } 384 385 void H264VideoToolboxEncoder::ConfigureCompressionSession() { 386 RTC_DCHECK(compression_session_); 387 internal::SetVTSessionProperty(compression_session_, 388 kVTCompressionPropertyKey_RealTime, true); 389 internal::SetVTSessionProperty(compression_session_, 390 kVTCompressionPropertyKey_ProfileLevel, 391 kVTProfileLevel_H264_Baseline_AutoLevel); 392 internal::SetVTSessionProperty( 393 compression_session_, kVTCompressionPropertyKey_AverageBitRate, bitrate_); 394 internal::SetVTSessionProperty(compression_session_, 395 kVTCompressionPropertyKey_AllowFrameReordering, 396 false); 397 // TODO(tkchin): Look at entropy mode and colorspace matrices. 398 // TODO(tkchin): Investigate to see if there's any way to make this work. 399 // May need it to interop with Android. Currently this call just fails. 400 // On inspecting encoder output on iOS8, this value is set to 6. 401 // internal::SetVTSessionProperty(compression_session_, 402 // kVTCompressionPropertyKey_MaxFrameDelayCount, 403 // 1); 404 // TODO(tkchin): See if enforcing keyframe frequency is beneficial in any 405 // way. 406 // internal::SetVTSessionProperty( 407 // compression_session_, 408 // kVTCompressionPropertyKey_MaxKeyFrameInterval, 240); 409 // internal::SetVTSessionProperty( 410 // compression_session_, 411 // kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration, 240); 412 } 413 414 void H264VideoToolboxEncoder::DestroyCompressionSession() { 415 if (compression_session_) { 416 VTCompressionSessionInvalidate(compression_session_); 417 CFRelease(compression_session_); 418 compression_session_ = nullptr; 419 } 420 } 421 422 const char* H264VideoToolboxEncoder::ImplementationName() const { 423 return "VideoToolbox"; 424 } 425 426 } // namespace webrtc 427 428 #endif // defined(WEBRTC_VIDEO_TOOLBOX_SUPPORTED) 429