Home | History | Annotate | Download | only in h264
      1 /*
      2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  *
     10  */
     11 
     12 #include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.h"
     13 
     14 #if defined(WEBRTC_VIDEO_TOOLBOX_SUPPORTED)
     15 
     16 #include <string>
     17 #include <vector>
     18 
     19 #include "libyuv/convert_from.h"
     20 #include "webrtc/base/checks.h"
     21 #include "webrtc/base/logging.h"
     22 #include "webrtc/base/scoped_ptr.h"
     23 #include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_nalu.h"
     24 
     25 namespace internal {
     26 
     27 // Convenience function for creating a dictionary.
     28 inline CFDictionaryRef CreateCFDictionary(CFTypeRef* keys,
     29                                           CFTypeRef* values,
     30                                           size_t size) {
     31   return CFDictionaryCreate(kCFAllocatorDefault, keys, values, size,
     32                             &kCFTypeDictionaryKeyCallBacks,
     33                             &kCFTypeDictionaryValueCallBacks);
     34 }
     35 
     36 // Copies characters from a CFStringRef into a std::string.
     37 std::string CFStringToString(const CFStringRef cf_string) {
     38   RTC_DCHECK(cf_string);
     39   std::string std_string;
     40   // Get the size needed for UTF8 plus terminating character.
     41   size_t buffer_size =
     42       CFStringGetMaximumSizeForEncoding(CFStringGetLength(cf_string),
     43                                         kCFStringEncodingUTF8) +
     44       1;
     45   rtc::scoped_ptr<char[]> buffer(new char[buffer_size]);
     46   if (CFStringGetCString(cf_string, buffer.get(), buffer_size,
     47                          kCFStringEncodingUTF8)) {
     48     // Copy over the characters.
     49     std_string.assign(buffer.get());
     50   }
     51   return std_string;
     52 }
     53 
     54 // Convenience function for setting a VT property.
     55 void SetVTSessionProperty(VTSessionRef session,
     56                           CFStringRef key,
     57                           int32_t value) {
     58   CFNumberRef cfNum =
     59       CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &value);
     60   OSStatus status = VTSessionSetProperty(session, key, cfNum);
     61   CFRelease(cfNum);
     62   if (status != noErr) {
     63     std::string key_string = CFStringToString(key);
     64     LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string
     65                   << " to " << value << ": " << status;
     66   }
     67 }
     68 
     69 // Convenience function for setting a VT property.
     70 void SetVTSessionProperty(VTSessionRef session, CFStringRef key, bool value) {
     71   CFBooleanRef cf_bool = (value) ? kCFBooleanTrue : kCFBooleanFalse;
     72   OSStatus status = VTSessionSetProperty(session, key, cf_bool);
     73   if (status != noErr) {
     74     std::string key_string = CFStringToString(key);
     75     LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string
     76                   << " to " << value << ": " << status;
     77   }
     78 }
     79 
     80 // Convenience function for setting a VT property.
     81 void SetVTSessionProperty(VTSessionRef session,
     82                           CFStringRef key,
     83                           CFStringRef value) {
     84   OSStatus status = VTSessionSetProperty(session, key, value);
     85   if (status != noErr) {
     86     std::string key_string = CFStringToString(key);
     87     std::string val_string = CFStringToString(value);
     88     LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string
     89                   << " to " << val_string << ": " << status;
     90   }
     91 }
     92 
     93 // Struct that we pass to the encoder per frame to encode. We receive it again
     94 // in the encoder callback.
     95 struct FrameEncodeParams {
     96   FrameEncodeParams(webrtc::EncodedImageCallback* cb,
     97                     const webrtc::CodecSpecificInfo* csi,
     98                     int32_t w,
     99                     int32_t h,
    100                     int64_t rtms,
    101                     uint32_t ts)
    102       : callback(cb), width(w), height(h), render_time_ms(rtms), timestamp(ts) {
    103     if (csi) {
    104       codec_specific_info = *csi;
    105     } else {
    106       codec_specific_info.codecType = webrtc::kVideoCodecH264;
    107     }
    108   }
    109   webrtc::EncodedImageCallback* callback;
    110   webrtc::CodecSpecificInfo codec_specific_info;
    111   int32_t width;
    112   int32_t height;
    113   int64_t render_time_ms;
    114   uint32_t timestamp;
    115 };
    116 
    117 // We receive I420Frames as input, but we need to feed CVPixelBuffers into the
    118 // encoder. This performs the copy and format conversion.
    119 // TODO(tkchin): See if encoder will accept i420 frames and compare performance.
    120 bool CopyVideoFrameToPixelBuffer(const webrtc::VideoFrame& frame,
    121                                  CVPixelBufferRef pixel_buffer) {
    122   RTC_DCHECK(pixel_buffer);
    123   RTC_DCHECK(CVPixelBufferGetPixelFormatType(pixel_buffer) ==
    124              kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
    125   RTC_DCHECK(CVPixelBufferGetHeightOfPlane(pixel_buffer, 0) ==
    126              static_cast<size_t>(frame.height()));
    127   RTC_DCHECK(CVPixelBufferGetWidthOfPlane(pixel_buffer, 0) ==
    128              static_cast<size_t>(frame.width()));
    129 
    130   CVReturn cvRet = CVPixelBufferLockBaseAddress(pixel_buffer, 0);
    131   if (cvRet != kCVReturnSuccess) {
    132     LOG(LS_ERROR) << "Failed to lock base address: " << cvRet;
    133     return false;
    134   }
    135   uint8_t* dst_y = reinterpret_cast<uint8_t*>(
    136       CVPixelBufferGetBaseAddressOfPlane(pixel_buffer, 0));
    137   int dst_stride_y = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer, 0);
    138   uint8_t* dst_uv = reinterpret_cast<uint8_t*>(
    139       CVPixelBufferGetBaseAddressOfPlane(pixel_buffer, 1));
    140   int dst_stride_uv = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer, 1);
    141   // Convert I420 to NV12.
    142   int ret = libyuv::I420ToNV12(
    143       frame.buffer(webrtc::kYPlane), frame.stride(webrtc::kYPlane),
    144       frame.buffer(webrtc::kUPlane), frame.stride(webrtc::kUPlane),
    145       frame.buffer(webrtc::kVPlane), frame.stride(webrtc::kVPlane), dst_y,
    146       dst_stride_y, dst_uv, dst_stride_uv, frame.width(), frame.height());
    147   CVPixelBufferUnlockBaseAddress(pixel_buffer, 0);
    148   if (ret) {
    149     LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret;
    150     return false;
    151   }
    152   return true;
    153 }
    154 
    155 // This is the callback function that VideoToolbox calls when encode is
    156 // complete.
    157 void VTCompressionOutputCallback(void* encoder,
    158                                  void* params,
    159                                  OSStatus status,
    160                                  VTEncodeInfoFlags info_flags,
    161                                  CMSampleBufferRef sample_buffer) {
    162   rtc::scoped_ptr<FrameEncodeParams> encode_params(
    163       reinterpret_cast<FrameEncodeParams*>(params));
    164   if (status != noErr) {
    165     LOG(LS_ERROR) << "H264 encoding failed.";
    166     return;
    167   }
    168   if (info_flags & kVTEncodeInfo_FrameDropped) {
    169     LOG(LS_INFO) << "H264 encode dropped frame.";
    170   }
    171 
    172   bool is_keyframe = false;
    173   CFArrayRef attachments =
    174       CMSampleBufferGetSampleAttachmentsArray(sample_buffer, 0);
    175   if (attachments != nullptr && CFArrayGetCount(attachments)) {
    176     CFDictionaryRef attachment =
    177         static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(attachments, 0));
    178     is_keyframe =
    179         !CFDictionaryContainsKey(attachment, kCMSampleAttachmentKey_NotSync);
    180   }
    181 
    182   // Convert the sample buffer into a buffer suitable for RTP packetization.
    183   // TODO(tkchin): Allocate buffers through a pool.
    184   rtc::scoped_ptr<rtc::Buffer> buffer(new rtc::Buffer());
    185   rtc::scoped_ptr<webrtc::RTPFragmentationHeader> header;
    186   if (!H264CMSampleBufferToAnnexBBuffer(sample_buffer, is_keyframe,
    187                                         buffer.get(), header.accept())) {
    188     return;
    189   }
    190   webrtc::EncodedImage frame(buffer->data(), buffer->size(), buffer->size());
    191   frame._encodedWidth = encode_params->width;
    192   frame._encodedHeight = encode_params->height;
    193   frame._completeFrame = true;
    194   frame._frameType =
    195       is_keyframe ? webrtc::kVideoFrameKey : webrtc::kVideoFrameDelta;
    196   frame.capture_time_ms_ = encode_params->render_time_ms;
    197   frame._timeStamp = encode_params->timestamp;
    198 
    199   int result = encode_params->callback->Encoded(
    200       frame, &(encode_params->codec_specific_info), header.get());
    201   if (result != 0) {
    202     LOG(LS_ERROR) << "Encoded callback failed: " << result;
    203   }
    204 }
    205 
    206 }  // namespace internal
    207 
    208 namespace webrtc {
    209 
    210 H264VideoToolboxEncoder::H264VideoToolboxEncoder()
    211     : callback_(nullptr), compression_session_(nullptr) {}
    212 
    213 H264VideoToolboxEncoder::~H264VideoToolboxEncoder() {
    214   DestroyCompressionSession();
    215 }
    216 
    217 int H264VideoToolboxEncoder::InitEncode(const VideoCodec* codec_settings,
    218                                         int number_of_cores,
    219                                         size_t max_payload_size) {
    220   RTC_DCHECK(codec_settings);
    221   RTC_DCHECK_EQ(codec_settings->codecType, kVideoCodecH264);
    222   // TODO(tkchin): We may need to enforce width/height dimension restrictions
    223   // to match what the encoder supports.
    224   width_ = codec_settings->width;
    225   height_ = codec_settings->height;
    226   // We can only set average bitrate on the HW encoder.
    227   bitrate_ = codec_settings->startBitrate * 1000;
    228 
    229   // TODO(tkchin): Try setting payload size via
    230   // kVTCompressionPropertyKey_MaxH264SliceBytes.
    231 
    232   return ResetCompressionSession();
    233 }
    234 
    235 int H264VideoToolboxEncoder::Encode(
    236     const VideoFrame& input_image,
    237     const CodecSpecificInfo* codec_specific_info,
    238     const std::vector<FrameType>* frame_types) {
    239   if (input_image.IsZeroSize()) {
    240     // It's possible to get zero sizes as a signal to produce keyframes (this
    241     // happens for internal sources). But this shouldn't happen in
    242     // webrtcvideoengine2.
    243     RTC_NOTREACHED();
    244     return WEBRTC_VIDEO_CODEC_OK;
    245   }
    246   if (!callback_ || !compression_session_) {
    247     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
    248   }
    249 
    250   // Get a pixel buffer from the pool and copy frame data over.
    251   CVPixelBufferPoolRef pixel_buffer_pool =
    252       VTCompressionSessionGetPixelBufferPool(compression_session_);
    253   CVPixelBufferRef pixel_buffer = nullptr;
    254   CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool,
    255                                                     &pixel_buffer);
    256   if (ret != kCVReturnSuccess) {
    257     LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret;
    258     // We probably want to drop frames here, since failure probably means
    259     // that the pool is empty.
    260     return WEBRTC_VIDEO_CODEC_ERROR;
    261   }
    262   RTC_DCHECK(pixel_buffer);
    263   if (!internal::CopyVideoFrameToPixelBuffer(input_image, pixel_buffer)) {
    264     LOG(LS_ERROR) << "Failed to copy frame data.";
    265     CVBufferRelease(pixel_buffer);
    266     return WEBRTC_VIDEO_CODEC_ERROR;
    267   }
    268 
    269   // Check if we need a keyframe.
    270   bool is_keyframe_required = false;
    271   if (frame_types) {
    272     for (auto frame_type : *frame_types) {
    273       if (frame_type == kVideoFrameKey) {
    274         is_keyframe_required = true;
    275         break;
    276       }
    277     }
    278   }
    279 
    280   CMTime presentation_time_stamp =
    281       CMTimeMake(input_image.render_time_ms(), 1000);
    282   CFDictionaryRef frame_properties = nullptr;
    283   if (is_keyframe_required) {
    284     CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame};
    285     CFTypeRef values[] = {kCFBooleanTrue};
    286     frame_properties = internal::CreateCFDictionary(keys, values, 1);
    287   }
    288   rtc::scoped_ptr<internal::FrameEncodeParams> encode_params;
    289   encode_params.reset(new internal::FrameEncodeParams(
    290       callback_, codec_specific_info, width_, height_,
    291       input_image.render_time_ms(), input_image.timestamp()));
    292   VTCompressionSessionEncodeFrame(
    293       compression_session_, pixel_buffer, presentation_time_stamp,
    294       kCMTimeInvalid, frame_properties, encode_params.release(), nullptr);
    295   if (frame_properties) {
    296     CFRelease(frame_properties);
    297   }
    298   if (pixel_buffer) {
    299     CVBufferRelease(pixel_buffer);
    300   }
    301   return WEBRTC_VIDEO_CODEC_OK;
    302 }
    303 
    304 int H264VideoToolboxEncoder::RegisterEncodeCompleteCallback(
    305     EncodedImageCallback* callback) {
    306   callback_ = callback;
    307   return WEBRTC_VIDEO_CODEC_OK;
    308 }
    309 
    310 int H264VideoToolboxEncoder::SetChannelParameters(uint32_t packet_loss,
    311                                                   int64_t rtt) {
    312   // Encoder doesn't know anything about packet loss or rtt so just return.
    313   return WEBRTC_VIDEO_CODEC_OK;
    314 }
    315 
    316 int H264VideoToolboxEncoder::SetRates(uint32_t new_bitrate_kbit,
    317                                       uint32_t frame_rate) {
    318   bitrate_ = new_bitrate_kbit * 1000;
    319   if (compression_session_) {
    320     internal::SetVTSessionProperty(compression_session_,
    321                                    kVTCompressionPropertyKey_AverageBitRate,
    322                                    bitrate_);
    323   }
    324   return WEBRTC_VIDEO_CODEC_OK;
    325 }
    326 
    327 int H264VideoToolboxEncoder::Release() {
    328   callback_ = nullptr;
    329   // Need to reset to that the session is invalidated and won't use the
    330   // callback anymore.
    331   return ResetCompressionSession();
    332 }
    333 
    334 int H264VideoToolboxEncoder::ResetCompressionSession() {
    335   DestroyCompressionSession();
    336 
    337   // Set source image buffer attributes. These attributes will be present on
    338   // buffers retrieved from the encoder's pixel buffer pool.
    339   const size_t attributes_size = 3;
    340   CFTypeRef keys[attributes_size] = {
    341 #if defined(WEBRTC_IOS)
    342     kCVPixelBufferOpenGLESCompatibilityKey,
    343 #elif defined(WEBRTC_MAC)
    344     kCVPixelBufferOpenGLCompatibilityKey,
    345 #endif
    346     kCVPixelBufferIOSurfacePropertiesKey,
    347     kCVPixelBufferPixelFormatTypeKey
    348   };
    349   CFDictionaryRef io_surface_value =
    350       internal::CreateCFDictionary(nullptr, nullptr, 0);
    351   int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
    352   CFNumberRef pixel_format =
    353       CFNumberCreate(nullptr, kCFNumberLongType, &nv12type);
    354   CFTypeRef values[attributes_size] = {kCFBooleanTrue, io_surface_value,
    355                                        pixel_format};
    356   CFDictionaryRef source_attributes =
    357       internal::CreateCFDictionary(keys, values, attributes_size);
    358   if (io_surface_value) {
    359     CFRelease(io_surface_value);
    360     io_surface_value = nullptr;
    361   }
    362   if (pixel_format) {
    363     CFRelease(pixel_format);
    364     pixel_format = nullptr;
    365   }
    366   OSStatus status = VTCompressionSessionCreate(
    367       nullptr,  // use default allocator
    368       width_, height_, kCMVideoCodecType_H264,
    369       nullptr,  // use default encoder
    370       source_attributes,
    371       nullptr,  // use default compressed data allocator
    372       internal::VTCompressionOutputCallback, this, &compression_session_);
    373   if (source_attributes) {
    374     CFRelease(source_attributes);
    375     source_attributes = nullptr;
    376   }
    377   if (status != noErr) {
    378     LOG(LS_ERROR) << "Failed to create compression session: " << status;
    379     return WEBRTC_VIDEO_CODEC_ERROR;
    380   }
    381   ConfigureCompressionSession();
    382   return WEBRTC_VIDEO_CODEC_OK;
    383 }
    384 
    385 void H264VideoToolboxEncoder::ConfigureCompressionSession() {
    386   RTC_DCHECK(compression_session_);
    387   internal::SetVTSessionProperty(compression_session_,
    388                                  kVTCompressionPropertyKey_RealTime, true);
    389   internal::SetVTSessionProperty(compression_session_,
    390                                  kVTCompressionPropertyKey_ProfileLevel,
    391                                  kVTProfileLevel_H264_Baseline_AutoLevel);
    392   internal::SetVTSessionProperty(
    393       compression_session_, kVTCompressionPropertyKey_AverageBitRate, bitrate_);
    394   internal::SetVTSessionProperty(compression_session_,
    395                                  kVTCompressionPropertyKey_AllowFrameReordering,
    396                                  false);
    397   // TODO(tkchin): Look at entropy mode and colorspace matrices.
    398   // TODO(tkchin): Investigate to see if there's any way to make this work.
    399   // May need it to interop with Android. Currently this call just fails.
    400   // On inspecting encoder output on iOS8, this value is set to 6.
    401   // internal::SetVTSessionProperty(compression_session_,
    402   //     kVTCompressionPropertyKey_MaxFrameDelayCount,
    403   //     1);
    404   // TODO(tkchin): See if enforcing keyframe frequency is beneficial in any
    405   // way.
    406   // internal::SetVTSessionProperty(
    407   //     compression_session_,
    408   //     kVTCompressionPropertyKey_MaxKeyFrameInterval, 240);
    409   // internal::SetVTSessionProperty(
    410   //     compression_session_,
    411   //     kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration, 240);
    412 }
    413 
    414 void H264VideoToolboxEncoder::DestroyCompressionSession() {
    415   if (compression_session_) {
    416     VTCompressionSessionInvalidate(compression_session_);
    417     CFRelease(compression_session_);
    418     compression_session_ = nullptr;
    419   }
    420 }
    421 
    422 const char* H264VideoToolboxEncoder::ImplementationName() const {
    423   return "VideoToolbox";
    424 }
    425 
    426 }  // namespace webrtc
    427 
    428 #endif  // defined(WEBRTC_VIDEO_TOOLBOX_SUPPORTED)
    429