Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
     12 
     13 #include "webrtc/base/bitbuffer.h"
     14 #include "webrtc/base/bytebuffer.h"
     15 #include "webrtc/base/logging.h"
     16 
     17 #define RETURN_FALSE_ON_FAIL(x) \
     18   if (!(x)) {                   \
     19     return false;               \
     20   }
     21 
     22 namespace webrtc {
     23 
     24 H264SpsParser::H264SpsParser(const uint8_t* sps, size_t byte_length)
     25     : sps_(sps), byte_length_(byte_length), width_(), height_() {
     26 }
     27 
     28 bool H264SpsParser::Parse() {
     29   // General note: this is based off the 02/2014 version of the H.264 standard.
     30   // You can find it on this page:
     31   // http://www.itu.int/rec/T-REC-H.264
     32 
     33   const char* sps_bytes = reinterpret_cast<const char*>(sps_);
     34   // First, parse out rbsp, which is basically the source buffer minus emulation
     35   // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in
     36   // section 7.3.1 of the H.264 standard.
     37   rtc::ByteBuffer rbsp_buffer;
     38   for (size_t i = 0; i < byte_length_;) {
     39     // Be careful about over/underflow here. byte_length_ - 3 can underflow, and
     40     // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_
     41     // above, and that expression will produce the number of bytes left in
     42     // the stream including the byte at i.
     43     if (byte_length_ - i >= 3 && sps_[i] == 0 && sps_[i + 1] == 0 &&
     44         sps_[i + 2] == 3) {
     45       // Two rbsp bytes + the emulation byte.
     46       rbsp_buffer.WriteBytes(sps_bytes + i, 2);
     47       i += 3;
     48     } else {
     49       // Single rbsp byte.
     50       rbsp_buffer.WriteBytes(sps_bytes + i, 1);
     51       i++;
     52     }
     53   }
     54 
     55   // Now, we need to use a bit buffer to parse through the actual AVC SPS
     56   // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the
     57   // H.264 standard for a complete description.
     58   // Since we only care about resolution, we ignore the majority of fields, but
     59   // we still have to actively parse through a lot of the data, since many of
     60   // the fields have variable size.
     61   // We're particularly interested in:
     62   // chroma_format_idc -> affects crop units
     63   // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
     64   // frame_crop_*_offset -> crop information
     65   rtc::BitBuffer parser(reinterpret_cast<const uint8_t*>(rbsp_buffer.Data()),
     66                         rbsp_buffer.Length());
     67 
     68   // The golomb values we have to read, not just consume.
     69   uint32_t golomb_ignored;
     70 
     71   // separate_colour_plane_flag is optional (assumed 0), but has implications
     72   // about the ChromaArrayType, which modifies how we treat crop coordinates.
     73   uint32_t separate_colour_plane_flag = 0;
     74   // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is
     75   // 0. It defaults to 1, when not specified.
     76   uint32_t chroma_format_idc = 1;
     77 
     78   // profile_idc: u(8). We need it to determine if we need to read/skip chroma
     79   // formats.
     80   uint8_t profile_idc;
     81   RETURN_FALSE_ON_FAIL(parser.ReadUInt8(&profile_idc));
     82   // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
     83   // 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
     84   RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
     85   // level_idc: u(8)
     86   RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
     87   // seq_parameter_set_id: ue(v)
     88   RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
     89   // See if profile_idc has chroma format information.
     90   if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
     91       profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
     92       profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
     93       profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
     94     // chroma_format_idc: ue(v)
     95     RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&chroma_format_idc));
     96     if (chroma_format_idc == 3) {
     97       // separate_colour_plane_flag: u(1)
     98       RETURN_FALSE_ON_FAIL(parser.ReadBits(&separate_colour_plane_flag, 1));
     99     }
    100     // bit_depth_luma_minus8: ue(v)
    101     RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
    102     // bit_depth_chroma_minus8: ue(v)
    103     RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
    104     // qpprime_y_zero_transform_bypass_flag: u(1)
    105     RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
    106     // seq_scaling_matrix_present_flag: u(1)
    107     uint32_t seq_scaling_matrix_present_flag;
    108     RETURN_FALSE_ON_FAIL(parser.ReadBits(&seq_scaling_matrix_present_flag, 1));
    109     if (seq_scaling_matrix_present_flag) {
    110       // seq_scaling_list_present_flags. Either 8 or 12, depending on
    111       // chroma_format_idc.
    112       uint32_t seq_scaling_list_present_flags;
    113       if (chroma_format_idc != 3) {
    114         RETURN_FALSE_ON_FAIL(
    115             parser.ReadBits(&seq_scaling_list_present_flags, 8));
    116       } else {
    117         RETURN_FALSE_ON_FAIL(
    118             parser.ReadBits(&seq_scaling_list_present_flags, 12));
    119       }
    120       // We don't support reading the sequence scaling list, and we don't really
    121       // see/use them in practice, so we'll just reject the full sps if we see
    122       // any provided.
    123       if (seq_scaling_list_present_flags > 0) {
    124         LOG(LS_WARNING) << "SPS contains scaling lists, which are unsupported.";
    125         return false;
    126       }
    127     }
    128   }
    129   // log2_max_frame_num_minus4: ue(v)
    130   RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
    131   // pic_order_cnt_type: ue(v)
    132   uint32_t pic_order_cnt_type;
    133   RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_order_cnt_type));
    134   if (pic_order_cnt_type == 0) {
    135     // log2_max_pic_order_cnt_lsb_minus4: ue(v)
    136     RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
    137   } else if (pic_order_cnt_type == 1) {
    138     // delta_pic_order_always_zero_flag: u(1)
    139     RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
    140     // offset_for_non_ref_pic: se(v)
    141     RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
    142     // offset_for_top_to_bottom_field: se(v)
    143     RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
    144     // num_ref_frames_in_pic_order_cnt_cycle: ue(v)
    145     uint32_t num_ref_frames_in_pic_order_cnt_cycle;
    146     RETURN_FALSE_ON_FAIL(
    147         parser.ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
    148     for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
    149       // offset_for_ref_frame[i]: se(v)
    150       RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
    151     }
    152   }
    153   // max_num_ref_frames: ue(v)
    154   RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
    155   // gaps_in_frame_num_value_allowed_flag: u(1)
    156   RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
    157   //
    158   // IMPORTANT ONES! Now we're getting to resolution. First we read the pic
    159   // width/height in macroblocks (16x16), which gives us the base resolution,
    160   // and then we continue on until we hit the frame crop offsets, which are used
    161   // to signify resolutions that aren't multiples of 16.
    162   //
    163   // pic_width_in_mbs_minus1: ue(v)
    164   uint32_t pic_width_in_mbs_minus1;
    165   RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_width_in_mbs_minus1));
    166   // pic_height_in_map_units_minus1: ue(v)
    167   uint32_t pic_height_in_map_units_minus1;
    168   RETURN_FALSE_ON_FAIL(
    169       parser.ReadExponentialGolomb(&pic_height_in_map_units_minus1));
    170   // frame_mbs_only_flag: u(1)
    171   uint32_t frame_mbs_only_flag;
    172   RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_mbs_only_flag, 1));
    173   if (!frame_mbs_only_flag) {
    174     // mb_adaptive_frame_field_flag: u(1)
    175     RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
    176   }
    177   // direct_8x8_inference_flag: u(1)
    178   RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
    179   //
    180   // MORE IMPORTANT ONES! Now we're at the frame crop information.
    181   //
    182   // frame_cropping_flag: u(1)
    183   uint32_t frame_cropping_flag;
    184   uint32_t frame_crop_left_offset = 0;
    185   uint32_t frame_crop_right_offset = 0;
    186   uint32_t frame_crop_top_offset = 0;
    187   uint32_t frame_crop_bottom_offset = 0;
    188   RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_cropping_flag, 1));
    189   if (frame_cropping_flag) {
    190     // frame_crop_{left, right, top, bottom}_offset: ue(v)
    191     RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_left_offset));
    192     RETURN_FALSE_ON_FAIL(
    193         parser.ReadExponentialGolomb(&frame_crop_right_offset));
    194     RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_top_offset));
    195     RETURN_FALSE_ON_FAIL(
    196         parser.ReadExponentialGolomb(&frame_crop_bottom_offset));
    197   }
    198 
    199   // Far enough! We don't use the rest of the SPS.
    200 
    201   // Start with the resolution determined by the pic_width/pic_height fields.
    202   int width = 16 * (pic_width_in_mbs_minus1 + 1);
    203   int height =
    204       16 * (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
    205 
    206   // Figure out the crop units in pixels. That's based on the chroma format's
    207   // sampling, which is indicated by chroma_format_idc.
    208   if (separate_colour_plane_flag || chroma_format_idc == 0) {
    209     frame_crop_bottom_offset *= (2 - frame_mbs_only_flag);
    210     frame_crop_top_offset *= (2 - frame_mbs_only_flag);
    211   } else if (!separate_colour_plane_flag && chroma_format_idc > 0) {
    212     // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).
    213     if (chroma_format_idc == 1 || chroma_format_idc == 2) {
    214       frame_crop_left_offset *= 2;
    215       frame_crop_right_offset *= 2;
    216     }
    217     // Height multipliers for format 1 (4:2:0).
    218     if (chroma_format_idc == 1) {
    219       frame_crop_top_offset *= 2;
    220       frame_crop_bottom_offset *= 2;
    221     }
    222   }
    223   // Subtract the crop for each dimension.
    224   width -= (frame_crop_left_offset + frame_crop_right_offset);
    225   height -= (frame_crop_top_offset + frame_crop_bottom_offset);
    226 
    227   width_ = width;
    228   height_ = height;
    229   return true;
    230 }
    231 
    232 }  // namespace webrtc
    233