Home | History | Annotate | Download | only in libstagefright
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 //#define LOG_NDEBUG 0
     18 #define LOG_TAG "avc_utils"
     19 #include <utils/Log.h>
     20 
     21 #include "include/avc_utils.h"
     22 
     23 #include <media/stagefright/foundation/ABitReader.h>
     24 #include <media/stagefright/foundation/ADebug.h>
     25 #include <media/stagefright/MediaDefs.h>
     26 #include <media/stagefright/MediaErrors.h>
     27 #include <media/stagefright/MetaData.h>
     28 
     29 namespace android {
     30 
     31 unsigned parseUE(ABitReader *br) {
     32     unsigned numZeroes = 0;
     33     while (br->getBits(1) == 0) {
     34         ++numZeroes;
     35     }
     36 
     37     unsigned x = br->getBits(numZeroes);
     38 
     39     return x + (1u << numZeroes) - 1;
     40 }
     41 
     42 // Determine video dimensions from the sequence parameterset.
     43 void FindAVCDimensions(
     44         const sp<ABuffer> &seqParamSet, int32_t *width, int32_t *height) {
     45     ABitReader br(seqParamSet->data() + 1, seqParamSet->size() - 1);
     46 
     47     unsigned profile_idc = br.getBits(8);
     48     br.skipBits(16);
     49     parseUE(&br);  // seq_parameter_set_id
     50 
     51     unsigned chroma_format_idc = 1;  // 4:2:0 chroma format
     52 
     53     if (profile_idc == 100 || profile_idc == 110
     54             || profile_idc == 122 || profile_idc == 244
     55             || profile_idc == 44 || profile_idc == 83 || profile_idc == 86) {
     56         chroma_format_idc = parseUE(&br);
     57         if (chroma_format_idc == 3) {
     58             br.skipBits(1);  // residual_colour_transform_flag
     59         }
     60         parseUE(&br);  // bit_depth_luma_minus8
     61         parseUE(&br);  // bit_depth_chroma_minus8
     62         br.skipBits(1);  // qpprime_y_zero_transform_bypass_flag
     63         CHECK_EQ(br.getBits(1), 0u);  // seq_scaling_matrix_present_flag
     64     }
     65 
     66     parseUE(&br);  // log2_max_frame_num_minus4
     67     unsigned pic_order_cnt_type = parseUE(&br);
     68 
     69     if (pic_order_cnt_type == 0) {
     70         parseUE(&br);  // log2_max_pic_order_cnt_lsb_minus4
     71     } else if (pic_order_cnt_type == 1) {
     72         // offset_for_non_ref_pic, offset_for_top_to_bottom_field and
     73         // offset_for_ref_frame are technically se(v), but since we are
     74         // just skipping over them the midpoint does not matter.
     75 
     76         br.getBits(1);  // delta_pic_order_always_zero_flag
     77         parseUE(&br);  // offset_for_non_ref_pic
     78         parseUE(&br);  // offset_for_top_to_bottom_field
     79 
     80         unsigned num_ref_frames_in_pic_order_cnt_cycle = parseUE(&br);
     81         for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
     82             parseUE(&br);  // offset_for_ref_frame
     83         }
     84     }
     85 
     86     parseUE(&br);  // num_ref_frames
     87     br.getBits(1);  // gaps_in_frame_num_value_allowed_flag
     88 
     89     unsigned pic_width_in_mbs_minus1 = parseUE(&br);
     90     unsigned pic_height_in_map_units_minus1 = parseUE(&br);
     91     unsigned frame_mbs_only_flag = br.getBits(1);
     92 
     93     *width = pic_width_in_mbs_minus1 * 16 + 16;
     94 
     95     *height = (2 - frame_mbs_only_flag)
     96         * (pic_height_in_map_units_minus1 * 16 + 16);
     97 
     98     if (!frame_mbs_only_flag) {
     99         br.getBits(1);  // mb_adaptive_frame_field_flag
    100     }
    101 
    102     br.getBits(1);  // direct_8x8_inference_flag
    103 
    104     if (br.getBits(1)) {  // frame_cropping_flag
    105         unsigned frame_crop_left_offset = parseUE(&br);
    106         unsigned frame_crop_right_offset = parseUE(&br);
    107         unsigned frame_crop_top_offset = parseUE(&br);
    108         unsigned frame_crop_bottom_offset = parseUE(&br);
    109 
    110         unsigned cropUnitX, cropUnitY;
    111         if (chroma_format_idc == 0  /* monochrome */) {
    112             cropUnitX = 1;
    113             cropUnitY = 2 - frame_mbs_only_flag;
    114         } else {
    115             unsigned subWidthC = (chroma_format_idc == 3) ? 1 : 2;
    116             unsigned subHeightC = (chroma_format_idc == 1) ? 2 : 1;
    117 
    118             cropUnitX = subWidthC;
    119             cropUnitY = subHeightC * (2 - frame_mbs_only_flag);
    120         }
    121 
    122         LOGV("frame_crop = (%u, %u, %u, %u), cropUnitX = %u, cropUnitY = %u",
    123              frame_crop_left_offset, frame_crop_right_offset,
    124              frame_crop_top_offset, frame_crop_bottom_offset,
    125              cropUnitX, cropUnitY);
    126 
    127         *width -=
    128             (frame_crop_left_offset + frame_crop_right_offset) * cropUnitX;
    129         *height -=
    130             (frame_crop_top_offset + frame_crop_bottom_offset) * cropUnitY;
    131     }
    132 }
    133 
    134 status_t getNextNALUnit(
    135         const uint8_t **_data, size_t *_size,
    136         const uint8_t **nalStart, size_t *nalSize,
    137         bool startCodeFollows) {
    138     const uint8_t *data = *_data;
    139     size_t size = *_size;
    140 
    141     *nalStart = NULL;
    142     *nalSize = 0;
    143 
    144     if (size == 0) {
    145         return -EAGAIN;
    146     }
    147 
    148     // Skip any number of leading 0x00.
    149 
    150     size_t offset = 0;
    151     while (offset < size && data[offset] == 0x00) {
    152         ++offset;
    153     }
    154 
    155     if (offset == size) {
    156         return -EAGAIN;
    157     }
    158 
    159     // A valid startcode consists of at least two 0x00 bytes followed by 0x01.
    160 
    161     if (offset < 2 || data[offset] != 0x01) {
    162         return ERROR_MALFORMED;
    163     }
    164 
    165     ++offset;
    166 
    167     size_t startOffset = offset;
    168 
    169     for (;;) {
    170         while (offset < size && data[offset] != 0x01) {
    171             ++offset;
    172         }
    173 
    174         if (offset == size) {
    175             if (startCodeFollows) {
    176                 offset = size + 2;
    177                 break;
    178             }
    179 
    180             return -EAGAIN;
    181         }
    182 
    183         if (data[offset - 1] == 0x00 && data[offset - 2] == 0x00) {
    184             break;
    185         }
    186 
    187         ++offset;
    188     }
    189 
    190     size_t endOffset = offset - 2;
    191     while (endOffset > startOffset + 1 && data[endOffset - 1] == 0x00) {
    192         --endOffset;
    193     }
    194 
    195     *nalStart = &data[startOffset];
    196     *nalSize = endOffset - startOffset;
    197 
    198     if (offset + 2 < size) {
    199         *_data = &data[offset - 2];
    200         *_size = size - offset + 2;
    201     } else {
    202         *_data = NULL;
    203         *_size = 0;
    204     }
    205 
    206     return OK;
    207 }
    208 
    209 static sp<ABuffer> FindNAL(
    210         const uint8_t *data, size_t size, unsigned nalType,
    211         size_t *stopOffset) {
    212     const uint8_t *nalStart;
    213     size_t nalSize;
    214     while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) {
    215         if ((nalStart[0] & 0x1f) == nalType) {
    216             sp<ABuffer> buffer = new ABuffer(nalSize);
    217             memcpy(buffer->data(), nalStart, nalSize);
    218             return buffer;
    219         }
    220     }
    221 
    222     return NULL;
    223 }
    224 
    225 const char *AVCProfileToString(uint8_t profile) {
    226     switch (profile) {
    227         case kAVCProfileBaseline:
    228             return "Baseline";
    229         case kAVCProfileMain:
    230             return "Main";
    231         case kAVCProfileExtended:
    232             return "Extended";
    233         case kAVCProfileHigh:
    234             return "High";
    235         case kAVCProfileHigh10:
    236             return "High 10";
    237         case kAVCProfileHigh422:
    238             return "High 422";
    239         case kAVCProfileHigh444:
    240             return "High 444";
    241         case kAVCProfileCAVLC444Intra:
    242             return "CAVLC 444 Intra";
    243         default:   return "Unknown";
    244     }
    245 }
    246 
    247 sp<MetaData> MakeAVCCodecSpecificData(const sp<ABuffer> &accessUnit) {
    248     const uint8_t *data = accessUnit->data();
    249     size_t size = accessUnit->size();
    250 
    251     sp<ABuffer> seqParamSet = FindNAL(data, size, 7, NULL);
    252     if (seqParamSet == NULL) {
    253         return NULL;
    254     }
    255 
    256     int32_t width, height;
    257     FindAVCDimensions(seqParamSet, &width, &height);
    258 
    259     size_t stopOffset;
    260     sp<ABuffer> picParamSet = FindNAL(data, size, 8, &stopOffset);
    261     CHECK(picParamSet != NULL);
    262 
    263     size_t csdSize =
    264         1 + 3 + 1 + 1
    265         + 2 * 1 + seqParamSet->size()
    266         + 1 + 2 * 1 + picParamSet->size();
    267 
    268     sp<ABuffer> csd = new ABuffer(csdSize);
    269     uint8_t *out = csd->data();
    270 
    271     *out++ = 0x01;  // configurationVersion
    272     memcpy(out, seqParamSet->data() + 1, 3);  // profile/level...
    273 
    274     uint8_t profile = out[0];
    275     uint8_t level = out[2];
    276 
    277     out += 3;
    278     *out++ = (0x3f << 2) | 1;  // lengthSize == 2 bytes
    279     *out++ = 0xe0 | 1;
    280 
    281     *out++ = seqParamSet->size() >> 8;
    282     *out++ = seqParamSet->size() & 0xff;
    283     memcpy(out, seqParamSet->data(), seqParamSet->size());
    284     out += seqParamSet->size();
    285 
    286     *out++ = 1;
    287 
    288     *out++ = picParamSet->size() >> 8;
    289     *out++ = picParamSet->size() & 0xff;
    290     memcpy(out, picParamSet->data(), picParamSet->size());
    291 
    292 #if 0
    293     LOGI("AVC seq param set");
    294     hexdump(seqParamSet->data(), seqParamSet->size());
    295 #endif
    296 
    297     sp<MetaData> meta = new MetaData;
    298     meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
    299 
    300     meta->setData(kKeyAVCC, kTypeAVCC, csd->data(), csd->size());
    301     meta->setInt32(kKeyWidth, width);
    302     meta->setInt32(kKeyHeight, height);
    303 
    304     LOGI("found AVC codec config (%d x %d, %s-profile level %d.%d)",
    305          width, height, AVCProfileToString(profile), level / 10, level % 10);
    306 
    307     return meta;
    308 }
    309 
    310 bool IsIDR(const sp<ABuffer> &buffer) {
    311     const uint8_t *data = buffer->data();
    312     size_t size = buffer->size();
    313 
    314     bool foundIDR = false;
    315 
    316     const uint8_t *nalStart;
    317     size_t nalSize;
    318     while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) {
    319         CHECK_GT(nalSize, 0u);
    320 
    321         unsigned nalType = nalStart[0] & 0x1f;
    322 
    323         if (nalType == 5) {
    324             foundIDR = true;
    325             break;
    326         }
    327     }
    328 
    329     return foundIDR;
    330 }
    331 
    332 bool IsAVCReferenceFrame(const sp<ABuffer> &accessUnit) {
    333     const uint8_t *data = accessUnit->data();
    334     size_t size = accessUnit->size();
    335 
    336     const uint8_t *nalStart;
    337     size_t nalSize;
    338     while (getNextNALUnit(&data, &size, &nalStart, &nalSize, true) == OK) {
    339         CHECK_GT(nalSize, 0u);
    340 
    341         unsigned nalType = nalStart[0] & 0x1f;
    342 
    343         if (nalType == 5) {
    344             return true;
    345         } else if (nalType == 1) {
    346             unsigned nal_ref_idc = (nalStart[0] >> 5) & 3;
    347             return nal_ref_idc != 0;
    348         }
    349     }
    350 
    351     return true;
    352 }
    353 
    354 sp<MetaData> MakeAACCodecSpecificData(
    355         unsigned profile, unsigned sampling_freq_index,
    356         unsigned channel_configuration) {
    357     sp<MetaData> meta = new MetaData;
    358     meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AAC);
    359 
    360     CHECK_LE(sampling_freq_index, 11u);
    361     static const int32_t kSamplingFreq[] = {
    362         96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
    363         16000, 12000, 11025, 8000
    364     };
    365     meta->setInt32(kKeySampleRate, kSamplingFreq[sampling_freq_index]);
    366     meta->setInt32(kKeyChannelCount, channel_configuration);
    367 
    368     static const uint8_t kStaticESDS[] = {
    369         0x03, 22,
    370         0x00, 0x00,     // ES_ID
    371         0x00,           // streamDependenceFlag, URL_Flag, OCRstreamFlag
    372 
    373         0x04, 17,
    374         0x40,                       // Audio ISO/IEC 14496-3
    375         0x00, 0x00, 0x00, 0x00,
    376         0x00, 0x00, 0x00, 0x00,
    377         0x00, 0x00, 0x00, 0x00,
    378 
    379         0x05, 2,
    380         // AudioSpecificInfo follows
    381 
    382         // oooo offf fccc c000
    383         // o - audioObjectType
    384         // f - samplingFreqIndex
    385         // c - channelConfig
    386     };
    387     sp<ABuffer> csd = new ABuffer(sizeof(kStaticESDS) + 2);
    388     memcpy(csd->data(), kStaticESDS, sizeof(kStaticESDS));
    389 
    390     csd->data()[sizeof(kStaticESDS)] =
    391         ((profile + 1) << 3) | (sampling_freq_index >> 1);
    392 
    393     csd->data()[sizeof(kStaticESDS) + 1] =
    394         ((sampling_freq_index << 7) & 0x80) | (channel_configuration << 3);
    395 
    396     meta->setData(kKeyESDS, 0, csd->data(), csd->size());
    397 
    398     return meta;
    399 }
    400 
    401 bool ExtractDimensionsFromVOLHeader(
    402         const uint8_t *data, size_t size, int32_t *width, int32_t *height) {
    403     ABitReader br(&data[4], size - 4);
    404     br.skipBits(1);  // random_accessible_vol
    405     unsigned video_object_type_indication = br.getBits(8);
    406 
    407     CHECK_NE(video_object_type_indication,
    408              0x21u /* Fine Granularity Scalable */);
    409 
    410     unsigned video_object_layer_verid;
    411     unsigned video_object_layer_priority;
    412     if (br.getBits(1)) {
    413         video_object_layer_verid = br.getBits(4);
    414         video_object_layer_priority = br.getBits(3);
    415     }
    416     unsigned aspect_ratio_info = br.getBits(4);
    417     if (aspect_ratio_info == 0x0f /* extended PAR */) {
    418         br.skipBits(8);  // par_width
    419         br.skipBits(8);  // par_height
    420     }
    421     if (br.getBits(1)) {  // vol_control_parameters
    422         br.skipBits(2);  // chroma_format
    423         br.skipBits(1);  // low_delay
    424         if (br.getBits(1)) {  // vbv_parameters
    425             br.skipBits(15);  // first_half_bit_rate
    426             CHECK(br.getBits(1));  // marker_bit
    427             br.skipBits(15);  // latter_half_bit_rate
    428             CHECK(br.getBits(1));  // marker_bit
    429             br.skipBits(15);  // first_half_vbv_buffer_size
    430             CHECK(br.getBits(1));  // marker_bit
    431             br.skipBits(3);  // latter_half_vbv_buffer_size
    432             br.skipBits(11);  // first_half_vbv_occupancy
    433             CHECK(br.getBits(1));  // marker_bit
    434             br.skipBits(15);  // latter_half_vbv_occupancy
    435             CHECK(br.getBits(1));  // marker_bit
    436         }
    437     }
    438     unsigned video_object_layer_shape = br.getBits(2);
    439     CHECK_EQ(video_object_layer_shape, 0x00u /* rectangular */);
    440 
    441     CHECK(br.getBits(1));  // marker_bit
    442     unsigned vop_time_increment_resolution = br.getBits(16);
    443     CHECK(br.getBits(1));  // marker_bit
    444 
    445     if (br.getBits(1)) {  // fixed_vop_rate
    446         // range [0..vop_time_increment_resolution)
    447 
    448         // vop_time_increment_resolution
    449         // 2 => 0..1, 1 bit
    450         // 3 => 0..2, 2 bits
    451         // 4 => 0..3, 2 bits
    452         // 5 => 0..4, 3 bits
    453         // ...
    454 
    455         CHECK_GT(vop_time_increment_resolution, 0u);
    456         --vop_time_increment_resolution;
    457 
    458         unsigned numBits = 0;
    459         while (vop_time_increment_resolution > 0) {
    460             ++numBits;
    461             vop_time_increment_resolution >>= 1;
    462         }
    463 
    464         br.skipBits(numBits);  // fixed_vop_time_increment
    465     }
    466 
    467     CHECK(br.getBits(1));  // marker_bit
    468     unsigned video_object_layer_width = br.getBits(13);
    469     CHECK(br.getBits(1));  // marker_bit
    470     unsigned video_object_layer_height = br.getBits(13);
    471     CHECK(br.getBits(1));  // marker_bit
    472 
    473     unsigned interlaced = br.getBits(1);
    474 
    475     *width = video_object_layer_width;
    476     *height = video_object_layer_height;
    477 
    478     return true;
    479 }
    480 
    481 bool GetMPEGAudioFrameSize(
    482         uint32_t header, size_t *frame_size,
    483         int *out_sampling_rate, int *out_channels,
    484         int *out_bitrate, int *out_num_samples) {
    485     *frame_size = 0;
    486 
    487     if (out_sampling_rate) {
    488         *out_sampling_rate = 0;
    489     }
    490 
    491     if (out_channels) {
    492         *out_channels = 0;
    493     }
    494 
    495     if (out_bitrate) {
    496         *out_bitrate = 0;
    497     }
    498 
    499     if (out_num_samples) {
    500         *out_num_samples = 1152;
    501     }
    502 
    503     if ((header & 0xffe00000) != 0xffe00000) {
    504         return false;
    505     }
    506 
    507     unsigned version = (header >> 19) & 3;
    508 
    509     if (version == 0x01) {
    510         return false;
    511     }
    512 
    513     unsigned layer = (header >> 17) & 3;
    514 
    515     if (layer == 0x00) {
    516         return false;
    517     }
    518 
    519     unsigned protection = (header >> 16) & 1;
    520 
    521     unsigned bitrate_index = (header >> 12) & 0x0f;
    522 
    523     if (bitrate_index == 0 || bitrate_index == 0x0f) {
    524         // Disallow "free" bitrate.
    525         return false;
    526     }
    527 
    528     unsigned sampling_rate_index = (header >> 10) & 3;
    529 
    530     if (sampling_rate_index == 3) {
    531         return false;
    532     }
    533 
    534     static const int kSamplingRateV1[] = { 44100, 48000, 32000 };
    535     int sampling_rate = kSamplingRateV1[sampling_rate_index];
    536     if (version == 2 /* V2 */) {
    537         sampling_rate /= 2;
    538     } else if (version == 0 /* V2.5 */) {
    539         sampling_rate /= 4;
    540     }
    541 
    542     unsigned padding = (header >> 9) & 1;
    543 
    544     if (layer == 3) {
    545         // layer I
    546 
    547         static const int kBitrateV1[] = {
    548             32, 64, 96, 128, 160, 192, 224, 256,
    549             288, 320, 352, 384, 416, 448
    550         };
    551 
    552         static const int kBitrateV2[] = {
    553             32, 48, 56, 64, 80, 96, 112, 128,
    554             144, 160, 176, 192, 224, 256
    555         };
    556 
    557         int bitrate =
    558             (version == 3 /* V1 */)
    559                 ? kBitrateV1[bitrate_index - 1]
    560                 : kBitrateV2[bitrate_index - 1];
    561 
    562         if (out_bitrate) {
    563             *out_bitrate = bitrate;
    564         }
    565 
    566         *frame_size = (12000 * bitrate / sampling_rate + padding) * 4;
    567 
    568         if (out_num_samples) {
    569             *out_num_samples = 384;
    570         }
    571     } else {
    572         // layer II or III
    573 
    574         static const int kBitrateV1L2[] = {
    575             32, 48, 56, 64, 80, 96, 112, 128,
    576             160, 192, 224, 256, 320, 384
    577         };
    578 
    579         static const int kBitrateV1L3[] = {
    580             32, 40, 48, 56, 64, 80, 96, 112,
    581             128, 160, 192, 224, 256, 320
    582         };
    583 
    584         static const int kBitrateV2[] = {
    585             8, 16, 24, 32, 40, 48, 56, 64,
    586             80, 96, 112, 128, 144, 160
    587         };
    588 
    589         int bitrate;
    590         if (version == 3 /* V1 */) {
    591             bitrate = (layer == 2 /* L2 */)
    592                 ? kBitrateV1L2[bitrate_index - 1]
    593                 : kBitrateV1L3[bitrate_index - 1];
    594 
    595             if (out_num_samples) {
    596                 *out_num_samples = 1152;
    597             }
    598         } else {
    599             // V2 (or 2.5)
    600 
    601             bitrate = kBitrateV2[bitrate_index - 1];
    602             if (out_num_samples) {
    603                 *out_num_samples = 576;
    604             }
    605         }
    606 
    607         if (out_bitrate) {
    608             *out_bitrate = bitrate;
    609         }
    610 
    611         if (version == 3 /* V1 */) {
    612             *frame_size = 144000 * bitrate / sampling_rate + padding;
    613         } else {
    614             // V2 or V2.5
    615             *frame_size = 72000 * bitrate / sampling_rate + padding;
    616         }
    617     }
    618 
    619     if (out_sampling_rate) {
    620         *out_sampling_rate = sampling_rate;
    621     }
    622 
    623     if (out_channels) {
    624         int channel_mode = (header >> 6) & 3;
    625 
    626         *out_channels = (channel_mode == 3) ? 1 : 2;
    627     }
    628 
    629     return true;
    630 }
    631 
    632 }  // namespace android
    633 
    634