Home | History | Annotate | Download | only in mpeg2ts
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 //#define LOG_NDEBUG 0
     18 #define LOG_TAG "ESQueue"
     19 #include <media/stagefright/foundation/ADebug.h>
     20 
     21 #include "ESQueue.h"
     22 
     23 #include <media/stagefright/foundation/hexdump.h>
     24 #include <media/stagefright/foundation/ABitReader.h>
     25 #include <media/stagefright/foundation/ABuffer.h>
     26 #include <media/stagefright/foundation/AMessage.h>
     27 #include <media/stagefright/MediaErrors.h>
     28 #include <media/stagefright/MediaDefs.h>
     29 #include <media/stagefright/MetaData.h>
     30 #include <media/stagefright/Utils.h>
     31 
     32 #include "include/avc_utils.h"
     33 
     34 #include <inttypes.h>
     35 #include <netinet/in.h>
     36 
     37 namespace android {
     38 
     39 ElementaryStreamQueue::ElementaryStreamQueue(Mode mode, uint32_t flags)
     40     : mMode(mode),
     41       mFlags(flags) {
     42 }
     43 
     44 sp<MetaData> ElementaryStreamQueue::getFormat() {
     45     return mFormat;
     46 }
     47 
     48 void ElementaryStreamQueue::clear(bool clearFormat) {
     49     if (mBuffer != NULL) {
     50         mBuffer->setRange(0, 0);
     51     }
     52 
     53     mRangeInfos.clear();
     54 
     55     if (clearFormat) {
     56         mFormat.clear();
     57     }
     58 }
     59 
     60 // Parse AC3 header assuming the current ptr is start position of syncframe,
     61 // update metadata only applicable, and return the payload size
     62 static unsigned parseAC3SyncFrame(
     63         const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
     64     static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
     65     static const unsigned samplingRateTable[] = {48000, 44100, 32000};
     66     static const unsigned rates[] = {32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256,
     67             320, 384, 448, 512, 576, 640};
     68 
     69     static const unsigned frameSizeTable[19][3] = {
     70         { 64, 69, 96 },
     71         { 80, 87, 120 },
     72         { 96, 104, 144 },
     73         { 112, 121, 168 },
     74         { 128, 139, 192 },
     75         { 160, 174, 240 },
     76         { 192, 208, 288 },
     77         { 224, 243, 336 },
     78         { 256, 278, 384 },
     79         { 320, 348, 480 },
     80         { 384, 417, 576 },
     81         { 448, 487, 672 },
     82         { 512, 557, 768 },
     83         { 640, 696, 960 },
     84         { 768, 835, 1152 },
     85         { 896, 975, 1344 },
     86         { 1024, 1114, 1536 },
     87         { 1152, 1253, 1728 },
     88         { 1280, 1393, 1920 },
     89     };
     90 
     91     ABitReader bits(ptr, size);
     92     unsigned syncStartPos = 0;  // in bytes
     93     if (bits.numBitsLeft() < 16) {
     94         return 0;
     95     }
     96     if (bits.getBits(16) != 0x0B77) {
     97         return 0;
     98     }
     99 
    100     if (bits.numBitsLeft() < 16 + 2 + 6 + 5 + 3 + 3) {
    101         ALOGV("Not enough bits left for further parsing");
    102         return 0;
    103     }
    104     bits.skipBits(16);  // crc1
    105 
    106     unsigned fscod = bits.getBits(2);
    107     if (fscod == 3) {
    108         ALOGW("Incorrect fscod in AC3 header");
    109         return 0;
    110     }
    111 
    112     unsigned frmsizecod = bits.getBits(6);
    113     if (frmsizecod > 37) {
    114         ALOGW("Incorrect frmsizecod in AC3 header");
    115         return 0;
    116     }
    117 
    118     unsigned bsid = bits.getBits(5);
    119     if (bsid > 8) {
    120         ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
    121         return 0;
    122     }
    123 
    124     unsigned bsmod = bits.getBits(3);
    125     unsigned acmod = bits.getBits(3);
    126     unsigned cmixlev = 0;
    127     unsigned surmixlev = 0;
    128     unsigned dsurmod = 0;
    129 
    130     if ((acmod & 1) > 0 && acmod != 1) {
    131         if (bits.numBitsLeft() < 2) {
    132             return 0;
    133         }
    134         cmixlev = bits.getBits(2);
    135     }
    136     if ((acmod & 4) > 0) {
    137         if (bits.numBitsLeft() < 2) {
    138             return 0;
    139         }
    140         surmixlev = bits.getBits(2);
    141     }
    142     if (acmod == 2) {
    143         if (bits.numBitsLeft() < 2) {
    144             return 0;
    145         }
    146         dsurmod = bits.getBits(2);
    147     }
    148 
    149     if (bits.numBitsLeft() < 1) {
    150         return 0;
    151     }
    152     unsigned lfeon = bits.getBits(1);
    153 
    154     unsigned samplingRate = samplingRateTable[fscod];
    155     unsigned payloadSize = frameSizeTable[frmsizecod >> 1][fscod];
    156     if (fscod == 1) {
    157         payloadSize += frmsizecod & 1;
    158     }
    159     payloadSize <<= 1;  // convert from 16-bit words to bytes
    160 
    161     unsigned channelCount = channelCountTable[acmod] + lfeon;
    162 
    163     if (metaData != NULL) {
    164         (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
    165         (*metaData)->setInt32(kKeyChannelCount, channelCount);
    166         (*metaData)->setInt32(kKeySampleRate, samplingRate);
    167     }
    168 
    169     return payloadSize;
    170 }
    171 
    172 static bool IsSeeminglyValidAC3Header(const uint8_t *ptr, size_t size) {
    173     return parseAC3SyncFrame(ptr, size, NULL) > 0;
    174 }
    175 
    176 static bool IsSeeminglyValidADTSHeader(
    177         const uint8_t *ptr, size_t size, size_t *frameLength) {
    178     if (size < 7) {
    179         // Not enough data to verify header.
    180         return false;
    181     }
    182 
    183     if (ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
    184         return false;
    185     }
    186 
    187     unsigned layer = (ptr[1] >> 1) & 3;
    188 
    189     if (layer != 0) {
    190         return false;
    191     }
    192 
    193     unsigned ID = (ptr[1] >> 3) & 1;
    194     unsigned profile_ObjectType = ptr[2] >> 6;
    195 
    196     if (ID == 1 && profile_ObjectType == 3) {
    197         // MPEG-2 profile 3 is reserved.
    198         return false;
    199     }
    200 
    201     size_t frameLengthInHeader =
    202             ((ptr[3] & 3) << 11) + (ptr[4] << 3) + ((ptr[5] >> 5) & 7);
    203     if (frameLengthInHeader > size) {
    204         return false;
    205     }
    206 
    207     *frameLength = frameLengthInHeader;
    208     return true;
    209 }
    210 
    211 static bool IsSeeminglyValidMPEGAudioHeader(const uint8_t *ptr, size_t size) {
    212     if (size < 3) {
    213         // Not enough data to verify header.
    214         return false;
    215     }
    216 
    217     if (ptr[0] != 0xff || (ptr[1] >> 5) != 0x07) {
    218         return false;
    219     }
    220 
    221     unsigned ID = (ptr[1] >> 3) & 3;
    222 
    223     if (ID == 1) {
    224         return false;  // reserved
    225     }
    226 
    227     unsigned layer = (ptr[1] >> 1) & 3;
    228 
    229     if (layer == 0) {
    230         return false;  // reserved
    231     }
    232 
    233     unsigned bitrateIndex = (ptr[2] >> 4);
    234 
    235     if (bitrateIndex == 0x0f) {
    236         return false;  // reserved
    237     }
    238 
    239     unsigned samplingRateIndex = (ptr[2] >> 2) & 3;
    240 
    241     if (samplingRateIndex == 3) {
    242         return false;  // reserved
    243     }
    244 
    245     return true;
    246 }
    247 
    248 status_t ElementaryStreamQueue::appendData(
    249         const void *data, size_t size, int64_t timeUs) {
    250     if (mBuffer == NULL || mBuffer->size() == 0) {
    251         switch (mMode) {
    252             case H264:
    253             case MPEG_VIDEO:
    254             {
    255 #if 0
    256                 if (size < 4 || memcmp("\x00\x00\x00\x01", data, 4)) {
    257                     return ERROR_MALFORMED;
    258                 }
    259 #else
    260                 uint8_t *ptr = (uint8_t *)data;
    261 
    262                 ssize_t startOffset = -1;
    263                 for (size_t i = 0; i + 3 < size; ++i) {
    264                     if (!memcmp("\x00\x00\x00\x01", &ptr[i], 4)) {
    265                         startOffset = i;
    266                         break;
    267                     }
    268                 }
    269 
    270                 if (startOffset < 0) {
    271                     return ERROR_MALFORMED;
    272                 }
    273 
    274                 if (startOffset > 0) {
    275                     ALOGI("found something resembling an H.264/MPEG syncword "
    276                           "at offset %zd",
    277                           startOffset);
    278                 }
    279 
    280                 data = &ptr[startOffset];
    281                 size -= startOffset;
    282 #endif
    283                 break;
    284             }
    285 
    286             case MPEG4_VIDEO:
    287             {
    288 #if 0
    289                 if (size < 3 || memcmp("\x00\x00\x01", data, 3)) {
    290                     return ERROR_MALFORMED;
    291                 }
    292 #else
    293                 uint8_t *ptr = (uint8_t *)data;
    294 
    295                 ssize_t startOffset = -1;
    296                 for (size_t i = 0; i + 2 < size; ++i) {
    297                     if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
    298                         startOffset = i;
    299                         break;
    300                     }
    301                 }
    302 
    303                 if (startOffset < 0) {
    304                     return ERROR_MALFORMED;
    305                 }
    306 
    307                 if (startOffset > 0) {
    308                     ALOGI("found something resembling an H.264/MPEG syncword "
    309                           "at offset %zd",
    310                           startOffset);
    311                 }
    312 
    313                 data = &ptr[startOffset];
    314                 size -= startOffset;
    315 #endif
    316                 break;
    317             }
    318 
    319             case AAC:
    320             {
    321                 uint8_t *ptr = (uint8_t *)data;
    322 
    323 #if 0
    324                 if (size < 2 || ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
    325                     return ERROR_MALFORMED;
    326                 }
    327 #else
    328                 ssize_t startOffset = -1;
    329                 size_t frameLength;
    330                 for (size_t i = 0; i < size; ++i) {
    331                     if (IsSeeminglyValidADTSHeader(
    332                             &ptr[i], size - i, &frameLength)) {
    333                         startOffset = i;
    334                         break;
    335                     }
    336                 }
    337 
    338                 if (startOffset < 0) {
    339                     return ERROR_MALFORMED;
    340                 }
    341 
    342                 if (startOffset > 0) {
    343                     ALOGI("found something resembling an AAC syncword at "
    344                           "offset %zd",
    345                           startOffset);
    346                 }
    347 
    348                 if (frameLength != size - startOffset) {
    349                     ALOGV("First ADTS AAC frame length is %zd bytes, "
    350                           "while the buffer size is %zd bytes.",
    351                           frameLength, size - startOffset);
    352                 }
    353 
    354                 data = &ptr[startOffset];
    355                 size -= startOffset;
    356 #endif
    357                 break;
    358             }
    359 
    360             case AC3:
    361             {
    362                 uint8_t *ptr = (uint8_t *)data;
    363 
    364                 ssize_t startOffset = -1;
    365                 for (size_t i = 0; i < size; ++i) {
    366                     if (IsSeeminglyValidAC3Header(&ptr[i], size - i)) {
    367                         startOffset = i;
    368                         break;
    369                     }
    370                 }
    371 
    372                 if (startOffset < 0) {
    373                     return ERROR_MALFORMED;
    374                 }
    375 
    376                 if (startOffset > 0) {
    377                     ALOGI("found something resembling an AC3 syncword at "
    378                           "offset %zd",
    379                           startOffset);
    380                 }
    381 
    382                 data = &ptr[startOffset];
    383                 size -= startOffset;
    384                 break;
    385             }
    386 
    387             case MPEG_AUDIO:
    388             {
    389                 uint8_t *ptr = (uint8_t *)data;
    390 
    391                 ssize_t startOffset = -1;
    392                 for (size_t i = 0; i < size; ++i) {
    393                     if (IsSeeminglyValidMPEGAudioHeader(&ptr[i], size - i)) {
    394                         startOffset = i;
    395                         break;
    396                     }
    397                 }
    398 
    399                 if (startOffset < 0) {
    400                     return ERROR_MALFORMED;
    401                 }
    402 
    403                 if (startOffset > 0) {
    404                     ALOGI("found something resembling an MPEG audio "
    405                           "syncword at offset %zd",
    406                           startOffset);
    407                 }
    408 
    409                 data = &ptr[startOffset];
    410                 size -= startOffset;
    411                 break;
    412             }
    413 
    414             case PCM_AUDIO:
    415             {
    416                 break;
    417             }
    418 
    419             default:
    420                 TRESPASS();
    421                 break;
    422         }
    423     }
    424 
    425     size_t neededSize = (mBuffer == NULL ? 0 : mBuffer->size()) + size;
    426     if (mBuffer == NULL || neededSize > mBuffer->capacity()) {
    427         neededSize = (neededSize + 65535) & ~65535;
    428 
    429         ALOGV("resizing buffer to size %zu", neededSize);
    430 
    431         sp<ABuffer> buffer = new ABuffer(neededSize);
    432         if (mBuffer != NULL) {
    433             memcpy(buffer->data(), mBuffer->data(), mBuffer->size());
    434             buffer->setRange(0, mBuffer->size());
    435         } else {
    436             buffer->setRange(0, 0);
    437         }
    438 
    439         mBuffer = buffer;
    440     }
    441 
    442     memcpy(mBuffer->data() + mBuffer->size(), data, size);
    443     mBuffer->setRange(0, mBuffer->size() + size);
    444 
    445     RangeInfo info;
    446     info.mLength = size;
    447     info.mTimestampUs = timeUs;
    448     mRangeInfos.push_back(info);
    449 
    450 #if 0
    451     if (mMode == AAC) {
    452         ALOGI("size = %zu, timeUs = %.2f secs", size, timeUs / 1E6);
    453         hexdump(data, size);
    454     }
    455 #endif
    456 
    457     return OK;
    458 }
    459 
    460 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnit() {
    461     if ((mFlags & kFlag_AlignedData) && mMode == H264) {
    462         if (mRangeInfos.empty()) {
    463             return NULL;
    464         }
    465 
    466         RangeInfo info = *mRangeInfos.begin();
    467         mRangeInfos.erase(mRangeInfos.begin());
    468 
    469         sp<ABuffer> accessUnit = new ABuffer(info.mLength);
    470         memcpy(accessUnit->data(), mBuffer->data(), info.mLength);
    471         accessUnit->meta()->setInt64("timeUs", info.mTimestampUs);
    472 
    473         memmove(mBuffer->data(),
    474                 mBuffer->data() + info.mLength,
    475                 mBuffer->size() - info.mLength);
    476 
    477         mBuffer->setRange(0, mBuffer->size() - info.mLength);
    478 
    479         if (mFormat == NULL) {
    480             mFormat = MakeAVCCodecSpecificData(accessUnit);
    481         }
    482 
    483         return accessUnit;
    484     }
    485 
    486     switch (mMode) {
    487         case H264:
    488             return dequeueAccessUnitH264();
    489         case AAC:
    490             return dequeueAccessUnitAAC();
    491         case AC3:
    492             return dequeueAccessUnitAC3();
    493         case MPEG_VIDEO:
    494             return dequeueAccessUnitMPEGVideo();
    495         case MPEG4_VIDEO:
    496             return dequeueAccessUnitMPEG4Video();
    497         case PCM_AUDIO:
    498             return dequeueAccessUnitPCMAudio();
    499         default:
    500             CHECK_EQ((unsigned)mMode, (unsigned)MPEG_AUDIO);
    501             return dequeueAccessUnitMPEGAudio();
    502     }
    503 }
    504 
    505 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAC3() {
    506     unsigned syncStartPos = 0;  // in bytes
    507     unsigned payloadSize = 0;
    508     sp<MetaData> format = new MetaData;
    509     while (true) {
    510         if (syncStartPos + 2 >= mBuffer->size()) {
    511             return NULL;
    512         }
    513 
    514         payloadSize = parseAC3SyncFrame(
    515                 mBuffer->data() + syncStartPos,
    516                 mBuffer->size() - syncStartPos,
    517                 &format);
    518         if (payloadSize > 0) {
    519             break;
    520         }
    521         ++syncStartPos;
    522     }
    523 
    524     if (mBuffer->size() < syncStartPos + payloadSize) {
    525         ALOGV("Not enough buffer size for AC3");
    526         return NULL;
    527     }
    528 
    529     if (mFormat == NULL) {
    530         mFormat = format;
    531     }
    532 
    533     sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
    534     memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
    535 
    536     int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
    537     CHECK_GE(timeUs, 0ll);
    538     accessUnit->meta()->setInt64("timeUs", timeUs);
    539 
    540     memmove(
    541             mBuffer->data(),
    542             mBuffer->data() + syncStartPos + payloadSize,
    543             mBuffer->size() - syncStartPos - payloadSize);
    544 
    545     mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
    546 
    547     return accessUnit;
    548 }
    549 
    550 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitPCMAudio() {
    551     if (mBuffer->size() < 4) {
    552         return NULL;
    553     }
    554 
    555     ABitReader bits(mBuffer->data(), 4);
    556     CHECK_EQ(bits.getBits(8), 0xa0);
    557     unsigned numAUs = bits.getBits(8);
    558     bits.skipBits(8);
    559     unsigned quantization_word_length = bits.getBits(2);
    560     unsigned audio_sampling_frequency = bits.getBits(3);
    561     unsigned num_channels = bits.getBits(3);
    562 
    563     CHECK_EQ(audio_sampling_frequency, 2);  // 48kHz
    564     CHECK_EQ(num_channels, 1u);  // stereo!
    565 
    566     if (mFormat == NULL) {
    567         mFormat = new MetaData;
    568         mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
    569         mFormat->setInt32(kKeyChannelCount, 2);
    570         mFormat->setInt32(kKeySampleRate, 48000);
    571     }
    572 
    573     static const size_t kFramesPerAU = 80;
    574     size_t frameSize = 2 /* numChannels */ * sizeof(int16_t);
    575 
    576     size_t payloadSize = numAUs * frameSize * kFramesPerAU;
    577 
    578     if (mBuffer->size() < 4 + payloadSize) {
    579         return NULL;
    580     }
    581 
    582     sp<ABuffer> accessUnit = new ABuffer(payloadSize);
    583     memcpy(accessUnit->data(), mBuffer->data() + 4, payloadSize);
    584 
    585     int64_t timeUs = fetchTimestamp(payloadSize + 4);
    586     CHECK_GE(timeUs, 0ll);
    587     accessUnit->meta()->setInt64("timeUs", timeUs);
    588 
    589     int16_t *ptr = (int16_t *)accessUnit->data();
    590     for (size_t i = 0; i < payloadSize / sizeof(int16_t); ++i) {
    591         ptr[i] = ntohs(ptr[i]);
    592     }
    593 
    594     memmove(
    595             mBuffer->data(),
    596             mBuffer->data() + 4 + payloadSize,
    597             mBuffer->size() - 4 - payloadSize);
    598 
    599     mBuffer->setRange(0, mBuffer->size() - 4 - payloadSize);
    600 
    601     return accessUnit;
    602 }
    603 
    604 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAAC() {
    605     if (mBuffer->size() == 0) {
    606         return NULL;
    607     }
    608 
    609     CHECK(!mRangeInfos.empty());
    610 
    611     const RangeInfo &info = *mRangeInfos.begin();
    612     if (mBuffer->size() < info.mLength) {
    613         return NULL;
    614     }
    615 
    616     CHECK_GE(info.mTimestampUs, 0ll);
    617 
    618     // The idea here is consume all AAC frames starting at offsets before
    619     // info.mLength so we can assign a meaningful timestamp without
    620     // having to interpolate.
    621     // The final AAC frame may well extend into the next RangeInfo but
    622     // that's ok.
    623     // TODO: the logic commented above is skipped because codec cannot take
    624     // arbitrary sized input buffers;
    625     size_t offset = 0;
    626     while (offset < info.mLength) {
    627         if (offset + 7 > mBuffer->size()) {
    628             return NULL;
    629         }
    630 
    631         ABitReader bits(mBuffer->data() + offset, mBuffer->size() - offset);
    632 
    633         // adts_fixed_header
    634 
    635         CHECK_EQ(bits.getBits(12), 0xfffu);
    636         bits.skipBits(3);  // ID, layer
    637         bool protection_absent = bits.getBits(1) != 0;
    638 
    639         if (mFormat == NULL) {
    640             unsigned profile = bits.getBits(2);
    641             CHECK_NE(profile, 3u);
    642             unsigned sampling_freq_index = bits.getBits(4);
    643             bits.getBits(1);  // private_bit
    644             unsigned channel_configuration = bits.getBits(3);
    645             CHECK_NE(channel_configuration, 0u);
    646             bits.skipBits(2);  // original_copy, home
    647 
    648             mFormat = MakeAACCodecSpecificData(
    649                     profile, sampling_freq_index, channel_configuration);
    650 
    651             mFormat->setInt32(kKeyIsADTS, true);
    652 
    653             int32_t sampleRate;
    654             int32_t numChannels;
    655             CHECK(mFormat->findInt32(kKeySampleRate, &sampleRate));
    656             CHECK(mFormat->findInt32(kKeyChannelCount, &numChannels));
    657 
    658             ALOGI("found AAC codec config (%d Hz, %d channels)",
    659                  sampleRate, numChannels);
    660         } else {
    661             // profile_ObjectType, sampling_frequency_index, private_bits,
    662             // channel_configuration, original_copy, home
    663             bits.skipBits(12);
    664         }
    665 
    666         // adts_variable_header
    667 
    668         // copyright_identification_bit, copyright_identification_start
    669         bits.skipBits(2);
    670 
    671         unsigned aac_frame_length = bits.getBits(13);
    672 
    673         bits.skipBits(11);  // adts_buffer_fullness
    674 
    675         unsigned number_of_raw_data_blocks_in_frame = bits.getBits(2);
    676 
    677         if (number_of_raw_data_blocks_in_frame != 0) {
    678             // To be implemented.
    679             TRESPASS();
    680         }
    681 
    682         if (offset + aac_frame_length > mBuffer->size()) {
    683             return NULL;
    684         }
    685 
    686         size_t headerSize = protection_absent ? 7 : 9;
    687 
    688         offset += aac_frame_length;
    689         // TODO: move back to concatenation when codec can support arbitrary input buffers.
    690         // For now only queue a single buffer
    691         break;
    692     }
    693 
    694     int64_t timeUs = fetchTimestampAAC(offset);
    695 
    696     sp<ABuffer> accessUnit = new ABuffer(offset);
    697     memcpy(accessUnit->data(), mBuffer->data(), offset);
    698 
    699     memmove(mBuffer->data(), mBuffer->data() + offset,
    700             mBuffer->size() - offset);
    701     mBuffer->setRange(0, mBuffer->size() - offset);
    702 
    703     accessUnit->meta()->setInt64("timeUs", timeUs);
    704 
    705     return accessUnit;
    706 }
    707 
    708 int64_t ElementaryStreamQueue::fetchTimestamp(size_t size) {
    709     int64_t timeUs = -1;
    710     bool first = true;
    711 
    712     while (size > 0) {
    713         CHECK(!mRangeInfos.empty());
    714 
    715         RangeInfo *info = &*mRangeInfos.begin();
    716 
    717         if (first) {
    718             timeUs = info->mTimestampUs;
    719             first = false;
    720         }
    721 
    722         if (info->mLength > size) {
    723             info->mLength -= size;
    724             size = 0;
    725         } else {
    726             size -= info->mLength;
    727 
    728             mRangeInfos.erase(mRangeInfos.begin());
    729             info = NULL;
    730         }
    731 
    732     }
    733 
    734     if (timeUs == 0ll) {
    735         ALOGV("Returning 0 timestamp");
    736     }
    737 
    738     return timeUs;
    739 }
    740 
    741 // TODO: avoid interpolating timestamps once codec supports arbitrary sized input buffers
    742 int64_t ElementaryStreamQueue::fetchTimestampAAC(size_t size) {
    743     int64_t timeUs = -1;
    744     bool first = true;
    745 
    746     size_t samplesize = size;
    747     while (size > 0) {
    748         CHECK(!mRangeInfos.empty());
    749 
    750         RangeInfo *info = &*mRangeInfos.begin();
    751 
    752         if (first) {
    753             timeUs = info->mTimestampUs;
    754             first = false;
    755         }
    756 
    757         if (info->mLength > size) {
    758             int32_t sampleRate;
    759             CHECK(mFormat->findInt32(kKeySampleRate, &sampleRate));
    760             info->mLength -= size;
    761             size_t numSamples = 1024 * size / samplesize;
    762             info->mTimestampUs += numSamples * 1000000ll / sampleRate;
    763             size = 0;
    764         } else {
    765             size -= info->mLength;
    766 
    767             mRangeInfos.erase(mRangeInfos.begin());
    768             info = NULL;
    769         }
    770 
    771     }
    772 
    773     if (timeUs == 0ll) {
    774         ALOGV("Returning 0 timestamp");
    775     }
    776 
    777     return timeUs;
    778 }
    779 
    780 struct NALPosition {
    781     size_t nalOffset;
    782     size_t nalSize;
    783 };
    784 
    785 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitH264() {
    786     const uint8_t *data = mBuffer->data();
    787 
    788     size_t size = mBuffer->size();
    789     Vector<NALPosition> nals;
    790 
    791     size_t totalSize = 0;
    792 
    793     status_t err;
    794     const uint8_t *nalStart;
    795     size_t nalSize;
    796     bool foundSlice = false;
    797     while ((err = getNextNALUnit(&data, &size, &nalStart, &nalSize)) == OK) {
    798         if (nalSize == 0) continue;
    799 
    800         unsigned nalType = nalStart[0] & 0x1f;
    801         bool flush = false;
    802 
    803         if (nalType == 1 || nalType == 5) {
    804             if (foundSlice) {
    805                 ABitReader br(nalStart + 1, nalSize);
    806                 unsigned first_mb_in_slice = parseUE(&br);
    807 
    808                 if (first_mb_in_slice == 0) {
    809                     // This slice starts a new frame.
    810 
    811                     flush = true;
    812                 }
    813             }
    814 
    815             foundSlice = true;
    816         } else if ((nalType == 9 || nalType == 7) && foundSlice) {
    817             // Access unit delimiter and SPS will be associated with the
    818             // next frame.
    819 
    820             flush = true;
    821         }
    822 
    823         if (flush) {
    824             // The access unit will contain all nal units up to, but excluding
    825             // the current one, separated by 0x00 0x00 0x00 0x01 startcodes.
    826 
    827             size_t auSize = 4 * nals.size() + totalSize;
    828             sp<ABuffer> accessUnit = new ABuffer(auSize);
    829 
    830 #if !LOG_NDEBUG
    831             AString out;
    832 #endif
    833 
    834             size_t dstOffset = 0;
    835             for (size_t i = 0; i < nals.size(); ++i) {
    836                 const NALPosition &pos = nals.itemAt(i);
    837 
    838                 unsigned nalType = mBuffer->data()[pos.nalOffset] & 0x1f;
    839 
    840                 if (nalType == 6) {
    841                     sp<ABuffer> sei = new ABuffer(pos.nalSize);
    842                     memcpy(sei->data(), mBuffer->data() + pos.nalOffset, pos.nalSize);
    843                     accessUnit->meta()->setBuffer("sei", sei);
    844                 }
    845 
    846 #if !LOG_NDEBUG
    847                 char tmp[128];
    848                 sprintf(tmp, "0x%02x", nalType);
    849                 if (i > 0) {
    850                     out.append(", ");
    851                 }
    852                 out.append(tmp);
    853 #endif
    854 
    855                 memcpy(accessUnit->data() + dstOffset, "\x00\x00\x00\x01", 4);
    856 
    857                 memcpy(accessUnit->data() + dstOffset + 4,
    858                        mBuffer->data() + pos.nalOffset,
    859                        pos.nalSize);
    860 
    861                 dstOffset += pos.nalSize + 4;
    862             }
    863 
    864 #if !LOG_NDEBUG
    865             ALOGV("accessUnit contains nal types %s", out.c_str());
    866 #endif
    867 
    868             const NALPosition &pos = nals.itemAt(nals.size() - 1);
    869             size_t nextScan = pos.nalOffset + pos.nalSize;
    870 
    871             memmove(mBuffer->data(),
    872                     mBuffer->data() + nextScan,
    873                     mBuffer->size() - nextScan);
    874 
    875             mBuffer->setRange(0, mBuffer->size() - nextScan);
    876 
    877             int64_t timeUs = fetchTimestamp(nextScan);
    878             CHECK_GE(timeUs, 0ll);
    879 
    880             accessUnit->meta()->setInt64("timeUs", timeUs);
    881 
    882             if (mFormat == NULL) {
    883                 mFormat = MakeAVCCodecSpecificData(accessUnit);
    884             }
    885 
    886             return accessUnit;
    887         }
    888 
    889         NALPosition pos;
    890         pos.nalOffset = nalStart - mBuffer->data();
    891         pos.nalSize = nalSize;
    892 
    893         nals.push(pos);
    894 
    895         totalSize += nalSize;
    896     }
    897     CHECK_EQ(err, (status_t)-EAGAIN);
    898 
    899     return NULL;
    900 }
    901 
    902 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGAudio() {
    903     const uint8_t *data = mBuffer->data();
    904     size_t size = mBuffer->size();
    905 
    906     if (size < 4) {
    907         return NULL;
    908     }
    909 
    910     uint32_t header = U32_AT(data);
    911 
    912     size_t frameSize;
    913     int samplingRate, numChannels, bitrate, numSamples;
    914     CHECK(GetMPEGAudioFrameSize(
    915                 header, &frameSize, &samplingRate, &numChannels,
    916                 &bitrate, &numSamples));
    917 
    918     if (size < frameSize) {
    919         return NULL;
    920     }
    921 
    922     unsigned layer = 4 - ((header >> 17) & 3);
    923 
    924     sp<ABuffer> accessUnit = new ABuffer(frameSize);
    925     memcpy(accessUnit->data(), data, frameSize);
    926 
    927     memmove(mBuffer->data(),
    928             mBuffer->data() + frameSize,
    929             mBuffer->size() - frameSize);
    930 
    931     mBuffer->setRange(0, mBuffer->size() - frameSize);
    932 
    933     int64_t timeUs = fetchTimestamp(frameSize);
    934     CHECK_GE(timeUs, 0ll);
    935 
    936     accessUnit->meta()->setInt64("timeUs", timeUs);
    937 
    938     if (mFormat == NULL) {
    939         mFormat = new MetaData;
    940 
    941         switch (layer) {
    942             case 1:
    943                 mFormat->setCString(
    944                         kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
    945                 break;
    946             case 2:
    947                 mFormat->setCString(
    948                         kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
    949                 break;
    950             case 3:
    951                 mFormat->setCString(
    952                         kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
    953                 break;
    954             default:
    955                 TRESPASS();
    956         }
    957 
    958         mFormat->setInt32(kKeySampleRate, samplingRate);
    959         mFormat->setInt32(kKeyChannelCount, numChannels);
    960     }
    961 
    962     return accessUnit;
    963 }
    964 
    965 static void EncodeSize14(uint8_t **_ptr, size_t size) {
    966     CHECK_LE(size, 0x3fff);
    967 
    968     uint8_t *ptr = *_ptr;
    969 
    970     *ptr++ = 0x80 | (size >> 7);
    971     *ptr++ = size & 0x7f;
    972 
    973     *_ptr = ptr;
    974 }
    975 
    976 static sp<ABuffer> MakeMPEGVideoESDS(const sp<ABuffer> &csd) {
    977     sp<ABuffer> esds = new ABuffer(csd->size() + 25);
    978 
    979     uint8_t *ptr = esds->data();
    980     *ptr++ = 0x03;
    981     EncodeSize14(&ptr, 22 + csd->size());
    982 
    983     *ptr++ = 0x00;  // ES_ID
    984     *ptr++ = 0x00;
    985 
    986     *ptr++ = 0x00;  // streamDependenceFlag, URL_Flag, OCRstreamFlag
    987 
    988     *ptr++ = 0x04;
    989     EncodeSize14(&ptr, 16 + csd->size());
    990 
    991     *ptr++ = 0x40;  // Audio ISO/IEC 14496-3
    992 
    993     for (size_t i = 0; i < 12; ++i) {
    994         *ptr++ = 0x00;
    995     }
    996 
    997     *ptr++ = 0x05;
    998     EncodeSize14(&ptr, csd->size());
    999 
   1000     memcpy(ptr, csd->data(), csd->size());
   1001 
   1002     return esds;
   1003 }
   1004 
   1005 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGVideo() {
   1006     const uint8_t *data = mBuffer->data();
   1007     size_t size = mBuffer->size();
   1008 
   1009     bool sawPictureStart = false;
   1010     int pprevStartCode = -1;
   1011     int prevStartCode = -1;
   1012     int currentStartCode = -1;
   1013 
   1014     size_t offset = 0;
   1015     while (offset + 3 < size) {
   1016         if (memcmp(&data[offset], "\x00\x00\x01", 3)) {
   1017             ++offset;
   1018             continue;
   1019         }
   1020 
   1021         pprevStartCode = prevStartCode;
   1022         prevStartCode = currentStartCode;
   1023         currentStartCode = data[offset + 3];
   1024 
   1025         if (currentStartCode == 0xb3 && mFormat == NULL) {
   1026             memmove(mBuffer->data(), mBuffer->data() + offset, size - offset);
   1027             size -= offset;
   1028             (void)fetchTimestamp(offset);
   1029             offset = 0;
   1030             mBuffer->setRange(0, size);
   1031         }
   1032 
   1033         if ((prevStartCode == 0xb3 && currentStartCode != 0xb5)
   1034                 || (pprevStartCode == 0xb3 && prevStartCode == 0xb5)) {
   1035             // seqHeader without/with extension
   1036 
   1037             if (mFormat == NULL) {
   1038                 CHECK_GE(size, 7u);
   1039 
   1040                 unsigned width =
   1041                     (data[4] << 4) | data[5] >> 4;
   1042 
   1043                 unsigned height =
   1044                     ((data[5] & 0x0f) << 8) | data[6];
   1045 
   1046                 mFormat = new MetaData;
   1047                 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
   1048                 mFormat->setInt32(kKeyWidth, width);
   1049                 mFormat->setInt32(kKeyHeight, height);
   1050 
   1051                 ALOGI("found MPEG2 video codec config (%d x %d)", width, height);
   1052 
   1053                 sp<ABuffer> csd = new ABuffer(offset);
   1054                 memcpy(csd->data(), data, offset);
   1055 
   1056                 memmove(mBuffer->data(),
   1057                         mBuffer->data() + offset,
   1058                         mBuffer->size() - offset);
   1059 
   1060                 mBuffer->setRange(0, mBuffer->size() - offset);
   1061                 size -= offset;
   1062                 (void)fetchTimestamp(offset);
   1063                 offset = 0;
   1064 
   1065                 // hexdump(csd->data(), csd->size());
   1066 
   1067                 sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
   1068                 mFormat->setData(
   1069                         kKeyESDS, kTypeESDS, esds->data(), esds->size());
   1070 
   1071                 return NULL;
   1072             }
   1073         }
   1074 
   1075         if (mFormat != NULL && currentStartCode == 0x00) {
   1076             // Picture start
   1077 
   1078             if (!sawPictureStart) {
   1079                 sawPictureStart = true;
   1080             } else {
   1081                 sp<ABuffer> accessUnit = new ABuffer(offset);
   1082                 memcpy(accessUnit->data(), data, offset);
   1083 
   1084                 memmove(mBuffer->data(),
   1085                         mBuffer->data() + offset,
   1086                         mBuffer->size() - offset);
   1087 
   1088                 mBuffer->setRange(0, mBuffer->size() - offset);
   1089 
   1090                 int64_t timeUs = fetchTimestamp(offset);
   1091                 CHECK_GE(timeUs, 0ll);
   1092 
   1093                 offset = 0;
   1094 
   1095                 accessUnit->meta()->setInt64("timeUs", timeUs);
   1096 
   1097                 ALOGV("returning MPEG video access unit at time %" PRId64 " us",
   1098                       timeUs);
   1099 
   1100                 // hexdump(accessUnit->data(), accessUnit->size());
   1101 
   1102                 return accessUnit;
   1103             }
   1104         }
   1105 
   1106         ++offset;
   1107     }
   1108 
   1109     return NULL;
   1110 }
   1111 
   1112 static ssize_t getNextChunkSize(
   1113         const uint8_t *data, size_t size) {
   1114     static const char kStartCode[] = "\x00\x00\x01";
   1115 
   1116     if (size < 3) {
   1117         return -EAGAIN;
   1118     }
   1119 
   1120     if (memcmp(kStartCode, data, 3)) {
   1121         TRESPASS();
   1122     }
   1123 
   1124     size_t offset = 3;
   1125     while (offset + 2 < size) {
   1126         if (!memcmp(&data[offset], kStartCode, 3)) {
   1127             return offset;
   1128         }
   1129 
   1130         ++offset;
   1131     }
   1132 
   1133     return -EAGAIN;
   1134 }
   1135 
   1136 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEG4Video() {
   1137     uint8_t *data = mBuffer->data();
   1138     size_t size = mBuffer->size();
   1139 
   1140     enum {
   1141         SKIP_TO_VISUAL_OBJECT_SEQ_START,
   1142         EXPECT_VISUAL_OBJECT_START,
   1143         EXPECT_VO_START,
   1144         EXPECT_VOL_START,
   1145         WAIT_FOR_VOP_START,
   1146         SKIP_TO_VOP_START,
   1147 
   1148     } state;
   1149 
   1150     if (mFormat == NULL) {
   1151         state = SKIP_TO_VISUAL_OBJECT_SEQ_START;
   1152     } else {
   1153         state = SKIP_TO_VOP_START;
   1154     }
   1155 
   1156     int32_t width = -1, height = -1;
   1157 
   1158     size_t offset = 0;
   1159     ssize_t chunkSize;
   1160     while ((chunkSize = getNextChunkSize(
   1161                     &data[offset], size - offset)) > 0) {
   1162         bool discard = false;
   1163 
   1164         unsigned chunkType = data[offset + 3];
   1165 
   1166         switch (state) {
   1167             case SKIP_TO_VISUAL_OBJECT_SEQ_START:
   1168             {
   1169                 if (chunkType == 0xb0) {
   1170                     // Discard anything before this marker.
   1171 
   1172                     state = EXPECT_VISUAL_OBJECT_START;
   1173                 } else {
   1174                     discard = true;
   1175                 }
   1176                 break;
   1177             }
   1178 
   1179             case EXPECT_VISUAL_OBJECT_START:
   1180             {
   1181                 CHECK_EQ(chunkType, 0xb5);
   1182                 state = EXPECT_VO_START;
   1183                 break;
   1184             }
   1185 
   1186             case EXPECT_VO_START:
   1187             {
   1188                 CHECK_LE(chunkType, 0x1f);
   1189                 state = EXPECT_VOL_START;
   1190                 break;
   1191             }
   1192 
   1193             case EXPECT_VOL_START:
   1194             {
   1195                 CHECK((chunkType & 0xf0) == 0x20);
   1196 
   1197                 CHECK(ExtractDimensionsFromVOLHeader(
   1198                             &data[offset], chunkSize,
   1199                             &width, &height));
   1200 
   1201                 state = WAIT_FOR_VOP_START;
   1202                 break;
   1203             }
   1204 
   1205             case WAIT_FOR_VOP_START:
   1206             {
   1207                 if (chunkType == 0xb3 || chunkType == 0xb6) {
   1208                     // group of VOP or VOP start.
   1209 
   1210                     mFormat = new MetaData;
   1211                     mFormat->setCString(
   1212                             kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG4);
   1213 
   1214                     mFormat->setInt32(kKeyWidth, width);
   1215                     mFormat->setInt32(kKeyHeight, height);
   1216 
   1217                     ALOGI("found MPEG4 video codec config (%d x %d)",
   1218                          width, height);
   1219 
   1220                     sp<ABuffer> csd = new ABuffer(offset);
   1221                     memcpy(csd->data(), data, offset);
   1222 
   1223                     // hexdump(csd->data(), csd->size());
   1224 
   1225                     sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
   1226                     mFormat->setData(
   1227                             kKeyESDS, kTypeESDS,
   1228                             esds->data(), esds->size());
   1229 
   1230                     discard = true;
   1231                     state = SKIP_TO_VOP_START;
   1232                 }
   1233 
   1234                 break;
   1235             }
   1236 
   1237             case SKIP_TO_VOP_START:
   1238             {
   1239                 if (chunkType == 0xb6) {
   1240                     offset += chunkSize;
   1241 
   1242                     sp<ABuffer> accessUnit = new ABuffer(offset);
   1243                     memcpy(accessUnit->data(), data, offset);
   1244 
   1245                     memmove(data, &data[offset], size - offset);
   1246                     size -= offset;
   1247                     mBuffer->setRange(0, size);
   1248 
   1249                     int64_t timeUs = fetchTimestamp(offset);
   1250                     CHECK_GE(timeUs, 0ll);
   1251 
   1252                     offset = 0;
   1253 
   1254                     accessUnit->meta()->setInt64("timeUs", timeUs);
   1255 
   1256                     ALOGV("returning MPEG4 video access unit at time %" PRId64 " us",
   1257                          timeUs);
   1258 
   1259                     // hexdump(accessUnit->data(), accessUnit->size());
   1260 
   1261                     return accessUnit;
   1262                 } else if (chunkType != 0xb3) {
   1263                     offset += chunkSize;
   1264                     discard = true;
   1265                 }
   1266 
   1267                 break;
   1268             }
   1269 
   1270             default:
   1271                 TRESPASS();
   1272         }
   1273 
   1274         if (discard) {
   1275             (void)fetchTimestamp(offset);
   1276             memmove(data, &data[offset], size - offset);
   1277             size -= offset;
   1278             offset = 0;
   1279             mBuffer->setRange(0, size);
   1280         } else {
   1281             offset += chunkSize;
   1282         }
   1283     }
   1284 
   1285     return NULL;
   1286 }
   1287 
   1288 }  // namespace android
   1289