Home | History | Annotate | Download | only in libstagefright
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 //#define LOG_NDEBUG 0
     18 #define LOG_TAG "MP3Extractor"
     19 #include <utils/Log.h>
     20 
     21 #include "include/MP3Extractor.h"
     22 
     23 #include "include/avc_utils.h"
     24 #include "include/ID3.h"
     25 #include "include/VBRISeeker.h"
     26 #include "include/XINGSeeker.h"
     27 
     28 #include <media/stagefright/foundation/ADebug.h>
     29 #include <media/stagefright/foundation/AMessage.h>
     30 #include <media/stagefright/DataSource.h>
     31 #include <media/stagefright/MediaBuffer.h>
     32 #include <media/stagefright/MediaBufferGroup.h>
     33 #include <media/stagefright/MediaDefs.h>
     34 #include <media/stagefright/MediaErrors.h>
     35 #include <media/stagefright/MediaSource.h>
     36 #include <media/stagefright/MetaData.h>
     37 #include <media/stagefright/Utils.h>
     38 #include <utils/String8.h>
     39 
     40 namespace android {
     41 
     42 // Everything must match except for
     43 // protection, bitrate, padding, private bits, mode, mode extension,
     44 // copyright bit, original bit and emphasis.
     45 // Yes ... there are things that must indeed match...
     46 static const uint32_t kMask = 0xfffe0c00;
     47 
     48 static bool Resync(
     49         const sp<DataSource> &source, uint32_t match_header,
     50         off64_t *inout_pos, off64_t *post_id3_pos, uint32_t *out_header) {
     51     if (post_id3_pos != NULL) {
     52         *post_id3_pos = 0;
     53     }
     54 
     55     if (*inout_pos == 0) {
     56         // Skip an optional ID3 header if syncing at the very beginning
     57         // of the datasource.
     58 
     59         for (;;) {
     60             uint8_t id3header[10];
     61             if (source->readAt(*inout_pos, id3header, sizeof(id3header))
     62                     < (ssize_t)sizeof(id3header)) {
     63                 // If we can't even read these 10 bytes, we might as well bail
     64                 // out, even if there _were_ 10 bytes of valid mp3 audio data...
     65                 return false;
     66             }
     67 
     68             if (memcmp("ID3", id3header, 3)) {
     69                 break;
     70             }
     71 
     72             // Skip the ID3v2 header.
     73 
     74             size_t len =
     75                 ((id3header[6] & 0x7f) << 21)
     76                 | ((id3header[7] & 0x7f) << 14)
     77                 | ((id3header[8] & 0x7f) << 7)
     78                 | (id3header[9] & 0x7f);
     79 
     80             len += 10;
     81 
     82             *inout_pos += len;
     83 
     84             ALOGV("skipped ID3 tag, new starting offset is %lld (0x%016llx)",
     85                     (long long)*inout_pos, (long long)*inout_pos);
     86         }
     87 
     88         if (post_id3_pos != NULL) {
     89             *post_id3_pos = *inout_pos;
     90         }
     91     }
     92 
     93     off64_t pos = *inout_pos;
     94     bool valid = false;
     95 
     96     const size_t kMaxReadBytes = 1024;
     97     const size_t kMaxBytesChecked = 128 * 1024;
     98     uint8_t buf[kMaxReadBytes];
     99     ssize_t bytesToRead = kMaxReadBytes;
    100     ssize_t totalBytesRead = 0;
    101     ssize_t remainingBytes = 0;
    102     bool reachEOS = false;
    103     uint8_t *tmp = buf;
    104 
    105     do {
    106         if (pos >= (off64_t)(*inout_pos + kMaxBytesChecked)) {
    107             // Don't scan forever.
    108             ALOGV("giving up at offset %lld", (long long)pos);
    109             break;
    110         }
    111 
    112         if (remainingBytes < 4) {
    113             if (reachEOS) {
    114                 break;
    115             } else {
    116                 memcpy(buf, tmp, remainingBytes);
    117                 bytesToRead = kMaxReadBytes - remainingBytes;
    118 
    119                 /*
    120                  * The next read position should start from the end of
    121                  * the last buffer, and thus should include the remaining
    122                  * bytes in the buffer.
    123                  */
    124                 totalBytesRead = source->readAt(pos + remainingBytes,
    125                                                 buf + remainingBytes,
    126                                                 bytesToRead);
    127                 if (totalBytesRead <= 0) {
    128                     break;
    129                 }
    130                 reachEOS = (totalBytesRead != bytesToRead);
    131                 totalBytesRead += remainingBytes;
    132                 remainingBytes = totalBytesRead;
    133                 tmp = buf;
    134                 continue;
    135             }
    136         }
    137 
    138         uint32_t header = U32_AT(tmp);
    139 
    140         if (match_header != 0 && (header & kMask) != (match_header & kMask)) {
    141             ++pos;
    142             ++tmp;
    143             --remainingBytes;
    144             continue;
    145         }
    146 
    147         size_t frame_size;
    148         int sample_rate, num_channels, bitrate;
    149         if (!GetMPEGAudioFrameSize(
    150                     header, &frame_size,
    151                     &sample_rate, &num_channels, &bitrate)) {
    152             ++pos;
    153             ++tmp;
    154             --remainingBytes;
    155             continue;
    156         }
    157 
    158         ALOGV("found possible 1st frame at %lld (header = 0x%08x)", (long long)pos, header);
    159 
    160         // We found what looks like a valid frame,
    161         // now find its successors.
    162 
    163         off64_t test_pos = pos + frame_size;
    164 
    165         valid = true;
    166         for (int j = 0; j < 3; ++j) {
    167             uint8_t tmp[4];
    168             if (source->readAt(test_pos, tmp, 4) < 4) {
    169                 valid = false;
    170                 break;
    171             }
    172 
    173             uint32_t test_header = U32_AT(tmp);
    174 
    175             ALOGV("subsequent header is %08x", test_header);
    176 
    177             if ((test_header & kMask) != (header & kMask)) {
    178                 valid = false;
    179                 break;
    180             }
    181 
    182             size_t test_frame_size;
    183             if (!GetMPEGAudioFrameSize(
    184                         test_header, &test_frame_size)) {
    185                 valid = false;
    186                 break;
    187             }
    188 
    189             ALOGV("found subsequent frame #%d at %lld", j + 2, (long long)test_pos);
    190 
    191             test_pos += test_frame_size;
    192         }
    193 
    194         if (valid) {
    195             *inout_pos = pos;
    196 
    197             if (out_header != NULL) {
    198                 *out_header = header;
    199             }
    200         } else {
    201             ALOGV("no dice, no valid sequence of frames found.");
    202         }
    203 
    204         ++pos;
    205         ++tmp;
    206         --remainingBytes;
    207     } while (!valid);
    208 
    209     return valid;
    210 }
    211 
    212 class MP3Source : public MediaSource {
    213 public:
    214     MP3Source(
    215             const sp<MetaData> &meta, const sp<DataSource> &source,
    216             off64_t first_frame_pos, uint32_t fixed_header,
    217             const sp<MP3Seeker> &seeker);
    218 
    219     virtual status_t start(MetaData *params = NULL);
    220     virtual status_t stop();
    221 
    222     virtual sp<MetaData> getFormat();
    223 
    224     virtual status_t read(
    225             MediaBuffer **buffer, const ReadOptions *options = NULL);
    226 
    227 protected:
    228     virtual ~MP3Source();
    229 
    230 private:
    231     static const size_t kMaxFrameSize;
    232     sp<MetaData> mMeta;
    233     sp<DataSource> mDataSource;
    234     off64_t mFirstFramePos;
    235     uint32_t mFixedHeader;
    236     off64_t mCurrentPos;
    237     int64_t mCurrentTimeUs;
    238     bool mStarted;
    239     sp<MP3Seeker> mSeeker;
    240     MediaBufferGroup *mGroup;
    241 
    242     int64_t mBasisTimeUs;
    243     int64_t mSamplesRead;
    244 
    245     MP3Source(const MP3Source &);
    246     MP3Source &operator=(const MP3Source &);
    247 };
    248 
    249 MP3Extractor::MP3Extractor(
    250         const sp<DataSource> &source, const sp<AMessage> &meta)
    251     : mInitCheck(NO_INIT),
    252       mDataSource(source),
    253       mFirstFramePos(-1),
    254       mFixedHeader(0) {
    255 
    256     off64_t pos = 0;
    257     off64_t post_id3_pos;
    258     uint32_t header;
    259     bool success;
    260 
    261     int64_t meta_offset;
    262     uint32_t meta_header;
    263     int64_t meta_post_id3_offset;
    264     if (meta != NULL
    265             && meta->findInt64("offset", &meta_offset)
    266             && meta->findInt32("header", (int32_t *)&meta_header)
    267             && meta->findInt64("post-id3-offset", &meta_post_id3_offset)) {
    268         // The sniffer has already done all the hard work for us, simply
    269         // accept its judgement.
    270         pos = (off64_t)meta_offset;
    271         header = meta_header;
    272         post_id3_pos = (off64_t)meta_post_id3_offset;
    273 
    274         success = true;
    275     } else {
    276         success = Resync(mDataSource, 0, &pos, &post_id3_pos, &header);
    277     }
    278 
    279     if (!success) {
    280         // mInitCheck will remain NO_INIT
    281         return;
    282     }
    283 
    284     mFirstFramePos = pos;
    285     mFixedHeader = header;
    286     mMeta = new MetaData;
    287     sp<XINGSeeker> seeker = XINGSeeker::CreateFromSource(mDataSource, mFirstFramePos);
    288 
    289     if (seeker == NULL) {
    290         mSeeker = VBRISeeker::CreateFromSource(mDataSource, post_id3_pos);
    291     } else {
    292         mSeeker = seeker;
    293         int encd = seeker->getEncoderDelay();
    294         int encp = seeker->getEncoderPadding();
    295         if (encd != 0 || encp != 0) {
    296             mMeta->setInt32(kKeyEncoderDelay, encd);
    297             mMeta->setInt32(kKeyEncoderPadding, encp);
    298         }
    299     }
    300 
    301     if (mSeeker != NULL) {
    302         // While it is safe to send the XING/VBRI frame to the decoder, this will
    303         // result in an extra 1152 samples being output. In addition, the bitrate
    304         // of the Xing header might not match the rest of the file, which could
    305         // lead to problems when seeking. The real first frame to decode is after
    306         // the XING/VBRI frame, so skip there.
    307         size_t frame_size;
    308         int sample_rate;
    309         int num_channels;
    310         int bitrate;
    311         GetMPEGAudioFrameSize(
    312                 header, &frame_size, &sample_rate, &num_channels, &bitrate);
    313         pos += frame_size;
    314         if (!Resync(mDataSource, 0, &pos, &post_id3_pos, &header)) {
    315             // mInitCheck will remain NO_INIT
    316             return;
    317         }
    318         mFirstFramePos = pos;
    319         mFixedHeader = header;
    320     }
    321 
    322     size_t frame_size;
    323     int sample_rate;
    324     int num_channels;
    325     int bitrate;
    326     GetMPEGAudioFrameSize(
    327             header, &frame_size, &sample_rate, &num_channels, &bitrate);
    328 
    329     unsigned layer = 4 - ((header >> 17) & 3);
    330 
    331     switch (layer) {
    332         case 1:
    333             mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
    334             break;
    335         case 2:
    336             mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
    337             break;
    338         case 3:
    339             mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
    340             break;
    341         default:
    342             TRESPASS();
    343     }
    344 
    345     mMeta->setInt32(kKeySampleRate, sample_rate);
    346     mMeta->setInt32(kKeyBitRate, bitrate * 1000);
    347     mMeta->setInt32(kKeyChannelCount, num_channels);
    348 
    349     int64_t durationUs;
    350 
    351     if (mSeeker == NULL || !mSeeker->getDuration(&durationUs)) {
    352         off64_t fileSize;
    353         if (mDataSource->getSize(&fileSize) == OK) {
    354             off64_t dataLength = fileSize - mFirstFramePos;
    355             if (dataLength > INT64_MAX / 8000LL) {
    356                 // duration would overflow
    357                 durationUs = INT64_MAX;
    358             } else {
    359                 durationUs = 8000LL * dataLength / bitrate;
    360             }
    361         } else {
    362             durationUs = -1;
    363         }
    364     }
    365 
    366     if (durationUs >= 0) {
    367         mMeta->setInt64(kKeyDuration, durationUs);
    368     }
    369 
    370     mInitCheck = OK;
    371 
    372     // Get iTunes-style gapless info if present.
    373     // When getting the id3 tag, skip the V1 tags to prevent the source cache
    374     // from being iterated to the end of the file.
    375     ID3 id3(mDataSource, true);
    376     if (id3.isValid()) {
    377         ID3::Iterator *com = new ID3::Iterator(id3, "COM");
    378         if (com->done()) {
    379             delete com;
    380             com = new ID3::Iterator(id3, "COMM");
    381         }
    382         while(!com->done()) {
    383             String8 commentdesc;
    384             String8 commentvalue;
    385             com->getString(&commentdesc, &commentvalue);
    386             const char * desc = commentdesc.string();
    387             const char * value = commentvalue.string();
    388 
    389             // first 3 characters are the language, which we don't care about
    390             if(strlen(desc) > 3 && strcmp(desc + 3, "iTunSMPB") == 0) {
    391 
    392                 int32_t delay, padding;
    393                 if (sscanf(value, " %*x %x %x %*x", &delay, &padding) == 2) {
    394                     mMeta->setInt32(kKeyEncoderDelay, delay);
    395                     mMeta->setInt32(kKeyEncoderPadding, padding);
    396                 }
    397                 break;
    398             }
    399             com->next();
    400         }
    401         delete com;
    402         com = NULL;
    403     }
    404 }
    405 
    406 size_t MP3Extractor::countTracks() {
    407     return mInitCheck != OK ? 0 : 1;
    408 }
    409 
    410 sp<IMediaSource> MP3Extractor::getTrack(size_t index) {
    411     if (mInitCheck != OK || index != 0) {
    412         return NULL;
    413     }
    414 
    415     return new MP3Source(
    416             mMeta, mDataSource, mFirstFramePos, mFixedHeader,
    417             mSeeker);
    418 }
    419 
    420 sp<MetaData> MP3Extractor::getTrackMetaData(
    421         size_t index, uint32_t /* flags */) {
    422     if (mInitCheck != OK || index != 0) {
    423         return NULL;
    424     }
    425 
    426     return mMeta;
    427 }
    428 
    429 ////////////////////////////////////////////////////////////////////////////////
    430 
    431 // The theoretical maximum frame size for an MPEG audio stream should occur
    432 // while playing a Layer 2, MPEGv2.5 audio stream at 160kbps (with padding).
    433 // The size of this frame should be...
    434 // ((1152 samples/frame * 160000 bits/sec) /
    435 //  (8000 samples/sec * 8 bits/byte)) + 1 padding byte/frame = 2881 bytes/frame.
    436 // Set our max frame size to the nearest power of 2 above this size (aka, 4kB)
    437 const size_t MP3Source::kMaxFrameSize = (1 << 12); /* 4096 bytes */
    438 MP3Source::MP3Source(
    439         const sp<MetaData> &meta, const sp<DataSource> &source,
    440         off64_t first_frame_pos, uint32_t fixed_header,
    441         const sp<MP3Seeker> &seeker)
    442     : mMeta(meta),
    443       mDataSource(source),
    444       mFirstFramePos(first_frame_pos),
    445       mFixedHeader(fixed_header),
    446       mCurrentPos(0),
    447       mCurrentTimeUs(0),
    448       mStarted(false),
    449       mSeeker(seeker),
    450       mGroup(NULL),
    451       mBasisTimeUs(0),
    452       mSamplesRead(0) {
    453 }
    454 
    455 MP3Source::~MP3Source() {
    456     if (mStarted) {
    457         stop();
    458     }
    459 }
    460 
    461 status_t MP3Source::start(MetaData *) {
    462     CHECK(!mStarted);
    463 
    464     mGroup = new MediaBufferGroup;
    465 
    466     mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
    467 
    468     mCurrentPos = mFirstFramePos;
    469     mCurrentTimeUs = 0;
    470 
    471     mBasisTimeUs = mCurrentTimeUs;
    472     mSamplesRead = 0;
    473 
    474     mStarted = true;
    475 
    476     return OK;
    477 }
    478 
    479 status_t MP3Source::stop() {
    480     CHECK(mStarted);
    481 
    482     delete mGroup;
    483     mGroup = NULL;
    484 
    485     mStarted = false;
    486 
    487     return OK;
    488 }
    489 
    490 sp<MetaData> MP3Source::getFormat() {
    491     return mMeta;
    492 }
    493 
    494 status_t MP3Source::read(
    495         MediaBuffer **out, const ReadOptions *options) {
    496     *out = NULL;
    497 
    498     int64_t seekTimeUs;
    499     ReadOptions::SeekMode mode;
    500     bool seekCBR = false;
    501 
    502     if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
    503         int64_t actualSeekTimeUs = seekTimeUs;
    504         if (mSeeker == NULL
    505                 || !mSeeker->getOffsetForTime(&actualSeekTimeUs, &mCurrentPos)) {
    506             int32_t bitrate;
    507             if (!mMeta->findInt32(kKeyBitRate, &bitrate)) {
    508                 // bitrate is in bits/sec.
    509                 ALOGI("no bitrate");
    510 
    511                 return ERROR_UNSUPPORTED;
    512             }
    513 
    514             mCurrentTimeUs = seekTimeUs;
    515             mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000;
    516             seekCBR = true;
    517         } else {
    518             mCurrentTimeUs = actualSeekTimeUs;
    519         }
    520 
    521         mBasisTimeUs = mCurrentTimeUs;
    522         mSamplesRead = 0;
    523     }
    524 
    525     MediaBuffer *buffer;
    526     status_t err = mGroup->acquire_buffer(&buffer);
    527     if (err != OK) {
    528         return err;
    529     }
    530 
    531     size_t frame_size;
    532     int bitrate;
    533     int num_samples;
    534     int sample_rate;
    535     for (;;) {
    536         ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4);
    537         if (n < 4) {
    538             buffer->release();
    539             buffer = NULL;
    540 
    541             return (n < 0 ? n : ERROR_END_OF_STREAM);
    542         }
    543 
    544         uint32_t header = U32_AT((const uint8_t *)buffer->data());
    545 
    546         if ((header & kMask) == (mFixedHeader & kMask)
    547             && GetMPEGAudioFrameSize(
    548                 header, &frame_size, &sample_rate, NULL,
    549                 &bitrate, &num_samples)) {
    550 
    551             // re-calculate mCurrentTimeUs because we might have called Resync()
    552             if (seekCBR) {
    553                 mCurrentTimeUs = (mCurrentPos - mFirstFramePos) * 8000 / bitrate;
    554                 mBasisTimeUs = mCurrentTimeUs;
    555             }
    556 
    557             break;
    558         }
    559 
    560         // Lost sync.
    561         ALOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader);
    562 
    563         off64_t pos = mCurrentPos;
    564         if (!Resync(mDataSource, mFixedHeader, &pos, NULL, NULL)) {
    565             ALOGE("Unable to resync. Signalling end of stream.");
    566 
    567             buffer->release();
    568             buffer = NULL;
    569 
    570             return ERROR_END_OF_STREAM;
    571         }
    572 
    573         mCurrentPos = pos;
    574 
    575         // Try again with the new position.
    576     }
    577 
    578     CHECK(frame_size <= buffer->size());
    579 
    580     ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size);
    581     if (n < (ssize_t)frame_size) {
    582         buffer->release();
    583         buffer = NULL;
    584 
    585         return (n < 0 ? n : ERROR_END_OF_STREAM);
    586     }
    587 
    588     buffer->set_range(0, frame_size);
    589 
    590     buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs);
    591     buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
    592 
    593     mCurrentPos += frame_size;
    594 
    595     mSamplesRead += num_samples;
    596     mCurrentTimeUs = mBasisTimeUs + ((mSamplesRead * 1000000) / sample_rate);
    597 
    598     *out = buffer;
    599 
    600     return OK;
    601 }
    602 
    603 sp<MetaData> MP3Extractor::getMetaData() {
    604     sp<MetaData> meta = new MetaData;
    605 
    606     if (mInitCheck != OK) {
    607         return meta;
    608     }
    609 
    610     meta->setCString(kKeyMIMEType, "audio/mpeg");
    611 
    612     ID3 id3(mDataSource);
    613 
    614     if (!id3.isValid()) {
    615         return meta;
    616     }
    617 
    618     struct Map {
    619         int key;
    620         const char *tag1;
    621         const char *tag2;
    622     };
    623     static const Map kMap[] = {
    624         { kKeyAlbum, "TALB", "TAL" },
    625         { kKeyArtist, "TPE1", "TP1" },
    626         { kKeyAlbumArtist, "TPE2", "TP2" },
    627         { kKeyComposer, "TCOM", "TCM" },
    628         { kKeyGenre, "TCON", "TCO" },
    629         { kKeyTitle, "TIT2", "TT2" },
    630         { kKeyYear, "TYE", "TYER" },
    631         { kKeyAuthor, "TXT", "TEXT" },
    632         { kKeyCDTrackNumber, "TRK", "TRCK" },
    633         { kKeyDiscNumber, "TPA", "TPOS" },
    634         { kKeyCompilation, "TCP", "TCMP" },
    635     };
    636     static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
    637 
    638     for (size_t i = 0; i < kNumMapEntries; ++i) {
    639         ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
    640         if (it->done()) {
    641             delete it;
    642             it = new ID3::Iterator(id3, kMap[i].tag2);
    643         }
    644 
    645         if (it->done()) {
    646             delete it;
    647             continue;
    648         }
    649 
    650         String8 s;
    651         it->getString(&s);
    652         delete it;
    653 
    654         meta->setCString(kMap[i].key, s);
    655     }
    656 
    657     size_t dataSize;
    658     String8 mime;
    659     const void *data = id3.getAlbumArt(&dataSize, &mime);
    660 
    661     if (data) {
    662         meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
    663         meta->setCString(kKeyAlbumArtMIME, mime.string());
    664     }
    665 
    666     return meta;
    667 }
    668 
    669 bool SniffMP3(
    670         const sp<DataSource> &source, String8 *mimeType,
    671         float *confidence, sp<AMessage> *meta) {
    672     off64_t pos = 0;
    673     off64_t post_id3_pos;
    674     uint32_t header;
    675     if (!Resync(source, 0, &pos, &post_id3_pos, &header)) {
    676         return false;
    677     }
    678 
    679     *meta = new AMessage;
    680     (*meta)->setInt64("offset", pos);
    681     (*meta)->setInt32("header", header);
    682     (*meta)->setInt64("post-id3-offset", post_id3_pos);
    683 
    684     *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG;
    685     *confidence = 0.2f;
    686 
    687     return true;
    688 }
    689 
    690 }  // namespace android
    691