1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 //#define LOG_NDEBUG 0 18 #define LOG_TAG "MP3Extractor" 19 #include <utils/Log.h> 20 21 #include "include/MP3Extractor.h" 22 23 #include "include/avc_utils.h" 24 #include "include/ID3.h" 25 #include "include/VBRISeeker.h" 26 #include "include/XINGSeeker.h" 27 28 #include <media/stagefright/foundation/ADebug.h> 29 #include <media/stagefright/foundation/AMessage.h> 30 #include <media/stagefright/DataSource.h> 31 #include <media/stagefright/MediaBuffer.h> 32 #include <media/stagefright/MediaBufferGroup.h> 33 #include <media/stagefright/MediaDefs.h> 34 #include <media/stagefright/MediaErrors.h> 35 #include <media/stagefright/MediaSource.h> 36 #include <media/stagefright/MetaData.h> 37 #include <media/stagefright/Utils.h> 38 #include <utils/String8.h> 39 40 namespace android { 41 42 // Everything must match except for 43 // protection, bitrate, padding, private bits, mode, mode extension, 44 // copyright bit, original bit and emphasis. 45 // Yes ... there are things that must indeed match... 46 static const uint32_t kMask = 0xfffe0c00; 47 48 static bool Resync( 49 const sp<DataSource> &source, uint32_t match_header, 50 off64_t *inout_pos, off64_t *post_id3_pos, uint32_t *out_header) { 51 if (post_id3_pos != NULL) { 52 *post_id3_pos = 0; 53 } 54 55 if (*inout_pos == 0) { 56 // Skip an optional ID3 header if syncing at the very beginning 57 // of the datasource. 58 59 for (;;) { 60 uint8_t id3header[10]; 61 if (source->readAt(*inout_pos, id3header, sizeof(id3header)) 62 < (ssize_t)sizeof(id3header)) { 63 // If we can't even read these 10 bytes, we might as well bail 64 // out, even if there _were_ 10 bytes of valid mp3 audio data... 65 return false; 66 } 67 68 if (memcmp("ID3", id3header, 3)) { 69 break; 70 } 71 72 // Skip the ID3v2 header. 73 74 size_t len = 75 ((id3header[6] & 0x7f) << 21) 76 | ((id3header[7] & 0x7f) << 14) 77 | ((id3header[8] & 0x7f) << 7) 78 | (id3header[9] & 0x7f); 79 80 len += 10; 81 82 *inout_pos += len; 83 84 ALOGV("skipped ID3 tag, new starting offset is %lld (0x%016llx)", 85 (long long)*inout_pos, (long long)*inout_pos); 86 } 87 88 if (post_id3_pos != NULL) { 89 *post_id3_pos = *inout_pos; 90 } 91 } 92 93 off64_t pos = *inout_pos; 94 bool valid = false; 95 96 const size_t kMaxReadBytes = 1024; 97 const size_t kMaxBytesChecked = 128 * 1024; 98 uint8_t buf[kMaxReadBytes]; 99 ssize_t bytesToRead = kMaxReadBytes; 100 ssize_t totalBytesRead = 0; 101 ssize_t remainingBytes = 0; 102 bool reachEOS = false; 103 uint8_t *tmp = buf; 104 105 do { 106 if (pos >= (off64_t)(*inout_pos + kMaxBytesChecked)) { 107 // Don't scan forever. 108 ALOGV("giving up at offset %lld", (long long)pos); 109 break; 110 } 111 112 if (remainingBytes < 4) { 113 if (reachEOS) { 114 break; 115 } else { 116 memcpy(buf, tmp, remainingBytes); 117 bytesToRead = kMaxReadBytes - remainingBytes; 118 119 /* 120 * The next read position should start from the end of 121 * the last buffer, and thus should include the remaining 122 * bytes in the buffer. 123 */ 124 totalBytesRead = source->readAt(pos + remainingBytes, 125 buf + remainingBytes, 126 bytesToRead); 127 if (totalBytesRead <= 0) { 128 break; 129 } 130 reachEOS = (totalBytesRead != bytesToRead); 131 totalBytesRead += remainingBytes; 132 remainingBytes = totalBytesRead; 133 tmp = buf; 134 continue; 135 } 136 } 137 138 uint32_t header = U32_AT(tmp); 139 140 if (match_header != 0 && (header & kMask) != (match_header & kMask)) { 141 ++pos; 142 ++tmp; 143 --remainingBytes; 144 continue; 145 } 146 147 size_t frame_size; 148 int sample_rate, num_channels, bitrate; 149 if (!GetMPEGAudioFrameSize( 150 header, &frame_size, 151 &sample_rate, &num_channels, &bitrate)) { 152 ++pos; 153 ++tmp; 154 --remainingBytes; 155 continue; 156 } 157 158 ALOGV("found possible 1st frame at %lld (header = 0x%08x)", (long long)pos, header); 159 160 // We found what looks like a valid frame, 161 // now find its successors. 162 163 off64_t test_pos = pos + frame_size; 164 165 valid = true; 166 for (int j = 0; j < 3; ++j) { 167 uint8_t tmp[4]; 168 if (source->readAt(test_pos, tmp, 4) < 4) { 169 valid = false; 170 break; 171 } 172 173 uint32_t test_header = U32_AT(tmp); 174 175 ALOGV("subsequent header is %08x", test_header); 176 177 if ((test_header & kMask) != (header & kMask)) { 178 valid = false; 179 break; 180 } 181 182 size_t test_frame_size; 183 if (!GetMPEGAudioFrameSize( 184 test_header, &test_frame_size)) { 185 valid = false; 186 break; 187 } 188 189 ALOGV("found subsequent frame #%d at %lld", j + 2, (long long)test_pos); 190 191 test_pos += test_frame_size; 192 } 193 194 if (valid) { 195 *inout_pos = pos; 196 197 if (out_header != NULL) { 198 *out_header = header; 199 } 200 } else { 201 ALOGV("no dice, no valid sequence of frames found."); 202 } 203 204 ++pos; 205 ++tmp; 206 --remainingBytes; 207 } while (!valid); 208 209 return valid; 210 } 211 212 class MP3Source : public MediaSource { 213 public: 214 MP3Source( 215 const sp<MetaData> &meta, const sp<DataSource> &source, 216 off64_t first_frame_pos, uint32_t fixed_header, 217 const sp<MP3Seeker> &seeker); 218 219 virtual status_t start(MetaData *params = NULL); 220 virtual status_t stop(); 221 222 virtual sp<MetaData> getFormat(); 223 224 virtual status_t read( 225 MediaBuffer **buffer, const ReadOptions *options = NULL); 226 227 protected: 228 virtual ~MP3Source(); 229 230 private: 231 static const size_t kMaxFrameSize; 232 sp<MetaData> mMeta; 233 sp<DataSource> mDataSource; 234 off64_t mFirstFramePos; 235 uint32_t mFixedHeader; 236 off64_t mCurrentPos; 237 int64_t mCurrentTimeUs; 238 bool mStarted; 239 sp<MP3Seeker> mSeeker; 240 MediaBufferGroup *mGroup; 241 242 int64_t mBasisTimeUs; 243 int64_t mSamplesRead; 244 245 MP3Source(const MP3Source &); 246 MP3Source &operator=(const MP3Source &); 247 }; 248 249 MP3Extractor::MP3Extractor( 250 const sp<DataSource> &source, const sp<AMessage> &meta) 251 : mInitCheck(NO_INIT), 252 mDataSource(source), 253 mFirstFramePos(-1), 254 mFixedHeader(0) { 255 256 off64_t pos = 0; 257 off64_t post_id3_pos; 258 uint32_t header; 259 bool success; 260 261 int64_t meta_offset; 262 uint32_t meta_header; 263 int64_t meta_post_id3_offset; 264 if (meta != NULL 265 && meta->findInt64("offset", &meta_offset) 266 && meta->findInt32("header", (int32_t *)&meta_header) 267 && meta->findInt64("post-id3-offset", &meta_post_id3_offset)) { 268 // The sniffer has already done all the hard work for us, simply 269 // accept its judgement. 270 pos = (off64_t)meta_offset; 271 header = meta_header; 272 post_id3_pos = (off64_t)meta_post_id3_offset; 273 274 success = true; 275 } else { 276 success = Resync(mDataSource, 0, &pos, &post_id3_pos, &header); 277 } 278 279 if (!success) { 280 // mInitCheck will remain NO_INIT 281 return; 282 } 283 284 mFirstFramePos = pos; 285 mFixedHeader = header; 286 mMeta = new MetaData; 287 sp<XINGSeeker> seeker = XINGSeeker::CreateFromSource(mDataSource, mFirstFramePos); 288 289 if (seeker == NULL) { 290 mSeeker = VBRISeeker::CreateFromSource(mDataSource, post_id3_pos); 291 } else { 292 mSeeker = seeker; 293 int encd = seeker->getEncoderDelay(); 294 int encp = seeker->getEncoderPadding(); 295 if (encd != 0 || encp != 0) { 296 mMeta->setInt32(kKeyEncoderDelay, encd); 297 mMeta->setInt32(kKeyEncoderPadding, encp); 298 } 299 } 300 301 if (mSeeker != NULL) { 302 // While it is safe to send the XING/VBRI frame to the decoder, this will 303 // result in an extra 1152 samples being output. In addition, the bitrate 304 // of the Xing header might not match the rest of the file, which could 305 // lead to problems when seeking. The real first frame to decode is after 306 // the XING/VBRI frame, so skip there. 307 size_t frame_size; 308 int sample_rate; 309 int num_channels; 310 int bitrate; 311 GetMPEGAudioFrameSize( 312 header, &frame_size, &sample_rate, &num_channels, &bitrate); 313 pos += frame_size; 314 if (!Resync(mDataSource, 0, &pos, &post_id3_pos, &header)) { 315 // mInitCheck will remain NO_INIT 316 return; 317 } 318 mFirstFramePos = pos; 319 mFixedHeader = header; 320 } 321 322 size_t frame_size; 323 int sample_rate; 324 int num_channels; 325 int bitrate; 326 GetMPEGAudioFrameSize( 327 header, &frame_size, &sample_rate, &num_channels, &bitrate); 328 329 unsigned layer = 4 - ((header >> 17) & 3); 330 331 switch (layer) { 332 case 1: 333 mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I); 334 break; 335 case 2: 336 mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II); 337 break; 338 case 3: 339 mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG); 340 break; 341 default: 342 TRESPASS(); 343 } 344 345 mMeta->setInt32(kKeySampleRate, sample_rate); 346 mMeta->setInt32(kKeyBitRate, bitrate * 1000); 347 mMeta->setInt32(kKeyChannelCount, num_channels); 348 349 int64_t durationUs; 350 351 if (mSeeker == NULL || !mSeeker->getDuration(&durationUs)) { 352 off64_t fileSize; 353 if (mDataSource->getSize(&fileSize) == OK) { 354 off64_t dataLength = fileSize - mFirstFramePos; 355 if (dataLength > INT64_MAX / 8000LL) { 356 // duration would overflow 357 durationUs = INT64_MAX; 358 } else { 359 durationUs = 8000LL * dataLength / bitrate; 360 } 361 } else { 362 durationUs = -1; 363 } 364 } 365 366 if (durationUs >= 0) { 367 mMeta->setInt64(kKeyDuration, durationUs); 368 } 369 370 mInitCheck = OK; 371 372 // Get iTunes-style gapless info if present. 373 // When getting the id3 tag, skip the V1 tags to prevent the source cache 374 // from being iterated to the end of the file. 375 ID3 id3(mDataSource, true); 376 if (id3.isValid()) { 377 ID3::Iterator *com = new ID3::Iterator(id3, "COM"); 378 if (com->done()) { 379 delete com; 380 com = new ID3::Iterator(id3, "COMM"); 381 } 382 while(!com->done()) { 383 String8 commentdesc; 384 String8 commentvalue; 385 com->getString(&commentdesc, &commentvalue); 386 const char * desc = commentdesc.string(); 387 const char * value = commentvalue.string(); 388 389 // first 3 characters are the language, which we don't care about 390 if(strlen(desc) > 3 && strcmp(desc + 3, "iTunSMPB") == 0) { 391 392 int32_t delay, padding; 393 if (sscanf(value, " %*x %x %x %*x", &delay, &padding) == 2) { 394 mMeta->setInt32(kKeyEncoderDelay, delay); 395 mMeta->setInt32(kKeyEncoderPadding, padding); 396 } 397 break; 398 } 399 com->next(); 400 } 401 delete com; 402 com = NULL; 403 } 404 } 405 406 size_t MP3Extractor::countTracks() { 407 return mInitCheck != OK ? 0 : 1; 408 } 409 410 sp<IMediaSource> MP3Extractor::getTrack(size_t index) { 411 if (mInitCheck != OK || index != 0) { 412 return NULL; 413 } 414 415 return new MP3Source( 416 mMeta, mDataSource, mFirstFramePos, mFixedHeader, 417 mSeeker); 418 } 419 420 sp<MetaData> MP3Extractor::getTrackMetaData( 421 size_t index, uint32_t /* flags */) { 422 if (mInitCheck != OK || index != 0) { 423 return NULL; 424 } 425 426 return mMeta; 427 } 428 429 //////////////////////////////////////////////////////////////////////////////// 430 431 // The theoretical maximum frame size for an MPEG audio stream should occur 432 // while playing a Layer 2, MPEGv2.5 audio stream at 160kbps (with padding). 433 // The size of this frame should be... 434 // ((1152 samples/frame * 160000 bits/sec) / 435 // (8000 samples/sec * 8 bits/byte)) + 1 padding byte/frame = 2881 bytes/frame. 436 // Set our max frame size to the nearest power of 2 above this size (aka, 4kB) 437 const size_t MP3Source::kMaxFrameSize = (1 << 12); /* 4096 bytes */ 438 MP3Source::MP3Source( 439 const sp<MetaData> &meta, const sp<DataSource> &source, 440 off64_t first_frame_pos, uint32_t fixed_header, 441 const sp<MP3Seeker> &seeker) 442 : mMeta(meta), 443 mDataSource(source), 444 mFirstFramePos(first_frame_pos), 445 mFixedHeader(fixed_header), 446 mCurrentPos(0), 447 mCurrentTimeUs(0), 448 mStarted(false), 449 mSeeker(seeker), 450 mGroup(NULL), 451 mBasisTimeUs(0), 452 mSamplesRead(0) { 453 } 454 455 MP3Source::~MP3Source() { 456 if (mStarted) { 457 stop(); 458 } 459 } 460 461 status_t MP3Source::start(MetaData *) { 462 CHECK(!mStarted); 463 464 mGroup = new MediaBufferGroup; 465 466 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 467 468 mCurrentPos = mFirstFramePos; 469 mCurrentTimeUs = 0; 470 471 mBasisTimeUs = mCurrentTimeUs; 472 mSamplesRead = 0; 473 474 mStarted = true; 475 476 return OK; 477 } 478 479 status_t MP3Source::stop() { 480 CHECK(mStarted); 481 482 delete mGroup; 483 mGroup = NULL; 484 485 mStarted = false; 486 487 return OK; 488 } 489 490 sp<MetaData> MP3Source::getFormat() { 491 return mMeta; 492 } 493 494 status_t MP3Source::read( 495 MediaBuffer **out, const ReadOptions *options) { 496 *out = NULL; 497 498 int64_t seekTimeUs; 499 ReadOptions::SeekMode mode; 500 bool seekCBR = false; 501 502 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { 503 int64_t actualSeekTimeUs = seekTimeUs; 504 if (mSeeker == NULL 505 || !mSeeker->getOffsetForTime(&actualSeekTimeUs, &mCurrentPos)) { 506 int32_t bitrate; 507 if (!mMeta->findInt32(kKeyBitRate, &bitrate)) { 508 // bitrate is in bits/sec. 509 ALOGI("no bitrate"); 510 511 return ERROR_UNSUPPORTED; 512 } 513 514 mCurrentTimeUs = seekTimeUs; 515 mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000; 516 seekCBR = true; 517 } else { 518 mCurrentTimeUs = actualSeekTimeUs; 519 } 520 521 mBasisTimeUs = mCurrentTimeUs; 522 mSamplesRead = 0; 523 } 524 525 MediaBuffer *buffer; 526 status_t err = mGroup->acquire_buffer(&buffer); 527 if (err != OK) { 528 return err; 529 } 530 531 size_t frame_size; 532 int bitrate; 533 int num_samples; 534 int sample_rate; 535 for (;;) { 536 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4); 537 if (n < 4) { 538 buffer->release(); 539 buffer = NULL; 540 541 return ERROR_END_OF_STREAM; 542 } 543 544 uint32_t header = U32_AT((const uint8_t *)buffer->data()); 545 546 if ((header & kMask) == (mFixedHeader & kMask) 547 && GetMPEGAudioFrameSize( 548 header, &frame_size, &sample_rate, NULL, 549 &bitrate, &num_samples)) { 550 551 // re-calculate mCurrentTimeUs because we might have called Resync() 552 if (seekCBR) { 553 mCurrentTimeUs = (mCurrentPos - mFirstFramePos) * 8000 / bitrate; 554 mBasisTimeUs = mCurrentTimeUs; 555 } 556 557 break; 558 } 559 560 // Lost sync. 561 ALOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader); 562 563 off64_t pos = mCurrentPos; 564 if (!Resync(mDataSource, mFixedHeader, &pos, NULL, NULL)) { 565 ALOGE("Unable to resync. Signalling end of stream."); 566 567 buffer->release(); 568 buffer = NULL; 569 570 return ERROR_END_OF_STREAM; 571 } 572 573 mCurrentPos = pos; 574 575 // Try again with the new position. 576 } 577 578 CHECK(frame_size <= buffer->size()); 579 580 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size); 581 if (n < (ssize_t)frame_size) { 582 buffer->release(); 583 buffer = NULL; 584 585 return ERROR_END_OF_STREAM; 586 } 587 588 buffer->set_range(0, frame_size); 589 590 buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs); 591 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 592 593 mCurrentPos += frame_size; 594 595 mSamplesRead += num_samples; 596 mCurrentTimeUs = mBasisTimeUs + ((mSamplesRead * 1000000) / sample_rate); 597 598 *out = buffer; 599 600 return OK; 601 } 602 603 sp<MetaData> MP3Extractor::getMetaData() { 604 sp<MetaData> meta = new MetaData; 605 606 if (mInitCheck != OK) { 607 return meta; 608 } 609 610 meta->setCString(kKeyMIMEType, "audio/mpeg"); 611 612 ID3 id3(mDataSource); 613 614 if (!id3.isValid()) { 615 return meta; 616 } 617 618 struct Map { 619 int key; 620 const char *tag1; 621 const char *tag2; 622 }; 623 static const Map kMap[] = { 624 { kKeyAlbum, "TALB", "TAL" }, 625 { kKeyArtist, "TPE1", "TP1" }, 626 { kKeyAlbumArtist, "TPE2", "TP2" }, 627 { kKeyComposer, "TCOM", "TCM" }, 628 { kKeyGenre, "TCON", "TCO" }, 629 { kKeyTitle, "TIT2", "TT2" }, 630 { kKeyYear, "TYE", "TYER" }, 631 { kKeyAuthor, "TXT", "TEXT" }, 632 { kKeyCDTrackNumber, "TRK", "TRCK" }, 633 { kKeyDiscNumber, "TPA", "TPOS" }, 634 { kKeyCompilation, "TCP", "TCMP" }, 635 }; 636 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 637 638 for (size_t i = 0; i < kNumMapEntries; ++i) { 639 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 640 if (it->done()) { 641 delete it; 642 it = new ID3::Iterator(id3, kMap[i].tag2); 643 } 644 645 if (it->done()) { 646 delete it; 647 continue; 648 } 649 650 String8 s; 651 it->getString(&s); 652 delete it; 653 654 meta->setCString(kMap[i].key, s); 655 } 656 657 size_t dataSize; 658 String8 mime; 659 const void *data = id3.getAlbumArt(&dataSize, &mime); 660 661 if (data) { 662 meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 663 meta->setCString(kKeyAlbumArtMIME, mime.string()); 664 } 665 666 return meta; 667 } 668 669 bool SniffMP3( 670 const sp<DataSource> &source, String8 *mimeType, 671 float *confidence, sp<AMessage> *meta) { 672 off64_t pos = 0; 673 off64_t post_id3_pos; 674 uint32_t header; 675 if (!Resync(source, 0, &pos, &post_id3_pos, &header)) { 676 return false; 677 } 678 679 *meta = new AMessage; 680 (*meta)->setInt64("offset", pos); 681 (*meta)->setInt32("header", header); 682 (*meta)->setInt64("post-id3-offset", post_id3_pos); 683 684 *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG; 685 *confidence = 0.2f; 686 687 return true; 688 } 689 690 } // namespace android 691