1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 //#define LOG_NDEBUG 0 18 #define LOG_TAG "MP3Extractor" 19 #include <utils/Log.h> 20 21 #include "include/MP3Extractor.h" 22 23 #include "include/ID3.h" 24 25 #include <media/stagefright/foundation/AMessage.h> 26 #include <media/stagefright/DataSource.h> 27 #include <media/stagefright/MediaBuffer.h> 28 #include <media/stagefright/MediaBufferGroup.h> 29 #include <media/stagefright/MediaDebug.h> 30 #include <media/stagefright/MediaDefs.h> 31 #include <media/stagefright/MediaErrors.h> 32 #include <media/stagefright/MediaSource.h> 33 #include <media/stagefright/MetaData.h> 34 #include <media/stagefright/Utils.h> 35 #include <utils/String8.h> 36 37 namespace android { 38 39 // Everything must match except for 40 // protection, bitrate, padding, private bits, mode extension, 41 // copyright bit, original bit and emphasis. 42 // Yes ... there are things that must indeed match... 43 static const uint32_t kMask = 0xfffe0cc0; 44 45 static bool get_mp3_frame_size( 46 uint32_t header, size_t *frame_size, 47 int *out_sampling_rate = NULL, int *out_channels = NULL, 48 int *out_bitrate = NULL) { 49 *frame_size = 0; 50 51 if (out_sampling_rate) { 52 *out_sampling_rate = 0; 53 } 54 55 if (out_channels) { 56 *out_channels = 0; 57 } 58 59 if (out_bitrate) { 60 *out_bitrate = 0; 61 } 62 63 if ((header & 0xffe00000) != 0xffe00000) { 64 return false; 65 } 66 67 unsigned version = (header >> 19) & 3; 68 69 if (version == 0x01) { 70 return false; 71 } 72 73 unsigned layer = (header >> 17) & 3; 74 75 if (layer == 0x00) { 76 return false; 77 } 78 79 unsigned protection = (header >> 16) & 1; 80 81 unsigned bitrate_index = (header >> 12) & 0x0f; 82 83 if (bitrate_index == 0 || bitrate_index == 0x0f) { 84 // Disallow "free" bitrate. 85 return false; 86 } 87 88 unsigned sampling_rate_index = (header >> 10) & 3; 89 90 if (sampling_rate_index == 3) { 91 return false; 92 } 93 94 static const int kSamplingRateV1[] = { 44100, 48000, 32000 }; 95 int sampling_rate = kSamplingRateV1[sampling_rate_index]; 96 if (version == 2 /* V2 */) { 97 sampling_rate /= 2; 98 } else if (version == 0 /* V2.5 */) { 99 sampling_rate /= 4; 100 } 101 102 unsigned padding = (header >> 9) & 1; 103 104 if (layer == 3) { 105 // layer I 106 107 static const int kBitrateV1[] = { 108 32, 64, 96, 128, 160, 192, 224, 256, 109 288, 320, 352, 384, 416, 448 110 }; 111 112 static const int kBitrateV2[] = { 113 32, 48, 56, 64, 80, 96, 112, 128, 114 144, 160, 176, 192, 224, 256 115 }; 116 117 int bitrate = 118 (version == 3 /* V1 */) 119 ? kBitrateV1[bitrate_index - 1] 120 : kBitrateV2[bitrate_index - 1]; 121 122 if (out_bitrate) { 123 *out_bitrate = bitrate; 124 } 125 126 *frame_size = (12000 * bitrate / sampling_rate + padding) * 4; 127 } else { 128 // layer II or III 129 130 static const int kBitrateV1L2[] = { 131 32, 48, 56, 64, 80, 96, 112, 128, 132 160, 192, 224, 256, 320, 384 133 }; 134 135 static const int kBitrateV1L3[] = { 136 32, 40, 48, 56, 64, 80, 96, 112, 137 128, 160, 192, 224, 256, 320 138 }; 139 140 static const int kBitrateV2[] = { 141 8, 16, 24, 32, 40, 48, 56, 64, 142 80, 96, 112, 128, 144, 160 143 }; 144 145 int bitrate; 146 if (version == 3 /* V1 */) { 147 bitrate = (layer == 2 /* L2 */) 148 ? kBitrateV1L2[bitrate_index - 1] 149 : kBitrateV1L3[bitrate_index - 1]; 150 } else { 151 // V2 (or 2.5) 152 153 bitrate = kBitrateV2[bitrate_index - 1]; 154 } 155 156 if (out_bitrate) { 157 *out_bitrate = bitrate; 158 } 159 160 if (version == 3 /* V1 */) { 161 *frame_size = 144000 * bitrate / sampling_rate + padding; 162 } else { 163 // V2 or V2.5 164 *frame_size = 72000 * bitrate / sampling_rate + padding; 165 } 166 } 167 168 if (out_sampling_rate) { 169 *out_sampling_rate = sampling_rate; 170 } 171 172 if (out_channels) { 173 int channel_mode = (header >> 6) & 3; 174 175 *out_channels = (channel_mode == 3) ? 1 : 2; 176 } 177 178 return true; 179 } 180 181 static bool parse_xing_header( 182 const sp<DataSource> &source, off_t first_frame_pos, 183 int32_t *frame_number = NULL, int32_t *byte_number = NULL, 184 char *table_of_contents = NULL, int32_t *quality_indicator = NULL, 185 int64_t *duration = NULL) { 186 187 if (frame_number) { 188 *frame_number = 0; 189 } 190 if (byte_number) { 191 *byte_number = 0; 192 } 193 if (table_of_contents) { 194 table_of_contents[0] = 0; 195 } 196 if (quality_indicator) { 197 *quality_indicator = 0; 198 } 199 if (duration) { 200 *duration = 0; 201 } 202 203 uint8_t buffer[4]; 204 int offset = first_frame_pos; 205 if (source->readAt(offset, &buffer, 4) < 4) { // get header 206 return false; 207 } 208 offset += 4; 209 210 uint8_t id, layer, sr_index, mode; 211 layer = (buffer[1] >> 1) & 3; 212 id = (buffer[1] >> 3) & 3; 213 sr_index = (buffer[2] >> 2) & 3; 214 mode = (buffer[3] >> 6) & 3; 215 if (layer == 0) { 216 return false; 217 } 218 if (id == 1) { 219 return false; 220 } 221 if (sr_index == 3) { 222 return false; 223 } 224 // determine offset of XING header 225 if(id&1) { // mpeg1 226 if (mode != 3) offset += 32; 227 else offset += 17; 228 } else { // mpeg2 229 if (mode != 3) offset += 17; 230 else offset += 9; 231 } 232 233 if (source->readAt(offset, &buffer, 4) < 4) { // XING header ID 234 return false; 235 } 236 offset += 4; 237 // Check XING ID 238 if ((buffer[0] != 'X') || (buffer[1] != 'i') 239 || (buffer[2] != 'n') || (buffer[3] != 'g')) { 240 if ((buffer[0] != 'I') || (buffer[1] != 'n') 241 || (buffer[2] != 'f') || (buffer[3] != 'o')) { 242 return false; 243 } 244 } 245 246 if (source->readAt(offset, &buffer, 4) < 4) { // flags 247 return false; 248 } 249 offset += 4; 250 uint32_t flags = U32_AT(buffer); 251 252 if (flags & 0x0001) { // Frames field is present 253 if (source->readAt(offset, buffer, 4) < 4) { 254 return false; 255 } 256 if (frame_number) { 257 *frame_number = U32_AT(buffer); 258 } 259 int32_t frame = U32_AT(buffer); 260 // Samples per Frame: 1. index = MPEG Version ID, 2. index = Layer 261 const int samplesPerFrames[2][3] = 262 { 263 { 384, 1152, 576 }, // MPEG 2, 2.5: layer1, layer2, layer3 264 { 384, 1152, 1152 }, // MPEG 1: layer1, layer2, layer3 265 }; 266 // sampling rates in hertz: 1. index = MPEG Version ID, 2. index = sampling rate index 267 const int samplingRates[4][3] = 268 { 269 { 11025, 12000, 8000, }, // MPEG 2.5 270 { 0, 0, 0, }, // reserved 271 { 22050, 24000, 16000, }, // MPEG 2 272 { 44100, 48000, 32000, } // MPEG 1 273 }; 274 if (duration) { 275 *duration = (int64_t)frame * samplesPerFrames[id&1][3-layer] * 1000000LL 276 / samplingRates[id][sr_index]; 277 } 278 offset += 4; 279 } 280 if (flags & 0x0002) { // Bytes field is present 281 if (byte_number) { 282 if (source->readAt(offset, buffer, 4) < 4) { 283 return false; 284 } 285 *byte_number = U32_AT(buffer); 286 } 287 offset += 4; 288 } 289 if (flags & 0x0004) { // TOC field is present 290 if (table_of_contents) { 291 if (source->readAt(offset + 1, table_of_contents, 99) < 99) { 292 return false; 293 } 294 } 295 offset += 100; 296 } 297 if (flags & 0x0008) { // Quality indicator field is present 298 if (quality_indicator) { 299 if (source->readAt(offset, buffer, 4) < 4) { 300 return false; 301 } 302 *quality_indicator = U32_AT(buffer); 303 } 304 } 305 return true; 306 } 307 308 static bool Resync( 309 const sp<DataSource> &source, uint32_t match_header, 310 off_t *inout_pos, uint32_t *out_header) { 311 if (*inout_pos == 0) { 312 // Skip an optional ID3 header if syncing at the very beginning 313 // of the datasource. 314 315 for (;;) { 316 uint8_t id3header[10]; 317 if (source->readAt(*inout_pos, id3header, sizeof(id3header)) 318 < (ssize_t)sizeof(id3header)) { 319 // If we can't even read these 10 bytes, we might as well bail 320 // out, even if there _were_ 10 bytes of valid mp3 audio data... 321 return false; 322 } 323 324 if (memcmp("ID3", id3header, 3)) { 325 break; 326 } 327 328 // Skip the ID3v2 header. 329 330 size_t len = 331 ((id3header[6] & 0x7f) << 21) 332 | ((id3header[7] & 0x7f) << 14) 333 | ((id3header[8] & 0x7f) << 7) 334 | (id3header[9] & 0x7f); 335 336 len += 10; 337 338 *inout_pos += len; 339 340 LOGV("skipped ID3 tag, new starting offset is %ld (0x%08lx)", 341 *inout_pos, *inout_pos); 342 } 343 } 344 345 off_t pos = *inout_pos; 346 bool valid = false; 347 do { 348 if (pos >= *inout_pos + 128 * 1024) { 349 // Don't scan forever. 350 LOGV("giving up at offset %ld", pos); 351 break; 352 } 353 354 uint8_t tmp[4]; 355 if (source->readAt(pos, tmp, 4) != 4) { 356 break; 357 } 358 359 uint32_t header = U32_AT(tmp); 360 361 if (match_header != 0 && (header & kMask) != (match_header & kMask)) { 362 ++pos; 363 continue; 364 } 365 366 size_t frame_size; 367 int sample_rate, num_channels, bitrate; 368 if (!get_mp3_frame_size(header, &frame_size, 369 &sample_rate, &num_channels, &bitrate)) { 370 ++pos; 371 continue; 372 } 373 374 LOGV("found possible 1st frame at %ld (header = 0x%08x)", pos, header); 375 376 // We found what looks like a valid frame, 377 // now find its successors. 378 379 off_t test_pos = pos + frame_size; 380 381 valid = true; 382 for (int j = 0; j < 3; ++j) { 383 uint8_t tmp[4]; 384 if (source->readAt(test_pos, tmp, 4) < 4) { 385 valid = false; 386 break; 387 } 388 389 uint32_t test_header = U32_AT(tmp); 390 391 LOGV("subsequent header is %08x", test_header); 392 393 if ((test_header & kMask) != (header & kMask)) { 394 valid = false; 395 break; 396 } 397 398 size_t test_frame_size; 399 if (!get_mp3_frame_size(test_header, &test_frame_size)) { 400 valid = false; 401 break; 402 } 403 404 LOGV("found subsequent frame #%d at %ld", j + 2, test_pos); 405 406 test_pos += test_frame_size; 407 } 408 409 if (valid) { 410 *inout_pos = pos; 411 412 if (out_header != NULL) { 413 *out_header = header; 414 } 415 } else { 416 LOGV("no dice, no valid sequence of frames found."); 417 } 418 419 ++pos; 420 } while (!valid); 421 422 return valid; 423 } 424 425 class MP3Source : public MediaSource { 426 public: 427 MP3Source( 428 const sp<MetaData> &meta, const sp<DataSource> &source, 429 off_t first_frame_pos, uint32_t fixed_header, 430 int32_t byte_number, const char *table_of_contents); 431 432 virtual status_t start(MetaData *params = NULL); 433 virtual status_t stop(); 434 435 virtual sp<MetaData> getFormat(); 436 437 virtual status_t read( 438 MediaBuffer **buffer, const ReadOptions *options = NULL); 439 440 protected: 441 virtual ~MP3Source(); 442 443 private: 444 sp<MetaData> mMeta; 445 sp<DataSource> mDataSource; 446 off_t mFirstFramePos; 447 uint32_t mFixedHeader; 448 off_t mCurrentPos; 449 int64_t mCurrentTimeUs; 450 bool mStarted; 451 int32_t mByteNumber; // total number of bytes in this MP3 452 // TOC entries in XING header. Skip the first one since it's always 0. 453 char mTableOfContents[99]; 454 MediaBufferGroup *mGroup; 455 456 MP3Source(const MP3Source &); 457 MP3Source &operator=(const MP3Source &); 458 }; 459 460 MP3Extractor::MP3Extractor( 461 const sp<DataSource> &source, const sp<AMessage> &meta) 462 : mInitCheck(NO_INIT), 463 mDataSource(source), 464 mFirstFramePos(-1), 465 mFixedHeader(0), 466 mByteNumber(0) { 467 off_t pos = 0; 468 uint32_t header; 469 bool success; 470 471 int64_t meta_offset; 472 uint32_t meta_header; 473 if (meta != NULL 474 && meta->findInt64("offset", &meta_offset) 475 && meta->findInt32("header", (int32_t *)&meta_header)) { 476 // The sniffer has already done all the hard work for us, simply 477 // accept its judgement. 478 pos = (off_t)meta_offset; 479 header = meta_header; 480 481 success = true; 482 } else { 483 success = Resync(mDataSource, 0, &pos, &header); 484 } 485 486 if (!success) { 487 // mInitCheck will remain NO_INIT 488 return; 489 } 490 491 mFirstFramePos = pos; 492 mFixedHeader = header; 493 494 size_t frame_size; 495 int sample_rate; 496 int num_channels; 497 int bitrate; 498 get_mp3_frame_size( 499 header, &frame_size, &sample_rate, &num_channels, &bitrate); 500 501 mMeta = new MetaData; 502 503 mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG); 504 mMeta->setInt32(kKeySampleRate, sample_rate); 505 mMeta->setInt32(kKeyBitRate, bitrate * 1000); 506 mMeta->setInt32(kKeyChannelCount, num_channels); 507 508 int64_t duration; 509 parse_xing_header( 510 mDataSource, mFirstFramePos, NULL, &mByteNumber, 511 mTableOfContents, NULL, &duration); 512 if (duration > 0) { 513 mMeta->setInt64(kKeyDuration, duration); 514 } else { 515 off_t fileSize; 516 if (mDataSource->getSize(&fileSize) == OK) { 517 mMeta->setInt64( 518 kKeyDuration, 519 8000LL * (fileSize - mFirstFramePos) / bitrate); 520 } 521 } 522 523 mInitCheck = OK; 524 } 525 526 size_t MP3Extractor::countTracks() { 527 return mInitCheck != OK ? 0 : 1; 528 } 529 530 sp<MediaSource> MP3Extractor::getTrack(size_t index) { 531 if (mInitCheck != OK || index != 0) { 532 return NULL; 533 } 534 535 return new MP3Source( 536 mMeta, mDataSource, mFirstFramePos, mFixedHeader, 537 mByteNumber, mTableOfContents); 538 } 539 540 sp<MetaData> MP3Extractor::getTrackMetaData(size_t index, uint32_t flags) { 541 if (mInitCheck != OK || index != 0) { 542 return NULL; 543 } 544 545 return mMeta; 546 } 547 548 //////////////////////////////////////////////////////////////////////////////// 549 550 MP3Source::MP3Source( 551 const sp<MetaData> &meta, const sp<DataSource> &source, 552 off_t first_frame_pos, uint32_t fixed_header, 553 int32_t byte_number, const char *table_of_contents) 554 : mMeta(meta), 555 mDataSource(source), 556 mFirstFramePos(first_frame_pos), 557 mFixedHeader(fixed_header), 558 mCurrentPos(0), 559 mCurrentTimeUs(0), 560 mStarted(false), 561 mByteNumber(byte_number), 562 mGroup(NULL) { 563 memcpy (mTableOfContents, table_of_contents, sizeof(mTableOfContents)); 564 } 565 566 MP3Source::~MP3Source() { 567 if (mStarted) { 568 stop(); 569 } 570 } 571 572 status_t MP3Source::start(MetaData *) { 573 CHECK(!mStarted); 574 575 mGroup = new MediaBufferGroup; 576 577 const size_t kMaxFrameSize = 32768; 578 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 579 580 mCurrentPos = mFirstFramePos; 581 mCurrentTimeUs = 0; 582 583 mStarted = true; 584 585 return OK; 586 } 587 588 status_t MP3Source::stop() { 589 CHECK(mStarted); 590 591 delete mGroup; 592 mGroup = NULL; 593 594 mStarted = false; 595 596 return OK; 597 } 598 599 sp<MetaData> MP3Source::getFormat() { 600 return mMeta; 601 } 602 603 status_t MP3Source::read( 604 MediaBuffer **out, const ReadOptions *options) { 605 *out = NULL; 606 607 int64_t seekTimeUs; 608 ReadOptions::SeekMode mode; 609 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { 610 int32_t bitrate; 611 if (!mMeta->findInt32(kKeyBitRate, &bitrate)) { 612 // bitrate is in bits/sec. 613 LOGI("no bitrate"); 614 615 return ERROR_UNSUPPORTED; 616 } 617 618 mCurrentTimeUs = seekTimeUs; 619 // interpolate in TOC to get file seek point in bytes 620 int64_t duration; 621 if ((mByteNumber > 0) && (mTableOfContents[0] > 0) 622 && mMeta->findInt64(kKeyDuration, &duration)) { 623 float percent = (float)seekTimeUs * 100 / duration; 624 float fx; 625 if( percent <= 0.0f ) { 626 fx = 0.0f; 627 } else if( percent >= 100.0f ) { 628 fx = 256.0f; 629 } else { 630 int a = (int)percent; 631 float fa, fb; 632 if ( a == 0 ) { 633 fa = 0.0f; 634 } else { 635 fa = (float)mTableOfContents[a-1]; 636 } 637 if ( a < 99 ) { 638 fb = (float)mTableOfContents[a]; 639 } else { 640 fb = 256.0f; 641 } 642 fx = fa + (fb-fa)*(percent-a); 643 } 644 mCurrentPos = mFirstFramePos + (int)((1.0f/256.0f)*fx*mByteNumber); 645 } else { 646 mCurrentPos = mFirstFramePos + seekTimeUs * bitrate / 8000000; 647 } 648 } 649 650 MediaBuffer *buffer; 651 status_t err = mGroup->acquire_buffer(&buffer); 652 if (err != OK) { 653 return err; 654 } 655 656 size_t frame_size; 657 int bitrate; 658 for (;;) { 659 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), 4); 660 if (n < 4) { 661 buffer->release(); 662 buffer = NULL; 663 664 return ERROR_END_OF_STREAM; 665 } 666 667 uint32_t header = U32_AT((const uint8_t *)buffer->data()); 668 669 if ((header & kMask) == (mFixedHeader & kMask) 670 && get_mp3_frame_size(header, &frame_size, NULL, NULL, &bitrate)) { 671 break; 672 } 673 674 // Lost sync. 675 LOGV("lost sync! header = 0x%08x, old header = 0x%08x\n", header, mFixedHeader); 676 677 off_t pos = mCurrentPos; 678 if (!Resync(mDataSource, mFixedHeader, &pos, NULL)) { 679 LOGE("Unable to resync. Signalling end of stream."); 680 681 buffer->release(); 682 buffer = NULL; 683 684 return ERROR_END_OF_STREAM; 685 } 686 687 mCurrentPos = pos; 688 689 // Try again with the new position. 690 } 691 692 CHECK(frame_size <= buffer->size()); 693 694 ssize_t n = mDataSource->readAt(mCurrentPos, buffer->data(), frame_size); 695 if (n < (ssize_t)frame_size) { 696 buffer->release(); 697 buffer = NULL; 698 699 return ERROR_END_OF_STREAM; 700 } 701 702 buffer->set_range(0, frame_size); 703 704 buffer->meta_data()->setInt64(kKeyTime, mCurrentTimeUs); 705 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 706 707 mCurrentPos += frame_size; 708 mCurrentTimeUs += frame_size * 8000ll / bitrate; 709 710 *out = buffer; 711 712 return OK; 713 } 714 715 sp<MetaData> MP3Extractor::getMetaData() { 716 sp<MetaData> meta = new MetaData; 717 718 if (mInitCheck != OK) { 719 return meta; 720 } 721 722 meta->setCString(kKeyMIMEType, "audio/mpeg"); 723 724 ID3 id3(mDataSource); 725 726 if (!id3.isValid()) { 727 return meta; 728 } 729 730 struct Map { 731 int key; 732 const char *tag1; 733 const char *tag2; 734 }; 735 static const Map kMap[] = { 736 { kKeyAlbum, "TALB", "TAL" }, 737 { kKeyArtist, "TPE1", "TP1" }, 738 { kKeyAlbumArtist, "TPE2", "TP2" }, 739 { kKeyComposer, "TCOM", "TCM" }, 740 { kKeyGenre, "TCON", "TCO" }, 741 { kKeyTitle, "TIT2", "TT2" }, 742 { kKeyYear, "TYE", "TYER" }, 743 { kKeyAuthor, "TXT", "TEXT" }, 744 { kKeyCDTrackNumber, "TRK", "TRCK" }, 745 { kKeyDiscNumber, "TPA", "TPOS" }, 746 }; 747 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 748 749 for (size_t i = 0; i < kNumMapEntries; ++i) { 750 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 751 if (it->done()) { 752 delete it; 753 it = new ID3::Iterator(id3, kMap[i].tag2); 754 } 755 756 if (it->done()) { 757 delete it; 758 continue; 759 } 760 761 String8 s; 762 it->getString(&s); 763 delete it; 764 765 meta->setCString(kMap[i].key, s); 766 } 767 768 size_t dataSize; 769 String8 mime; 770 const void *data = id3.getAlbumArt(&dataSize, &mime); 771 772 if (data) { 773 meta->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 774 meta->setCString(kKeyAlbumArtMIME, mime.string()); 775 } 776 777 return meta; 778 } 779 780 bool SniffMP3( 781 const sp<DataSource> &source, String8 *mimeType, 782 float *confidence, sp<AMessage> *meta) { 783 off_t pos = 0; 784 uint32_t header; 785 if (!Resync(source, 0, &pos, &header)) { 786 return false; 787 } 788 789 *meta = new AMessage; 790 (*meta)->setInt64("offset", pos); 791 (*meta)->setInt32("header", header); 792 793 *mimeType = MEDIA_MIMETYPE_AUDIO_MPEG; 794 *confidence = 0.2f; 795 796 return true; 797 } 798 799 } // namespace android 800