1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 //#define LOG_NDEBUG 0 18 #define LOG_TAG "MPEG4Extractor" 19 20 #include <ctype.h> 21 #include <inttypes.h> 22 #include <stdint.h> 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include <utils/Log.h> 27 28 #include "include/MPEG4Extractor.h" 29 #include "include/SampleTable.h" 30 #include "include/ESDS.h" 31 32 #include <media/stagefright/foundation/ABitReader.h> 33 #include <media/stagefright/foundation/ABuffer.h> 34 #include <media/stagefright/foundation/ADebug.h> 35 #include <media/stagefright/foundation/AMessage.h> 36 #include <media/stagefright/foundation/AUtils.h> 37 #include <media/stagefright/MediaBuffer.h> 38 #include <media/stagefright/MediaBufferGroup.h> 39 #include <media/stagefright/MediaDefs.h> 40 #include <media/stagefright/MediaSource.h> 41 #include <media/stagefright/MetaData.h> 42 #include <utils/String8.h> 43 44 #include <byteswap.h> 45 #include "include/ID3.h" 46 47 namespace android { 48 49 class MPEG4Source : public MediaSource { 50 public: 51 // Caller retains ownership of both "dataSource" and "sampleTable". 52 MPEG4Source(const sp<MPEG4Extractor> &owner, 53 const sp<MetaData> &format, 54 const sp<DataSource> &dataSource, 55 int32_t timeScale, 56 const sp<SampleTable> &sampleTable, 57 Vector<SidxEntry> &sidx, 58 const Trex *trex, 59 off64_t firstMoofOffset); 60 61 virtual status_t start(MetaData *params = NULL); 62 virtual status_t stop(); 63 64 virtual sp<MetaData> getFormat(); 65 66 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 67 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 68 69 protected: 70 virtual ~MPEG4Source(); 71 72 private: 73 Mutex mLock; 74 75 // keep the MPEG4Extractor around, since we're referencing its data 76 sp<MPEG4Extractor> mOwner; 77 sp<MetaData> mFormat; 78 sp<DataSource> mDataSource; 79 int32_t mTimescale; 80 sp<SampleTable> mSampleTable; 81 uint32_t mCurrentSampleIndex; 82 uint32_t mCurrentFragmentIndex; 83 Vector<SidxEntry> &mSegments; 84 const Trex *mTrex; 85 off64_t mFirstMoofOffset; 86 off64_t mCurrentMoofOffset; 87 off64_t mNextMoofOffset; 88 uint32_t mCurrentTime; 89 int32_t mLastParsedTrackId; 90 int32_t mTrackId; 91 92 int32_t mCryptoMode; // passed in from extractor 93 int32_t mDefaultIVSize; // passed in from extractor 94 uint8_t mCryptoKey[16]; // passed in from extractor 95 uint32_t mCurrentAuxInfoType; 96 uint32_t mCurrentAuxInfoTypeParameter; 97 int32_t mCurrentDefaultSampleInfoSize; 98 uint32_t mCurrentSampleInfoCount; 99 uint32_t mCurrentSampleInfoAllocSize; 100 uint8_t* mCurrentSampleInfoSizes; 101 uint32_t mCurrentSampleInfoOffsetCount; 102 uint32_t mCurrentSampleInfoOffsetsAllocSize; 103 uint64_t* mCurrentSampleInfoOffsets; 104 105 bool mIsAVC; 106 bool mIsHEVC; 107 size_t mNALLengthSize; 108 109 bool mStarted; 110 111 MediaBufferGroup *mGroup; 112 113 MediaBuffer *mBuffer; 114 115 bool mWantsNALFragments; 116 117 uint8_t *mSrcBuffer; 118 119 size_t parseNALSize(const uint8_t *data) const; 120 status_t parseChunk(off64_t *offset); 121 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 122 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 123 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 124 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 125 126 struct TrackFragmentHeaderInfo { 127 enum Flags { 128 kBaseDataOffsetPresent = 0x01, 129 kSampleDescriptionIndexPresent = 0x02, 130 kDefaultSampleDurationPresent = 0x08, 131 kDefaultSampleSizePresent = 0x10, 132 kDefaultSampleFlagsPresent = 0x20, 133 kDurationIsEmpty = 0x10000, 134 }; 135 136 uint32_t mTrackID; 137 uint32_t mFlags; 138 uint64_t mBaseDataOffset; 139 uint32_t mSampleDescriptionIndex; 140 uint32_t mDefaultSampleDuration; 141 uint32_t mDefaultSampleSize; 142 uint32_t mDefaultSampleFlags; 143 144 uint64_t mDataOffset; 145 }; 146 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 147 148 struct Sample { 149 off64_t offset; 150 size_t size; 151 uint32_t duration; 152 int32_t compositionOffset; 153 uint8_t iv[16]; 154 Vector<size_t> clearsizes; 155 Vector<size_t> encryptedsizes; 156 }; 157 Vector<Sample> mCurrentSamples; 158 159 MPEG4Source(const MPEG4Source &); 160 MPEG4Source &operator=(const MPEG4Source &); 161 }; 162 163 // This custom data source wraps an existing one and satisfies requests 164 // falling entirely within a cached range from the cache while forwarding 165 // all remaining requests to the wrapped datasource. 166 // This is used to cache the full sampletable metadata for a single track, 167 // possibly wrapping multiple times to cover all tracks, i.e. 168 // Each MPEG4DataSource caches the sampletable metadata for a single track. 169 170 struct MPEG4DataSource : public DataSource { 171 MPEG4DataSource(const sp<DataSource> &source); 172 173 virtual status_t initCheck() const; 174 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 175 virtual status_t getSize(off64_t *size); 176 virtual uint32_t flags(); 177 178 status_t setCachedRange(off64_t offset, size_t size); 179 180 protected: 181 virtual ~MPEG4DataSource(); 182 183 private: 184 Mutex mLock; 185 186 sp<DataSource> mSource; 187 off64_t mCachedOffset; 188 size_t mCachedSize; 189 uint8_t *mCache; 190 191 void clearCache(); 192 193 MPEG4DataSource(const MPEG4DataSource &); 194 MPEG4DataSource &operator=(const MPEG4DataSource &); 195 }; 196 197 MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 198 : mSource(source), 199 mCachedOffset(0), 200 mCachedSize(0), 201 mCache(NULL) { 202 } 203 204 MPEG4DataSource::~MPEG4DataSource() { 205 clearCache(); 206 } 207 208 void MPEG4DataSource::clearCache() { 209 if (mCache) { 210 free(mCache); 211 mCache = NULL; 212 } 213 214 mCachedOffset = 0; 215 mCachedSize = 0; 216 } 217 218 status_t MPEG4DataSource::initCheck() const { 219 return mSource->initCheck(); 220 } 221 222 ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 223 Mutex::Autolock autoLock(mLock); 224 225 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 226 memcpy(data, &mCache[offset - mCachedOffset], size); 227 return size; 228 } 229 230 return mSource->readAt(offset, data, size); 231 } 232 233 status_t MPEG4DataSource::getSize(off64_t *size) { 234 return mSource->getSize(size); 235 } 236 237 uint32_t MPEG4DataSource::flags() { 238 return mSource->flags(); 239 } 240 241 status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 242 Mutex::Autolock autoLock(mLock); 243 244 clearCache(); 245 246 mCache = (uint8_t *)malloc(size); 247 248 if (mCache == NULL) { 249 return -ENOMEM; 250 } 251 252 mCachedOffset = offset; 253 mCachedSize = size; 254 255 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 256 257 if (err < (ssize_t)size) { 258 clearCache(); 259 260 return ERROR_IO; 261 } 262 263 return OK; 264 } 265 266 //////////////////////////////////////////////////////////////////////////////// 267 268 static void hexdump(const void *_data, size_t size) { 269 const uint8_t *data = (const uint8_t *)_data; 270 size_t offset = 0; 271 while (offset < size) { 272 printf("0x%04zx ", offset); 273 274 size_t n = size - offset; 275 if (n > 16) { 276 n = 16; 277 } 278 279 for (size_t i = 0; i < 16; ++i) { 280 if (i == 8) { 281 printf(" "); 282 } 283 284 if (offset + i < size) { 285 printf("%02x ", data[offset + i]); 286 } else { 287 printf(" "); 288 } 289 } 290 291 printf(" "); 292 293 for (size_t i = 0; i < n; ++i) { 294 if (isprint(data[offset + i])) { 295 printf("%c", data[offset + i]); 296 } else { 297 printf("."); 298 } 299 } 300 301 printf("\n"); 302 303 offset += 16; 304 } 305 } 306 307 static const char *FourCC2MIME(uint32_t fourcc) { 308 switch (fourcc) { 309 case FOURCC('m', 'p', '4', 'a'): 310 return MEDIA_MIMETYPE_AUDIO_AAC; 311 312 case FOURCC('s', 'a', 'm', 'r'): 313 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 314 315 case FOURCC('s', 'a', 'w', 'b'): 316 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 317 318 case FOURCC('m', 'p', '4', 'v'): 319 return MEDIA_MIMETYPE_VIDEO_MPEG4; 320 321 case FOURCC('s', '2', '6', '3'): 322 case FOURCC('h', '2', '6', '3'): 323 case FOURCC('H', '2', '6', '3'): 324 return MEDIA_MIMETYPE_VIDEO_H263; 325 326 case FOURCC('a', 'v', 'c', '1'): 327 return MEDIA_MIMETYPE_VIDEO_AVC; 328 329 case FOURCC('h', 'v', 'c', '1'): 330 case FOURCC('h', 'e', 'v', '1'): 331 return MEDIA_MIMETYPE_VIDEO_HEVC; 332 default: 333 CHECK(!"should not be here."); 334 return NULL; 335 } 336 } 337 338 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 339 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 340 // AMR NB audio is always mono, 8kHz 341 *channels = 1; 342 *rate = 8000; 343 return true; 344 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 345 // AMR WB audio is always mono, 16kHz 346 *channels = 1; 347 *rate = 16000; 348 return true; 349 } 350 return false; 351 } 352 353 MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 354 : mMoofOffset(0), 355 mDataSource(source), 356 mInitCheck(NO_INIT), 357 mHasVideo(false), 358 mHeaderTimescale(0), 359 mFirstTrack(NULL), 360 mLastTrack(NULL), 361 mFileMetaData(new MetaData), 362 mFirstSINF(NULL), 363 mIsDrm(false) { 364 } 365 366 MPEG4Extractor::~MPEG4Extractor() { 367 Track *track = mFirstTrack; 368 while (track) { 369 Track *next = track->next; 370 371 delete track; 372 track = next; 373 } 374 mFirstTrack = mLastTrack = NULL; 375 376 SINF *sinf = mFirstSINF; 377 while (sinf) { 378 SINF *next = sinf->next; 379 delete[] sinf->IPMPData; 380 delete sinf; 381 sinf = next; 382 } 383 mFirstSINF = NULL; 384 385 for (size_t i = 0; i < mPssh.size(); i++) { 386 delete [] mPssh[i].data; 387 } 388 } 389 390 uint32_t MPEG4Extractor::flags() const { 391 return CAN_PAUSE | 392 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 393 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 394 } 395 396 sp<MetaData> MPEG4Extractor::getMetaData() { 397 status_t err; 398 if ((err = readMetaData()) != OK) { 399 return new MetaData; 400 } 401 402 return mFileMetaData; 403 } 404 405 size_t MPEG4Extractor::countTracks() { 406 status_t err; 407 if ((err = readMetaData()) != OK) { 408 ALOGV("MPEG4Extractor::countTracks: no tracks"); 409 return 0; 410 } 411 412 size_t n = 0; 413 Track *track = mFirstTrack; 414 while (track) { 415 ++n; 416 track = track->next; 417 } 418 419 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 420 return n; 421 } 422 423 sp<MetaData> MPEG4Extractor::getTrackMetaData( 424 size_t index, uint32_t flags) { 425 status_t err; 426 if ((err = readMetaData()) != OK) { 427 return NULL; 428 } 429 430 Track *track = mFirstTrack; 431 while (index > 0) { 432 if (track == NULL) { 433 return NULL; 434 } 435 436 track = track->next; 437 --index; 438 } 439 440 if (track == NULL) { 441 return NULL; 442 } 443 444 if ((flags & kIncludeExtensiveMetaData) 445 && !track->includes_expensive_metadata) { 446 track->includes_expensive_metadata = true; 447 448 const char *mime; 449 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 450 if (!strncasecmp("video/", mime, 6)) { 451 if (mMoofOffset > 0) { 452 int64_t duration; 453 if (track->meta->findInt64(kKeyDuration, &duration)) { 454 // nothing fancy, just pick a frame near 1/4th of the duration 455 track->meta->setInt64( 456 kKeyThumbnailTime, duration / 4); 457 } 458 } else { 459 uint32_t sampleIndex; 460 uint32_t sampleTime; 461 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK 462 && track->sampleTable->getMetaDataForSample( 463 sampleIndex, NULL /* offset */, NULL /* size */, 464 &sampleTime) == OK) { 465 track->meta->setInt64( 466 kKeyThumbnailTime, 467 ((int64_t)sampleTime * 1000000) / track->timescale); 468 } 469 } 470 } 471 } 472 473 return track->meta; 474 } 475 476 static void MakeFourCCString(uint32_t x, char *s) { 477 s[0] = x >> 24; 478 s[1] = (x >> 16) & 0xff; 479 s[2] = (x >> 8) & 0xff; 480 s[3] = x & 0xff; 481 s[4] = '\0'; 482 } 483 484 status_t MPEG4Extractor::readMetaData() { 485 if (mInitCheck != NO_INIT) { 486 return mInitCheck; 487 } 488 489 off64_t offset = 0; 490 status_t err; 491 while (true) { 492 off64_t orig_offset = offset; 493 err = parseChunk(&offset, 0); 494 495 if (err != OK && err != UNKNOWN_ERROR) { 496 break; 497 } else if (offset <= orig_offset) { 498 // only continue parsing if the offset was advanced, 499 // otherwise we might end up in an infinite loop 500 ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset); 501 err = ERROR_MALFORMED; 502 break; 503 } else if (err == OK) { 504 continue; 505 } 506 507 uint32_t hdr[2]; 508 if (mDataSource->readAt(offset, hdr, 8) < 8) { 509 break; 510 } 511 uint32_t chunk_type = ntohl(hdr[1]); 512 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 513 // store the offset of the first segment 514 mMoofOffset = offset; 515 } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) { 516 // keep parsing until we get to the data 517 continue; 518 } 519 break; 520 } 521 522 if (mInitCheck == OK) { 523 if (mHasVideo) { 524 mFileMetaData->setCString( 525 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 526 } else { 527 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 528 } 529 } else { 530 mInitCheck = err; 531 } 532 533 CHECK_NE(err, (status_t)NO_INIT); 534 535 // copy pssh data into file metadata 536 int psshsize = 0; 537 for (size_t i = 0; i < mPssh.size(); i++) { 538 psshsize += 20 + mPssh[i].datalen; 539 } 540 if (psshsize) { 541 char *buf = (char*)malloc(psshsize); 542 char *ptr = buf; 543 for (size_t i = 0; i < mPssh.size(); i++) { 544 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 545 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 546 ptr += (20 + mPssh[i].datalen); 547 } 548 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 549 free(buf); 550 } 551 return mInitCheck; 552 } 553 554 char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 555 if (mFirstSINF == NULL) { 556 return NULL; 557 } 558 559 SINF *sinf = mFirstSINF; 560 while (sinf && (trackID != sinf->trackID)) { 561 sinf = sinf->next; 562 } 563 564 if (sinf == NULL) { 565 return NULL; 566 } 567 568 *len = sinf->len; 569 return sinf->IPMPData; 570 } 571 572 // Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 573 static int32_t readSize(off64_t offset, 574 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 575 uint32_t size = 0; 576 uint8_t data; 577 bool moreData = true; 578 *numOfBytes = 0; 579 580 while (moreData) { 581 if (DataSource->readAt(offset, &data, 1) < 1) { 582 return -1; 583 } 584 offset ++; 585 moreData = (data >= 128) ? true : false; 586 size = (size << 7) | (data & 0x7f); // Take last 7 bits 587 (*numOfBytes) ++; 588 } 589 590 return size; 591 } 592 593 status_t MPEG4Extractor::parseDrmSINF( 594 off64_t * /* offset */, off64_t data_offset) { 595 uint8_t updateIdTag; 596 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 597 return ERROR_IO; 598 } 599 data_offset ++; 600 601 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 602 return ERROR_MALFORMED; 603 } 604 605 uint8_t numOfBytes; 606 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 607 if (size < 0) { 608 return ERROR_IO; 609 } 610 int32_t classSize = size; 611 data_offset += numOfBytes; 612 613 while(size >= 11 ) { 614 uint8_t descriptorTag; 615 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 616 return ERROR_IO; 617 } 618 data_offset ++; 619 620 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 621 return ERROR_MALFORMED; 622 } 623 624 uint8_t buffer[8]; 625 //ObjectDescriptorID and ObjectDescriptor url flag 626 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 627 return ERROR_IO; 628 } 629 data_offset += 2; 630 631 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 632 return ERROR_MALFORMED; 633 } 634 635 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 636 return ERROR_IO; 637 } 638 data_offset += 8; 639 640 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 641 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 642 return ERROR_MALFORMED; 643 } 644 645 SINF *sinf = new SINF; 646 sinf->trackID = U16_AT(&buffer[3]); 647 sinf->IPMPDescriptorID = buffer[7]; 648 sinf->next = mFirstSINF; 649 mFirstSINF = sinf; 650 651 size -= (8 + 2 + 1); 652 } 653 654 if (size != 0) { 655 return ERROR_MALFORMED; 656 } 657 658 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 659 return ERROR_IO; 660 } 661 data_offset ++; 662 663 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 664 return ERROR_MALFORMED; 665 } 666 667 size = readSize(data_offset, mDataSource, &numOfBytes); 668 if (size < 0) { 669 return ERROR_IO; 670 } 671 classSize = size; 672 data_offset += numOfBytes; 673 674 while (size > 0) { 675 uint8_t tag; 676 int32_t dataLen; 677 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 678 return ERROR_IO; 679 } 680 data_offset ++; 681 682 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 683 uint8_t id; 684 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 685 if (dataLen < 0) { 686 return ERROR_IO; 687 } else if (dataLen < 4) { 688 return ERROR_MALFORMED; 689 } 690 data_offset += numOfBytes; 691 692 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 693 return ERROR_IO; 694 } 695 data_offset ++; 696 697 SINF *sinf = mFirstSINF; 698 while (sinf && (sinf->IPMPDescriptorID != id)) { 699 sinf = sinf->next; 700 } 701 if (sinf == NULL) { 702 return ERROR_MALFORMED; 703 } 704 sinf->len = dataLen - 3; 705 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 706 if (sinf->IPMPData == NULL) { 707 return ERROR_MALFORMED; 708 } 709 data_offset += 2; 710 711 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 712 return ERROR_IO; 713 } 714 data_offset += sinf->len; 715 716 size -= (dataLen + numOfBytes + 1); 717 } 718 } 719 720 if (size != 0) { 721 return ERROR_MALFORMED; 722 } 723 724 return UNKNOWN_ERROR; // Return a dummy error. 725 } 726 727 struct PathAdder { 728 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 729 : mPath(path) { 730 mPath->push(chunkType); 731 } 732 733 ~PathAdder() { 734 mPath->pop(); 735 } 736 737 private: 738 Vector<uint32_t> *mPath; 739 740 PathAdder(const PathAdder &); 741 PathAdder &operator=(const PathAdder &); 742 }; 743 744 static bool underMetaDataPath(const Vector<uint32_t> &path) { 745 return path.size() >= 5 746 && path[0] == FOURCC('m', 'o', 'o', 'v') 747 && path[1] == FOURCC('u', 'd', 't', 'a') 748 && path[2] == FOURCC('m', 'e', 't', 'a') 749 && path[3] == FOURCC('i', 'l', 's', 't'); 750 } 751 752 // Given a time in seconds since Jan 1 1904, produce a human-readable string. 753 static void convertTimeToDate(int64_t time_1904, String8 *s) { 754 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 755 756 char tmp[32]; 757 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 758 759 s->setTo(tmp); 760 } 761 762 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 763 ALOGV("entering parseChunk %lld/%d", *offset, depth); 764 uint32_t hdr[2]; 765 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 766 return ERROR_IO; 767 } 768 uint64_t chunk_size = ntohl(hdr[0]); 769 uint32_t chunk_type = ntohl(hdr[1]); 770 off64_t data_offset = *offset + 8; 771 772 if (chunk_size == 1) { 773 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 774 return ERROR_IO; 775 } 776 chunk_size = ntoh64(chunk_size); 777 data_offset += 8; 778 779 if (chunk_size < 16) { 780 // The smallest valid chunk is 16 bytes long in this case. 781 return ERROR_MALFORMED; 782 } 783 } else if (chunk_size == 0) { 784 if (depth == 0) { 785 // atom extends to end of file 786 off64_t sourceSize; 787 if (mDataSource->getSize(&sourceSize) == OK) { 788 chunk_size = (sourceSize - *offset); 789 } else { 790 // XXX could we just pick a "sufficiently large" value here? 791 ALOGE("atom size is 0, and data source has no size"); 792 return ERROR_MALFORMED; 793 } 794 } else { 795 // not allowed for non-toplevel atoms, skip it 796 *offset += 4; 797 return OK; 798 } 799 } else if (chunk_size < 8) { 800 // The smallest valid chunk is 8 bytes long. 801 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 802 return ERROR_MALFORMED; 803 } 804 805 char chunk[5]; 806 MakeFourCCString(chunk_type, chunk); 807 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 808 809 #if 0 810 static const char kWhitespace[] = " "; 811 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 812 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 813 814 char buffer[256]; 815 size_t n = chunk_size; 816 if (n > sizeof(buffer)) { 817 n = sizeof(buffer); 818 } 819 if (mDataSource->readAt(*offset, buffer, n) 820 < (ssize_t)n) { 821 return ERROR_IO; 822 } 823 824 hexdump(buffer, n); 825 #endif 826 827 PathAdder autoAdder(&mPath, chunk_type); 828 829 off64_t chunk_data_size = *offset + chunk_size - data_offset; 830 831 if (chunk_type != FOURCC('c', 'p', 'r', 't') 832 && chunk_type != FOURCC('c', 'o', 'v', 'r') 833 && mPath.size() == 5 && underMetaDataPath(mPath)) { 834 off64_t stop_offset = *offset + chunk_size; 835 *offset = data_offset; 836 while (*offset < stop_offset) { 837 status_t err = parseChunk(offset, depth + 1); 838 if (err != OK) { 839 return err; 840 } 841 } 842 843 if (*offset != stop_offset) { 844 return ERROR_MALFORMED; 845 } 846 847 return OK; 848 } 849 850 switch(chunk_type) { 851 case FOURCC('m', 'o', 'o', 'v'): 852 case FOURCC('t', 'r', 'a', 'k'): 853 case FOURCC('m', 'd', 'i', 'a'): 854 case FOURCC('m', 'i', 'n', 'f'): 855 case FOURCC('d', 'i', 'n', 'f'): 856 case FOURCC('s', 't', 'b', 'l'): 857 case FOURCC('m', 'v', 'e', 'x'): 858 case FOURCC('m', 'o', 'o', 'f'): 859 case FOURCC('t', 'r', 'a', 'f'): 860 case FOURCC('m', 'f', 'r', 'a'): 861 case FOURCC('u', 'd', 't', 'a'): 862 case FOURCC('i', 'l', 's', 't'): 863 case FOURCC('s', 'i', 'n', 'f'): 864 case FOURCC('s', 'c', 'h', 'i'): 865 case FOURCC('e', 'd', 't', 's'): 866 { 867 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 868 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 869 870 if (mDataSource->flags() 871 & (DataSource::kWantsPrefetching 872 | DataSource::kIsCachingDataSource)) { 873 sp<MPEG4DataSource> cachedSource = 874 new MPEG4DataSource(mDataSource); 875 876 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 877 mDataSource = cachedSource; 878 } 879 } 880 881 mLastTrack->sampleTable = new SampleTable(mDataSource); 882 } 883 884 bool isTrack = false; 885 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 886 isTrack = true; 887 888 Track *track = new Track; 889 track->next = NULL; 890 if (mLastTrack) { 891 mLastTrack->next = track; 892 } else { 893 mFirstTrack = track; 894 } 895 mLastTrack = track; 896 897 track->meta = new MetaData; 898 track->includes_expensive_metadata = false; 899 track->skipTrack = false; 900 track->timescale = 0; 901 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 902 } 903 904 off64_t stop_offset = *offset + chunk_size; 905 *offset = data_offset; 906 while (*offset < stop_offset) { 907 status_t err = parseChunk(offset, depth + 1); 908 if (err != OK) { 909 return err; 910 } 911 } 912 913 if (*offset != stop_offset) { 914 return ERROR_MALFORMED; 915 } 916 917 if (isTrack) { 918 if (mLastTrack->skipTrack) { 919 Track *cur = mFirstTrack; 920 921 if (cur == mLastTrack) { 922 delete cur; 923 mFirstTrack = mLastTrack = NULL; 924 } else { 925 while (cur && cur->next != mLastTrack) { 926 cur = cur->next; 927 } 928 cur->next = NULL; 929 delete mLastTrack; 930 mLastTrack = cur; 931 } 932 933 return OK; 934 } 935 936 status_t err = verifyTrack(mLastTrack); 937 938 if (err != OK) { 939 return err; 940 } 941 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 942 mInitCheck = OK; 943 944 if (!mIsDrm) { 945 return UNKNOWN_ERROR; // Return a dummy error. 946 } else { 947 return OK; 948 } 949 } 950 break; 951 } 952 953 case FOURCC('e', 'l', 's', 't'): 954 { 955 *offset += chunk_size; 956 957 // See 14496-12 8.6.6 958 uint8_t version; 959 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 960 return ERROR_IO; 961 } 962 963 uint32_t entry_count; 964 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 965 return ERROR_IO; 966 } 967 968 if (entry_count != 1) { 969 // we only support a single entry at the moment, for gapless playback 970 ALOGW("ignoring edit list with %d entries", entry_count); 971 } else if (mHeaderTimescale == 0) { 972 ALOGW("ignoring edit list because timescale is 0"); 973 } else { 974 off64_t entriesoffset = data_offset + 8; 975 uint64_t segment_duration; 976 int64_t media_time; 977 978 if (version == 1) { 979 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 980 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 981 return ERROR_IO; 982 } 983 } else if (version == 0) { 984 uint32_t sd; 985 int32_t mt; 986 if (!mDataSource->getUInt32(entriesoffset, &sd) || 987 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 988 return ERROR_IO; 989 } 990 segment_duration = sd; 991 media_time = mt; 992 } else { 993 return ERROR_IO; 994 } 995 996 uint64_t halfscale = mHeaderTimescale / 2; 997 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 998 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 999 1000 int64_t duration; 1001 int32_t samplerate; 1002 if (!mLastTrack) { 1003 return ERROR_MALFORMED; 1004 } 1005 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 1006 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 1007 1008 int64_t delay = (media_time * samplerate + 500000) / 1000000; 1009 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 1010 1011 int64_t paddingus = duration - (segment_duration + media_time); 1012 if (paddingus < 0) { 1013 // track duration from media header (which is what kKeyDuration is) might 1014 // be slightly shorter than the segment duration, which would make the 1015 // padding negative. Clamp to zero. 1016 paddingus = 0; 1017 } 1018 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1019 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1020 } 1021 } 1022 break; 1023 } 1024 1025 case FOURCC('f', 'r', 'm', 'a'): 1026 { 1027 *offset += chunk_size; 1028 1029 uint32_t original_fourcc; 1030 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1031 return ERROR_IO; 1032 } 1033 original_fourcc = ntohl(original_fourcc); 1034 ALOGV("read original format: %d", original_fourcc); 1035 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1036 uint32_t num_channels = 0; 1037 uint32_t sample_rate = 0; 1038 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1039 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1040 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1041 } 1042 break; 1043 } 1044 1045 case FOURCC('t', 'e', 'n', 'c'): 1046 { 1047 *offset += chunk_size; 1048 1049 if (chunk_size < 32) { 1050 return ERROR_MALFORMED; 1051 } 1052 1053 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1054 // default IV size, 16 bytes default KeyID 1055 // (ISO 23001-7) 1056 char buf[4]; 1057 memset(buf, 0, 4); 1058 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1059 return ERROR_IO; 1060 } 1061 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1062 if (defaultAlgorithmId > 1) { 1063 // only 0 (clear) and 1 (AES-128) are valid 1064 return ERROR_MALFORMED; 1065 } 1066 1067 memset(buf, 0, 4); 1068 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1069 return ERROR_IO; 1070 } 1071 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1072 1073 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1074 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1075 // only unencrypted data must have 0 IV size 1076 return ERROR_MALFORMED; 1077 } else if (defaultIVSize != 0 && 1078 defaultIVSize != 8 && 1079 defaultIVSize != 16) { 1080 // only supported sizes are 0, 8 and 16 1081 return ERROR_MALFORMED; 1082 } 1083 1084 uint8_t defaultKeyId[16]; 1085 1086 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1087 return ERROR_IO; 1088 } 1089 1090 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1091 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1092 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1093 break; 1094 } 1095 1096 case FOURCC('t', 'k', 'h', 'd'): 1097 { 1098 *offset += chunk_size; 1099 1100 status_t err; 1101 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1102 return err; 1103 } 1104 1105 break; 1106 } 1107 1108 case FOURCC('p', 's', 's', 'h'): 1109 { 1110 *offset += chunk_size; 1111 1112 PsshInfo pssh; 1113 1114 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1115 return ERROR_IO; 1116 } 1117 1118 uint32_t psshdatalen = 0; 1119 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1120 return ERROR_IO; 1121 } 1122 pssh.datalen = ntohl(psshdatalen); 1123 ALOGV("pssh data size: %d", pssh.datalen); 1124 if (pssh.datalen + 20 > chunk_size) { 1125 // pssh data length exceeds size of containing box 1126 return ERROR_MALFORMED; 1127 } 1128 1129 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1130 if (pssh.data == NULL) { 1131 return ERROR_MALFORMED; 1132 } 1133 ALOGV("allocated pssh @ %p", pssh.data); 1134 ssize_t requested = (ssize_t) pssh.datalen; 1135 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1136 return ERROR_IO; 1137 } 1138 mPssh.push_back(pssh); 1139 1140 break; 1141 } 1142 1143 case FOURCC('m', 'd', 'h', 'd'): 1144 { 1145 *offset += chunk_size; 1146 1147 if (chunk_data_size < 4 || mLastTrack == NULL) { 1148 return ERROR_MALFORMED; 1149 } 1150 1151 uint8_t version; 1152 if (mDataSource->readAt( 1153 data_offset, &version, sizeof(version)) 1154 < (ssize_t)sizeof(version)) { 1155 return ERROR_IO; 1156 } 1157 1158 off64_t timescale_offset; 1159 1160 if (version == 1) { 1161 timescale_offset = data_offset + 4 + 16; 1162 } else if (version == 0) { 1163 timescale_offset = data_offset + 4 + 8; 1164 } else { 1165 return ERROR_IO; 1166 } 1167 1168 uint32_t timescale; 1169 if (mDataSource->readAt( 1170 timescale_offset, ×cale, sizeof(timescale)) 1171 < (ssize_t)sizeof(timescale)) { 1172 return ERROR_IO; 1173 } 1174 1175 mLastTrack->timescale = ntohl(timescale); 1176 1177 // 14496-12 says all ones means indeterminate, but some files seem to use 1178 // 0 instead. We treat both the same. 1179 int64_t duration = 0; 1180 if (version == 1) { 1181 if (mDataSource->readAt( 1182 timescale_offset + 4, &duration, sizeof(duration)) 1183 < (ssize_t)sizeof(duration)) { 1184 return ERROR_IO; 1185 } 1186 if (duration != -1) { 1187 duration = ntoh64(duration); 1188 } 1189 } else { 1190 uint32_t duration32; 1191 if (mDataSource->readAt( 1192 timescale_offset + 4, &duration32, sizeof(duration32)) 1193 < (ssize_t)sizeof(duration32)) { 1194 return ERROR_IO; 1195 } 1196 if (duration32 != 0xffffffff) { 1197 duration = ntohl(duration32); 1198 } 1199 } 1200 if (duration != 0) { 1201 mLastTrack->meta->setInt64( 1202 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1203 } 1204 1205 uint8_t lang[2]; 1206 off64_t lang_offset; 1207 if (version == 1) { 1208 lang_offset = timescale_offset + 4 + 8; 1209 } else if (version == 0) { 1210 lang_offset = timescale_offset + 4 + 4; 1211 } else { 1212 return ERROR_IO; 1213 } 1214 1215 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1216 < (ssize_t)sizeof(lang)) { 1217 return ERROR_IO; 1218 } 1219 1220 // To get the ISO-639-2/T three character language code 1221 // 1 bit pad followed by 3 5-bits characters. Each character 1222 // is packed as the difference between its ASCII value and 0x60. 1223 char lang_code[4]; 1224 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1225 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1226 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1227 lang_code[3] = '\0'; 1228 1229 mLastTrack->meta->setCString( 1230 kKeyMediaLanguage, lang_code); 1231 1232 break; 1233 } 1234 1235 case FOURCC('s', 't', 's', 'd'): 1236 { 1237 if (chunk_data_size < 8) { 1238 return ERROR_MALFORMED; 1239 } 1240 1241 uint8_t buffer[8]; 1242 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1243 return ERROR_MALFORMED; 1244 } 1245 1246 if (mDataSource->readAt( 1247 data_offset, buffer, 8) < 8) { 1248 return ERROR_IO; 1249 } 1250 1251 if (U32_AT(buffer) != 0) { 1252 // Should be version 0, flags 0. 1253 return ERROR_MALFORMED; 1254 } 1255 1256 uint32_t entry_count = U32_AT(&buffer[4]); 1257 1258 if (entry_count > 1) { 1259 // For 3GPP timed text, there could be multiple tx3g boxes contain 1260 // multiple text display formats. These formats will be used to 1261 // display the timed text. 1262 // For encrypted files, there may also be more than one entry. 1263 const char *mime; 1264 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1265 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1266 strcasecmp(mime, "application/octet-stream")) { 1267 // For now we only support a single type of media per track. 1268 mLastTrack->skipTrack = true; 1269 *offset += chunk_size; 1270 break; 1271 } 1272 } 1273 off64_t stop_offset = *offset + chunk_size; 1274 *offset = data_offset + 8; 1275 for (uint32_t i = 0; i < entry_count; ++i) { 1276 status_t err = parseChunk(offset, depth + 1); 1277 if (err != OK) { 1278 return err; 1279 } 1280 } 1281 1282 if (*offset != stop_offset) { 1283 return ERROR_MALFORMED; 1284 } 1285 break; 1286 } 1287 1288 case FOURCC('m', 'p', '4', 'a'): 1289 case FOURCC('e', 'n', 'c', 'a'): 1290 case FOURCC('s', 'a', 'm', 'r'): 1291 case FOURCC('s', 'a', 'w', 'b'): 1292 { 1293 uint8_t buffer[8 + 20]; 1294 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1295 // Basic AudioSampleEntry size. 1296 return ERROR_MALFORMED; 1297 } 1298 1299 if (mDataSource->readAt( 1300 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1301 return ERROR_IO; 1302 } 1303 1304 uint16_t data_ref_index = U16_AT(&buffer[6]); 1305 uint32_t num_channels = U16_AT(&buffer[16]); 1306 1307 uint16_t sample_size = U16_AT(&buffer[18]); 1308 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1309 1310 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1311 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1312 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1313 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1314 } 1315 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1316 chunk, num_channels, sample_size, sample_rate); 1317 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1318 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1319 1320 off64_t stop_offset = *offset + chunk_size; 1321 *offset = data_offset + sizeof(buffer); 1322 while (*offset < stop_offset) { 1323 status_t err = parseChunk(offset, depth + 1); 1324 if (err != OK) { 1325 return err; 1326 } 1327 } 1328 1329 if (*offset != stop_offset) { 1330 return ERROR_MALFORMED; 1331 } 1332 break; 1333 } 1334 1335 case FOURCC('m', 'p', '4', 'v'): 1336 case FOURCC('e', 'n', 'c', 'v'): 1337 case FOURCC('s', '2', '6', '3'): 1338 case FOURCC('H', '2', '6', '3'): 1339 case FOURCC('h', '2', '6', '3'): 1340 case FOURCC('a', 'v', 'c', '1'): 1341 case FOURCC('h', 'v', 'c', '1'): 1342 case FOURCC('h', 'e', 'v', '1'): 1343 { 1344 mHasVideo = true; 1345 1346 uint8_t buffer[78]; 1347 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1348 // Basic VideoSampleEntry size. 1349 return ERROR_MALFORMED; 1350 } 1351 1352 if (mDataSource->readAt( 1353 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1354 return ERROR_IO; 1355 } 1356 1357 uint16_t data_ref_index = U16_AT(&buffer[6]); 1358 uint16_t width = U16_AT(&buffer[6 + 18]); 1359 uint16_t height = U16_AT(&buffer[6 + 20]); 1360 1361 // The video sample is not standard-compliant if it has invalid dimension. 1362 // Use some default width and height value, and 1363 // let the decoder figure out the actual width and height (and thus 1364 // be prepared for INFO_FOMRAT_CHANGED event). 1365 if (width == 0) width = 352; 1366 if (height == 0) height = 288; 1367 1368 // printf("*** coding='%s' width=%d height=%d\n", 1369 // chunk, width, height); 1370 1371 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1372 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1373 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1374 } 1375 mLastTrack->meta->setInt32(kKeyWidth, width); 1376 mLastTrack->meta->setInt32(kKeyHeight, height); 1377 1378 off64_t stop_offset = *offset + chunk_size; 1379 *offset = data_offset + sizeof(buffer); 1380 while (*offset < stop_offset) { 1381 status_t err = parseChunk(offset, depth + 1); 1382 if (err != OK) { 1383 return err; 1384 } 1385 } 1386 1387 if (*offset != stop_offset) { 1388 return ERROR_MALFORMED; 1389 } 1390 break; 1391 } 1392 1393 case FOURCC('s', 't', 'c', 'o'): 1394 case FOURCC('c', 'o', '6', '4'): 1395 { 1396 status_t err = 1397 mLastTrack->sampleTable->setChunkOffsetParams( 1398 chunk_type, data_offset, chunk_data_size); 1399 1400 *offset += chunk_size; 1401 1402 if (err != OK) { 1403 return err; 1404 } 1405 1406 break; 1407 } 1408 1409 case FOURCC('s', 't', 's', 'c'): 1410 { 1411 status_t err = 1412 mLastTrack->sampleTable->setSampleToChunkParams( 1413 data_offset, chunk_data_size); 1414 1415 *offset += chunk_size; 1416 1417 if (err != OK) { 1418 return err; 1419 } 1420 1421 break; 1422 } 1423 1424 case FOURCC('s', 't', 's', 'z'): 1425 case FOURCC('s', 't', 'z', '2'): 1426 { 1427 status_t err = 1428 mLastTrack->sampleTable->setSampleSizeParams( 1429 chunk_type, data_offset, chunk_data_size); 1430 1431 *offset += chunk_size; 1432 1433 if (err != OK) { 1434 return err; 1435 } 1436 1437 size_t max_size; 1438 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1439 1440 if (err != OK) { 1441 return err; 1442 } 1443 1444 if (max_size != 0) { 1445 // Assume that a given buffer only contains at most 10 chunks, 1446 // each chunk originally prefixed with a 2 byte length will 1447 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1448 // and thus will grow by 2 bytes per chunk. 1449 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1450 } else { 1451 // No size was specified. Pick a conservatively large size. 1452 int32_t width, height; 1453 if (!mLastTrack->meta->findInt32(kKeyWidth, &width) || 1454 !mLastTrack->meta->findInt32(kKeyHeight, &height)) { 1455 ALOGE("No width or height, assuming worst case 1080p"); 1456 width = 1920; 1457 height = 1080; 1458 } 1459 1460 const char *mime; 1461 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1462 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 1463 // AVC requires compression ratio of at least 2, and uses 1464 // macroblocks 1465 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1466 } else { 1467 // For all other formats there is no minimum compression 1468 // ratio. Use compression ratio of 1. 1469 max_size = width * height * 3 / 2; 1470 } 1471 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1472 } 1473 1474 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1475 // mimetype) previously obtained, so don't cache them. 1476 const char *mime; 1477 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1478 // Calculate average frame rate. 1479 if (!strncasecmp("video/", mime, 6)) { 1480 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1481 int64_t durationUs; 1482 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1483 if (durationUs > 0) { 1484 int32_t frameRate = (nSamples * 1000000LL + 1485 (durationUs >> 1)) / durationUs; 1486 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1487 } 1488 } 1489 } 1490 1491 break; 1492 } 1493 1494 case FOURCC('s', 't', 't', 's'): 1495 { 1496 *offset += chunk_size; 1497 1498 status_t err = 1499 mLastTrack->sampleTable->setTimeToSampleParams( 1500 data_offset, chunk_data_size); 1501 1502 if (err != OK) { 1503 return err; 1504 } 1505 1506 break; 1507 } 1508 1509 case FOURCC('c', 't', 't', 's'): 1510 { 1511 *offset += chunk_size; 1512 1513 status_t err = 1514 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1515 data_offset, chunk_data_size); 1516 1517 if (err != OK) { 1518 return err; 1519 } 1520 1521 break; 1522 } 1523 1524 case FOURCC('s', 't', 's', 's'): 1525 { 1526 *offset += chunk_size; 1527 1528 status_t err = 1529 mLastTrack->sampleTable->setSyncSampleParams( 1530 data_offset, chunk_data_size); 1531 1532 if (err != OK) { 1533 return err; 1534 } 1535 1536 break; 1537 } 1538 1539 // @xyz 1540 case FOURCC('\xA9', 'x', 'y', 'z'): 1541 { 1542 *offset += chunk_size; 1543 1544 // Best case the total data length inside "@xyz" box 1545 // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", 1546 // where "\x00\x04" is the text string length with value = 4, 1547 // "\0x15\xc7" is the language code = en, and "0+0" is a 1548 // location (string) value with longitude = 0 and latitude = 0. 1549 if (chunk_data_size < 8) { 1550 return ERROR_MALFORMED; 1551 } 1552 1553 // Worst case the location string length would be 18, 1554 // for instance +90.0000-180.0000, without the trailing "/" and 1555 // the string length + language code. 1556 char buffer[18]; 1557 1558 // Substracting 5 from the data size is because the text string length + 1559 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1560 off64_t location_length = chunk_data_size - 5; 1561 if (location_length >= (off64_t) sizeof(buffer)) { 1562 return ERROR_MALFORMED; 1563 } 1564 1565 if (mDataSource->readAt( 1566 data_offset + 4, buffer, location_length) < location_length) { 1567 return ERROR_IO; 1568 } 1569 1570 buffer[location_length] = '\0'; 1571 mFileMetaData->setCString(kKeyLocation, buffer); 1572 break; 1573 } 1574 1575 case FOURCC('e', 's', 'd', 's'): 1576 { 1577 *offset += chunk_size; 1578 1579 if (chunk_data_size < 4) { 1580 return ERROR_MALFORMED; 1581 } 1582 1583 uint8_t buffer[256]; 1584 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1585 return ERROR_BUFFER_TOO_SMALL; 1586 } 1587 1588 if (mDataSource->readAt( 1589 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1590 return ERROR_IO; 1591 } 1592 1593 if (U32_AT(buffer) != 0) { 1594 // Should be version 0, flags 0. 1595 return ERROR_MALFORMED; 1596 } 1597 1598 mLastTrack->meta->setData( 1599 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1600 1601 if (mPath.size() >= 2 1602 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1603 // Information from the ESDS must be relied on for proper 1604 // setup of sample rate and channel count for MPEG4 Audio. 1605 // The generic header appears to only contain generic 1606 // information... 1607 1608 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1609 &buffer[4], chunk_data_size - 4); 1610 1611 if (err != OK) { 1612 return err; 1613 } 1614 } 1615 1616 break; 1617 } 1618 1619 case FOURCC('a', 'v', 'c', 'C'): 1620 { 1621 *offset += chunk_size; 1622 1623 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1624 1625 if (mDataSource->readAt( 1626 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1627 return ERROR_IO; 1628 } 1629 1630 mLastTrack->meta->setData( 1631 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1632 1633 break; 1634 } 1635 case FOURCC('h', 'v', 'c', 'C'): 1636 { 1637 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1638 1639 if (mDataSource->readAt( 1640 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1641 return ERROR_IO; 1642 } 1643 1644 mLastTrack->meta->setData( 1645 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1646 1647 *offset += chunk_size; 1648 break; 1649 } 1650 1651 case FOURCC('d', '2', '6', '3'): 1652 { 1653 *offset += chunk_size; 1654 /* 1655 * d263 contains a fixed 7 bytes part: 1656 * vendor - 4 bytes 1657 * version - 1 byte 1658 * level - 1 byte 1659 * profile - 1 byte 1660 * optionally, "d263" box itself may contain a 16-byte 1661 * bit rate box (bitr) 1662 * average bit rate - 4 bytes 1663 * max bit rate - 4 bytes 1664 */ 1665 char buffer[23]; 1666 if (chunk_data_size != 7 && 1667 chunk_data_size != 23) { 1668 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1669 return ERROR_MALFORMED; 1670 } 1671 1672 if (mDataSource->readAt( 1673 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1674 return ERROR_IO; 1675 } 1676 1677 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1678 1679 break; 1680 } 1681 1682 case FOURCC('m', 'e', 't', 'a'): 1683 { 1684 uint8_t buffer[4]; 1685 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1686 *offset += chunk_size; 1687 return ERROR_MALFORMED; 1688 } 1689 1690 if (mDataSource->readAt( 1691 data_offset, buffer, 4) < 4) { 1692 *offset += chunk_size; 1693 return ERROR_IO; 1694 } 1695 1696 if (U32_AT(buffer) != 0) { 1697 // Should be version 0, flags 0. 1698 1699 // If it's not, let's assume this is one of those 1700 // apparently malformed chunks that don't have flags 1701 // and completely different semantics than what's 1702 // in the MPEG4 specs and skip it. 1703 *offset += chunk_size; 1704 return OK; 1705 } 1706 1707 off64_t stop_offset = *offset + chunk_size; 1708 *offset = data_offset + sizeof(buffer); 1709 while (*offset < stop_offset) { 1710 status_t err = parseChunk(offset, depth + 1); 1711 if (err != OK) { 1712 return err; 1713 } 1714 } 1715 1716 if (*offset != stop_offset) { 1717 return ERROR_MALFORMED; 1718 } 1719 break; 1720 } 1721 1722 case FOURCC('m', 'e', 'a', 'n'): 1723 case FOURCC('n', 'a', 'm', 'e'): 1724 case FOURCC('d', 'a', 't', 'a'): 1725 { 1726 *offset += chunk_size; 1727 1728 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1729 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 1730 1731 if (err != OK) { 1732 return err; 1733 } 1734 } 1735 1736 break; 1737 } 1738 1739 case FOURCC('m', 'v', 'h', 'd'): 1740 { 1741 *offset += chunk_size; 1742 1743 if (chunk_data_size < 32) { 1744 return ERROR_MALFORMED; 1745 } 1746 1747 uint8_t header[32]; 1748 if (mDataSource->readAt( 1749 data_offset, header, sizeof(header)) 1750 < (ssize_t)sizeof(header)) { 1751 return ERROR_IO; 1752 } 1753 1754 uint64_t creationTime; 1755 uint64_t duration = 0; 1756 if (header[0] == 1) { 1757 creationTime = U64_AT(&header[4]); 1758 mHeaderTimescale = U32_AT(&header[20]); 1759 duration = U64_AT(&header[24]); 1760 if (duration == 0xffffffffffffffff) { 1761 duration = 0; 1762 } 1763 } else if (header[0] != 0) { 1764 return ERROR_MALFORMED; 1765 } else { 1766 creationTime = U32_AT(&header[4]); 1767 mHeaderTimescale = U32_AT(&header[12]); 1768 uint32_t d32 = U32_AT(&header[16]); 1769 if (d32 == 0xffffffff) { 1770 d32 = 0; 1771 } 1772 duration = d32; 1773 } 1774 if (duration != 0) { 1775 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1776 } 1777 1778 String8 s; 1779 convertTimeToDate(creationTime, &s); 1780 1781 mFileMetaData->setCString(kKeyDate, s.string()); 1782 1783 break; 1784 } 1785 1786 case FOURCC('m', 'e', 'h', 'd'): 1787 { 1788 *offset += chunk_size; 1789 1790 if (chunk_data_size < 8) { 1791 return ERROR_MALFORMED; 1792 } 1793 1794 uint8_t flags[4]; 1795 if (mDataSource->readAt( 1796 data_offset, flags, sizeof(flags)) 1797 < (ssize_t)sizeof(flags)) { 1798 return ERROR_IO; 1799 } 1800 1801 uint64_t duration = 0; 1802 if (flags[0] == 1) { 1803 // 64 bit 1804 if (chunk_data_size < 12) { 1805 return ERROR_MALFORMED; 1806 } 1807 mDataSource->getUInt64(data_offset + 4, &duration); 1808 if (duration == 0xffffffffffffffff) { 1809 duration = 0; 1810 } 1811 } else if (flags[0] == 0) { 1812 // 32 bit 1813 uint32_t d32; 1814 mDataSource->getUInt32(data_offset + 4, &d32); 1815 if (d32 == 0xffffffff) { 1816 d32 = 0; 1817 } 1818 duration = d32; 1819 } else { 1820 return ERROR_MALFORMED; 1821 } 1822 1823 if (duration != 0) { 1824 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 1825 } 1826 1827 break; 1828 } 1829 1830 case FOURCC('m', 'd', 'a', 't'): 1831 { 1832 ALOGV("mdat chunk, drm: %d", mIsDrm); 1833 if (!mIsDrm) { 1834 *offset += chunk_size; 1835 break; 1836 } 1837 1838 if (chunk_size < 8) { 1839 return ERROR_MALFORMED; 1840 } 1841 1842 return parseDrmSINF(offset, data_offset); 1843 } 1844 1845 case FOURCC('h', 'd', 'l', 'r'): 1846 { 1847 *offset += chunk_size; 1848 1849 uint32_t buffer; 1850 if (mDataSource->readAt( 1851 data_offset + 8, &buffer, 4) < 4) { 1852 return ERROR_IO; 1853 } 1854 1855 uint32_t type = ntohl(buffer); 1856 // For the 3GPP file format, the handler-type within the 'hdlr' box 1857 // shall be 'text'. We also want to support 'sbtl' handler type 1858 // for a practical reason as various MPEG4 containers use it. 1859 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1860 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1861 } 1862 1863 break; 1864 } 1865 1866 case FOURCC('t', 'r', 'e', 'x'): 1867 { 1868 *offset += chunk_size; 1869 1870 if (chunk_data_size < 24) { 1871 return ERROR_IO; 1872 } 1873 uint32_t duration; 1874 Trex trex; 1875 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 1876 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 1877 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 1878 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 1879 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 1880 return ERROR_IO; 1881 } 1882 mTrex.add(trex); 1883 break; 1884 } 1885 1886 case FOURCC('t', 'x', '3', 'g'): 1887 { 1888 uint32_t type; 1889 const void *data; 1890 size_t size = 0; 1891 if (!mLastTrack->meta->findData( 1892 kKeyTextFormatData, &type, &data, &size)) { 1893 size = 0; 1894 } 1895 1896 if (SIZE_MAX - chunk_size <= size) { 1897 return ERROR_MALFORMED; 1898 } 1899 1900 uint8_t *buffer = new uint8_t[size + chunk_size]; 1901 if (buffer == NULL) { 1902 return ERROR_MALFORMED; 1903 } 1904 1905 if (size > 0) { 1906 memcpy(buffer, data, size); 1907 } 1908 1909 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 1910 < chunk_size) { 1911 delete[] buffer; 1912 buffer = NULL; 1913 1914 // advance read pointer so we don't end up reading this again 1915 *offset += chunk_size; 1916 return ERROR_IO; 1917 } 1918 1919 mLastTrack->meta->setData( 1920 kKeyTextFormatData, 0, buffer, size + chunk_size); 1921 1922 delete[] buffer; 1923 1924 *offset += chunk_size; 1925 break; 1926 } 1927 1928 case FOURCC('c', 'o', 'v', 'r'): 1929 { 1930 *offset += chunk_size; 1931 1932 if (mFileMetaData != NULL) { 1933 ALOGV("chunk_data_size = %lld and data_offset = %lld", 1934 chunk_data_size, data_offset); 1935 1936 if (chunk_data_size >= SIZE_MAX - 1) { 1937 return ERROR_MALFORMED; 1938 } 1939 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 1940 if (mDataSource->readAt( 1941 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 1942 return ERROR_IO; 1943 } 1944 const int kSkipBytesOfDataBox = 16; 1945 if (chunk_data_size <= kSkipBytesOfDataBox) { 1946 return ERROR_MALFORMED; 1947 } 1948 1949 mFileMetaData->setData( 1950 kKeyAlbumArt, MetaData::TYPE_NONE, 1951 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 1952 } 1953 1954 break; 1955 } 1956 1957 case FOURCC('t', 'i', 't', 'l'): 1958 case FOURCC('p', 'e', 'r', 'f'): 1959 case FOURCC('a', 'u', 't', 'h'): 1960 case FOURCC('g', 'n', 'r', 'e'): 1961 case FOURCC('a', 'l', 'b', 'm'): 1962 case FOURCC('y', 'r', 'r', 'c'): 1963 { 1964 *offset += chunk_size; 1965 1966 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 1967 1968 if (err != OK) { 1969 return err; 1970 } 1971 1972 break; 1973 } 1974 1975 case FOURCC('I', 'D', '3', '2'): 1976 { 1977 *offset += chunk_size; 1978 1979 if (chunk_data_size < 6) { 1980 return ERROR_MALFORMED; 1981 } 1982 1983 parseID3v2MetaData(data_offset + 6); 1984 1985 break; 1986 } 1987 1988 case FOURCC('-', '-', '-', '-'): 1989 { 1990 mLastCommentMean.clear(); 1991 mLastCommentName.clear(); 1992 mLastCommentData.clear(); 1993 *offset += chunk_size; 1994 break; 1995 } 1996 1997 case FOURCC('s', 'i', 'd', 'x'): 1998 { 1999 parseSegmentIndex(data_offset, chunk_data_size); 2000 *offset += chunk_size; 2001 return UNKNOWN_ERROR; // stop parsing after sidx 2002 } 2003 2004 default: 2005 { 2006 *offset += chunk_size; 2007 break; 2008 } 2009 } 2010 2011 return OK; 2012 } 2013 2014 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2015 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2016 2017 if (size < 12) { 2018 return -EINVAL; 2019 } 2020 2021 uint32_t flags; 2022 if (!mDataSource->getUInt32(offset, &flags)) { 2023 return ERROR_MALFORMED; 2024 } 2025 2026 uint32_t version = flags >> 24; 2027 flags &= 0xffffff; 2028 2029 ALOGV("sidx version %d", version); 2030 2031 uint32_t referenceId; 2032 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2033 return ERROR_MALFORMED; 2034 } 2035 2036 uint32_t timeScale; 2037 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2038 return ERROR_MALFORMED; 2039 } 2040 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2041 2042 uint64_t earliestPresentationTime; 2043 uint64_t firstOffset; 2044 2045 offset += 12; 2046 size -= 12; 2047 2048 if (version == 0) { 2049 if (size < 8) { 2050 return -EINVAL; 2051 } 2052 uint32_t tmp; 2053 if (!mDataSource->getUInt32(offset, &tmp)) { 2054 return ERROR_MALFORMED; 2055 } 2056 earliestPresentationTime = tmp; 2057 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2058 return ERROR_MALFORMED; 2059 } 2060 firstOffset = tmp; 2061 offset += 8; 2062 size -= 8; 2063 } else { 2064 if (size < 16) { 2065 return -EINVAL; 2066 } 2067 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2068 return ERROR_MALFORMED; 2069 } 2070 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2071 return ERROR_MALFORMED; 2072 } 2073 offset += 16; 2074 size -= 16; 2075 } 2076 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2077 2078 if (size < 4) { 2079 return -EINVAL; 2080 } 2081 2082 uint16_t referenceCount; 2083 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2084 return ERROR_MALFORMED; 2085 } 2086 offset += 4; 2087 size -= 4; 2088 ALOGV("refcount: %d", referenceCount); 2089 2090 if (size < referenceCount * 12) { 2091 return -EINVAL; 2092 } 2093 2094 uint64_t total_duration = 0; 2095 for (unsigned int i = 0; i < referenceCount; i++) { 2096 uint32_t d1, d2, d3; 2097 2098 if (!mDataSource->getUInt32(offset, &d1) || // size 2099 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2100 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2101 return ERROR_MALFORMED; 2102 } 2103 2104 if (d1 & 0x80000000) { 2105 ALOGW("sub-sidx boxes not supported yet"); 2106 } 2107 bool sap = d3 & 0x80000000; 2108 uint32_t saptype = (d3 >> 28) & 7; 2109 if (!sap || (saptype != 1 && saptype != 2)) { 2110 // type 1 and 2 are sync samples 2111 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2112 } 2113 total_duration += d2; 2114 offset += 12; 2115 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2116 SidxEntry se; 2117 se.mSize = d1 & 0x7fffffff; 2118 se.mDurationUs = 1000000LL * d2 / timeScale; 2119 mSidxEntries.add(se); 2120 } 2121 2122 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2123 2124 int64_t metaDuration; 2125 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2126 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2127 } 2128 return OK; 2129 } 2130 2131 2132 2133 status_t MPEG4Extractor::parseTrackHeader( 2134 off64_t data_offset, off64_t data_size) { 2135 if (data_size < 4) { 2136 return ERROR_MALFORMED; 2137 } 2138 2139 uint8_t version; 2140 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2141 return ERROR_IO; 2142 } 2143 2144 size_t dynSize = (version == 1) ? 36 : 24; 2145 2146 uint8_t buffer[36 + 60]; 2147 2148 if (data_size != (off64_t)dynSize + 60) { 2149 return ERROR_MALFORMED; 2150 } 2151 2152 if (mDataSource->readAt( 2153 data_offset, buffer, data_size) < (ssize_t)data_size) { 2154 return ERROR_IO; 2155 } 2156 2157 uint64_t ctime, mtime, duration; 2158 int32_t id; 2159 2160 if (version == 1) { 2161 ctime = U64_AT(&buffer[4]); 2162 mtime = U64_AT(&buffer[12]); 2163 id = U32_AT(&buffer[20]); 2164 duration = U64_AT(&buffer[28]); 2165 } else if (version == 0) { 2166 ctime = U32_AT(&buffer[4]); 2167 mtime = U32_AT(&buffer[8]); 2168 id = U32_AT(&buffer[12]); 2169 duration = U32_AT(&buffer[20]); 2170 } else { 2171 return ERROR_UNSUPPORTED; 2172 } 2173 2174 mLastTrack->meta->setInt32(kKeyTrackID, id); 2175 2176 size_t matrixOffset = dynSize + 16; 2177 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2178 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2179 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2180 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2181 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2182 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2183 2184 #if 0 2185 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2186 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2187 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2188 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2189 #endif 2190 2191 uint32_t rotationDegrees; 2192 2193 static const int32_t kFixedOne = 0x10000; 2194 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2195 // Identity, no rotation 2196 rotationDegrees = 0; 2197 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2198 rotationDegrees = 90; 2199 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2200 rotationDegrees = 270; 2201 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2202 rotationDegrees = 180; 2203 } else { 2204 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2205 rotationDegrees = 0; 2206 } 2207 2208 if (rotationDegrees != 0) { 2209 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2210 } 2211 2212 // Handle presentation display size, which could be different 2213 // from the image size indicated by kKeyWidth and kKeyHeight. 2214 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2215 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2216 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2217 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2218 2219 return OK; 2220 } 2221 2222 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2223 if (size < 4) { 2224 return ERROR_MALFORMED; 2225 } 2226 2227 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2228 if (buffer == NULL) { 2229 return ERROR_MALFORMED; 2230 } 2231 if (mDataSource->readAt( 2232 offset, buffer, size) != (ssize_t)size) { 2233 delete[] buffer; 2234 buffer = NULL; 2235 2236 return ERROR_IO; 2237 } 2238 2239 uint32_t flags = U32_AT(buffer); 2240 2241 uint32_t metadataKey = 0; 2242 char chunk[5]; 2243 MakeFourCCString(mPath[4], chunk); 2244 ALOGV("meta: %s @ %lld", chunk, offset); 2245 switch (mPath[4]) { 2246 case FOURCC(0xa9, 'a', 'l', 'b'): 2247 { 2248 metadataKey = kKeyAlbum; 2249 break; 2250 } 2251 case FOURCC(0xa9, 'A', 'R', 'T'): 2252 { 2253 metadataKey = kKeyArtist; 2254 break; 2255 } 2256 case FOURCC('a', 'A', 'R', 'T'): 2257 { 2258 metadataKey = kKeyAlbumArtist; 2259 break; 2260 } 2261 case FOURCC(0xa9, 'd', 'a', 'y'): 2262 { 2263 metadataKey = kKeyYear; 2264 break; 2265 } 2266 case FOURCC(0xa9, 'n', 'a', 'm'): 2267 { 2268 metadataKey = kKeyTitle; 2269 break; 2270 } 2271 case FOURCC(0xa9, 'w', 'r', 't'): 2272 { 2273 metadataKey = kKeyWriter; 2274 break; 2275 } 2276 case FOURCC('c', 'o', 'v', 'r'): 2277 { 2278 metadataKey = kKeyAlbumArt; 2279 break; 2280 } 2281 case FOURCC('g', 'n', 'r', 'e'): 2282 { 2283 metadataKey = kKeyGenre; 2284 break; 2285 } 2286 case FOURCC(0xa9, 'g', 'e', 'n'): 2287 { 2288 metadataKey = kKeyGenre; 2289 break; 2290 } 2291 case FOURCC('c', 'p', 'i', 'l'): 2292 { 2293 if (size == 9 && flags == 21) { 2294 char tmp[16]; 2295 sprintf(tmp, "%d", 2296 (int)buffer[size - 1]); 2297 2298 mFileMetaData->setCString(kKeyCompilation, tmp); 2299 } 2300 break; 2301 } 2302 case FOURCC('t', 'r', 'k', 'n'): 2303 { 2304 if (size == 16 && flags == 0) { 2305 char tmp[16]; 2306 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2307 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2308 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2309 2310 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2311 } 2312 break; 2313 } 2314 case FOURCC('d', 'i', 's', 'k'): 2315 { 2316 if ((size == 14 || size == 16) && flags == 0) { 2317 char tmp[16]; 2318 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2319 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2320 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2321 2322 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2323 } 2324 break; 2325 } 2326 case FOURCC('-', '-', '-', '-'): 2327 { 2328 buffer[size] = '\0'; 2329 switch (mPath[5]) { 2330 case FOURCC('m', 'e', 'a', 'n'): 2331 mLastCommentMean.setTo((const char *)buffer + 4); 2332 break; 2333 case FOURCC('n', 'a', 'm', 'e'): 2334 mLastCommentName.setTo((const char *)buffer + 4); 2335 break; 2336 case FOURCC('d', 'a', 't', 'a'): 2337 mLastCommentData.setTo((const char *)buffer + 8); 2338 break; 2339 } 2340 2341 // Once we have a set of mean/name/data info, go ahead and process 2342 // it to see if its something we are interested in. Whether or not 2343 // were are interested in the specific tag, make sure to clear out 2344 // the set so we can be ready to process another tuple should one 2345 // show up later in the file. 2346 if ((mLastCommentMean.length() != 0) && 2347 (mLastCommentName.length() != 0) && 2348 (mLastCommentData.length() != 0)) { 2349 2350 if (mLastCommentMean == "com.apple.iTunes" 2351 && mLastCommentName == "iTunSMPB") { 2352 int32_t delay, padding; 2353 if (sscanf(mLastCommentData, 2354 " %*x %x %x %*x", &delay, &padding) == 2) { 2355 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2356 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2357 } 2358 } 2359 2360 mLastCommentMean.clear(); 2361 mLastCommentName.clear(); 2362 mLastCommentData.clear(); 2363 } 2364 break; 2365 } 2366 2367 default: 2368 break; 2369 } 2370 2371 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2372 if (metadataKey == kKeyAlbumArt) { 2373 mFileMetaData->setData( 2374 kKeyAlbumArt, MetaData::TYPE_NONE, 2375 buffer + 8, size - 8); 2376 } else if (metadataKey == kKeyGenre) { 2377 if (flags == 0) { 2378 // uint8_t genre code, iTunes genre codes are 2379 // the standard id3 codes, except they start 2380 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2381 // We use standard id3 numbering, so subtract 1. 2382 int genrecode = (int)buffer[size - 1]; 2383 genrecode--; 2384 if (genrecode < 0) { 2385 genrecode = 255; // reserved for 'unknown genre' 2386 } 2387 char genre[10]; 2388 sprintf(genre, "%d", genrecode); 2389 2390 mFileMetaData->setCString(metadataKey, genre); 2391 } else if (flags == 1) { 2392 // custom genre string 2393 buffer[size] = '\0'; 2394 2395 mFileMetaData->setCString( 2396 metadataKey, (const char *)buffer + 8); 2397 } 2398 } else { 2399 buffer[size] = '\0'; 2400 2401 mFileMetaData->setCString( 2402 metadataKey, (const char *)buffer + 8); 2403 } 2404 } 2405 2406 delete[] buffer; 2407 buffer = NULL; 2408 2409 return OK; 2410 } 2411 2412 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 2413 if (size < 4 || size == SIZE_MAX) { 2414 return ERROR_MALFORMED; 2415 } 2416 2417 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2418 if (buffer == NULL) { 2419 return ERROR_MALFORMED; 2420 } 2421 if (mDataSource->readAt( 2422 offset, buffer, size) != (ssize_t)size) { 2423 delete[] buffer; 2424 buffer = NULL; 2425 2426 return ERROR_IO; 2427 } 2428 2429 uint32_t metadataKey = 0; 2430 switch (mPath[depth]) { 2431 case FOURCC('t', 'i', 't', 'l'): 2432 { 2433 metadataKey = kKeyTitle; 2434 break; 2435 } 2436 case FOURCC('p', 'e', 'r', 'f'): 2437 { 2438 metadataKey = kKeyArtist; 2439 break; 2440 } 2441 case FOURCC('a', 'u', 't', 'h'): 2442 { 2443 metadataKey = kKeyWriter; 2444 break; 2445 } 2446 case FOURCC('g', 'n', 'r', 'e'): 2447 { 2448 metadataKey = kKeyGenre; 2449 break; 2450 } 2451 case FOURCC('a', 'l', 'b', 'm'): 2452 { 2453 if (buffer[size - 1] != '\0') { 2454 char tmp[4]; 2455 sprintf(tmp, "%u", buffer[size - 1]); 2456 2457 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2458 } 2459 2460 metadataKey = kKeyAlbum; 2461 break; 2462 } 2463 case FOURCC('y', 'r', 'r', 'c'): 2464 { 2465 char tmp[5]; 2466 uint16_t year = U16_AT(&buffer[4]); 2467 2468 if (year < 10000) { 2469 sprintf(tmp, "%u", year); 2470 2471 mFileMetaData->setCString(kKeyYear, tmp); 2472 } 2473 break; 2474 } 2475 2476 default: 2477 break; 2478 } 2479 2480 if (metadataKey > 0) { 2481 bool isUTF8 = true; // Common case 2482 char16_t *framedata = NULL; 2483 int len16 = 0; // Number of UTF-16 characters 2484 2485 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 2486 if (size < 6) { 2487 return ERROR_MALFORMED; 2488 } 2489 2490 if (size - 6 >= 4) { 2491 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 2492 framedata = (char16_t *)(buffer + 6); 2493 if (0xfffe == *framedata) { 2494 // endianness marker (BOM) doesn't match host endianness 2495 for (int i = 0; i < len16; i++) { 2496 framedata[i] = bswap_16(framedata[i]); 2497 } 2498 // BOM is now swapped to 0xfeff, we will execute next block too 2499 } 2500 2501 if (0xfeff == *framedata) { 2502 // Remove the BOM 2503 framedata++; 2504 len16--; 2505 isUTF8 = false; 2506 } 2507 // else normal non-zero-length UTF-8 string 2508 // we can't handle UTF-16 without BOM as there is no other 2509 // indication of encoding. 2510 } 2511 2512 if (isUTF8) { 2513 buffer[size] = 0; 2514 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 2515 } else { 2516 // Convert from UTF-16 string to UTF-8 string. 2517 String8 tmpUTF8str(framedata, len16); 2518 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 2519 } 2520 } 2521 2522 delete[] buffer; 2523 buffer = NULL; 2524 2525 return OK; 2526 } 2527 2528 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 2529 ID3 id3(mDataSource, true /* ignorev1 */, offset); 2530 2531 if (id3.isValid()) { 2532 struct Map { 2533 int key; 2534 const char *tag1; 2535 const char *tag2; 2536 }; 2537 static const Map kMap[] = { 2538 { kKeyAlbum, "TALB", "TAL" }, 2539 { kKeyArtist, "TPE1", "TP1" }, 2540 { kKeyAlbumArtist, "TPE2", "TP2" }, 2541 { kKeyComposer, "TCOM", "TCM" }, 2542 { kKeyGenre, "TCON", "TCO" }, 2543 { kKeyTitle, "TIT2", "TT2" }, 2544 { kKeyYear, "TYE", "TYER" }, 2545 { kKeyAuthor, "TXT", "TEXT" }, 2546 { kKeyCDTrackNumber, "TRK", "TRCK" }, 2547 { kKeyDiscNumber, "TPA", "TPOS" }, 2548 { kKeyCompilation, "TCP", "TCMP" }, 2549 }; 2550 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 2551 2552 for (size_t i = 0; i < kNumMapEntries; ++i) { 2553 if (!mFileMetaData->hasData(kMap[i].key)) { 2554 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 2555 if (it->done()) { 2556 delete it; 2557 it = new ID3::Iterator(id3, kMap[i].tag2); 2558 } 2559 2560 if (it->done()) { 2561 delete it; 2562 continue; 2563 } 2564 2565 String8 s; 2566 it->getString(&s); 2567 delete it; 2568 2569 mFileMetaData->setCString(kMap[i].key, s); 2570 } 2571 } 2572 2573 size_t dataSize; 2574 String8 mime; 2575 const void *data = id3.getAlbumArt(&dataSize, &mime); 2576 2577 if (data) { 2578 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 2579 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 2580 } 2581 } 2582 } 2583 2584 sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2585 status_t err; 2586 if ((err = readMetaData()) != OK) { 2587 return NULL; 2588 } 2589 2590 Track *track = mFirstTrack; 2591 while (index > 0) { 2592 if (track == NULL) { 2593 return NULL; 2594 } 2595 2596 track = track->next; 2597 --index; 2598 } 2599 2600 if (track == NULL) { 2601 return NULL; 2602 } 2603 2604 2605 Trex *trex = NULL; 2606 int32_t trackId; 2607 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 2608 for (size_t i = 0; i < mTrex.size(); i++) { 2609 Trex *t = &mTrex.editItemAt(index); 2610 if (t->track_ID == (uint32_t) trackId) { 2611 trex = t; 2612 break; 2613 } 2614 } 2615 } 2616 2617 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 2618 2619 return new MPEG4Source(this, 2620 track->meta, mDataSource, track->timescale, track->sampleTable, 2621 mSidxEntries, trex, mMoofOffset); 2622 } 2623 2624 // static 2625 status_t MPEG4Extractor::verifyTrack(Track *track) { 2626 const char *mime; 2627 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2628 2629 uint32_t type; 2630 const void *data; 2631 size_t size; 2632 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2633 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2634 || type != kTypeAVCC) { 2635 return ERROR_MALFORMED; 2636 } 2637 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 2638 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 2639 || type != kTypeHVCC) { 2640 return ERROR_MALFORMED; 2641 } 2642 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2643 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2644 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2645 || type != kTypeESDS) { 2646 return ERROR_MALFORMED; 2647 } 2648 } 2649 2650 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 2651 // Make sure we have all the metadata we need. 2652 ALOGE("stbl atom missing/invalid."); 2653 return ERROR_MALFORMED; 2654 } 2655 2656 return OK; 2657 } 2658 2659 typedef enum { 2660 //AOT_NONE = -1, 2661 //AOT_NULL_OBJECT = 0, 2662 //AOT_AAC_MAIN = 1, /**< Main profile */ 2663 AOT_AAC_LC = 2, /**< Low Complexity object */ 2664 //AOT_AAC_SSR = 3, 2665 //AOT_AAC_LTP = 4, 2666 AOT_SBR = 5, 2667 //AOT_AAC_SCAL = 6, 2668 //AOT_TWIN_VQ = 7, 2669 //AOT_CELP = 8, 2670 //AOT_HVXC = 9, 2671 //AOT_RSVD_10 = 10, /**< (reserved) */ 2672 //AOT_RSVD_11 = 11, /**< (reserved) */ 2673 //AOT_TTSI = 12, /**< TTSI Object */ 2674 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 2675 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 2676 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 2677 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 2678 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 2679 //AOT_RSVD_18 = 18, /**< (reserved) */ 2680 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 2681 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 2682 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 2683 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 2684 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 2685 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 2686 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 2687 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 2688 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 2689 //AOT_RSVD_28 = 28, /**< might become SSC */ 2690 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 2691 //AOT_MPEGS = 30, /**< MPEG Surround */ 2692 2693 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 2694 2695 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 2696 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 2697 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 2698 //AOT_RSVD_35 = 35, /**< might become DST */ 2699 //AOT_RSVD_36 = 36, /**< might become ALS */ 2700 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 2701 //AOT_SLS = 38, /**< SLS */ 2702 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 2703 2704 //AOT_USAC = 42, /**< USAC */ 2705 //AOT_SAOC = 43, /**< SAOC */ 2706 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 2707 2708 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 2709 } AUDIO_OBJECT_TYPE; 2710 2711 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2712 const void *esds_data, size_t esds_size) { 2713 ESDS esds(esds_data, esds_size); 2714 2715 uint8_t objectTypeIndication; 2716 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2717 return ERROR_MALFORMED; 2718 } 2719 2720 if (objectTypeIndication == 0xe1) { 2721 // This isn't MPEG4 audio at all, it's QCELP 14k... 2722 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2723 return OK; 2724 } 2725 2726 if (objectTypeIndication == 0x6b) { 2727 // The media subtype is MP3 audio 2728 // Our software MP3 audio decoder may not be able to handle 2729 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2730 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2731 return ERROR_UNSUPPORTED; 2732 } 2733 2734 const uint8_t *csd; 2735 size_t csd_size; 2736 if (esds.getCodecSpecificInfo( 2737 (const void **)&csd, &csd_size) != OK) { 2738 return ERROR_MALFORMED; 2739 } 2740 2741 #if 0 2742 printf("ESD of size %d\n", csd_size); 2743 hexdump(csd, csd_size); 2744 #endif 2745 2746 if (csd_size == 0) { 2747 // There's no further information, i.e. no codec specific data 2748 // Let's assume that the information provided in the mpeg4 headers 2749 // is accurate and hope for the best. 2750 2751 return OK; 2752 } 2753 2754 if (csd_size < 2) { 2755 return ERROR_MALFORMED; 2756 } 2757 2758 static uint32_t kSamplingRate[] = { 2759 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2760 16000, 12000, 11025, 8000, 7350 2761 }; 2762 2763 ABitReader br(csd, csd_size); 2764 uint32_t objectType = br.getBits(5); 2765 2766 if (objectType == 31) { // AAC-ELD => additional 6 bits 2767 objectType = 32 + br.getBits(6); 2768 } 2769 2770 //keep AOT type 2771 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 2772 2773 uint32_t freqIndex = br.getBits(4); 2774 2775 int32_t sampleRate = 0; 2776 int32_t numChannels = 0; 2777 if (freqIndex == 15) { 2778 if (csd_size < 5) { 2779 return ERROR_MALFORMED; 2780 } 2781 sampleRate = br.getBits(24); 2782 numChannels = br.getBits(4); 2783 } else { 2784 numChannels = br.getBits(4); 2785 2786 if (freqIndex == 13 || freqIndex == 14) { 2787 return ERROR_MALFORMED; 2788 } 2789 2790 sampleRate = kSamplingRate[freqIndex]; 2791 } 2792 2793 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 2794 uint32_t extFreqIndex = br.getBits(4); 2795 int32_t extSampleRate; 2796 if (extFreqIndex == 15) { 2797 if (csd_size < 8) { 2798 return ERROR_MALFORMED; 2799 } 2800 extSampleRate = br.getBits(24); 2801 } else { 2802 if (extFreqIndex == 13 || extFreqIndex == 14) { 2803 return ERROR_MALFORMED; 2804 } 2805 extSampleRate = kSamplingRate[extFreqIndex]; 2806 } 2807 //TODO: save the extension sampling rate value in meta data => 2808 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 2809 } 2810 2811 switch (numChannels) { 2812 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 2813 case 0: 2814 case 1:// FC 2815 case 2:// FL FR 2816 case 3:// FC, FL FR 2817 case 4:// FC, FL FR, RC 2818 case 5:// FC, FL FR, SL SR 2819 case 6:// FC, FL FR, SL SR, LFE 2820 //numChannels already contains the right value 2821 break; 2822 case 11:// FC, FL FR, SL SR, RC, LFE 2823 numChannels = 7; 2824 break; 2825 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 2826 case 12:// FC, FL FR, SL SR, RL RR, LFE 2827 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 2828 numChannels = 8; 2829 break; 2830 default: 2831 return ERROR_UNSUPPORTED; 2832 } 2833 2834 { 2835 if (objectType == AOT_SBR || objectType == AOT_PS) { 2836 objectType = br.getBits(5); 2837 2838 if (objectType == AOT_ESCAPE) { 2839 objectType = 32 + br.getBits(6); 2840 } 2841 } 2842 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 2843 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 2844 objectType == AOT_ER_BSAC) { 2845 const int32_t frameLengthFlag = br.getBits(1); 2846 2847 const int32_t dependsOnCoreCoder = br.getBits(1); 2848 2849 if (dependsOnCoreCoder ) { 2850 const int32_t coreCoderDelay = br.getBits(14); 2851 } 2852 2853 int32_t extensionFlag = -1; 2854 if (br.numBitsLeft() > 0) { 2855 extensionFlag = br.getBits(1); 2856 } else { 2857 switch (objectType) { 2858 // 14496-3 4.5.1.1 extensionFlag 2859 case AOT_AAC_LC: 2860 extensionFlag = 0; 2861 break; 2862 case AOT_ER_AAC_LC: 2863 case AOT_ER_AAC_SCAL: 2864 case AOT_ER_BSAC: 2865 case AOT_ER_AAC_LD: 2866 extensionFlag = 1; 2867 break; 2868 default: 2869 TRESPASS(); 2870 break; 2871 } 2872 ALOGW("csd missing extension flag; assuming %d for object type %u.", 2873 extensionFlag, objectType); 2874 } 2875 2876 if (numChannels == 0) { 2877 int32_t channelsEffectiveNum = 0; 2878 int32_t channelsNum = 0; 2879 const int32_t ElementInstanceTag = br.getBits(4); 2880 const int32_t Profile = br.getBits(2); 2881 const int32_t SamplingFrequencyIndex = br.getBits(4); 2882 const int32_t NumFrontChannelElements = br.getBits(4); 2883 const int32_t NumSideChannelElements = br.getBits(4); 2884 const int32_t NumBackChannelElements = br.getBits(4); 2885 const int32_t NumLfeChannelElements = br.getBits(2); 2886 const int32_t NumAssocDataElements = br.getBits(3); 2887 const int32_t NumValidCcElements = br.getBits(4); 2888 2889 const int32_t MonoMixdownPresent = br.getBits(1); 2890 if (MonoMixdownPresent != 0) { 2891 const int32_t MonoMixdownElementNumber = br.getBits(4); 2892 } 2893 2894 const int32_t StereoMixdownPresent = br.getBits(1); 2895 if (StereoMixdownPresent != 0) { 2896 const int32_t StereoMixdownElementNumber = br.getBits(4); 2897 } 2898 2899 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 2900 if (MatrixMixdownIndexPresent != 0) { 2901 const int32_t MatrixMixdownIndex = br.getBits(2); 2902 const int32_t PseudoSurroundEnable = br.getBits(1); 2903 } 2904 2905 int i; 2906 for (i=0; i < NumFrontChannelElements; i++) { 2907 const int32_t FrontElementIsCpe = br.getBits(1); 2908 const int32_t FrontElementTagSelect = br.getBits(4); 2909 channelsNum += FrontElementIsCpe ? 2 : 1; 2910 } 2911 2912 for (i=0; i < NumSideChannelElements; i++) { 2913 const int32_t SideElementIsCpe = br.getBits(1); 2914 const int32_t SideElementTagSelect = br.getBits(4); 2915 channelsNum += SideElementIsCpe ? 2 : 1; 2916 } 2917 2918 for (i=0; i < NumBackChannelElements; i++) { 2919 const int32_t BackElementIsCpe = br.getBits(1); 2920 const int32_t BackElementTagSelect = br.getBits(4); 2921 channelsNum += BackElementIsCpe ? 2 : 1; 2922 } 2923 channelsEffectiveNum = channelsNum; 2924 2925 for (i=0; i < NumLfeChannelElements; i++) { 2926 const int32_t LfeElementTagSelect = br.getBits(4); 2927 channelsNum += 1; 2928 } 2929 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 2930 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 2931 numChannels = channelsNum; 2932 } 2933 } 2934 } 2935 2936 if (numChannels == 0) { 2937 return ERROR_UNSUPPORTED; 2938 } 2939 2940 int32_t prevSampleRate; 2941 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 2942 2943 if (prevSampleRate != sampleRate) { 2944 ALOGV("mpeg4 audio sample rate different from previous setting. " 2945 "was: %d, now: %d", prevSampleRate, sampleRate); 2946 } 2947 2948 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2949 2950 int32_t prevChannelCount; 2951 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 2952 2953 if (prevChannelCount != numChannels) { 2954 ALOGV("mpeg4 audio channel count different from previous setting. " 2955 "was: %d, now: %d", prevChannelCount, numChannels); 2956 } 2957 2958 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 2959 2960 return OK; 2961 } 2962 2963 //////////////////////////////////////////////////////////////////////////////// 2964 2965 MPEG4Source::MPEG4Source( 2966 const sp<MPEG4Extractor> &owner, 2967 const sp<MetaData> &format, 2968 const sp<DataSource> &dataSource, 2969 int32_t timeScale, 2970 const sp<SampleTable> &sampleTable, 2971 Vector<SidxEntry> &sidx, 2972 const Trex *trex, 2973 off64_t firstMoofOffset) 2974 : mOwner(owner), 2975 mFormat(format), 2976 mDataSource(dataSource), 2977 mTimescale(timeScale), 2978 mSampleTable(sampleTable), 2979 mCurrentSampleIndex(0), 2980 mCurrentFragmentIndex(0), 2981 mSegments(sidx), 2982 mTrex(trex), 2983 mFirstMoofOffset(firstMoofOffset), 2984 mCurrentMoofOffset(firstMoofOffset), 2985 mCurrentTime(0), 2986 mCurrentSampleInfoAllocSize(0), 2987 mCurrentSampleInfoSizes(NULL), 2988 mCurrentSampleInfoOffsetsAllocSize(0), 2989 mCurrentSampleInfoOffsets(NULL), 2990 mIsAVC(false), 2991 mIsHEVC(false), 2992 mNALLengthSize(0), 2993 mStarted(false), 2994 mGroup(NULL), 2995 mBuffer(NULL), 2996 mWantsNALFragments(false), 2997 mSrcBuffer(NULL) { 2998 2999 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3000 3001 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3002 mDefaultIVSize = 0; 3003 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3004 uint32_t keytype; 3005 const void *key; 3006 size_t keysize; 3007 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3008 CHECK(keysize <= 16); 3009 memset(mCryptoKey, 0, 16); 3010 memcpy(mCryptoKey, key, keysize); 3011 } 3012 3013 const char *mime; 3014 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3015 CHECK(success); 3016 3017 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3018 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 3019 3020 if (mIsAVC) { 3021 uint32_t type; 3022 const void *data; 3023 size_t size; 3024 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3025 3026 const uint8_t *ptr = (const uint8_t *)data; 3027 3028 CHECK(size >= 7); 3029 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3030 3031 // The number of bytes used to encode the length of a NAL unit. 3032 mNALLengthSize = 1 + (ptr[4] & 3); 3033 } else if (mIsHEVC) { 3034 uint32_t type; 3035 const void *data; 3036 size_t size; 3037 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3038 3039 const uint8_t *ptr = (const uint8_t *)data; 3040 3041 CHECK(size >= 7); 3042 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3043 3044 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3045 } 3046 3047 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3048 3049 if (mFirstMoofOffset != 0) { 3050 off64_t offset = mFirstMoofOffset; 3051 parseChunk(&offset); 3052 } 3053 } 3054 3055 MPEG4Source::~MPEG4Source() { 3056 if (mStarted) { 3057 stop(); 3058 } 3059 free(mCurrentSampleInfoSizes); 3060 free(mCurrentSampleInfoOffsets); 3061 } 3062 3063 status_t MPEG4Source::start(MetaData *params) { 3064 Mutex::Autolock autoLock(mLock); 3065 3066 CHECK(!mStarted); 3067 3068 int32_t val; 3069 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3070 && val != 0) { 3071 mWantsNALFragments = true; 3072 } else { 3073 mWantsNALFragments = false; 3074 } 3075 3076 mGroup = new MediaBufferGroup; 3077 3078 int32_t max_size; 3079 CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); 3080 3081 mGroup->add_buffer(new MediaBuffer(max_size)); 3082 3083 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3084 if (mSrcBuffer == NULL) { 3085 // file probably specified a bad max size 3086 return ERROR_MALFORMED; 3087 } 3088 3089 mStarted = true; 3090 3091 return OK; 3092 } 3093 3094 status_t MPEG4Source::stop() { 3095 Mutex::Autolock autoLock(mLock); 3096 3097 CHECK(mStarted); 3098 3099 if (mBuffer != NULL) { 3100 mBuffer->release(); 3101 mBuffer = NULL; 3102 } 3103 3104 delete[] mSrcBuffer; 3105 mSrcBuffer = NULL; 3106 3107 delete mGroup; 3108 mGroup = NULL; 3109 3110 mStarted = false; 3111 mCurrentSampleIndex = 0; 3112 3113 return OK; 3114 } 3115 3116 status_t MPEG4Source::parseChunk(off64_t *offset) { 3117 uint32_t hdr[2]; 3118 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3119 return ERROR_IO; 3120 } 3121 uint64_t chunk_size = ntohl(hdr[0]); 3122 uint32_t chunk_type = ntohl(hdr[1]); 3123 off64_t data_offset = *offset + 8; 3124 3125 if (chunk_size == 1) { 3126 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3127 return ERROR_IO; 3128 } 3129 chunk_size = ntoh64(chunk_size); 3130 data_offset += 8; 3131 3132 if (chunk_size < 16) { 3133 // The smallest valid chunk is 16 bytes long in this case. 3134 return ERROR_MALFORMED; 3135 } 3136 } else if (chunk_size < 8) { 3137 // The smallest valid chunk is 8 bytes long. 3138 return ERROR_MALFORMED; 3139 } 3140 3141 char chunk[5]; 3142 MakeFourCCString(chunk_type, chunk); 3143 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 3144 3145 off64_t chunk_data_size = *offset + chunk_size - data_offset; 3146 3147 switch(chunk_type) { 3148 3149 case FOURCC('t', 'r', 'a', 'f'): 3150 case FOURCC('m', 'o', 'o', 'f'): { 3151 off64_t stop_offset = *offset + chunk_size; 3152 *offset = data_offset; 3153 while (*offset < stop_offset) { 3154 status_t err = parseChunk(offset); 3155 if (err != OK) { 3156 return err; 3157 } 3158 } 3159 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3160 // *offset points to the box following this moof. Find the next moof from there. 3161 3162 while (true) { 3163 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3164 return ERROR_END_OF_STREAM; 3165 } 3166 chunk_size = ntohl(hdr[0]); 3167 chunk_type = ntohl(hdr[1]); 3168 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3169 mNextMoofOffset = *offset; 3170 break; 3171 } 3172 *offset += chunk_size; 3173 } 3174 } 3175 break; 3176 } 3177 3178 case FOURCC('t', 'f', 'h', 'd'): { 3179 status_t err; 3180 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3181 return err; 3182 } 3183 *offset += chunk_size; 3184 break; 3185 } 3186 3187 case FOURCC('t', 'r', 'u', 'n'): { 3188 status_t err; 3189 if (mLastParsedTrackId == mTrackId) { 3190 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3191 return err; 3192 } 3193 } 3194 3195 *offset += chunk_size; 3196 break; 3197 } 3198 3199 case FOURCC('s', 'a', 'i', 'z'): { 3200 status_t err; 3201 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3202 return err; 3203 } 3204 *offset += chunk_size; 3205 break; 3206 } 3207 case FOURCC('s', 'a', 'i', 'o'): { 3208 status_t err; 3209 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3210 return err; 3211 } 3212 *offset += chunk_size; 3213 break; 3214 } 3215 3216 case FOURCC('m', 'd', 'a', 't'): { 3217 // parse DRM info if present 3218 ALOGV("MPEG4Source::parseChunk mdat"); 3219 // if saiz/saoi was previously observed, do something with the sampleinfos 3220 *offset += chunk_size; 3221 break; 3222 } 3223 3224 default: { 3225 *offset += chunk_size; 3226 break; 3227 } 3228 } 3229 return OK; 3230 } 3231 3232 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 3233 off64_t offset, off64_t /* size */) { 3234 ALOGV("parseSampleAuxiliaryInformationSizes"); 3235 // 14496-12 8.7.12 3236 uint8_t version; 3237 if (mDataSource->readAt( 3238 offset, &version, sizeof(version)) 3239 < (ssize_t)sizeof(version)) { 3240 return ERROR_IO; 3241 } 3242 3243 if (version != 0) { 3244 return ERROR_UNSUPPORTED; 3245 } 3246 offset++; 3247 3248 uint32_t flags; 3249 if (!mDataSource->getUInt24(offset, &flags)) { 3250 return ERROR_IO; 3251 } 3252 offset += 3; 3253 3254 if (flags & 1) { 3255 uint32_t tmp; 3256 if (!mDataSource->getUInt32(offset, &tmp)) { 3257 return ERROR_MALFORMED; 3258 } 3259 mCurrentAuxInfoType = tmp; 3260 offset += 4; 3261 if (!mDataSource->getUInt32(offset, &tmp)) { 3262 return ERROR_MALFORMED; 3263 } 3264 mCurrentAuxInfoTypeParameter = tmp; 3265 offset += 4; 3266 } 3267 3268 uint8_t defsize; 3269 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 3270 return ERROR_MALFORMED; 3271 } 3272 mCurrentDefaultSampleInfoSize = defsize; 3273 offset++; 3274 3275 uint32_t smplcnt; 3276 if (!mDataSource->getUInt32(offset, &smplcnt)) { 3277 return ERROR_MALFORMED; 3278 } 3279 mCurrentSampleInfoCount = smplcnt; 3280 offset += 4; 3281 3282 if (mCurrentDefaultSampleInfoSize != 0) { 3283 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 3284 return OK; 3285 } 3286 if (smplcnt > mCurrentSampleInfoAllocSize) { 3287 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 3288 mCurrentSampleInfoAllocSize = smplcnt; 3289 } 3290 3291 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 3292 return OK; 3293 } 3294 3295 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 3296 off64_t offset, off64_t /* size */) { 3297 ALOGV("parseSampleAuxiliaryInformationOffsets"); 3298 // 14496-12 8.7.13 3299 uint8_t version; 3300 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 3301 return ERROR_IO; 3302 } 3303 offset++; 3304 3305 uint32_t flags; 3306 if (!mDataSource->getUInt24(offset, &flags)) { 3307 return ERROR_IO; 3308 } 3309 offset += 3; 3310 3311 uint32_t entrycount; 3312 if (!mDataSource->getUInt32(offset, &entrycount)) { 3313 return ERROR_IO; 3314 } 3315 offset += 4; 3316 3317 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 3318 mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8); 3319 mCurrentSampleInfoOffsetsAllocSize = entrycount; 3320 } 3321 mCurrentSampleInfoOffsetCount = entrycount; 3322 3323 for (size_t i = 0; i < entrycount; i++) { 3324 if (version == 0) { 3325 uint32_t tmp; 3326 if (!mDataSource->getUInt32(offset, &tmp)) { 3327 return ERROR_IO; 3328 } 3329 mCurrentSampleInfoOffsets[i] = tmp; 3330 offset += 4; 3331 } else { 3332 uint64_t tmp; 3333 if (!mDataSource->getUInt64(offset, &tmp)) { 3334 return ERROR_IO; 3335 } 3336 mCurrentSampleInfoOffsets[i] = tmp; 3337 offset += 8; 3338 } 3339 } 3340 3341 // parse clear/encrypted data 3342 3343 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 3344 3345 drmoffset += mCurrentMoofOffset; 3346 int ivlength; 3347 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 3348 3349 // read CencSampleAuxiliaryDataFormats 3350 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 3351 Sample *smpl = &mCurrentSamples.editItemAt(i); 3352 3353 memset(smpl->iv, 0, 16); 3354 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 3355 return ERROR_IO; 3356 } 3357 3358 drmoffset += ivlength; 3359 3360 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 3361 if (smplinfosize == 0) { 3362 smplinfosize = mCurrentSampleInfoSizes[i]; 3363 } 3364 if (smplinfosize > ivlength) { 3365 uint16_t numsubsamples; 3366 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 3367 return ERROR_IO; 3368 } 3369 drmoffset += 2; 3370 for (size_t j = 0; j < numsubsamples; j++) { 3371 uint16_t numclear; 3372 uint32_t numencrypted; 3373 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 3374 return ERROR_IO; 3375 } 3376 drmoffset += 2; 3377 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 3378 return ERROR_IO; 3379 } 3380 drmoffset += 4; 3381 smpl->clearsizes.add(numclear); 3382 smpl->encryptedsizes.add(numencrypted); 3383 } 3384 } else { 3385 smpl->clearsizes.add(0); 3386 smpl->encryptedsizes.add(smpl->size); 3387 } 3388 } 3389 3390 3391 return OK; 3392 } 3393 3394 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 3395 3396 if (size < 8) { 3397 return -EINVAL; 3398 } 3399 3400 uint32_t flags; 3401 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 3402 return ERROR_MALFORMED; 3403 } 3404 3405 if (flags & 0xff000000) { 3406 return -EINVAL; 3407 } 3408 3409 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 3410 return ERROR_MALFORMED; 3411 } 3412 3413 if (mLastParsedTrackId != mTrackId) { 3414 // this is not the right track, skip it 3415 return OK; 3416 } 3417 3418 mTrackFragmentHeaderInfo.mFlags = flags; 3419 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 3420 offset += 8; 3421 size -= 8; 3422 3423 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 3424 3425 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 3426 if (size < 8) { 3427 return -EINVAL; 3428 } 3429 3430 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 3431 return ERROR_MALFORMED; 3432 } 3433 offset += 8; 3434 size -= 8; 3435 } 3436 3437 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 3438 if (size < 4) { 3439 return -EINVAL; 3440 } 3441 3442 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 3443 return ERROR_MALFORMED; 3444 } 3445 offset += 4; 3446 size -= 4; 3447 } 3448 3449 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3450 if (size < 4) { 3451 return -EINVAL; 3452 } 3453 3454 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 3455 return ERROR_MALFORMED; 3456 } 3457 offset += 4; 3458 size -= 4; 3459 } 3460 3461 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3462 if (size < 4) { 3463 return -EINVAL; 3464 } 3465 3466 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 3467 return ERROR_MALFORMED; 3468 } 3469 offset += 4; 3470 size -= 4; 3471 } 3472 3473 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3474 if (size < 4) { 3475 return -EINVAL; 3476 } 3477 3478 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 3479 return ERROR_MALFORMED; 3480 } 3481 offset += 4; 3482 size -= 4; 3483 } 3484 3485 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 3486 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 3487 } 3488 3489 mTrackFragmentHeaderInfo.mDataOffset = 0; 3490 return OK; 3491 } 3492 3493 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 3494 3495 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 3496 if (size < 8) { 3497 return -EINVAL; 3498 } 3499 3500 enum { 3501 kDataOffsetPresent = 0x01, 3502 kFirstSampleFlagsPresent = 0x04, 3503 kSampleDurationPresent = 0x100, 3504 kSampleSizePresent = 0x200, 3505 kSampleFlagsPresent = 0x400, 3506 kSampleCompositionTimeOffsetPresent = 0x800, 3507 }; 3508 3509 uint32_t flags; 3510 if (!mDataSource->getUInt32(offset, &flags)) { 3511 return ERROR_MALFORMED; 3512 } 3513 ALOGV("fragment run flags: %08x", flags); 3514 3515 if (flags & 0xff000000) { 3516 return -EINVAL; 3517 } 3518 3519 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 3520 // These two shall not be used together. 3521 return -EINVAL; 3522 } 3523 3524 uint32_t sampleCount; 3525 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 3526 return ERROR_MALFORMED; 3527 } 3528 offset += 8; 3529 size -= 8; 3530 3531 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 3532 3533 uint32_t firstSampleFlags = 0; 3534 3535 if (flags & kDataOffsetPresent) { 3536 if (size < 4) { 3537 return -EINVAL; 3538 } 3539 3540 int32_t dataOffsetDelta; 3541 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 3542 return ERROR_MALFORMED; 3543 } 3544 3545 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 3546 3547 offset += 4; 3548 size -= 4; 3549 } 3550 3551 if (flags & kFirstSampleFlagsPresent) { 3552 if (size < 4) { 3553 return -EINVAL; 3554 } 3555 3556 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 3557 return ERROR_MALFORMED; 3558 } 3559 offset += 4; 3560 size -= 4; 3561 } 3562 3563 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 3564 sampleCtsOffset = 0; 3565 3566 size_t bytesPerSample = 0; 3567 if (flags & kSampleDurationPresent) { 3568 bytesPerSample += 4; 3569 } else if (mTrackFragmentHeaderInfo.mFlags 3570 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3571 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3572 } else if (mTrex) { 3573 sampleDuration = mTrex->default_sample_duration; 3574 } 3575 3576 if (flags & kSampleSizePresent) { 3577 bytesPerSample += 4; 3578 } else if (mTrackFragmentHeaderInfo.mFlags 3579 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3580 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3581 } else { 3582 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3583 } 3584 3585 if (flags & kSampleFlagsPresent) { 3586 bytesPerSample += 4; 3587 } else if (mTrackFragmentHeaderInfo.mFlags 3588 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3589 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3590 } else { 3591 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3592 } 3593 3594 if (flags & kSampleCompositionTimeOffsetPresent) { 3595 bytesPerSample += 4; 3596 } else { 3597 sampleCtsOffset = 0; 3598 } 3599 3600 if (size < (off64_t)sampleCount * bytesPerSample) { 3601 return -EINVAL; 3602 } 3603 3604 Sample tmp; 3605 for (uint32_t i = 0; i < sampleCount; ++i) { 3606 if (flags & kSampleDurationPresent) { 3607 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 3608 return ERROR_MALFORMED; 3609 } 3610 offset += 4; 3611 } 3612 3613 if (flags & kSampleSizePresent) { 3614 if (!mDataSource->getUInt32(offset, &sampleSize)) { 3615 return ERROR_MALFORMED; 3616 } 3617 offset += 4; 3618 } 3619 3620 if (flags & kSampleFlagsPresent) { 3621 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 3622 return ERROR_MALFORMED; 3623 } 3624 offset += 4; 3625 } 3626 3627 if (flags & kSampleCompositionTimeOffsetPresent) { 3628 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 3629 return ERROR_MALFORMED; 3630 } 3631 offset += 4; 3632 } 3633 3634 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 3635 " flags 0x%08x", i + 1, 3636 dataOffset, sampleSize, sampleDuration, 3637 (flags & kFirstSampleFlagsPresent) && i == 0 3638 ? firstSampleFlags : sampleFlags); 3639 tmp.offset = dataOffset; 3640 tmp.size = sampleSize; 3641 tmp.duration = sampleDuration; 3642 tmp.compositionOffset = sampleCtsOffset; 3643 mCurrentSamples.add(tmp); 3644 3645 dataOffset += sampleSize; 3646 } 3647 3648 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 3649 3650 return OK; 3651 } 3652 3653 sp<MetaData> MPEG4Source::getFormat() { 3654 Mutex::Autolock autoLock(mLock); 3655 3656 return mFormat; 3657 } 3658 3659 size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 3660 switch (mNALLengthSize) { 3661 case 1: 3662 return *data; 3663 case 2: 3664 return U16_AT(data); 3665 case 3: 3666 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 3667 case 4: 3668 return U32_AT(data); 3669 } 3670 3671 // This cannot happen, mNALLengthSize springs to life by adding 1 to 3672 // a 2-bit integer. 3673 CHECK(!"Should not be here."); 3674 3675 return 0; 3676 } 3677 3678 status_t MPEG4Source::read( 3679 MediaBuffer **out, const ReadOptions *options) { 3680 Mutex::Autolock autoLock(mLock); 3681 3682 CHECK(mStarted); 3683 3684 if (mFirstMoofOffset > 0) { 3685 return fragmentedRead(out, options); 3686 } 3687 3688 *out = NULL; 3689 3690 int64_t targetSampleTimeUs = -1; 3691 3692 int64_t seekTimeUs; 3693 ReadOptions::SeekMode mode; 3694 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3695 uint32_t findFlags = 0; 3696 switch (mode) { 3697 case ReadOptions::SEEK_PREVIOUS_SYNC: 3698 findFlags = SampleTable::kFlagBefore; 3699 break; 3700 case ReadOptions::SEEK_NEXT_SYNC: 3701 findFlags = SampleTable::kFlagAfter; 3702 break; 3703 case ReadOptions::SEEK_CLOSEST_SYNC: 3704 case ReadOptions::SEEK_CLOSEST: 3705 findFlags = SampleTable::kFlagClosest; 3706 break; 3707 default: 3708 CHECK(!"Should not be here."); 3709 break; 3710 } 3711 3712 uint32_t sampleIndex; 3713 status_t err = mSampleTable->findSampleAtTime( 3714 seekTimeUs, 1000000, mTimescale, 3715 &sampleIndex, findFlags); 3716 3717 if (mode == ReadOptions::SEEK_CLOSEST) { 3718 // We found the closest sample already, now we want the sync 3719 // sample preceding it (or the sample itself of course), even 3720 // if the subsequent sync sample is closer. 3721 findFlags = SampleTable::kFlagBefore; 3722 } 3723 3724 uint32_t syncSampleIndex; 3725 if (err == OK) { 3726 err = mSampleTable->findSyncSampleNear( 3727 sampleIndex, &syncSampleIndex, findFlags); 3728 } 3729 3730 uint32_t sampleTime; 3731 if (err == OK) { 3732 err = mSampleTable->getMetaDataForSample( 3733 sampleIndex, NULL, NULL, &sampleTime); 3734 } 3735 3736 if (err != OK) { 3737 if (err == ERROR_OUT_OF_RANGE) { 3738 // An attempt to seek past the end of the stream would 3739 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3740 // this all the way to the MediaPlayer would cause abnormal 3741 // termination. Legacy behaviour appears to be to behave as if 3742 // we had seeked to the end of stream, ending normally. 3743 err = ERROR_END_OF_STREAM; 3744 } 3745 ALOGV("end of stream"); 3746 return err; 3747 } 3748 3749 if (mode == ReadOptions::SEEK_CLOSEST) { 3750 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3751 } 3752 3753 #if 0 3754 uint32_t syncSampleTime; 3755 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3756 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3757 3758 ALOGI("seek to time %lld us => sample at time %lld us, " 3759 "sync sample at time %lld us", 3760 seekTimeUs, 3761 sampleTime * 1000000ll / mTimescale, 3762 syncSampleTime * 1000000ll / mTimescale); 3763 #endif 3764 3765 mCurrentSampleIndex = syncSampleIndex; 3766 if (mBuffer != NULL) { 3767 mBuffer->release(); 3768 mBuffer = NULL; 3769 } 3770 3771 // fall through 3772 } 3773 3774 off64_t offset; 3775 size_t size; 3776 uint32_t cts, stts; 3777 bool isSyncSample; 3778 bool newBuffer = false; 3779 if (mBuffer == NULL) { 3780 newBuffer = true; 3781 3782 status_t err = 3783 mSampleTable->getMetaDataForSample( 3784 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 3785 3786 if (err != OK) { 3787 return err; 3788 } 3789 3790 err = mGroup->acquire_buffer(&mBuffer); 3791 3792 if (err != OK) { 3793 CHECK(mBuffer == NULL); 3794 return err; 3795 } 3796 } 3797 3798 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 3799 if (newBuffer) { 3800 ssize_t num_bytes_read = 3801 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3802 3803 if (num_bytes_read < (ssize_t)size) { 3804 mBuffer->release(); 3805 mBuffer = NULL; 3806 3807 return ERROR_IO; 3808 } 3809 3810 CHECK(mBuffer != NULL); 3811 mBuffer->set_range(0, size); 3812 mBuffer->meta_data()->clear(); 3813 mBuffer->meta_data()->setInt64( 3814 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3815 mBuffer->meta_data()->setInt64( 3816 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3817 3818 if (targetSampleTimeUs >= 0) { 3819 mBuffer->meta_data()->setInt64( 3820 kKeyTargetTime, targetSampleTimeUs); 3821 } 3822 3823 if (isSyncSample) { 3824 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3825 } 3826 3827 ++mCurrentSampleIndex; 3828 } 3829 3830 if (!mIsAVC && !mIsHEVC) { 3831 *out = mBuffer; 3832 mBuffer = NULL; 3833 3834 return OK; 3835 } 3836 3837 // Each NAL unit is split up into its constituent fragments and 3838 // each one of them returned in its own buffer. 3839 3840 CHECK(mBuffer->range_length() >= mNALLengthSize); 3841 3842 const uint8_t *src = 3843 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3844 3845 size_t nal_size = parseNALSize(src); 3846 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3847 ALOGE("incomplete NAL unit."); 3848 3849 mBuffer->release(); 3850 mBuffer = NULL; 3851 3852 return ERROR_MALFORMED; 3853 } 3854 3855 MediaBuffer *clone = mBuffer->clone(); 3856 CHECK(clone != NULL); 3857 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3858 3859 CHECK(mBuffer != NULL); 3860 mBuffer->set_range( 3861 mBuffer->range_offset() + mNALLengthSize + nal_size, 3862 mBuffer->range_length() - mNALLengthSize - nal_size); 3863 3864 if (mBuffer->range_length() == 0) { 3865 mBuffer->release(); 3866 mBuffer = NULL; 3867 } 3868 3869 *out = clone; 3870 3871 return OK; 3872 } else { 3873 // Whole NAL units are returned but each fragment is prefixed by 3874 // the start code (0x00 00 00 01). 3875 ssize_t num_bytes_read = 0; 3876 int32_t drm = 0; 3877 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3878 if (usesDRM) { 3879 num_bytes_read = 3880 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3881 } else { 3882 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3883 } 3884 3885 if (num_bytes_read < (ssize_t)size) { 3886 mBuffer->release(); 3887 mBuffer = NULL; 3888 3889 return ERROR_IO; 3890 } 3891 3892 if (usesDRM) { 3893 CHECK(mBuffer != NULL); 3894 mBuffer->set_range(0, size); 3895 3896 } else { 3897 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3898 size_t srcOffset = 0; 3899 size_t dstOffset = 0; 3900 3901 while (srcOffset < size) { 3902 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 3903 size_t nalLength = 0; 3904 if (!isMalFormed) { 3905 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3906 srcOffset += mNALLengthSize; 3907 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 3908 } 3909 3910 if (isMalFormed) { 3911 ALOGE("Video is malformed"); 3912 mBuffer->release(); 3913 mBuffer = NULL; 3914 return ERROR_MALFORMED; 3915 } 3916 3917 if (nalLength == 0) { 3918 continue; 3919 } 3920 3921 CHECK(dstOffset + 4 <= mBuffer->size()); 3922 3923 dstData[dstOffset++] = 0; 3924 dstData[dstOffset++] = 0; 3925 dstData[dstOffset++] = 0; 3926 dstData[dstOffset++] = 1; 3927 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3928 srcOffset += nalLength; 3929 dstOffset += nalLength; 3930 } 3931 CHECK_EQ(srcOffset, size); 3932 CHECK(mBuffer != NULL); 3933 mBuffer->set_range(0, dstOffset); 3934 } 3935 3936 mBuffer->meta_data()->clear(); 3937 mBuffer->meta_data()->setInt64( 3938 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3939 mBuffer->meta_data()->setInt64( 3940 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3941 3942 if (targetSampleTimeUs >= 0) { 3943 mBuffer->meta_data()->setInt64( 3944 kKeyTargetTime, targetSampleTimeUs); 3945 } 3946 3947 if (isSyncSample) { 3948 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3949 } 3950 3951 ++mCurrentSampleIndex; 3952 3953 *out = mBuffer; 3954 mBuffer = NULL; 3955 3956 return OK; 3957 } 3958 } 3959 3960 status_t MPEG4Source::fragmentedRead( 3961 MediaBuffer **out, const ReadOptions *options) { 3962 3963 ALOGV("MPEG4Source::fragmentedRead"); 3964 3965 CHECK(mStarted); 3966 3967 *out = NULL; 3968 3969 int64_t targetSampleTimeUs = -1; 3970 3971 int64_t seekTimeUs; 3972 ReadOptions::SeekMode mode; 3973 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3974 3975 int numSidxEntries = mSegments.size(); 3976 if (numSidxEntries != 0) { 3977 int64_t totalTime = 0; 3978 off64_t totalOffset = mFirstMoofOffset; 3979 for (int i = 0; i < numSidxEntries; i++) { 3980 const SidxEntry *se = &mSegments[i]; 3981 if (totalTime + se->mDurationUs > seekTimeUs) { 3982 // The requested time is somewhere in this segment 3983 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 3984 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 3985 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 3986 // requested next sync, or closest sync and it was closer to the end of 3987 // this segment 3988 totalTime += se->mDurationUs; 3989 totalOffset += se->mSize; 3990 } 3991 break; 3992 } 3993 totalTime += se->mDurationUs; 3994 totalOffset += se->mSize; 3995 } 3996 mCurrentMoofOffset = totalOffset; 3997 mCurrentSamples.clear(); 3998 mCurrentSampleIndex = 0; 3999 parseChunk(&totalOffset); 4000 mCurrentTime = totalTime * mTimescale / 1000000ll; 4001 } else { 4002 // without sidx boxes, we can only seek to 0 4003 mCurrentMoofOffset = mFirstMoofOffset; 4004 mCurrentSamples.clear(); 4005 mCurrentSampleIndex = 0; 4006 off64_t tmp = mCurrentMoofOffset; 4007 parseChunk(&tmp); 4008 mCurrentTime = 0; 4009 } 4010 4011 if (mBuffer != NULL) { 4012 mBuffer->release(); 4013 mBuffer = NULL; 4014 } 4015 4016 // fall through 4017 } 4018 4019 off64_t offset = 0; 4020 size_t size = 0; 4021 uint32_t cts = 0; 4022 bool isSyncSample = false; 4023 bool newBuffer = false; 4024 if (mBuffer == NULL) { 4025 newBuffer = true; 4026 4027 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4028 // move to next fragment if there is one 4029 if (mNextMoofOffset <= mCurrentMoofOffset) { 4030 return ERROR_END_OF_STREAM; 4031 } 4032 off64_t nextMoof = mNextMoofOffset; 4033 mCurrentMoofOffset = nextMoof; 4034 mCurrentSamples.clear(); 4035 mCurrentSampleIndex = 0; 4036 parseChunk(&nextMoof); 4037 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4038 return ERROR_END_OF_STREAM; 4039 } 4040 } 4041 4042 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4043 offset = smpl->offset; 4044 size = smpl->size; 4045 cts = mCurrentTime + smpl->compositionOffset; 4046 mCurrentTime += smpl->duration; 4047 isSyncSample = (mCurrentSampleIndex == 0); // XXX 4048 4049 status_t err = mGroup->acquire_buffer(&mBuffer); 4050 4051 if (err != OK) { 4052 CHECK(mBuffer == NULL); 4053 ALOGV("acquire_buffer returned %d", err); 4054 return err; 4055 } 4056 } 4057 4058 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4059 const sp<MetaData> bufmeta = mBuffer->meta_data(); 4060 bufmeta->clear(); 4061 if (smpl->encryptedsizes.size()) { 4062 // store clear/encrypted lengths in metadata 4063 bufmeta->setData(kKeyPlainSizes, 0, 4064 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 4065 bufmeta->setData(kKeyEncryptedSizes, 0, 4066 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 4067 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 4068 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 4069 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 4070 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 4071 } 4072 4073 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 4074 if (newBuffer) { 4075 ssize_t num_bytes_read = 4076 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4077 4078 if (num_bytes_read < (ssize_t)size) { 4079 mBuffer->release(); 4080 mBuffer = NULL; 4081 4082 ALOGV("i/o error"); 4083 return ERROR_IO; 4084 } 4085 4086 CHECK(mBuffer != NULL); 4087 mBuffer->set_range(0, size); 4088 mBuffer->meta_data()->setInt64( 4089 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4090 mBuffer->meta_data()->setInt64( 4091 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4092 4093 if (targetSampleTimeUs >= 0) { 4094 mBuffer->meta_data()->setInt64( 4095 kKeyTargetTime, targetSampleTimeUs); 4096 } 4097 4098 if (isSyncSample) { 4099 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4100 } 4101 4102 ++mCurrentSampleIndex; 4103 } 4104 4105 if (!mIsAVC && !mIsHEVC) { 4106 *out = mBuffer; 4107 mBuffer = NULL; 4108 4109 return OK; 4110 } 4111 4112 // Each NAL unit is split up into its constituent fragments and 4113 // each one of them returned in its own buffer. 4114 4115 CHECK(mBuffer->range_length() >= mNALLengthSize); 4116 4117 const uint8_t *src = 4118 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4119 4120 size_t nal_size = parseNALSize(src); 4121 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 4122 ALOGE("incomplete NAL unit."); 4123 4124 mBuffer->release(); 4125 mBuffer = NULL; 4126 4127 return ERROR_MALFORMED; 4128 } 4129 4130 MediaBuffer *clone = mBuffer->clone(); 4131 CHECK(clone != NULL); 4132 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4133 4134 CHECK(mBuffer != NULL); 4135 mBuffer->set_range( 4136 mBuffer->range_offset() + mNALLengthSize + nal_size, 4137 mBuffer->range_length() - mNALLengthSize - nal_size); 4138 4139 if (mBuffer->range_length() == 0) { 4140 mBuffer->release(); 4141 mBuffer = NULL; 4142 } 4143 4144 *out = clone; 4145 4146 return OK; 4147 } else { 4148 ALOGV("whole NAL"); 4149 // Whole NAL units are returned but each fragment is prefixed by 4150 // the start code (0x00 00 00 01). 4151 ssize_t num_bytes_read = 0; 4152 int32_t drm = 0; 4153 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4154 if (usesDRM) { 4155 num_bytes_read = 4156 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4157 } else { 4158 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4159 } 4160 4161 if (num_bytes_read < (ssize_t)size) { 4162 mBuffer->release(); 4163 mBuffer = NULL; 4164 4165 ALOGV("i/o error"); 4166 return ERROR_IO; 4167 } 4168 4169 if (usesDRM) { 4170 CHECK(mBuffer != NULL); 4171 mBuffer->set_range(0, size); 4172 4173 } else { 4174 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4175 size_t srcOffset = 0; 4176 size_t dstOffset = 0; 4177 4178 while (srcOffset < size) { 4179 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4180 size_t nalLength = 0; 4181 if (!isMalFormed) { 4182 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4183 srcOffset += mNALLengthSize; 4184 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4185 } 4186 4187 if (isMalFormed) { 4188 ALOGE("Video is malformed"); 4189 mBuffer->release(); 4190 mBuffer = NULL; 4191 return ERROR_MALFORMED; 4192 } 4193 4194 if (nalLength == 0) { 4195 continue; 4196 } 4197 4198 CHECK(dstOffset + 4 <= mBuffer->size()); 4199 4200 dstData[dstOffset++] = 0; 4201 dstData[dstOffset++] = 0; 4202 dstData[dstOffset++] = 0; 4203 dstData[dstOffset++] = 1; 4204 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4205 srcOffset += nalLength; 4206 dstOffset += nalLength; 4207 } 4208 CHECK_EQ(srcOffset, size); 4209 CHECK(mBuffer != NULL); 4210 mBuffer->set_range(0, dstOffset); 4211 } 4212 4213 mBuffer->meta_data()->setInt64( 4214 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4215 mBuffer->meta_data()->setInt64( 4216 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4217 4218 if (targetSampleTimeUs >= 0) { 4219 mBuffer->meta_data()->setInt64( 4220 kKeyTargetTime, targetSampleTimeUs); 4221 } 4222 4223 if (isSyncSample) { 4224 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4225 } 4226 4227 ++mCurrentSampleIndex; 4228 4229 *out = mBuffer; 4230 mBuffer = NULL; 4231 4232 return OK; 4233 } 4234 } 4235 4236 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 4237 const char *mimePrefix) { 4238 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 4239 const char *mime; 4240 if (track->meta != NULL 4241 && track->meta->findCString(kKeyMIMEType, &mime) 4242 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 4243 return track; 4244 } 4245 } 4246 4247 return NULL; 4248 } 4249 4250 static bool LegacySniffMPEG4( 4251 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 4252 uint8_t header[8]; 4253 4254 ssize_t n = source->readAt(4, header, sizeof(header)); 4255 if (n < (ssize_t)sizeof(header)) { 4256 return false; 4257 } 4258 4259 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 4260 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 4261 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 4262 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 4263 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 4264 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 4265 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4266 *confidence = 0.4; 4267 4268 return true; 4269 } 4270 4271 return false; 4272 } 4273 4274 static bool isCompatibleBrand(uint32_t fourcc) { 4275 static const uint32_t kCompatibleBrands[] = { 4276 FOURCC('i', 's', 'o', 'm'), 4277 FOURCC('i', 's', 'o', '2'), 4278 FOURCC('a', 'v', 'c', '1'), 4279 FOURCC('h', 'v', 'c', '1'), 4280 FOURCC('h', 'e', 'v', '1'), 4281 FOURCC('3', 'g', 'p', '4'), 4282 FOURCC('m', 'p', '4', '1'), 4283 FOURCC('m', 'p', '4', '2'), 4284 4285 // Won't promise that the following file types can be played. 4286 // Just give these file types a chance. 4287 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 4288 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 4289 4290 FOURCC('3', 'g', '2', 'a'), // 3GPP2 4291 FOURCC('3', 'g', '2', 'b'), 4292 }; 4293 4294 for (size_t i = 0; 4295 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 4296 ++i) { 4297 if (kCompatibleBrands[i] == fourcc) { 4298 return true; 4299 } 4300 } 4301 4302 return false; 4303 } 4304 4305 // Attempt to actually parse the 'ftyp' atom and determine if a suitable 4306 // compatible brand is present. 4307 // Also try to identify where this file's metadata ends 4308 // (end of the 'moov' atom) and report it to the caller as part of 4309 // the metadata. 4310 static bool BetterSniffMPEG4( 4311 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4312 sp<AMessage> *meta) { 4313 // We scan up to 128 bytes to identify this file as an MP4. 4314 static const off64_t kMaxScanOffset = 128ll; 4315 4316 off64_t offset = 0ll; 4317 bool foundGoodFileType = false; 4318 off64_t moovAtomEndOffset = -1ll; 4319 bool done = false; 4320 4321 while (!done && offset < kMaxScanOffset) { 4322 uint32_t hdr[2]; 4323 if (source->readAt(offset, hdr, 8) < 8) { 4324 return false; 4325 } 4326 4327 uint64_t chunkSize = ntohl(hdr[0]); 4328 uint32_t chunkType = ntohl(hdr[1]); 4329 off64_t chunkDataOffset = offset + 8; 4330 4331 if (chunkSize == 1) { 4332 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 4333 return false; 4334 } 4335 4336 chunkSize = ntoh64(chunkSize); 4337 chunkDataOffset += 8; 4338 4339 if (chunkSize < 16) { 4340 // The smallest valid chunk is 16 bytes long in this case. 4341 return false; 4342 } 4343 } else if (chunkSize < 8) { 4344 // The smallest valid chunk is 8 bytes long. 4345 return false; 4346 } 4347 4348 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 4349 4350 char chunkstring[5]; 4351 MakeFourCCString(chunkType, chunkstring); 4352 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, offset); 4353 switch (chunkType) { 4354 case FOURCC('f', 't', 'y', 'p'): 4355 { 4356 if (chunkDataSize < 8) { 4357 return false; 4358 } 4359 4360 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 4361 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 4362 if (i == 1) { 4363 // Skip this index, it refers to the minorVersion, 4364 // not a brand. 4365 continue; 4366 } 4367 4368 uint32_t brand; 4369 if (source->readAt( 4370 chunkDataOffset + 4 * i, &brand, 4) < 4) { 4371 return false; 4372 } 4373 4374 brand = ntohl(brand); 4375 4376 if (isCompatibleBrand(brand)) { 4377 foundGoodFileType = true; 4378 break; 4379 } 4380 } 4381 4382 if (!foundGoodFileType) { 4383 return false; 4384 } 4385 4386 break; 4387 } 4388 4389 case FOURCC('m', 'o', 'o', 'v'): 4390 { 4391 moovAtomEndOffset = offset + chunkSize; 4392 4393 done = true; 4394 break; 4395 } 4396 4397 default: 4398 break; 4399 } 4400 4401 offset += chunkSize; 4402 } 4403 4404 if (!foundGoodFileType) { 4405 return false; 4406 } 4407 4408 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4409 *confidence = 0.4f; 4410 4411 if (moovAtomEndOffset >= 0) { 4412 *meta = new AMessage; 4413 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 4414 4415 ALOGV("found metadata size: %lld", moovAtomEndOffset); 4416 } 4417 4418 return true; 4419 } 4420 4421 bool SniffMPEG4( 4422 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4423 sp<AMessage> *meta) { 4424 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 4425 return true; 4426 } 4427 4428 if (LegacySniffMPEG4(source, mimeType, confidence)) { 4429 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 4430 return true; 4431 } 4432 4433 return false; 4434 } 4435 4436 } // namespace android 4437