1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 //#define LOG_NDEBUG 0 18 #define LOG_TAG "MPEG4Extractor" 19 20 #include <ctype.h> 21 #include <inttypes.h> 22 #include <stdint.h> 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include <utils/Log.h> 27 28 #include "include/MPEG4Extractor.h" 29 #include "include/SampleTable.h" 30 #include "include/ESDS.h" 31 32 #include <media/stagefright/foundation/ABitReader.h> 33 #include <media/stagefright/foundation/ABuffer.h> 34 #include <media/stagefright/foundation/ADebug.h> 35 #include <media/stagefright/foundation/AMessage.h> 36 #include <media/stagefright/foundation/AUtils.h> 37 #include <media/stagefright/foundation/ColorUtils.h> 38 #include <media/stagefright/MediaBuffer.h> 39 #include <media/stagefright/MediaBufferGroup.h> 40 #include <media/stagefright/MediaDefs.h> 41 #include <media/stagefright/MediaSource.h> 42 #include <media/stagefright/MetaData.h> 43 #include <utils/String8.h> 44 45 #include <byteswap.h> 46 #include "include/ID3.h" 47 #include "include/avc_utils.h" 48 49 #ifndef UINT32_MAX 50 #define UINT32_MAX (4294967295U) 51 #endif 52 53 namespace android { 54 55 enum { 56 // max track header chunk to return 57 kMaxTrackHeaderSize = 32, 58 }; 59 60 class MPEG4Source : public MediaSource { 61 public: 62 // Caller retains ownership of both "dataSource" and "sampleTable". 63 MPEG4Source(const sp<MPEG4Extractor> &owner, 64 const sp<MetaData> &format, 65 const sp<DataSource> &dataSource, 66 int32_t timeScale, 67 const sp<SampleTable> &sampleTable, 68 Vector<SidxEntry> &sidx, 69 const Trex *trex, 70 off64_t firstMoofOffset); 71 72 virtual status_t start(MetaData *params = NULL); 73 virtual status_t stop(); 74 75 virtual sp<MetaData> getFormat(); 76 77 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 78 virtual bool supportNonblockingRead() { return true; } 79 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 80 81 protected: 82 virtual ~MPEG4Source(); 83 84 private: 85 Mutex mLock; 86 87 // keep the MPEG4Extractor around, since we're referencing its data 88 sp<MPEG4Extractor> mOwner; 89 sp<MetaData> mFormat; 90 sp<DataSource> mDataSource; 91 int32_t mTimescale; 92 sp<SampleTable> mSampleTable; 93 uint32_t mCurrentSampleIndex; 94 uint32_t mCurrentFragmentIndex; 95 Vector<SidxEntry> &mSegments; 96 const Trex *mTrex; 97 off64_t mFirstMoofOffset; 98 off64_t mCurrentMoofOffset; 99 off64_t mNextMoofOffset; 100 uint32_t mCurrentTime; 101 int32_t mLastParsedTrackId; 102 int32_t mTrackId; 103 104 int32_t mCryptoMode; // passed in from extractor 105 int32_t mDefaultIVSize; // passed in from extractor 106 uint8_t mCryptoKey[16]; // passed in from extractor 107 uint32_t mCurrentAuxInfoType; 108 uint32_t mCurrentAuxInfoTypeParameter; 109 int32_t mCurrentDefaultSampleInfoSize; 110 uint32_t mCurrentSampleInfoCount; 111 uint32_t mCurrentSampleInfoAllocSize; 112 uint8_t* mCurrentSampleInfoSizes; 113 uint32_t mCurrentSampleInfoOffsetCount; 114 uint32_t mCurrentSampleInfoOffsetsAllocSize; 115 uint64_t* mCurrentSampleInfoOffsets; 116 117 bool mIsAVC; 118 bool mIsHEVC; 119 size_t mNALLengthSize; 120 121 bool mStarted; 122 123 MediaBufferGroup *mGroup; 124 125 MediaBuffer *mBuffer; 126 127 bool mWantsNALFragments; 128 129 uint8_t *mSrcBuffer; 130 131 size_t parseNALSize(const uint8_t *data) const; 132 status_t parseChunk(off64_t *offset); 133 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 134 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 135 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 136 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 137 138 struct TrackFragmentHeaderInfo { 139 enum Flags { 140 kBaseDataOffsetPresent = 0x01, 141 kSampleDescriptionIndexPresent = 0x02, 142 kDefaultSampleDurationPresent = 0x08, 143 kDefaultSampleSizePresent = 0x10, 144 kDefaultSampleFlagsPresent = 0x20, 145 kDurationIsEmpty = 0x10000, 146 }; 147 148 uint32_t mTrackID; 149 uint32_t mFlags; 150 uint64_t mBaseDataOffset; 151 uint32_t mSampleDescriptionIndex; 152 uint32_t mDefaultSampleDuration; 153 uint32_t mDefaultSampleSize; 154 uint32_t mDefaultSampleFlags; 155 156 uint64_t mDataOffset; 157 }; 158 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 159 160 struct Sample { 161 off64_t offset; 162 size_t size; 163 uint32_t duration; 164 int32_t compositionOffset; 165 uint8_t iv[16]; 166 Vector<size_t> clearsizes; 167 Vector<size_t> encryptedsizes; 168 }; 169 Vector<Sample> mCurrentSamples; 170 171 MPEG4Source(const MPEG4Source &); 172 MPEG4Source &operator=(const MPEG4Source &); 173 }; 174 175 // This custom data source wraps an existing one and satisfies requests 176 // falling entirely within a cached range from the cache while forwarding 177 // all remaining requests to the wrapped datasource. 178 // This is used to cache the full sampletable metadata for a single track, 179 // possibly wrapping multiple times to cover all tracks, i.e. 180 // Each MPEG4DataSource caches the sampletable metadata for a single track. 181 182 struct MPEG4DataSource : public DataSource { 183 MPEG4DataSource(const sp<DataSource> &source); 184 185 virtual status_t initCheck() const; 186 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 187 virtual status_t getSize(off64_t *size); 188 virtual uint32_t flags(); 189 190 status_t setCachedRange(off64_t offset, size_t size); 191 192 protected: 193 virtual ~MPEG4DataSource(); 194 195 private: 196 Mutex mLock; 197 198 sp<DataSource> mSource; 199 off64_t mCachedOffset; 200 size_t mCachedSize; 201 uint8_t *mCache; 202 203 void clearCache(); 204 205 MPEG4DataSource(const MPEG4DataSource &); 206 MPEG4DataSource &operator=(const MPEG4DataSource &); 207 }; 208 209 MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 210 : mSource(source), 211 mCachedOffset(0), 212 mCachedSize(0), 213 mCache(NULL) { 214 } 215 216 MPEG4DataSource::~MPEG4DataSource() { 217 clearCache(); 218 } 219 220 void MPEG4DataSource::clearCache() { 221 if (mCache) { 222 free(mCache); 223 mCache = NULL; 224 } 225 226 mCachedOffset = 0; 227 mCachedSize = 0; 228 } 229 230 status_t MPEG4DataSource::initCheck() const { 231 return mSource->initCheck(); 232 } 233 234 ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 235 Mutex::Autolock autoLock(mLock); 236 237 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 238 memcpy(data, &mCache[offset - mCachedOffset], size); 239 return size; 240 } 241 242 return mSource->readAt(offset, data, size); 243 } 244 245 status_t MPEG4DataSource::getSize(off64_t *size) { 246 return mSource->getSize(size); 247 } 248 249 uint32_t MPEG4DataSource::flags() { 250 return mSource->flags(); 251 } 252 253 status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 254 Mutex::Autolock autoLock(mLock); 255 256 clearCache(); 257 258 mCache = (uint8_t *)malloc(size); 259 260 if (mCache == NULL) { 261 return -ENOMEM; 262 } 263 264 mCachedOffset = offset; 265 mCachedSize = size; 266 267 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 268 269 if (err < (ssize_t)size) { 270 clearCache(); 271 272 return ERROR_IO; 273 } 274 275 return OK; 276 } 277 278 //////////////////////////////////////////////////////////////////////////////// 279 280 static const bool kUseHexDump = false; 281 282 static void hexdump(const void *_data, size_t size) { 283 const uint8_t *data = (const uint8_t *)_data; 284 size_t offset = 0; 285 while (offset < size) { 286 printf("0x%04zx ", offset); 287 288 size_t n = size - offset; 289 if (n > 16) { 290 n = 16; 291 } 292 293 for (size_t i = 0; i < 16; ++i) { 294 if (i == 8) { 295 printf(" "); 296 } 297 298 if (offset + i < size) { 299 printf("%02x ", data[offset + i]); 300 } else { 301 printf(" "); 302 } 303 } 304 305 printf(" "); 306 307 for (size_t i = 0; i < n; ++i) { 308 if (isprint(data[offset + i])) { 309 printf("%c", data[offset + i]); 310 } else { 311 printf("."); 312 } 313 } 314 315 printf("\n"); 316 317 offset += 16; 318 } 319 } 320 321 static const char *FourCC2MIME(uint32_t fourcc) { 322 switch (fourcc) { 323 case FOURCC('m', 'p', '4', 'a'): 324 return MEDIA_MIMETYPE_AUDIO_AAC; 325 326 case FOURCC('s', 'a', 'm', 'r'): 327 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 328 329 case FOURCC('s', 'a', 'w', 'b'): 330 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 331 332 case FOURCC('m', 'p', '4', 'v'): 333 return MEDIA_MIMETYPE_VIDEO_MPEG4; 334 335 case FOURCC('s', '2', '6', '3'): 336 case FOURCC('h', '2', '6', '3'): 337 case FOURCC('H', '2', '6', '3'): 338 return MEDIA_MIMETYPE_VIDEO_H263; 339 340 case FOURCC('a', 'v', 'c', '1'): 341 return MEDIA_MIMETYPE_VIDEO_AVC; 342 343 case FOURCC('h', 'v', 'c', '1'): 344 case FOURCC('h', 'e', 'v', '1'): 345 return MEDIA_MIMETYPE_VIDEO_HEVC; 346 default: 347 CHECK(!"should not be here."); 348 return NULL; 349 } 350 } 351 352 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 353 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 354 // AMR NB audio is always mono, 8kHz 355 *channels = 1; 356 *rate = 8000; 357 return true; 358 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 359 // AMR WB audio is always mono, 16kHz 360 *channels = 1; 361 *rate = 16000; 362 return true; 363 } 364 return false; 365 } 366 367 MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 368 : mMoofOffset(0), 369 mMoofFound(false), 370 mMdatFound(false), 371 mDataSource(source), 372 mInitCheck(NO_INIT), 373 mHasVideo(false), 374 mHeaderTimescale(0), 375 mIsQT(false), 376 mFirstTrack(NULL), 377 mLastTrack(NULL), 378 mFileMetaData(new MetaData), 379 mFirstSINF(NULL), 380 mIsDrm(false) { 381 } 382 383 MPEG4Extractor::~MPEG4Extractor() { 384 Track *track = mFirstTrack; 385 while (track) { 386 Track *next = track->next; 387 388 delete track; 389 track = next; 390 } 391 mFirstTrack = mLastTrack = NULL; 392 393 SINF *sinf = mFirstSINF; 394 while (sinf) { 395 SINF *next = sinf->next; 396 delete[] sinf->IPMPData; 397 delete sinf; 398 sinf = next; 399 } 400 mFirstSINF = NULL; 401 402 for (size_t i = 0; i < mPssh.size(); i++) { 403 delete [] mPssh[i].data; 404 } 405 } 406 407 uint32_t MPEG4Extractor::flags() const { 408 return CAN_PAUSE | 409 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 410 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 411 } 412 413 sp<MetaData> MPEG4Extractor::getMetaData() { 414 status_t err; 415 if ((err = readMetaData()) != OK) { 416 return new MetaData; 417 } 418 419 return mFileMetaData; 420 } 421 422 size_t MPEG4Extractor::countTracks() { 423 status_t err; 424 if ((err = readMetaData()) != OK) { 425 ALOGV("MPEG4Extractor::countTracks: no tracks"); 426 return 0; 427 } 428 429 size_t n = 0; 430 Track *track = mFirstTrack; 431 while (track) { 432 ++n; 433 track = track->next; 434 } 435 436 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 437 return n; 438 } 439 440 sp<MetaData> MPEG4Extractor::getTrackMetaData( 441 size_t index, uint32_t flags) { 442 status_t err; 443 if ((err = readMetaData()) != OK) { 444 return NULL; 445 } 446 447 Track *track = mFirstTrack; 448 while (index > 0) { 449 if (track == NULL) { 450 return NULL; 451 } 452 453 track = track->next; 454 --index; 455 } 456 457 if (track == NULL) { 458 return NULL; 459 } 460 461 if ((flags & kIncludeExtensiveMetaData) 462 && !track->includes_expensive_metadata) { 463 track->includes_expensive_metadata = true; 464 465 const char *mime; 466 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 467 if (!strncasecmp("video/", mime, 6)) { 468 if (mMoofOffset > 0) { 469 int64_t duration; 470 if (track->meta->findInt64(kKeyDuration, &duration)) { 471 // nothing fancy, just pick a frame near 1/4th of the duration 472 track->meta->setInt64( 473 kKeyThumbnailTime, duration / 4); 474 } 475 } else { 476 uint32_t sampleIndex; 477 uint32_t sampleTime; 478 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK 479 && track->sampleTable->getMetaDataForSample( 480 sampleIndex, NULL /* offset */, NULL /* size */, 481 &sampleTime) == OK) { 482 track->meta->setInt64( 483 kKeyThumbnailTime, 484 ((int64_t)sampleTime * 1000000) / track->timescale); 485 } 486 } 487 488 // MPEG2 tracks do not provide CSD, so read the stream header 489 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) { 490 off64_t offset; 491 size_t size; 492 if (track->sampleTable->getMetaDataForSample( 493 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) { 494 if (size > kMaxTrackHeaderSize) { 495 size = kMaxTrackHeaderSize; 496 } 497 uint8_t header[kMaxTrackHeaderSize]; 498 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) { 499 track->meta->setData(kKeyStreamHeader, 'mdat', header, size); 500 } 501 } 502 } 503 } 504 } 505 506 return track->meta; 507 } 508 509 static void MakeFourCCString(uint32_t x, char *s) { 510 s[0] = x >> 24; 511 s[1] = (x >> 16) & 0xff; 512 s[2] = (x >> 8) & 0xff; 513 s[3] = x & 0xff; 514 s[4] = '\0'; 515 } 516 517 status_t MPEG4Extractor::readMetaData() { 518 if (mInitCheck != NO_INIT) { 519 return mInitCheck; 520 } 521 522 off64_t offset = 0; 523 status_t err; 524 bool sawMoovOrSidx = false; 525 526 while (!(sawMoovOrSidx && (mMdatFound || mMoofFound))) { 527 off64_t orig_offset = offset; 528 err = parseChunk(&offset, 0); 529 530 if (err != OK && err != UNKNOWN_ERROR) { 531 break; 532 } else if (offset <= orig_offset) { 533 // only continue parsing if the offset was advanced, 534 // otherwise we might end up in an infinite loop 535 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset); 536 err = ERROR_MALFORMED; 537 break; 538 } else if (err == UNKNOWN_ERROR) { 539 sawMoovOrSidx = true; 540 } 541 } 542 543 if (mInitCheck == OK) { 544 if (mHasVideo) { 545 mFileMetaData->setCString( 546 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 547 } else { 548 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 549 } 550 } else { 551 mInitCheck = err; 552 } 553 554 CHECK_NE(err, (status_t)NO_INIT); 555 556 // copy pssh data into file metadata 557 uint64_t psshsize = 0; 558 for (size_t i = 0; i < mPssh.size(); i++) { 559 psshsize += 20 + mPssh[i].datalen; 560 } 561 if (psshsize > 0 && psshsize <= UINT32_MAX) { 562 char *buf = (char*)malloc(psshsize); 563 if (!buf) { 564 ALOGE("b/28471206"); 565 return NO_MEMORY; 566 } 567 char *ptr = buf; 568 for (size_t i = 0; i < mPssh.size(); i++) { 569 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 570 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 571 ptr += (20 + mPssh[i].datalen); 572 } 573 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 574 free(buf); 575 } 576 return mInitCheck; 577 } 578 579 char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 580 if (mFirstSINF == NULL) { 581 return NULL; 582 } 583 584 SINF *sinf = mFirstSINF; 585 while (sinf && (trackID != sinf->trackID)) { 586 sinf = sinf->next; 587 } 588 589 if (sinf == NULL) { 590 return NULL; 591 } 592 593 *len = sinf->len; 594 return sinf->IPMPData; 595 } 596 597 // Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 598 static int32_t readSize(off64_t offset, 599 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 600 uint32_t size = 0; 601 uint8_t data; 602 bool moreData = true; 603 *numOfBytes = 0; 604 605 while (moreData) { 606 if (DataSource->readAt(offset, &data, 1) < 1) { 607 return -1; 608 } 609 offset ++; 610 moreData = (data >= 128) ? true : false; 611 size = (size << 7) | (data & 0x7f); // Take last 7 bits 612 (*numOfBytes) ++; 613 } 614 615 return size; 616 } 617 618 status_t MPEG4Extractor::parseDrmSINF( 619 off64_t * /* offset */, off64_t data_offset) { 620 uint8_t updateIdTag; 621 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 622 return ERROR_IO; 623 } 624 data_offset ++; 625 626 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 627 return ERROR_MALFORMED; 628 } 629 630 uint8_t numOfBytes; 631 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 632 if (size < 0) { 633 return ERROR_IO; 634 } 635 data_offset += numOfBytes; 636 637 while(size >= 11 ) { 638 uint8_t descriptorTag; 639 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 640 return ERROR_IO; 641 } 642 data_offset ++; 643 644 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 645 return ERROR_MALFORMED; 646 } 647 648 uint8_t buffer[8]; 649 //ObjectDescriptorID and ObjectDescriptor url flag 650 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 651 return ERROR_IO; 652 } 653 data_offset += 2; 654 655 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 656 return ERROR_MALFORMED; 657 } 658 659 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 660 return ERROR_IO; 661 } 662 data_offset += 8; 663 664 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 665 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 666 return ERROR_MALFORMED; 667 } 668 669 SINF *sinf = new SINF; 670 sinf->trackID = U16_AT(&buffer[3]); 671 sinf->IPMPDescriptorID = buffer[7]; 672 sinf->next = mFirstSINF; 673 mFirstSINF = sinf; 674 675 size -= (8 + 2 + 1); 676 } 677 678 if (size != 0) { 679 return ERROR_MALFORMED; 680 } 681 682 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 683 return ERROR_IO; 684 } 685 data_offset ++; 686 687 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 688 return ERROR_MALFORMED; 689 } 690 691 size = readSize(data_offset, mDataSource, &numOfBytes); 692 if (size < 0) { 693 return ERROR_IO; 694 } 695 data_offset += numOfBytes; 696 697 while (size > 0) { 698 uint8_t tag; 699 int32_t dataLen; 700 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 701 return ERROR_IO; 702 } 703 data_offset ++; 704 705 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 706 uint8_t id; 707 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 708 if (dataLen < 0) { 709 return ERROR_IO; 710 } else if (dataLen < 4) { 711 return ERROR_MALFORMED; 712 } 713 data_offset += numOfBytes; 714 715 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 716 return ERROR_IO; 717 } 718 data_offset ++; 719 720 SINF *sinf = mFirstSINF; 721 while (sinf && (sinf->IPMPDescriptorID != id)) { 722 sinf = sinf->next; 723 } 724 if (sinf == NULL) { 725 return ERROR_MALFORMED; 726 } 727 sinf->len = dataLen - 3; 728 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 729 if (sinf->IPMPData == NULL) { 730 return ERROR_MALFORMED; 731 } 732 data_offset += 2; 733 734 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 735 return ERROR_IO; 736 } 737 data_offset += sinf->len; 738 739 size -= (dataLen + numOfBytes + 1); 740 } 741 } 742 743 if (size != 0) { 744 return ERROR_MALFORMED; 745 } 746 747 return UNKNOWN_ERROR; // Return a dummy error. 748 } 749 750 struct PathAdder { 751 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 752 : mPath(path) { 753 mPath->push(chunkType); 754 } 755 756 ~PathAdder() { 757 mPath->pop(); 758 } 759 760 private: 761 Vector<uint32_t> *mPath; 762 763 PathAdder(const PathAdder &); 764 PathAdder &operator=(const PathAdder &); 765 }; 766 767 static bool underMetaDataPath(const Vector<uint32_t> &path) { 768 return path.size() >= 5 769 && path[0] == FOURCC('m', 'o', 'o', 'v') 770 && path[1] == FOURCC('u', 'd', 't', 'a') 771 && path[2] == FOURCC('m', 'e', 't', 'a') 772 && path[3] == FOURCC('i', 'l', 's', 't'); 773 } 774 775 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) { 776 return path.size() >= 2 777 && path[0] == FOURCC('m', 'o', 'o', 'v') 778 && path[1] == FOURCC('m', 'e', 't', 'a') 779 && (depth == 2 780 || (depth == 3 781 && (path[2] == FOURCC('h', 'd', 'l', 'r') 782 || path[2] == FOURCC('i', 'l', 's', 't') 783 || path[2] == FOURCC('k', 'e', 'y', 's')))); 784 } 785 786 // Given a time in seconds since Jan 1 1904, produce a human-readable string. 787 static bool convertTimeToDate(int64_t time_1904, String8 *s) { 788 // delta between mpeg4 time and unix epoch time 789 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600); 790 if (time_1904 < INT64_MIN + delta) { 791 return false; 792 } 793 time_t time_1970 = time_1904 - delta; 794 795 char tmp[32]; 796 struct tm* tm = gmtime(&time_1970); 797 if (tm != NULL && 798 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) { 799 s->setTo(tmp); 800 return true; 801 } 802 return false; 803 } 804 805 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 806 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth); 807 808 if (*offset < 0) { 809 ALOGE("b/23540914"); 810 return ERROR_MALFORMED; 811 } 812 uint32_t hdr[2]; 813 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 814 return ERROR_IO; 815 } 816 uint64_t chunk_size = ntohl(hdr[0]); 817 int32_t chunk_type = ntohl(hdr[1]); 818 off64_t data_offset = *offset + 8; 819 820 if (chunk_size == 1) { 821 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 822 return ERROR_IO; 823 } 824 chunk_size = ntoh64(chunk_size); 825 data_offset += 8; 826 827 if (chunk_size < 16) { 828 // The smallest valid chunk is 16 bytes long in this case. 829 return ERROR_MALFORMED; 830 } 831 } else if (chunk_size == 0) { 832 if (depth == 0) { 833 // atom extends to end of file 834 off64_t sourceSize; 835 if (mDataSource->getSize(&sourceSize) == OK) { 836 chunk_size = (sourceSize - *offset); 837 } else { 838 // XXX could we just pick a "sufficiently large" value here? 839 ALOGE("atom size is 0, and data source has no size"); 840 return ERROR_MALFORMED; 841 } 842 } else { 843 // not allowed for non-toplevel atoms, skip it 844 *offset += 4; 845 return OK; 846 } 847 } else if (chunk_size < 8) { 848 // The smallest valid chunk is 8 bytes long. 849 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 850 return ERROR_MALFORMED; 851 } 852 853 char chunk[5]; 854 MakeFourCCString(chunk_type, chunk); 855 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth); 856 857 if (kUseHexDump) { 858 static const char kWhitespace[] = " "; 859 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 860 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 861 862 char buffer[256]; 863 size_t n = chunk_size; 864 if (n > sizeof(buffer)) { 865 n = sizeof(buffer); 866 } 867 if (mDataSource->readAt(*offset, buffer, n) 868 < (ssize_t)n) { 869 return ERROR_IO; 870 } 871 872 hexdump(buffer, n); 873 } 874 875 PathAdder autoAdder(&mPath, chunk_type); 876 877 // (data_offset - *offset) is either 8 or 16 878 off64_t chunk_data_size = chunk_size - (data_offset - *offset); 879 if (chunk_data_size < 0) { 880 ALOGE("b/23540914"); 881 return ERROR_MALFORMED; 882 } 883 884 if (chunk_type != FOURCC('c', 'p', 'r', 't') 885 && chunk_type != FOURCC('c', 'o', 'v', 'r') 886 && mPath.size() == 5 && underMetaDataPath(mPath)) { 887 off64_t stop_offset = *offset + chunk_size; 888 *offset = data_offset; 889 while (*offset < stop_offset) { 890 status_t err = parseChunk(offset, depth + 1); 891 if (err != OK) { 892 return err; 893 } 894 } 895 896 if (*offset != stop_offset) { 897 return ERROR_MALFORMED; 898 } 899 900 return OK; 901 } 902 903 switch(chunk_type) { 904 case FOURCC('m', 'o', 'o', 'v'): 905 case FOURCC('t', 'r', 'a', 'k'): 906 case FOURCC('m', 'd', 'i', 'a'): 907 case FOURCC('m', 'i', 'n', 'f'): 908 case FOURCC('d', 'i', 'n', 'f'): 909 case FOURCC('s', 't', 'b', 'l'): 910 case FOURCC('m', 'v', 'e', 'x'): 911 case FOURCC('m', 'o', 'o', 'f'): 912 case FOURCC('t', 'r', 'a', 'f'): 913 case FOURCC('m', 'f', 'r', 'a'): 914 case FOURCC('u', 'd', 't', 'a'): 915 case FOURCC('i', 'l', 's', 't'): 916 case FOURCC('s', 'i', 'n', 'f'): 917 case FOURCC('s', 'c', 'h', 'i'): 918 case FOURCC('e', 'd', 't', 's'): 919 case FOURCC('w', 'a', 'v', 'e'): 920 { 921 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) { 922 // store the offset of the first segment 923 mMoofFound = true; 924 mMoofOffset = *offset; 925 } 926 927 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 928 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 929 930 if (mDataSource->flags() 931 & (DataSource::kWantsPrefetching 932 | DataSource::kIsCachingDataSource)) { 933 sp<MPEG4DataSource> cachedSource = 934 new MPEG4DataSource(mDataSource); 935 936 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 937 mDataSource = cachedSource; 938 } 939 } 940 941 if (mLastTrack == NULL) 942 return ERROR_MALFORMED; 943 944 mLastTrack->sampleTable = new SampleTable(mDataSource); 945 } 946 947 bool isTrack = false; 948 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 949 isTrack = true; 950 951 Track *track = new Track; 952 track->next = NULL; 953 if (mLastTrack) { 954 mLastTrack->next = track; 955 } else { 956 mFirstTrack = track; 957 } 958 mLastTrack = track; 959 960 track->meta = new MetaData; 961 track->includes_expensive_metadata = false; 962 track->skipTrack = false; 963 track->timescale = 0; 964 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 965 } 966 967 off64_t stop_offset = *offset + chunk_size; 968 *offset = data_offset; 969 while (*offset < stop_offset) { 970 status_t err = parseChunk(offset, depth + 1); 971 if (err != OK) { 972 return err; 973 } 974 } 975 976 if (*offset != stop_offset) { 977 return ERROR_MALFORMED; 978 } 979 980 if (isTrack) { 981 int32_t trackId; 982 // There must be exact one track header per track. 983 if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 984 mLastTrack->skipTrack = true; 985 } 986 if (mLastTrack->skipTrack) { 987 Track *cur = mFirstTrack; 988 989 if (cur == mLastTrack) { 990 delete cur; 991 mFirstTrack = mLastTrack = NULL; 992 } else { 993 while (cur && cur->next != mLastTrack) { 994 cur = cur->next; 995 } 996 cur->next = NULL; 997 delete mLastTrack; 998 mLastTrack = cur; 999 } 1000 1001 return OK; 1002 } 1003 1004 status_t err = verifyTrack(mLastTrack); 1005 1006 if (err != OK) { 1007 return err; 1008 } 1009 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 1010 mInitCheck = OK; 1011 1012 if (!mIsDrm) { 1013 return UNKNOWN_ERROR; // Return a dummy error. 1014 } else { 1015 return OK; 1016 } 1017 } 1018 break; 1019 } 1020 1021 case FOURCC('e', 'l', 's', 't'): 1022 { 1023 *offset += chunk_size; 1024 1025 // See 14496-12 8.6.6 1026 uint8_t version; 1027 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1028 return ERROR_IO; 1029 } 1030 1031 uint32_t entry_count; 1032 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 1033 return ERROR_IO; 1034 } 1035 1036 if (entry_count != 1) { 1037 // we only support a single entry at the moment, for gapless playback 1038 ALOGW("ignoring edit list with %d entries", entry_count); 1039 } else if (mHeaderTimescale == 0) { 1040 ALOGW("ignoring edit list because timescale is 0"); 1041 } else { 1042 off64_t entriesoffset = data_offset + 8; 1043 uint64_t segment_duration; 1044 int64_t media_time; 1045 1046 if (version == 1) { 1047 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 1048 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 1049 return ERROR_IO; 1050 } 1051 } else if (version == 0) { 1052 uint32_t sd; 1053 int32_t mt; 1054 if (!mDataSource->getUInt32(entriesoffset, &sd) || 1055 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 1056 return ERROR_IO; 1057 } 1058 segment_duration = sd; 1059 media_time = mt; 1060 } else { 1061 return ERROR_IO; 1062 } 1063 1064 uint64_t halfscale = mHeaderTimescale / 2; 1065 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 1066 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 1067 1068 int64_t duration; 1069 int32_t samplerate; 1070 if (!mLastTrack) { 1071 return ERROR_MALFORMED; 1072 } 1073 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 1074 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 1075 1076 int64_t delay = (media_time * samplerate + 500000) / 1000000; 1077 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 1078 1079 int64_t paddingus = duration - (int64_t)(segment_duration + media_time); 1080 if (paddingus < 0) { 1081 // track duration from media header (which is what kKeyDuration is) might 1082 // be slightly shorter than the segment duration, which would make the 1083 // padding negative. Clamp to zero. 1084 paddingus = 0; 1085 } 1086 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1087 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1088 } 1089 } 1090 break; 1091 } 1092 1093 case FOURCC('f', 'r', 'm', 'a'): 1094 { 1095 *offset += chunk_size; 1096 1097 uint32_t original_fourcc; 1098 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1099 return ERROR_IO; 1100 } 1101 original_fourcc = ntohl(original_fourcc); 1102 ALOGV("read original format: %d", original_fourcc); 1103 1104 if (mLastTrack == NULL) 1105 return ERROR_MALFORMED; 1106 1107 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1108 uint32_t num_channels = 0; 1109 uint32_t sample_rate = 0; 1110 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1111 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1112 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1113 } 1114 break; 1115 } 1116 1117 case FOURCC('t', 'e', 'n', 'c'): 1118 { 1119 *offset += chunk_size; 1120 1121 if (chunk_size < 32) { 1122 return ERROR_MALFORMED; 1123 } 1124 1125 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1126 // default IV size, 16 bytes default KeyID 1127 // (ISO 23001-7) 1128 char buf[4]; 1129 memset(buf, 0, 4); 1130 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1131 return ERROR_IO; 1132 } 1133 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1134 if (defaultAlgorithmId > 1) { 1135 // only 0 (clear) and 1 (AES-128) are valid 1136 return ERROR_MALFORMED; 1137 } 1138 1139 memset(buf, 0, 4); 1140 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1141 return ERROR_IO; 1142 } 1143 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1144 1145 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1146 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1147 // only unencrypted data must have 0 IV size 1148 return ERROR_MALFORMED; 1149 } else if (defaultIVSize != 0 && 1150 defaultIVSize != 8 && 1151 defaultIVSize != 16) { 1152 // only supported sizes are 0, 8 and 16 1153 return ERROR_MALFORMED; 1154 } 1155 1156 uint8_t defaultKeyId[16]; 1157 1158 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1159 return ERROR_IO; 1160 } 1161 1162 if (mLastTrack == NULL) 1163 return ERROR_MALFORMED; 1164 1165 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1166 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1167 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1168 break; 1169 } 1170 1171 case FOURCC('t', 'k', 'h', 'd'): 1172 { 1173 *offset += chunk_size; 1174 1175 status_t err; 1176 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1177 return err; 1178 } 1179 1180 break; 1181 } 1182 1183 case FOURCC('p', 's', 's', 'h'): 1184 { 1185 *offset += chunk_size; 1186 1187 PsshInfo pssh; 1188 1189 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1190 return ERROR_IO; 1191 } 1192 1193 uint32_t psshdatalen = 0; 1194 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1195 return ERROR_IO; 1196 } 1197 pssh.datalen = ntohl(psshdatalen); 1198 ALOGV("pssh data size: %d", pssh.datalen); 1199 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) { 1200 // pssh data length exceeds size of containing box 1201 return ERROR_MALFORMED; 1202 } 1203 1204 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1205 if (pssh.data == NULL) { 1206 return ERROR_MALFORMED; 1207 } 1208 ALOGV("allocated pssh @ %p", pssh.data); 1209 ssize_t requested = (ssize_t) pssh.datalen; 1210 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1211 return ERROR_IO; 1212 } 1213 mPssh.push_back(pssh); 1214 1215 break; 1216 } 1217 1218 case FOURCC('m', 'd', 'h', 'd'): 1219 { 1220 *offset += chunk_size; 1221 1222 if (chunk_data_size < 4 || mLastTrack == NULL) { 1223 return ERROR_MALFORMED; 1224 } 1225 1226 uint8_t version; 1227 if (mDataSource->readAt( 1228 data_offset, &version, sizeof(version)) 1229 < (ssize_t)sizeof(version)) { 1230 return ERROR_IO; 1231 } 1232 1233 off64_t timescale_offset; 1234 1235 if (version == 1) { 1236 timescale_offset = data_offset + 4 + 16; 1237 } else if (version == 0) { 1238 timescale_offset = data_offset + 4 + 8; 1239 } else { 1240 return ERROR_IO; 1241 } 1242 1243 uint32_t timescale; 1244 if (mDataSource->readAt( 1245 timescale_offset, ×cale, sizeof(timescale)) 1246 < (ssize_t)sizeof(timescale)) { 1247 return ERROR_IO; 1248 } 1249 1250 if (!timescale) { 1251 ALOGE("timescale should not be ZERO."); 1252 return ERROR_MALFORMED; 1253 } 1254 1255 mLastTrack->timescale = ntohl(timescale); 1256 1257 // 14496-12 says all ones means indeterminate, but some files seem to use 1258 // 0 instead. We treat both the same. 1259 int64_t duration = 0; 1260 if (version == 1) { 1261 if (mDataSource->readAt( 1262 timescale_offset + 4, &duration, sizeof(duration)) 1263 < (ssize_t)sizeof(duration)) { 1264 return ERROR_IO; 1265 } 1266 if (duration != -1) { 1267 duration = ntoh64(duration); 1268 } 1269 } else { 1270 uint32_t duration32; 1271 if (mDataSource->readAt( 1272 timescale_offset + 4, &duration32, sizeof(duration32)) 1273 < (ssize_t)sizeof(duration32)) { 1274 return ERROR_IO; 1275 } 1276 if (duration32 != 0xffffffff) { 1277 duration = ntohl(duration32); 1278 } 1279 } 1280 if (duration != 0 && mLastTrack->timescale != 0) { 1281 mLastTrack->meta->setInt64( 1282 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1283 } 1284 1285 uint8_t lang[2]; 1286 off64_t lang_offset; 1287 if (version == 1) { 1288 lang_offset = timescale_offset + 4 + 8; 1289 } else if (version == 0) { 1290 lang_offset = timescale_offset + 4 + 4; 1291 } else { 1292 return ERROR_IO; 1293 } 1294 1295 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1296 < (ssize_t)sizeof(lang)) { 1297 return ERROR_IO; 1298 } 1299 1300 // To get the ISO-639-2/T three character language code 1301 // 1 bit pad followed by 3 5-bits characters. Each character 1302 // is packed as the difference between its ASCII value and 0x60. 1303 char lang_code[4]; 1304 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1305 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1306 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1307 lang_code[3] = '\0'; 1308 1309 mLastTrack->meta->setCString( 1310 kKeyMediaLanguage, lang_code); 1311 1312 break; 1313 } 1314 1315 case FOURCC('s', 't', 's', 'd'): 1316 { 1317 if (chunk_data_size < 8) { 1318 return ERROR_MALFORMED; 1319 } 1320 1321 uint8_t buffer[8]; 1322 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1323 return ERROR_MALFORMED; 1324 } 1325 1326 if (mDataSource->readAt( 1327 data_offset, buffer, 8) < 8) { 1328 return ERROR_IO; 1329 } 1330 1331 if (U32_AT(buffer) != 0) { 1332 // Should be version 0, flags 0. 1333 return ERROR_MALFORMED; 1334 } 1335 1336 uint32_t entry_count = U32_AT(&buffer[4]); 1337 1338 if (entry_count > 1) { 1339 // For 3GPP timed text, there could be multiple tx3g boxes contain 1340 // multiple text display formats. These formats will be used to 1341 // display the timed text. 1342 // For encrypted files, there may also be more than one entry. 1343 const char *mime; 1344 1345 if (mLastTrack == NULL) 1346 return ERROR_MALFORMED; 1347 1348 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1349 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1350 strcasecmp(mime, "application/octet-stream")) { 1351 // For now we only support a single type of media per track. 1352 mLastTrack->skipTrack = true; 1353 *offset += chunk_size; 1354 break; 1355 } 1356 } 1357 off64_t stop_offset = *offset + chunk_size; 1358 *offset = data_offset + 8; 1359 for (uint32_t i = 0; i < entry_count; ++i) { 1360 status_t err = parseChunk(offset, depth + 1); 1361 if (err != OK) { 1362 return err; 1363 } 1364 } 1365 1366 if (*offset != stop_offset) { 1367 return ERROR_MALFORMED; 1368 } 1369 break; 1370 } 1371 1372 case FOURCC('m', 'p', '4', 'a'): 1373 case FOURCC('e', 'n', 'c', 'a'): 1374 case FOURCC('s', 'a', 'm', 'r'): 1375 case FOURCC('s', 'a', 'w', 'b'): 1376 { 1377 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a') 1378 && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) { 1379 // Ignore mp4a embedded in QT wave atom 1380 *offset += chunk_size; 1381 break; 1382 } 1383 1384 uint8_t buffer[8 + 20]; 1385 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1386 // Basic AudioSampleEntry size. 1387 return ERROR_MALFORMED; 1388 } 1389 1390 if (mDataSource->readAt( 1391 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1392 return ERROR_IO; 1393 } 1394 1395 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1396 uint16_t version = U16_AT(&buffer[8]); 1397 uint32_t num_channels = U16_AT(&buffer[16]); 1398 1399 uint16_t sample_size = U16_AT(&buffer[18]); 1400 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1401 1402 if (mLastTrack == NULL) 1403 return ERROR_MALFORMED; 1404 1405 off64_t stop_offset = *offset + chunk_size; 1406 *offset = data_offset + sizeof(buffer); 1407 1408 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) { 1409 if (version == 1) { 1410 if (mDataSource->readAt(*offset, buffer, 16) < 16) { 1411 return ERROR_IO; 1412 } 1413 1414 #if 0 1415 U32_AT(buffer); // samples per packet 1416 U32_AT(&buffer[4]); // bytes per packet 1417 U32_AT(&buffer[8]); // bytes per frame 1418 U32_AT(&buffer[12]); // bytes per sample 1419 #endif 1420 *offset += 16; 1421 } else if (version == 2) { 1422 uint8_t v2buffer[36]; 1423 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) { 1424 return ERROR_IO; 1425 } 1426 1427 #if 0 1428 U32_AT(v2buffer); // size of struct only 1429 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate 1430 num_channels = U32_AT(&v2buffer[12]); // num audio channels 1431 U32_AT(&v2buffer[16]); // always 0x7f000000 1432 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel 1433 U32_AT(&v2buffer[24]); // format specifc flags 1434 U32_AT(&v2buffer[28]); // const bytes per audio packet 1435 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet 1436 #endif 1437 *offset += 36; 1438 } 1439 } 1440 1441 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1442 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1443 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1444 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1445 } 1446 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1447 chunk, num_channels, sample_size, sample_rate); 1448 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1449 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1450 1451 while (*offset < stop_offset) { 1452 status_t err = parseChunk(offset, depth + 1); 1453 if (err != OK) { 1454 return err; 1455 } 1456 } 1457 1458 if (*offset != stop_offset) { 1459 return ERROR_MALFORMED; 1460 } 1461 break; 1462 } 1463 1464 case FOURCC('m', 'p', '4', 'v'): 1465 case FOURCC('e', 'n', 'c', 'v'): 1466 case FOURCC('s', '2', '6', '3'): 1467 case FOURCC('H', '2', '6', '3'): 1468 case FOURCC('h', '2', '6', '3'): 1469 case FOURCC('a', 'v', 'c', '1'): 1470 case FOURCC('h', 'v', 'c', '1'): 1471 case FOURCC('h', 'e', 'v', '1'): 1472 { 1473 mHasVideo = true; 1474 1475 uint8_t buffer[78]; 1476 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1477 // Basic VideoSampleEntry size. 1478 return ERROR_MALFORMED; 1479 } 1480 1481 if (mDataSource->readAt( 1482 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1483 return ERROR_IO; 1484 } 1485 1486 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1487 uint16_t width = U16_AT(&buffer[6 + 18]); 1488 uint16_t height = U16_AT(&buffer[6 + 20]); 1489 1490 // The video sample is not standard-compliant if it has invalid dimension. 1491 // Use some default width and height value, and 1492 // let the decoder figure out the actual width and height (and thus 1493 // be prepared for INFO_FOMRAT_CHANGED event). 1494 if (width == 0) width = 352; 1495 if (height == 0) height = 288; 1496 1497 // printf("*** coding='%s' width=%d height=%d\n", 1498 // chunk, width, height); 1499 1500 if (mLastTrack == NULL) 1501 return ERROR_MALFORMED; 1502 1503 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1504 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1505 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1506 } 1507 mLastTrack->meta->setInt32(kKeyWidth, width); 1508 mLastTrack->meta->setInt32(kKeyHeight, height); 1509 1510 off64_t stop_offset = *offset + chunk_size; 1511 *offset = data_offset + sizeof(buffer); 1512 while (*offset < stop_offset) { 1513 status_t err = parseChunk(offset, depth + 1); 1514 if (err != OK) { 1515 return err; 1516 } 1517 } 1518 1519 if (*offset != stop_offset) { 1520 return ERROR_MALFORMED; 1521 } 1522 break; 1523 } 1524 1525 case FOURCC('s', 't', 'c', 'o'): 1526 case FOURCC('c', 'o', '6', '4'): 1527 { 1528 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1529 return ERROR_MALFORMED; 1530 1531 status_t err = 1532 mLastTrack->sampleTable->setChunkOffsetParams( 1533 chunk_type, data_offset, chunk_data_size); 1534 1535 *offset += chunk_size; 1536 1537 if (err != OK) { 1538 return err; 1539 } 1540 1541 break; 1542 } 1543 1544 case FOURCC('s', 't', 's', 'c'): 1545 { 1546 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1547 return ERROR_MALFORMED; 1548 1549 status_t err = 1550 mLastTrack->sampleTable->setSampleToChunkParams( 1551 data_offset, chunk_data_size); 1552 1553 *offset += chunk_size; 1554 1555 if (err != OK) { 1556 return err; 1557 } 1558 1559 break; 1560 } 1561 1562 case FOURCC('s', 't', 's', 'z'): 1563 case FOURCC('s', 't', 'z', '2'): 1564 { 1565 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1566 return ERROR_MALFORMED; 1567 1568 status_t err = 1569 mLastTrack->sampleTable->setSampleSizeParams( 1570 chunk_type, data_offset, chunk_data_size); 1571 1572 *offset += chunk_size; 1573 1574 if (err != OK) { 1575 return err; 1576 } 1577 1578 size_t max_size; 1579 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1580 1581 if (err != OK) { 1582 return err; 1583 } 1584 1585 if (max_size != 0) { 1586 // Assume that a given buffer only contains at most 10 chunks, 1587 // each chunk originally prefixed with a 2 byte length will 1588 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1589 // and thus will grow by 2 bytes per chunk. 1590 if (max_size > SIZE_MAX - 10 * 2) { 1591 ALOGE("max sample size too big: %zu", max_size); 1592 return ERROR_MALFORMED; 1593 } 1594 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1595 } else { 1596 // No size was specified. Pick a conservatively large size. 1597 uint32_t width, height; 1598 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) || 1599 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) { 1600 ALOGE("No width or height, assuming worst case 1080p"); 1601 width = 1920; 1602 height = 1080; 1603 } else { 1604 // A resolution was specified, check that it's not too big. The values below 1605 // were chosen so that the calculations below don't cause overflows, they're 1606 // not indicating that resolutions up to 32kx32k are actually supported. 1607 if (width > 32768 || height > 32768) { 1608 ALOGE("can't support %u x %u video", width, height); 1609 return ERROR_MALFORMED; 1610 } 1611 } 1612 1613 const char *mime; 1614 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1615 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC) 1616 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 1617 // AVC & HEVC requires compression ratio of at least 2, and uses 1618 // macroblocks 1619 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1620 } else { 1621 // For all other formats there is no minimum compression 1622 // ratio. Use compression ratio of 1. 1623 max_size = width * height * 3 / 2; 1624 } 1625 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1626 } 1627 1628 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1629 // mimetype) previously obtained, so don't cache them. 1630 const char *mime; 1631 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1632 // Calculate average frame rate. 1633 if (!strncasecmp("video/", mime, 6)) { 1634 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1635 if (nSamples == 0) { 1636 int32_t trackId; 1637 if (mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 1638 for (size_t i = 0; i < mTrex.size(); i++) { 1639 Trex *t = &mTrex.editItemAt(i); 1640 if (t->track_ID == (uint32_t) trackId) { 1641 if (t->default_sample_duration > 0) { 1642 int32_t frameRate = 1643 mLastTrack->timescale / t->default_sample_duration; 1644 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1645 } 1646 break; 1647 } 1648 } 1649 } 1650 } else { 1651 int64_t durationUs; 1652 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1653 if (durationUs > 0) { 1654 int32_t frameRate = (nSamples * 1000000LL + 1655 (durationUs >> 1)) / durationUs; 1656 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1657 } 1658 } 1659 } 1660 } 1661 1662 break; 1663 } 1664 1665 case FOURCC('s', 't', 't', 's'): 1666 { 1667 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1668 return ERROR_MALFORMED; 1669 1670 *offset += chunk_size; 1671 1672 status_t err = 1673 mLastTrack->sampleTable->setTimeToSampleParams( 1674 data_offset, chunk_data_size); 1675 1676 if (err != OK) { 1677 return err; 1678 } 1679 1680 break; 1681 } 1682 1683 case FOURCC('c', 't', 't', 's'): 1684 { 1685 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1686 return ERROR_MALFORMED; 1687 1688 *offset += chunk_size; 1689 1690 status_t err = 1691 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1692 data_offset, chunk_data_size); 1693 1694 if (err != OK) { 1695 return err; 1696 } 1697 1698 break; 1699 } 1700 1701 case FOURCC('s', 't', 's', 's'): 1702 { 1703 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1704 return ERROR_MALFORMED; 1705 1706 *offset += chunk_size; 1707 1708 status_t err = 1709 mLastTrack->sampleTable->setSyncSampleParams( 1710 data_offset, chunk_data_size); 1711 1712 if (err != OK) { 1713 return err; 1714 } 1715 1716 break; 1717 } 1718 1719 // \xA9xyz 1720 case FOURCC(0xA9, 'x', 'y', 'z'): 1721 { 1722 *offset += chunk_size; 1723 1724 // Best case the total data length inside "\xA9xyz" box 1725 // would be 8, for instance "\xA9xyz" + "\x00\x04\x15\xc7" + "0+0/", 1726 // where "\x00\x04" is the text string length with value = 4, 1727 // "\0x15\xc7" is the language code = en, and "0+0" is a 1728 // location (string) value with longitude = 0 and latitude = 0. 1729 if (chunk_data_size < 8) { 1730 return ERROR_MALFORMED; 1731 } 1732 1733 // Worst case the location string length would be 18, 1734 // for instance +90.0000-180.0000, without the trailing "/" and 1735 // the string length + language code, and some devices include 1736 // an additional 8 bytes of altitude, e.g. +007.186 1737 char buffer[18 + 8]; 1738 1739 // Substracting 5 from the data size is because the text string length + 1740 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1741 off64_t location_length = chunk_data_size - 5; 1742 if (location_length >= (off64_t) sizeof(buffer)) { 1743 return ERROR_MALFORMED; 1744 } 1745 1746 if (mDataSource->readAt( 1747 data_offset + 4, buffer, location_length) < location_length) { 1748 return ERROR_IO; 1749 } 1750 1751 buffer[location_length] = '\0'; 1752 mFileMetaData->setCString(kKeyLocation, buffer); 1753 break; 1754 } 1755 1756 case FOURCC('e', 's', 'd', 's'): 1757 { 1758 *offset += chunk_size; 1759 1760 if (chunk_data_size < 4) { 1761 return ERROR_MALFORMED; 1762 } 1763 1764 uint8_t buffer[256]; 1765 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1766 return ERROR_BUFFER_TOO_SMALL; 1767 } 1768 1769 if (mDataSource->readAt( 1770 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1771 return ERROR_IO; 1772 } 1773 1774 if (U32_AT(buffer) != 0) { 1775 // Should be version 0, flags 0. 1776 return ERROR_MALFORMED; 1777 } 1778 1779 if (mLastTrack == NULL) 1780 return ERROR_MALFORMED; 1781 1782 mLastTrack->meta->setData( 1783 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1784 1785 if (mPath.size() >= 2 1786 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1787 // Information from the ESDS must be relied on for proper 1788 // setup of sample rate and channel count for MPEG4 Audio. 1789 // The generic header appears to only contain generic 1790 // information... 1791 1792 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1793 &buffer[4], chunk_data_size - 4); 1794 1795 if (err != OK) { 1796 return err; 1797 } 1798 } 1799 if (mPath.size() >= 2 1800 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) { 1801 // Check if the video is MPEG2 1802 ESDS esds(&buffer[4], chunk_data_size - 4); 1803 1804 uint8_t objectTypeIndication; 1805 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) { 1806 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) { 1807 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2); 1808 } 1809 } 1810 } 1811 break; 1812 } 1813 1814 case FOURCC('b', 't', 'r', 't'): 1815 { 1816 *offset += chunk_size; 1817 1818 uint8_t buffer[12]; 1819 if (chunk_data_size != sizeof(buffer)) { 1820 return ERROR_MALFORMED; 1821 } 1822 1823 if (mDataSource->readAt( 1824 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1825 return ERROR_IO; 1826 } 1827 1828 uint32_t maxBitrate = U32_AT(&buffer[4]); 1829 uint32_t avgBitrate = U32_AT(&buffer[8]); 1830 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 1831 mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 1832 } 1833 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 1834 mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate); 1835 } 1836 break; 1837 } 1838 1839 case FOURCC('a', 'v', 'c', 'C'): 1840 { 1841 *offset += chunk_size; 1842 1843 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1844 1845 if (buffer->data() == NULL) { 1846 ALOGE("b/28471206"); 1847 return NO_MEMORY; 1848 } 1849 1850 if (mDataSource->readAt( 1851 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1852 return ERROR_IO; 1853 } 1854 1855 if (mLastTrack == NULL) 1856 return ERROR_MALFORMED; 1857 1858 mLastTrack->meta->setData( 1859 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1860 1861 break; 1862 } 1863 case FOURCC('h', 'v', 'c', 'C'): 1864 { 1865 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1866 1867 if (buffer->data() == NULL) { 1868 ALOGE("b/28471206"); 1869 return NO_MEMORY; 1870 } 1871 1872 if (mDataSource->readAt( 1873 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1874 return ERROR_IO; 1875 } 1876 1877 if (mLastTrack == NULL) 1878 return ERROR_MALFORMED; 1879 1880 mLastTrack->meta->setData( 1881 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1882 1883 *offset += chunk_size; 1884 break; 1885 } 1886 1887 case FOURCC('d', '2', '6', '3'): 1888 { 1889 *offset += chunk_size; 1890 /* 1891 * d263 contains a fixed 7 bytes part: 1892 * vendor - 4 bytes 1893 * version - 1 byte 1894 * level - 1 byte 1895 * profile - 1 byte 1896 * optionally, "d263" box itself may contain a 16-byte 1897 * bit rate box (bitr) 1898 * average bit rate - 4 bytes 1899 * max bit rate - 4 bytes 1900 */ 1901 char buffer[23]; 1902 if (chunk_data_size != 7 && 1903 chunk_data_size != 23) { 1904 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size); 1905 return ERROR_MALFORMED; 1906 } 1907 1908 if (mDataSource->readAt( 1909 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1910 return ERROR_IO; 1911 } 1912 1913 if (mLastTrack == NULL) 1914 return ERROR_MALFORMED; 1915 1916 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1917 1918 break; 1919 } 1920 1921 case FOURCC('m', 'e', 't', 'a'): 1922 { 1923 off64_t stop_offset = *offset + chunk_size; 1924 *offset = data_offset; 1925 bool isParsingMetaKeys = underQTMetaPath(mPath, 2); 1926 if (!isParsingMetaKeys) { 1927 uint8_t buffer[4]; 1928 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1929 *offset = stop_offset; 1930 return ERROR_MALFORMED; 1931 } 1932 1933 if (mDataSource->readAt( 1934 data_offset, buffer, 4) < 4) { 1935 *offset = stop_offset; 1936 return ERROR_IO; 1937 } 1938 1939 if (U32_AT(buffer) != 0) { 1940 // Should be version 0, flags 0. 1941 1942 // If it's not, let's assume this is one of those 1943 // apparently malformed chunks that don't have flags 1944 // and completely different semantics than what's 1945 // in the MPEG4 specs and skip it. 1946 *offset = stop_offset; 1947 return OK; 1948 } 1949 *offset += sizeof(buffer); 1950 } 1951 1952 while (*offset < stop_offset) { 1953 status_t err = parseChunk(offset, depth + 1); 1954 if (err != OK) { 1955 return err; 1956 } 1957 } 1958 1959 if (*offset != stop_offset) { 1960 return ERROR_MALFORMED; 1961 } 1962 break; 1963 } 1964 1965 case FOURCC('m', 'e', 'a', 'n'): 1966 case FOURCC('n', 'a', 'm', 'e'): 1967 case FOURCC('d', 'a', 't', 'a'): 1968 { 1969 *offset += chunk_size; 1970 1971 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1972 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 1973 1974 if (err != OK) { 1975 return err; 1976 } 1977 } 1978 1979 break; 1980 } 1981 1982 case FOURCC('m', 'v', 'h', 'd'): 1983 { 1984 *offset += chunk_size; 1985 1986 if (chunk_data_size < 32) { 1987 return ERROR_MALFORMED; 1988 } 1989 1990 uint8_t header[32]; 1991 if (mDataSource->readAt( 1992 data_offset, header, sizeof(header)) 1993 < (ssize_t)sizeof(header)) { 1994 return ERROR_IO; 1995 } 1996 1997 uint64_t creationTime; 1998 uint64_t duration = 0; 1999 if (header[0] == 1) { 2000 creationTime = U64_AT(&header[4]); 2001 mHeaderTimescale = U32_AT(&header[20]); 2002 duration = U64_AT(&header[24]); 2003 if (duration == 0xffffffffffffffff) { 2004 duration = 0; 2005 } 2006 } else if (header[0] != 0) { 2007 return ERROR_MALFORMED; 2008 } else { 2009 creationTime = U32_AT(&header[4]); 2010 mHeaderTimescale = U32_AT(&header[12]); 2011 uint32_t d32 = U32_AT(&header[16]); 2012 if (d32 == 0xffffffff) { 2013 d32 = 0; 2014 } 2015 duration = d32; 2016 } 2017 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) { 2018 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2019 } 2020 2021 String8 s; 2022 if (convertTimeToDate(creationTime, &s)) { 2023 mFileMetaData->setCString(kKeyDate, s.string()); 2024 } 2025 2026 2027 break; 2028 } 2029 2030 case FOURCC('m', 'e', 'h', 'd'): 2031 { 2032 *offset += chunk_size; 2033 2034 if (chunk_data_size < 8) { 2035 return ERROR_MALFORMED; 2036 } 2037 2038 uint8_t flags[4]; 2039 if (mDataSource->readAt( 2040 data_offset, flags, sizeof(flags)) 2041 < (ssize_t)sizeof(flags)) { 2042 return ERROR_IO; 2043 } 2044 2045 uint64_t duration = 0; 2046 if (flags[0] == 1) { 2047 // 64 bit 2048 if (chunk_data_size < 12) { 2049 return ERROR_MALFORMED; 2050 } 2051 mDataSource->getUInt64(data_offset + 4, &duration); 2052 if (duration == 0xffffffffffffffff) { 2053 duration = 0; 2054 } 2055 } else if (flags[0] == 0) { 2056 // 32 bit 2057 uint32_t d32; 2058 mDataSource->getUInt32(data_offset + 4, &d32); 2059 if (d32 == 0xffffffff) { 2060 d32 = 0; 2061 } 2062 duration = d32; 2063 } else { 2064 return ERROR_MALFORMED; 2065 } 2066 2067 if (duration != 0 && mHeaderTimescale != 0) { 2068 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2069 } 2070 2071 break; 2072 } 2073 2074 case FOURCC('m', 'd', 'a', 't'): 2075 { 2076 ALOGV("mdat chunk, drm: %d", mIsDrm); 2077 2078 mMdatFound = true; 2079 2080 if (!mIsDrm) { 2081 *offset += chunk_size; 2082 break; 2083 } 2084 2085 if (chunk_size < 8) { 2086 return ERROR_MALFORMED; 2087 } 2088 2089 return parseDrmSINF(offset, data_offset); 2090 } 2091 2092 case FOURCC('h', 'd', 'l', 'r'): 2093 { 2094 *offset += chunk_size; 2095 2096 if (underQTMetaPath(mPath, 3)) { 2097 break; 2098 } 2099 2100 uint32_t buffer; 2101 if (mDataSource->readAt( 2102 data_offset + 8, &buffer, 4) < 4) { 2103 return ERROR_IO; 2104 } 2105 2106 uint32_t type = ntohl(buffer); 2107 // For the 3GPP file format, the handler-type within the 'hdlr' box 2108 // shall be 'text'. We also want to support 'sbtl' handler type 2109 // for a practical reason as various MPEG4 containers use it. 2110 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 2111 if (mLastTrack != NULL) { 2112 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 2113 } 2114 } 2115 2116 break; 2117 } 2118 2119 case FOURCC('k', 'e', 'y', 's'): 2120 { 2121 *offset += chunk_size; 2122 2123 if (underQTMetaPath(mPath, 3)) { 2124 parseQTMetaKey(data_offset, chunk_data_size); 2125 } 2126 break; 2127 } 2128 2129 case FOURCC('t', 'r', 'e', 'x'): 2130 { 2131 *offset += chunk_size; 2132 2133 if (chunk_data_size < 24) { 2134 return ERROR_IO; 2135 } 2136 Trex trex; 2137 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 2138 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 2139 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 2140 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 2141 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 2142 return ERROR_IO; 2143 } 2144 mTrex.add(trex); 2145 break; 2146 } 2147 2148 case FOURCC('t', 'x', '3', 'g'): 2149 { 2150 if (mLastTrack == NULL) 2151 return ERROR_MALFORMED; 2152 2153 uint32_t type; 2154 const void *data; 2155 size_t size = 0; 2156 if (!mLastTrack->meta->findData( 2157 kKeyTextFormatData, &type, &data, &size)) { 2158 size = 0; 2159 } 2160 2161 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 2162 return ERROR_MALFORMED; 2163 } 2164 2165 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 2166 if (buffer == NULL) { 2167 return ERROR_MALFORMED; 2168 } 2169 2170 if (size > 0) { 2171 memcpy(buffer, data, size); 2172 } 2173 2174 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 2175 < chunk_size) { 2176 delete[] buffer; 2177 buffer = NULL; 2178 2179 // advance read pointer so we don't end up reading this again 2180 *offset += chunk_size; 2181 return ERROR_IO; 2182 } 2183 2184 mLastTrack->meta->setData( 2185 kKeyTextFormatData, 0, buffer, size + chunk_size); 2186 2187 delete[] buffer; 2188 2189 *offset += chunk_size; 2190 break; 2191 } 2192 2193 case FOURCC('c', 'o', 'v', 'r'): 2194 { 2195 *offset += chunk_size; 2196 2197 if (mFileMetaData != NULL) { 2198 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64, 2199 chunk_data_size, data_offset); 2200 2201 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 2202 return ERROR_MALFORMED; 2203 } 2204 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 2205 if (buffer->data() == NULL) { 2206 ALOGE("b/28471206"); 2207 return NO_MEMORY; 2208 } 2209 if (mDataSource->readAt( 2210 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 2211 return ERROR_IO; 2212 } 2213 const int kSkipBytesOfDataBox = 16; 2214 if (chunk_data_size <= kSkipBytesOfDataBox) { 2215 return ERROR_MALFORMED; 2216 } 2217 2218 mFileMetaData->setData( 2219 kKeyAlbumArt, MetaData::TYPE_NONE, 2220 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 2221 } 2222 2223 break; 2224 } 2225 2226 case FOURCC('c', 'o', 'l', 'r'): 2227 { 2228 *offset += chunk_size; 2229 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd') 2230 // ignore otherwise 2231 if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) { 2232 status_t err = parseColorInfo(data_offset, chunk_data_size); 2233 if (err != OK) { 2234 return err; 2235 } 2236 } 2237 2238 break; 2239 } 2240 2241 case FOURCC('t', 'i', 't', 'l'): 2242 case FOURCC('p', 'e', 'r', 'f'): 2243 case FOURCC('a', 'u', 't', 'h'): 2244 case FOURCC('g', 'n', 'r', 'e'): 2245 case FOURCC('a', 'l', 'b', 'm'): 2246 case FOURCC('y', 'r', 'r', 'c'): 2247 { 2248 *offset += chunk_size; 2249 2250 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 2251 2252 if (err != OK) { 2253 return err; 2254 } 2255 2256 break; 2257 } 2258 2259 case FOURCC('I', 'D', '3', '2'): 2260 { 2261 *offset += chunk_size; 2262 2263 if (chunk_data_size < 6) { 2264 return ERROR_MALFORMED; 2265 } 2266 2267 parseID3v2MetaData(data_offset + 6); 2268 2269 break; 2270 } 2271 2272 case FOURCC('-', '-', '-', '-'): 2273 { 2274 mLastCommentMean.clear(); 2275 mLastCommentName.clear(); 2276 mLastCommentData.clear(); 2277 *offset += chunk_size; 2278 break; 2279 } 2280 2281 case FOURCC('s', 'i', 'd', 'x'): 2282 { 2283 parseSegmentIndex(data_offset, chunk_data_size); 2284 *offset += chunk_size; 2285 return UNKNOWN_ERROR; // stop parsing after sidx 2286 } 2287 2288 case FOURCC('f', 't', 'y', 'p'): 2289 { 2290 if (chunk_data_size < 8 || depth != 0) { 2291 return ERROR_MALFORMED; 2292 } 2293 2294 off64_t stop_offset = *offset + chunk_size; 2295 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4; 2296 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 2297 if (i == 1) { 2298 // Skip this index, it refers to the minorVersion, 2299 // not a brand. 2300 continue; 2301 } 2302 2303 uint32_t brand; 2304 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) { 2305 return ERROR_MALFORMED; 2306 } 2307 2308 brand = ntohl(brand); 2309 if (brand == FOURCC('q', 't', ' ', ' ')) { 2310 mIsQT = true; 2311 break; 2312 } 2313 } 2314 2315 *offset = stop_offset; 2316 2317 break; 2318 } 2319 2320 default: 2321 { 2322 // check if we're parsing 'ilst' for meta keys 2323 // if so, treat type as a number (key-id). 2324 if (underQTMetaPath(mPath, 3)) { 2325 parseQTMetaVal(chunk_type, data_offset, chunk_data_size); 2326 } 2327 2328 *offset += chunk_size; 2329 break; 2330 } 2331 } 2332 2333 return OK; 2334 } 2335 2336 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2337 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2338 2339 if (size < 12) { 2340 return -EINVAL; 2341 } 2342 2343 uint32_t flags; 2344 if (!mDataSource->getUInt32(offset, &flags)) { 2345 return ERROR_MALFORMED; 2346 } 2347 2348 uint32_t version = flags >> 24; 2349 flags &= 0xffffff; 2350 2351 ALOGV("sidx version %d", version); 2352 2353 uint32_t referenceId; 2354 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2355 return ERROR_MALFORMED; 2356 } 2357 2358 uint32_t timeScale; 2359 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2360 return ERROR_MALFORMED; 2361 } 2362 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2363 if (timeScale == 0) 2364 return ERROR_MALFORMED; 2365 2366 uint64_t earliestPresentationTime; 2367 uint64_t firstOffset; 2368 2369 offset += 12; 2370 size -= 12; 2371 2372 if (version == 0) { 2373 if (size < 8) { 2374 return -EINVAL; 2375 } 2376 uint32_t tmp; 2377 if (!mDataSource->getUInt32(offset, &tmp)) { 2378 return ERROR_MALFORMED; 2379 } 2380 earliestPresentationTime = tmp; 2381 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2382 return ERROR_MALFORMED; 2383 } 2384 firstOffset = tmp; 2385 offset += 8; 2386 size -= 8; 2387 } else { 2388 if (size < 16) { 2389 return -EINVAL; 2390 } 2391 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2392 return ERROR_MALFORMED; 2393 } 2394 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2395 return ERROR_MALFORMED; 2396 } 2397 offset += 16; 2398 size -= 16; 2399 } 2400 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2401 2402 if (size < 4) { 2403 return -EINVAL; 2404 } 2405 2406 uint16_t referenceCount; 2407 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2408 return ERROR_MALFORMED; 2409 } 2410 offset += 4; 2411 size -= 4; 2412 ALOGV("refcount: %d", referenceCount); 2413 2414 if (size < referenceCount * 12) { 2415 return -EINVAL; 2416 } 2417 2418 uint64_t total_duration = 0; 2419 for (unsigned int i = 0; i < referenceCount; i++) { 2420 uint32_t d1, d2, d3; 2421 2422 if (!mDataSource->getUInt32(offset, &d1) || // size 2423 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2424 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2425 return ERROR_MALFORMED; 2426 } 2427 2428 if (d1 & 0x80000000) { 2429 ALOGW("sub-sidx boxes not supported yet"); 2430 } 2431 bool sap = d3 & 0x80000000; 2432 uint32_t saptype = (d3 >> 28) & 7; 2433 if (!sap || (saptype != 1 && saptype != 2)) { 2434 // type 1 and 2 are sync samples 2435 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2436 } 2437 total_duration += d2; 2438 offset += 12; 2439 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2440 SidxEntry se; 2441 se.mSize = d1 & 0x7fffffff; 2442 se.mDurationUs = 1000000LL * d2 / timeScale; 2443 mSidxEntries.add(se); 2444 } 2445 2446 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2447 2448 if (mLastTrack == NULL) 2449 return ERROR_MALFORMED; 2450 2451 int64_t metaDuration; 2452 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2453 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2454 } 2455 return OK; 2456 } 2457 2458 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) { 2459 if (size < 8) { 2460 return ERROR_MALFORMED; 2461 } 2462 2463 uint32_t count; 2464 if (!mDataSource->getUInt32(offset + 4, &count)) { 2465 return ERROR_MALFORMED; 2466 } 2467 2468 if (mMetaKeyMap.size() > 0) { 2469 ALOGW("'keys' atom seen again, discarding existing entries"); 2470 mMetaKeyMap.clear(); 2471 } 2472 2473 off64_t keyOffset = offset + 8; 2474 off64_t stopOffset = offset + size; 2475 for (size_t i = 1; i <= count; i++) { 2476 if (keyOffset + 8 > stopOffset) { 2477 return ERROR_MALFORMED; 2478 } 2479 2480 uint32_t keySize; 2481 if (!mDataSource->getUInt32(keyOffset, &keySize) 2482 || keySize < 8 2483 || keyOffset + keySize > stopOffset) { 2484 return ERROR_MALFORMED; 2485 } 2486 2487 uint32_t type; 2488 if (!mDataSource->getUInt32(keyOffset + 4, &type) 2489 || type != FOURCC('m', 'd', 't', 'a')) { 2490 return ERROR_MALFORMED; 2491 } 2492 2493 keySize -= 8; 2494 keyOffset += 8; 2495 2496 sp<ABuffer> keyData = new ABuffer(keySize); 2497 if (keyData->data() == NULL) { 2498 return ERROR_MALFORMED; 2499 } 2500 if (mDataSource->readAt( 2501 keyOffset, keyData->data(), keySize) < (ssize_t) keySize) { 2502 return ERROR_MALFORMED; 2503 } 2504 2505 AString key((const char *)keyData->data(), keySize); 2506 mMetaKeyMap.add(i, key); 2507 2508 keyOffset += keySize; 2509 } 2510 return OK; 2511 } 2512 2513 status_t MPEG4Extractor::parseQTMetaVal( 2514 int32_t keyId, off64_t offset, size_t size) { 2515 ssize_t index = mMetaKeyMap.indexOfKey(keyId); 2516 if (index < 0) { 2517 // corresponding key is not present, ignore 2518 return ERROR_MALFORMED; 2519 } 2520 2521 if (size <= 16) { 2522 return ERROR_MALFORMED; 2523 } 2524 uint32_t dataSize; 2525 if (!mDataSource->getUInt32(offset, &dataSize) 2526 || dataSize > size || dataSize <= 16) { 2527 return ERROR_MALFORMED; 2528 } 2529 uint32_t atomFourCC; 2530 if (!mDataSource->getUInt32(offset + 4, &atomFourCC) 2531 || atomFourCC != FOURCC('d', 'a', 't', 'a')) { 2532 return ERROR_MALFORMED; 2533 } 2534 uint32_t dataType; 2535 if (!mDataSource->getUInt32(offset + 8, &dataType) 2536 || ((dataType & 0xff000000) != 0)) { 2537 // not well-known type 2538 return ERROR_MALFORMED; 2539 } 2540 2541 dataSize -= 16; 2542 offset += 16; 2543 2544 if (dataType == 23 && dataSize >= 4) { 2545 // BE Float32 2546 uint32_t val; 2547 if (!mDataSource->getUInt32(offset, &val)) { 2548 return ERROR_MALFORMED; 2549 } 2550 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) { 2551 mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val); 2552 } 2553 } else if (dataType == 67 && dataSize >= 4) { 2554 // BE signed int32 2555 uint32_t val; 2556 if (!mDataSource->getUInt32(offset, &val)) { 2557 return ERROR_MALFORMED; 2558 } 2559 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) { 2560 mFileMetaData->setInt32(kKeyTemporalLayerCount, val); 2561 } 2562 } else { 2563 // add more keys if needed 2564 ALOGV("ignoring key: type %d, size %d", dataType, dataSize); 2565 } 2566 2567 return OK; 2568 } 2569 2570 status_t MPEG4Extractor::parseTrackHeader( 2571 off64_t data_offset, off64_t data_size) { 2572 if (data_size < 4) { 2573 return ERROR_MALFORMED; 2574 } 2575 2576 uint8_t version; 2577 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2578 return ERROR_IO; 2579 } 2580 2581 size_t dynSize = (version == 1) ? 36 : 24; 2582 2583 uint8_t buffer[36 + 60]; 2584 2585 if (data_size != (off64_t)dynSize + 60) { 2586 return ERROR_MALFORMED; 2587 } 2588 2589 if (mDataSource->readAt( 2590 data_offset, buffer, data_size) < (ssize_t)data_size) { 2591 return ERROR_IO; 2592 } 2593 2594 uint64_t ctime __unused, mtime __unused, duration __unused; 2595 int32_t id; 2596 2597 if (version == 1) { 2598 ctime = U64_AT(&buffer[4]); 2599 mtime = U64_AT(&buffer[12]); 2600 id = U32_AT(&buffer[20]); 2601 duration = U64_AT(&buffer[28]); 2602 } else if (version == 0) { 2603 ctime = U32_AT(&buffer[4]); 2604 mtime = U32_AT(&buffer[8]); 2605 id = U32_AT(&buffer[12]); 2606 duration = U32_AT(&buffer[20]); 2607 } else { 2608 return ERROR_UNSUPPORTED; 2609 } 2610 2611 if (mLastTrack == NULL) 2612 return ERROR_MALFORMED; 2613 2614 mLastTrack->meta->setInt32(kKeyTrackID, id); 2615 2616 size_t matrixOffset = dynSize + 16; 2617 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2618 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2619 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2620 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2621 2622 #if 0 2623 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2624 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2625 2626 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2627 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2628 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2629 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2630 #endif 2631 2632 uint32_t rotationDegrees; 2633 2634 static const int32_t kFixedOne = 0x10000; 2635 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2636 // Identity, no rotation 2637 rotationDegrees = 0; 2638 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2639 rotationDegrees = 90; 2640 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2641 rotationDegrees = 270; 2642 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2643 rotationDegrees = 180; 2644 } else { 2645 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2646 rotationDegrees = 0; 2647 } 2648 2649 if (rotationDegrees != 0) { 2650 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2651 } 2652 2653 // Handle presentation display size, which could be different 2654 // from the image size indicated by kKeyWidth and kKeyHeight. 2655 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2656 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2657 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2658 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2659 2660 return OK; 2661 } 2662 2663 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2664 if (size < 4 || size == SIZE_MAX) { 2665 return ERROR_MALFORMED; 2666 } 2667 2668 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2669 if (buffer == NULL) { 2670 return ERROR_MALFORMED; 2671 } 2672 if (mDataSource->readAt( 2673 offset, buffer, size) != (ssize_t)size) { 2674 delete[] buffer; 2675 buffer = NULL; 2676 2677 return ERROR_IO; 2678 } 2679 2680 uint32_t flags = U32_AT(buffer); 2681 2682 uint32_t metadataKey = 0; 2683 char chunk[5]; 2684 MakeFourCCString(mPath[4], chunk); 2685 ALOGV("meta: %s @ %lld", chunk, (long long)offset); 2686 switch ((int32_t)mPath[4]) { 2687 case FOURCC(0xa9, 'a', 'l', 'b'): 2688 { 2689 metadataKey = kKeyAlbum; 2690 break; 2691 } 2692 case FOURCC(0xa9, 'A', 'R', 'T'): 2693 { 2694 metadataKey = kKeyArtist; 2695 break; 2696 } 2697 case FOURCC('a', 'A', 'R', 'T'): 2698 { 2699 metadataKey = kKeyAlbumArtist; 2700 break; 2701 } 2702 case FOURCC(0xa9, 'd', 'a', 'y'): 2703 { 2704 metadataKey = kKeyYear; 2705 break; 2706 } 2707 case FOURCC(0xa9, 'n', 'a', 'm'): 2708 { 2709 metadataKey = kKeyTitle; 2710 break; 2711 } 2712 case FOURCC(0xa9, 'w', 'r', 't'): 2713 { 2714 metadataKey = kKeyWriter; 2715 break; 2716 } 2717 case FOURCC('c', 'o', 'v', 'r'): 2718 { 2719 metadataKey = kKeyAlbumArt; 2720 break; 2721 } 2722 case FOURCC('g', 'n', 'r', 'e'): 2723 { 2724 metadataKey = kKeyGenre; 2725 break; 2726 } 2727 case FOURCC(0xa9, 'g', 'e', 'n'): 2728 { 2729 metadataKey = kKeyGenre; 2730 break; 2731 } 2732 case FOURCC('c', 'p', 'i', 'l'): 2733 { 2734 if (size == 9 && flags == 21) { 2735 char tmp[16]; 2736 sprintf(tmp, "%d", 2737 (int)buffer[size - 1]); 2738 2739 mFileMetaData->setCString(kKeyCompilation, tmp); 2740 } 2741 break; 2742 } 2743 case FOURCC('t', 'r', 'k', 'n'): 2744 { 2745 if (size == 16 && flags == 0) { 2746 char tmp[16]; 2747 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2748 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2749 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2750 2751 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2752 } 2753 break; 2754 } 2755 case FOURCC('d', 'i', 's', 'k'): 2756 { 2757 if ((size == 14 || size == 16) && flags == 0) { 2758 char tmp[16]; 2759 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2760 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2761 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2762 2763 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2764 } 2765 break; 2766 } 2767 case FOURCC('-', '-', '-', '-'): 2768 { 2769 buffer[size] = '\0'; 2770 switch (mPath[5]) { 2771 case FOURCC('m', 'e', 'a', 'n'): 2772 mLastCommentMean.setTo((const char *)buffer + 4); 2773 break; 2774 case FOURCC('n', 'a', 'm', 'e'): 2775 mLastCommentName.setTo((const char *)buffer + 4); 2776 break; 2777 case FOURCC('d', 'a', 't', 'a'): 2778 if (size < 8) { 2779 delete[] buffer; 2780 buffer = NULL; 2781 ALOGE("b/24346430"); 2782 return ERROR_MALFORMED; 2783 } 2784 mLastCommentData.setTo((const char *)buffer + 8); 2785 break; 2786 } 2787 2788 // Once we have a set of mean/name/data info, go ahead and process 2789 // it to see if its something we are interested in. Whether or not 2790 // were are interested in the specific tag, make sure to clear out 2791 // the set so we can be ready to process another tuple should one 2792 // show up later in the file. 2793 if ((mLastCommentMean.length() != 0) && 2794 (mLastCommentName.length() != 0) && 2795 (mLastCommentData.length() != 0)) { 2796 2797 if (mLastCommentMean == "com.apple.iTunes" 2798 && mLastCommentName == "iTunSMPB") { 2799 int32_t delay, padding; 2800 if (sscanf(mLastCommentData, 2801 " %*x %x %x %*x", &delay, &padding) == 2) { 2802 if (mLastTrack == NULL) 2803 return ERROR_MALFORMED; 2804 2805 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2806 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2807 } 2808 } 2809 2810 mLastCommentMean.clear(); 2811 mLastCommentName.clear(); 2812 mLastCommentData.clear(); 2813 } 2814 break; 2815 } 2816 2817 default: 2818 break; 2819 } 2820 2821 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2822 if (metadataKey == kKeyAlbumArt) { 2823 mFileMetaData->setData( 2824 kKeyAlbumArt, MetaData::TYPE_NONE, 2825 buffer + 8, size - 8); 2826 } else if (metadataKey == kKeyGenre) { 2827 if (flags == 0) { 2828 // uint8_t genre code, iTunes genre codes are 2829 // the standard id3 codes, except they start 2830 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2831 // We use standard id3 numbering, so subtract 1. 2832 int genrecode = (int)buffer[size - 1]; 2833 genrecode--; 2834 if (genrecode < 0) { 2835 genrecode = 255; // reserved for 'unknown genre' 2836 } 2837 char genre[10]; 2838 sprintf(genre, "%d", genrecode); 2839 2840 mFileMetaData->setCString(metadataKey, genre); 2841 } else if (flags == 1) { 2842 // custom genre string 2843 buffer[size] = '\0'; 2844 2845 mFileMetaData->setCString( 2846 metadataKey, (const char *)buffer + 8); 2847 } 2848 } else { 2849 buffer[size] = '\0'; 2850 2851 mFileMetaData->setCString( 2852 metadataKey, (const char *)buffer + 8); 2853 } 2854 } 2855 2856 delete[] buffer; 2857 buffer = NULL; 2858 2859 return OK; 2860 } 2861 2862 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) { 2863 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) { 2864 return ERROR_MALFORMED; 2865 } 2866 2867 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2868 if (buffer == NULL) { 2869 return ERROR_MALFORMED; 2870 } 2871 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) { 2872 delete[] buffer; 2873 buffer = NULL; 2874 2875 return ERROR_IO; 2876 } 2877 2878 int32_t type = U32_AT(&buffer[0]); 2879 if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11) 2880 || (type == FOURCC('n', 'c', 'l', 'c' && size >= 10))) { 2881 int32_t primaries = U16_AT(&buffer[4]); 2882 int32_t transfer = U16_AT(&buffer[6]); 2883 int32_t coeffs = U16_AT(&buffer[8]); 2884 bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128); 2885 2886 ColorAspects aspects; 2887 ColorUtils::convertIsoColorAspectsToCodecAspects( 2888 primaries, transfer, coeffs, fullRange, aspects); 2889 2890 // only store the first color specification 2891 if (!mLastTrack->meta->hasData(kKeyColorPrimaries)) { 2892 mLastTrack->meta->setInt32(kKeyColorPrimaries, aspects.mPrimaries); 2893 mLastTrack->meta->setInt32(kKeyTransferFunction, aspects.mTransfer); 2894 mLastTrack->meta->setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs); 2895 mLastTrack->meta->setInt32(kKeyColorRange, aspects.mRange); 2896 } 2897 } 2898 2899 delete[] buffer; 2900 buffer = NULL; 2901 2902 return OK; 2903 } 2904 2905 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 2906 if (size < 4 || size == SIZE_MAX) { 2907 return ERROR_MALFORMED; 2908 } 2909 2910 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2911 if (buffer == NULL) { 2912 return ERROR_MALFORMED; 2913 } 2914 if (mDataSource->readAt( 2915 offset, buffer, size) != (ssize_t)size) { 2916 delete[] buffer; 2917 buffer = NULL; 2918 2919 return ERROR_IO; 2920 } 2921 2922 uint32_t metadataKey = 0; 2923 switch (mPath[depth]) { 2924 case FOURCC('t', 'i', 't', 'l'): 2925 { 2926 metadataKey = kKeyTitle; 2927 break; 2928 } 2929 case FOURCC('p', 'e', 'r', 'f'): 2930 { 2931 metadataKey = kKeyArtist; 2932 break; 2933 } 2934 case FOURCC('a', 'u', 't', 'h'): 2935 { 2936 metadataKey = kKeyWriter; 2937 break; 2938 } 2939 case FOURCC('g', 'n', 'r', 'e'): 2940 { 2941 metadataKey = kKeyGenre; 2942 break; 2943 } 2944 case FOURCC('a', 'l', 'b', 'm'): 2945 { 2946 if (buffer[size - 1] != '\0') { 2947 char tmp[4]; 2948 sprintf(tmp, "%u", buffer[size - 1]); 2949 2950 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2951 } 2952 2953 metadataKey = kKeyAlbum; 2954 break; 2955 } 2956 case FOURCC('y', 'r', 'r', 'c'): 2957 { 2958 char tmp[5]; 2959 uint16_t year = U16_AT(&buffer[4]); 2960 2961 if (year < 10000) { 2962 sprintf(tmp, "%u", year); 2963 2964 mFileMetaData->setCString(kKeyYear, tmp); 2965 } 2966 break; 2967 } 2968 2969 default: 2970 break; 2971 } 2972 2973 if (metadataKey > 0) { 2974 bool isUTF8 = true; // Common case 2975 char16_t *framedata = NULL; 2976 int len16 = 0; // Number of UTF-16 characters 2977 2978 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 2979 if (size < 6) { 2980 return ERROR_MALFORMED; 2981 } 2982 2983 if (size - 6 >= 4) { 2984 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 2985 framedata = (char16_t *)(buffer + 6); 2986 if (0xfffe == *framedata) { 2987 // endianness marker (BOM) doesn't match host endianness 2988 for (int i = 0; i < len16; i++) { 2989 framedata[i] = bswap_16(framedata[i]); 2990 } 2991 // BOM is now swapped to 0xfeff, we will execute next block too 2992 } 2993 2994 if (0xfeff == *framedata) { 2995 // Remove the BOM 2996 framedata++; 2997 len16--; 2998 isUTF8 = false; 2999 } 3000 // else normal non-zero-length UTF-8 string 3001 // we can't handle UTF-16 without BOM as there is no other 3002 // indication of encoding. 3003 } 3004 3005 if (isUTF8) { 3006 buffer[size] = 0; 3007 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 3008 } else { 3009 // Convert from UTF-16 string to UTF-8 string. 3010 String8 tmpUTF8str(framedata, len16); 3011 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 3012 } 3013 } 3014 3015 delete[] buffer; 3016 buffer = NULL; 3017 3018 return OK; 3019 } 3020 3021 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 3022 ID3 id3(mDataSource, true /* ignorev1 */, offset); 3023 3024 if (id3.isValid()) { 3025 struct Map { 3026 int key; 3027 const char *tag1; 3028 const char *tag2; 3029 }; 3030 static const Map kMap[] = { 3031 { kKeyAlbum, "TALB", "TAL" }, 3032 { kKeyArtist, "TPE1", "TP1" }, 3033 { kKeyAlbumArtist, "TPE2", "TP2" }, 3034 { kKeyComposer, "TCOM", "TCM" }, 3035 { kKeyGenre, "TCON", "TCO" }, 3036 { kKeyTitle, "TIT2", "TT2" }, 3037 { kKeyYear, "TYE", "TYER" }, 3038 { kKeyAuthor, "TXT", "TEXT" }, 3039 { kKeyCDTrackNumber, "TRK", "TRCK" }, 3040 { kKeyDiscNumber, "TPA", "TPOS" }, 3041 { kKeyCompilation, "TCP", "TCMP" }, 3042 }; 3043 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 3044 3045 for (size_t i = 0; i < kNumMapEntries; ++i) { 3046 if (!mFileMetaData->hasData(kMap[i].key)) { 3047 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 3048 if (it->done()) { 3049 delete it; 3050 it = new ID3::Iterator(id3, kMap[i].tag2); 3051 } 3052 3053 if (it->done()) { 3054 delete it; 3055 continue; 3056 } 3057 3058 String8 s; 3059 it->getString(&s); 3060 delete it; 3061 3062 mFileMetaData->setCString(kMap[i].key, s); 3063 } 3064 } 3065 3066 size_t dataSize; 3067 String8 mime; 3068 const void *data = id3.getAlbumArt(&dataSize, &mime); 3069 3070 if (data) { 3071 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 3072 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 3073 } 3074 } 3075 } 3076 3077 sp<IMediaSource> MPEG4Extractor::getTrack(size_t index) { 3078 status_t err; 3079 if ((err = readMetaData()) != OK) { 3080 return NULL; 3081 } 3082 3083 Track *track = mFirstTrack; 3084 while (index > 0) { 3085 if (track == NULL) { 3086 return NULL; 3087 } 3088 3089 track = track->next; 3090 --index; 3091 } 3092 3093 if (track == NULL) { 3094 return NULL; 3095 } 3096 3097 3098 Trex *trex = NULL; 3099 int32_t trackId; 3100 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 3101 for (size_t i = 0; i < mTrex.size(); i++) { 3102 Trex *t = &mTrex.editItemAt(i); 3103 if (t->track_ID == (uint32_t) trackId) { 3104 trex = t; 3105 break; 3106 } 3107 } 3108 } else { 3109 ALOGE("b/21657957"); 3110 return NULL; 3111 } 3112 3113 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 3114 3115 const char *mime; 3116 if (!track->meta->findCString(kKeyMIMEType, &mime)) { 3117 return NULL; 3118 } 3119 3120 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3121 uint32_t type; 3122 const void *data; 3123 size_t size; 3124 if (!track->meta->findData(kKeyAVCC, &type, &data, &size)) { 3125 return NULL; 3126 } 3127 3128 const uint8_t *ptr = (const uint8_t *)data; 3129 3130 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1 3131 return NULL; 3132 } 3133 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3134 uint32_t type; 3135 const void *data; 3136 size_t size; 3137 if (!track->meta->findData(kKeyHVCC, &type, &data, &size)) { 3138 return NULL; 3139 } 3140 3141 const uint8_t *ptr = (const uint8_t *)data; 3142 3143 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1 3144 return NULL; 3145 } 3146 } 3147 3148 return new MPEG4Source(this, 3149 track->meta, mDataSource, track->timescale, track->sampleTable, 3150 mSidxEntries, trex, mMoofOffset); 3151 } 3152 3153 // static 3154 status_t MPEG4Extractor::verifyTrack(Track *track) { 3155 const char *mime; 3156 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 3157 3158 uint32_t type; 3159 const void *data; 3160 size_t size; 3161 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3162 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 3163 || type != kTypeAVCC) { 3164 return ERROR_MALFORMED; 3165 } 3166 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3167 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 3168 || type != kTypeHVCC) { 3169 return ERROR_MALFORMED; 3170 } 3171 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 3172 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2) 3173 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 3174 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 3175 || type != kTypeESDS) { 3176 return ERROR_MALFORMED; 3177 } 3178 } 3179 3180 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 3181 // Make sure we have all the metadata we need. 3182 ALOGE("stbl atom missing/invalid."); 3183 return ERROR_MALFORMED; 3184 } 3185 3186 if (track->timescale == 0) { 3187 ALOGE("timescale invalid."); 3188 return ERROR_MALFORMED; 3189 } 3190 3191 return OK; 3192 } 3193 3194 typedef enum { 3195 //AOT_NONE = -1, 3196 //AOT_NULL_OBJECT = 0, 3197 //AOT_AAC_MAIN = 1, /**< Main profile */ 3198 AOT_AAC_LC = 2, /**< Low Complexity object */ 3199 //AOT_AAC_SSR = 3, 3200 //AOT_AAC_LTP = 4, 3201 AOT_SBR = 5, 3202 //AOT_AAC_SCAL = 6, 3203 //AOT_TWIN_VQ = 7, 3204 //AOT_CELP = 8, 3205 //AOT_HVXC = 9, 3206 //AOT_RSVD_10 = 10, /**< (reserved) */ 3207 //AOT_RSVD_11 = 11, /**< (reserved) */ 3208 //AOT_TTSI = 12, /**< TTSI Object */ 3209 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 3210 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 3211 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 3212 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 3213 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 3214 //AOT_RSVD_18 = 18, /**< (reserved) */ 3215 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 3216 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 3217 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 3218 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 3219 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 3220 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 3221 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 3222 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 3223 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 3224 //AOT_RSVD_28 = 28, /**< might become SSC */ 3225 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 3226 //AOT_MPEGS = 30, /**< MPEG Surround */ 3227 3228 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 3229 3230 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 3231 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 3232 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 3233 //AOT_RSVD_35 = 35, /**< might become DST */ 3234 //AOT_RSVD_36 = 36, /**< might become ALS */ 3235 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 3236 //AOT_SLS = 38, /**< SLS */ 3237 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 3238 3239 //AOT_USAC = 42, /**< USAC */ 3240 //AOT_SAOC = 43, /**< SAOC */ 3241 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 3242 3243 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 3244 } AUDIO_OBJECT_TYPE; 3245 3246 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 3247 const void *esds_data, size_t esds_size) { 3248 ESDS esds(esds_data, esds_size); 3249 3250 uint8_t objectTypeIndication; 3251 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 3252 return ERROR_MALFORMED; 3253 } 3254 3255 if (objectTypeIndication == 0xe1) { 3256 // This isn't MPEG4 audio at all, it's QCELP 14k... 3257 if (mLastTrack == NULL) 3258 return ERROR_MALFORMED; 3259 3260 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 3261 return OK; 3262 } 3263 3264 if (objectTypeIndication == 0x6b) { 3265 // The media subtype is MP3 audio 3266 // Our software MP3 audio decoder may not be able to handle 3267 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 3268 ALOGE("MP3 track in MP4/3GPP file is not supported"); 3269 return ERROR_UNSUPPORTED; 3270 } 3271 3272 const uint8_t *csd; 3273 size_t csd_size; 3274 if (esds.getCodecSpecificInfo( 3275 (const void **)&csd, &csd_size) != OK) { 3276 return ERROR_MALFORMED; 3277 } 3278 3279 if (kUseHexDump) { 3280 printf("ESD of size %zu\n", csd_size); 3281 hexdump(csd, csd_size); 3282 } 3283 3284 if (csd_size == 0) { 3285 // There's no further information, i.e. no codec specific data 3286 // Let's assume that the information provided in the mpeg4 headers 3287 // is accurate and hope for the best. 3288 3289 return OK; 3290 } 3291 3292 if (csd_size < 2) { 3293 return ERROR_MALFORMED; 3294 } 3295 3296 static uint32_t kSamplingRate[] = { 3297 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 3298 16000, 12000, 11025, 8000, 7350 3299 }; 3300 3301 ABitReader br(csd, csd_size); 3302 uint32_t objectType = br.getBits(5); 3303 3304 if (objectType == 31) { // AAC-ELD => additional 6 bits 3305 objectType = 32 + br.getBits(6); 3306 } 3307 3308 if (mLastTrack == NULL) 3309 return ERROR_MALFORMED; 3310 3311 //keep AOT type 3312 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 3313 3314 uint32_t freqIndex = br.getBits(4); 3315 3316 int32_t sampleRate = 0; 3317 int32_t numChannels = 0; 3318 if (freqIndex == 15) { 3319 if (br.numBitsLeft() < 28) return ERROR_MALFORMED; 3320 sampleRate = br.getBits(24); 3321 numChannels = br.getBits(4); 3322 } else { 3323 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3324 numChannels = br.getBits(4); 3325 3326 if (freqIndex == 13 || freqIndex == 14) { 3327 return ERROR_MALFORMED; 3328 } 3329 3330 sampleRate = kSamplingRate[freqIndex]; 3331 } 3332 3333 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 3334 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3335 uint32_t extFreqIndex = br.getBits(4); 3336 int32_t extSampleRate __unused; 3337 if (extFreqIndex == 15) { 3338 if (csd_size < 8) { 3339 return ERROR_MALFORMED; 3340 } 3341 if (br.numBitsLeft() < 24) return ERROR_MALFORMED; 3342 extSampleRate = br.getBits(24); 3343 } else { 3344 if (extFreqIndex == 13 || extFreqIndex == 14) { 3345 return ERROR_MALFORMED; 3346 } 3347 extSampleRate = kSamplingRate[extFreqIndex]; 3348 } 3349 //TODO: save the extension sampling rate value in meta data => 3350 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 3351 } 3352 3353 switch (numChannels) { 3354 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 3355 case 0: 3356 case 1:// FC 3357 case 2:// FL FR 3358 case 3:// FC, FL FR 3359 case 4:// FC, FL FR, RC 3360 case 5:// FC, FL FR, SL SR 3361 case 6:// FC, FL FR, SL SR, LFE 3362 //numChannels already contains the right value 3363 break; 3364 case 11:// FC, FL FR, SL SR, RC, LFE 3365 numChannels = 7; 3366 break; 3367 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 3368 case 12:// FC, FL FR, SL SR, RL RR, LFE 3369 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 3370 numChannels = 8; 3371 break; 3372 default: 3373 return ERROR_UNSUPPORTED; 3374 } 3375 3376 { 3377 if (objectType == AOT_SBR || objectType == AOT_PS) { 3378 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3379 objectType = br.getBits(5); 3380 3381 if (objectType == AOT_ESCAPE) { 3382 if (br.numBitsLeft() < 6) return ERROR_MALFORMED; 3383 objectType = 32 + br.getBits(6); 3384 } 3385 } 3386 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 3387 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 3388 objectType == AOT_ER_BSAC) { 3389 if (br.numBitsLeft() < 2) return ERROR_MALFORMED; 3390 const int32_t frameLengthFlag __unused = br.getBits(1); 3391 3392 const int32_t dependsOnCoreCoder = br.getBits(1); 3393 3394 if (dependsOnCoreCoder ) { 3395 if (br.numBitsLeft() < 14) return ERROR_MALFORMED; 3396 const int32_t coreCoderDelay __unused = br.getBits(14); 3397 } 3398 3399 int32_t extensionFlag = -1; 3400 if (br.numBitsLeft() > 0) { 3401 extensionFlag = br.getBits(1); 3402 } else { 3403 switch (objectType) { 3404 // 14496-3 4.5.1.1 extensionFlag 3405 case AOT_AAC_LC: 3406 extensionFlag = 0; 3407 break; 3408 case AOT_ER_AAC_LC: 3409 case AOT_ER_AAC_SCAL: 3410 case AOT_ER_BSAC: 3411 case AOT_ER_AAC_LD: 3412 extensionFlag = 1; 3413 break; 3414 default: 3415 return ERROR_MALFORMED; 3416 break; 3417 } 3418 ALOGW("csd missing extension flag; assuming %d for object type %u.", 3419 extensionFlag, objectType); 3420 } 3421 3422 if (numChannels == 0) { 3423 int32_t channelsEffectiveNum = 0; 3424 int32_t channelsNum = 0; 3425 if (br.numBitsLeft() < 32) { 3426 return ERROR_MALFORMED; 3427 } 3428 const int32_t ElementInstanceTag __unused = br.getBits(4); 3429 const int32_t Profile __unused = br.getBits(2); 3430 const int32_t SamplingFrequencyIndex __unused = br.getBits(4); 3431 const int32_t NumFrontChannelElements = br.getBits(4); 3432 const int32_t NumSideChannelElements = br.getBits(4); 3433 const int32_t NumBackChannelElements = br.getBits(4); 3434 const int32_t NumLfeChannelElements = br.getBits(2); 3435 const int32_t NumAssocDataElements __unused = br.getBits(3); 3436 const int32_t NumValidCcElements __unused = br.getBits(4); 3437 3438 const int32_t MonoMixdownPresent = br.getBits(1); 3439 3440 if (MonoMixdownPresent != 0) { 3441 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3442 const int32_t MonoMixdownElementNumber __unused = br.getBits(4); 3443 } 3444 3445 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3446 const int32_t StereoMixdownPresent = br.getBits(1); 3447 if (StereoMixdownPresent != 0) { 3448 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3449 const int32_t StereoMixdownElementNumber __unused = br.getBits(4); 3450 } 3451 3452 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3453 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 3454 if (MatrixMixdownIndexPresent != 0) { 3455 if (br.numBitsLeft() < 3) return ERROR_MALFORMED; 3456 const int32_t MatrixMixdownIndex __unused = br.getBits(2); 3457 const int32_t PseudoSurroundEnable __unused = br.getBits(1); 3458 } 3459 3460 int i; 3461 for (i=0; i < NumFrontChannelElements; i++) { 3462 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3463 const int32_t FrontElementIsCpe = br.getBits(1); 3464 const int32_t FrontElementTagSelect __unused = br.getBits(4); 3465 channelsNum += FrontElementIsCpe ? 2 : 1; 3466 } 3467 3468 for (i=0; i < NumSideChannelElements; i++) { 3469 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3470 const int32_t SideElementIsCpe = br.getBits(1); 3471 const int32_t SideElementTagSelect __unused = br.getBits(4); 3472 channelsNum += SideElementIsCpe ? 2 : 1; 3473 } 3474 3475 for (i=0; i < NumBackChannelElements; i++) { 3476 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3477 const int32_t BackElementIsCpe = br.getBits(1); 3478 const int32_t BackElementTagSelect __unused = br.getBits(4); 3479 channelsNum += BackElementIsCpe ? 2 : 1; 3480 } 3481 channelsEffectiveNum = channelsNum; 3482 3483 for (i=0; i < NumLfeChannelElements; i++) { 3484 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3485 const int32_t LfeElementTagSelect __unused = br.getBits(4); 3486 channelsNum += 1; 3487 } 3488 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 3489 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 3490 numChannels = channelsNum; 3491 } 3492 } 3493 } 3494 3495 if (numChannels == 0) { 3496 return ERROR_UNSUPPORTED; 3497 } 3498 3499 if (mLastTrack == NULL) 3500 return ERROR_MALFORMED; 3501 3502 int32_t prevSampleRate; 3503 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 3504 3505 if (prevSampleRate != sampleRate) { 3506 ALOGV("mpeg4 audio sample rate different from previous setting. " 3507 "was: %d, now: %d", prevSampleRate, sampleRate); 3508 } 3509 3510 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 3511 3512 int32_t prevChannelCount; 3513 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 3514 3515 if (prevChannelCount != numChannels) { 3516 ALOGV("mpeg4 audio channel count different from previous setting. " 3517 "was: %d, now: %d", prevChannelCount, numChannels); 3518 } 3519 3520 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 3521 3522 return OK; 3523 } 3524 3525 //////////////////////////////////////////////////////////////////////////////// 3526 3527 MPEG4Source::MPEG4Source( 3528 const sp<MPEG4Extractor> &owner, 3529 const sp<MetaData> &format, 3530 const sp<DataSource> &dataSource, 3531 int32_t timeScale, 3532 const sp<SampleTable> &sampleTable, 3533 Vector<SidxEntry> &sidx, 3534 const Trex *trex, 3535 off64_t firstMoofOffset) 3536 : mOwner(owner), 3537 mFormat(format), 3538 mDataSource(dataSource), 3539 mTimescale(timeScale), 3540 mSampleTable(sampleTable), 3541 mCurrentSampleIndex(0), 3542 mCurrentFragmentIndex(0), 3543 mSegments(sidx), 3544 mTrex(trex), 3545 mFirstMoofOffset(firstMoofOffset), 3546 mCurrentMoofOffset(firstMoofOffset), 3547 mCurrentTime(0), 3548 mCurrentSampleInfoAllocSize(0), 3549 mCurrentSampleInfoSizes(NULL), 3550 mCurrentSampleInfoOffsetsAllocSize(0), 3551 mCurrentSampleInfoOffsets(NULL), 3552 mIsAVC(false), 3553 mIsHEVC(false), 3554 mNALLengthSize(0), 3555 mStarted(false), 3556 mGroup(NULL), 3557 mBuffer(NULL), 3558 mWantsNALFragments(false), 3559 mSrcBuffer(NULL) { 3560 3561 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3562 3563 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3564 mDefaultIVSize = 0; 3565 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3566 uint32_t keytype; 3567 const void *key; 3568 size_t keysize; 3569 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3570 CHECK(keysize <= 16); 3571 memset(mCryptoKey, 0, 16); 3572 memcpy(mCryptoKey, key, keysize); 3573 } 3574 3575 const char *mime; 3576 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3577 CHECK(success); 3578 3579 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3580 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 3581 3582 if (mIsAVC) { 3583 uint32_t type; 3584 const void *data; 3585 size_t size; 3586 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3587 3588 const uint8_t *ptr = (const uint8_t *)data; 3589 3590 CHECK(size >= 7); 3591 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3592 3593 // The number of bytes used to encode the length of a NAL unit. 3594 mNALLengthSize = 1 + (ptr[4] & 3); 3595 } else if (mIsHEVC) { 3596 uint32_t type; 3597 const void *data; 3598 size_t size; 3599 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3600 3601 const uint8_t *ptr = (const uint8_t *)data; 3602 3603 CHECK(size >= 22); 3604 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3605 3606 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3607 } 3608 3609 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3610 3611 if (mFirstMoofOffset != 0) { 3612 off64_t offset = mFirstMoofOffset; 3613 parseChunk(&offset); 3614 } 3615 } 3616 3617 MPEG4Source::~MPEG4Source() { 3618 if (mStarted) { 3619 stop(); 3620 } 3621 free(mCurrentSampleInfoSizes); 3622 free(mCurrentSampleInfoOffsets); 3623 } 3624 3625 status_t MPEG4Source::start(MetaData *params) { 3626 Mutex::Autolock autoLock(mLock); 3627 3628 CHECK(!mStarted); 3629 3630 int32_t val; 3631 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3632 && val != 0) { 3633 mWantsNALFragments = true; 3634 } else { 3635 mWantsNALFragments = false; 3636 } 3637 3638 int32_t tmp; 3639 CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp)); 3640 size_t max_size = tmp; 3641 3642 // A somewhat arbitrary limit that should be sufficient for 8k video frames 3643 // If you see the message below for a valid input stream: increase the limit 3644 const size_t kMaxBufferSize = 64 * 1024 * 1024; 3645 if (max_size > kMaxBufferSize) { 3646 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize); 3647 return ERROR_MALFORMED; 3648 } 3649 if (max_size == 0) { 3650 ALOGE("zero max input size"); 3651 return ERROR_MALFORMED; 3652 } 3653 3654 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize. 3655 const size_t kMaxBuffers = 8; 3656 const size_t buffers = min(kMaxBufferSize / max_size, kMaxBuffers); 3657 mGroup = new MediaBufferGroup(buffers, max_size); 3658 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3659 if (mSrcBuffer == NULL) { 3660 // file probably specified a bad max size 3661 delete mGroup; 3662 mGroup = NULL; 3663 return ERROR_MALFORMED; 3664 } 3665 3666 mStarted = true; 3667 3668 return OK; 3669 } 3670 3671 status_t MPEG4Source::stop() { 3672 Mutex::Autolock autoLock(mLock); 3673 3674 CHECK(mStarted); 3675 3676 if (mBuffer != NULL) { 3677 mBuffer->release(); 3678 mBuffer = NULL; 3679 } 3680 3681 delete[] mSrcBuffer; 3682 mSrcBuffer = NULL; 3683 3684 delete mGroup; 3685 mGroup = NULL; 3686 3687 mStarted = false; 3688 mCurrentSampleIndex = 0; 3689 3690 return OK; 3691 } 3692 3693 status_t MPEG4Source::parseChunk(off64_t *offset) { 3694 uint32_t hdr[2]; 3695 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3696 return ERROR_IO; 3697 } 3698 uint64_t chunk_size = ntohl(hdr[0]); 3699 uint32_t chunk_type = ntohl(hdr[1]); 3700 off64_t data_offset = *offset + 8; 3701 3702 if (chunk_size == 1) { 3703 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3704 return ERROR_IO; 3705 } 3706 chunk_size = ntoh64(chunk_size); 3707 data_offset += 8; 3708 3709 if (chunk_size < 16) { 3710 // The smallest valid chunk is 16 bytes long in this case. 3711 return ERROR_MALFORMED; 3712 } 3713 } else if (chunk_size < 8) { 3714 // The smallest valid chunk is 8 bytes long. 3715 return ERROR_MALFORMED; 3716 } 3717 3718 char chunk[5]; 3719 MakeFourCCString(chunk_type, chunk); 3720 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset); 3721 3722 off64_t chunk_data_size = *offset + chunk_size - data_offset; 3723 3724 switch(chunk_type) { 3725 3726 case FOURCC('t', 'r', 'a', 'f'): 3727 case FOURCC('m', 'o', 'o', 'f'): { 3728 off64_t stop_offset = *offset + chunk_size; 3729 *offset = data_offset; 3730 while (*offset < stop_offset) { 3731 status_t err = parseChunk(offset); 3732 if (err != OK) { 3733 return err; 3734 } 3735 } 3736 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3737 // *offset points to the box following this moof. Find the next moof from there. 3738 3739 while (true) { 3740 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3741 return ERROR_END_OF_STREAM; 3742 } 3743 chunk_size = ntohl(hdr[0]); 3744 chunk_type = ntohl(hdr[1]); 3745 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3746 mNextMoofOffset = *offset; 3747 break; 3748 } 3749 *offset += chunk_size; 3750 } 3751 } 3752 break; 3753 } 3754 3755 case FOURCC('t', 'f', 'h', 'd'): { 3756 status_t err; 3757 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3758 return err; 3759 } 3760 *offset += chunk_size; 3761 break; 3762 } 3763 3764 case FOURCC('t', 'r', 'u', 'n'): { 3765 status_t err; 3766 if (mLastParsedTrackId == mTrackId) { 3767 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3768 return err; 3769 } 3770 } 3771 3772 *offset += chunk_size; 3773 break; 3774 } 3775 3776 case FOURCC('s', 'a', 'i', 'z'): { 3777 status_t err; 3778 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3779 return err; 3780 } 3781 *offset += chunk_size; 3782 break; 3783 } 3784 case FOURCC('s', 'a', 'i', 'o'): { 3785 status_t err; 3786 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3787 return err; 3788 } 3789 *offset += chunk_size; 3790 break; 3791 } 3792 3793 case FOURCC('m', 'd', 'a', 't'): { 3794 // parse DRM info if present 3795 ALOGV("MPEG4Source::parseChunk mdat"); 3796 // if saiz/saoi was previously observed, do something with the sampleinfos 3797 *offset += chunk_size; 3798 break; 3799 } 3800 3801 default: { 3802 *offset += chunk_size; 3803 break; 3804 } 3805 } 3806 return OK; 3807 } 3808 3809 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 3810 off64_t offset, off64_t /* size */) { 3811 ALOGV("parseSampleAuxiliaryInformationSizes"); 3812 // 14496-12 8.7.12 3813 uint8_t version; 3814 if (mDataSource->readAt( 3815 offset, &version, sizeof(version)) 3816 < (ssize_t)sizeof(version)) { 3817 return ERROR_IO; 3818 } 3819 3820 if (version != 0) { 3821 return ERROR_UNSUPPORTED; 3822 } 3823 offset++; 3824 3825 uint32_t flags; 3826 if (!mDataSource->getUInt24(offset, &flags)) { 3827 return ERROR_IO; 3828 } 3829 offset += 3; 3830 3831 if (flags & 1) { 3832 uint32_t tmp; 3833 if (!mDataSource->getUInt32(offset, &tmp)) { 3834 return ERROR_MALFORMED; 3835 } 3836 mCurrentAuxInfoType = tmp; 3837 offset += 4; 3838 if (!mDataSource->getUInt32(offset, &tmp)) { 3839 return ERROR_MALFORMED; 3840 } 3841 mCurrentAuxInfoTypeParameter = tmp; 3842 offset += 4; 3843 } 3844 3845 uint8_t defsize; 3846 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 3847 return ERROR_MALFORMED; 3848 } 3849 mCurrentDefaultSampleInfoSize = defsize; 3850 offset++; 3851 3852 uint32_t smplcnt; 3853 if (!mDataSource->getUInt32(offset, &smplcnt)) { 3854 return ERROR_MALFORMED; 3855 } 3856 mCurrentSampleInfoCount = smplcnt; 3857 offset += 4; 3858 3859 if (mCurrentDefaultSampleInfoSize != 0) { 3860 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 3861 return OK; 3862 } 3863 if (smplcnt > mCurrentSampleInfoAllocSize) { 3864 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 3865 mCurrentSampleInfoAllocSize = smplcnt; 3866 } 3867 3868 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 3869 return OK; 3870 } 3871 3872 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 3873 off64_t offset, off64_t /* size */) { 3874 ALOGV("parseSampleAuxiliaryInformationOffsets"); 3875 // 14496-12 8.7.13 3876 uint8_t version; 3877 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 3878 return ERROR_IO; 3879 } 3880 offset++; 3881 3882 uint32_t flags; 3883 if (!mDataSource->getUInt24(offset, &flags)) { 3884 return ERROR_IO; 3885 } 3886 offset += 3; 3887 3888 uint32_t entrycount; 3889 if (!mDataSource->getUInt32(offset, &entrycount)) { 3890 return ERROR_IO; 3891 } 3892 offset += 4; 3893 if (entrycount == 0) { 3894 return OK; 3895 } 3896 if (entrycount > UINT32_MAX / 8) { 3897 return ERROR_MALFORMED; 3898 } 3899 3900 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 3901 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 3902 if (newPtr == NULL) { 3903 return NO_MEMORY; 3904 } 3905 mCurrentSampleInfoOffsets = newPtr; 3906 mCurrentSampleInfoOffsetsAllocSize = entrycount; 3907 } 3908 mCurrentSampleInfoOffsetCount = entrycount; 3909 3910 if (mCurrentSampleInfoOffsets == NULL) { 3911 return OK; 3912 } 3913 3914 for (size_t i = 0; i < entrycount; i++) { 3915 if (version == 0) { 3916 uint32_t tmp; 3917 if (!mDataSource->getUInt32(offset, &tmp)) { 3918 return ERROR_IO; 3919 } 3920 mCurrentSampleInfoOffsets[i] = tmp; 3921 offset += 4; 3922 } else { 3923 uint64_t tmp; 3924 if (!mDataSource->getUInt64(offset, &tmp)) { 3925 return ERROR_IO; 3926 } 3927 mCurrentSampleInfoOffsets[i] = tmp; 3928 offset += 8; 3929 } 3930 } 3931 3932 // parse clear/encrypted data 3933 3934 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 3935 3936 drmoffset += mCurrentMoofOffset; 3937 int ivlength; 3938 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 3939 3940 // only 0, 8 and 16 byte initialization vectors are supported 3941 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 3942 ALOGW("unsupported IV length: %d", ivlength); 3943 return ERROR_MALFORMED; 3944 } 3945 // read CencSampleAuxiliaryDataFormats 3946 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 3947 if (i >= mCurrentSamples.size()) { 3948 ALOGW("too few samples"); 3949 break; 3950 } 3951 Sample *smpl = &mCurrentSamples.editItemAt(i); 3952 3953 memset(smpl->iv, 0, 16); 3954 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 3955 return ERROR_IO; 3956 } 3957 3958 drmoffset += ivlength; 3959 3960 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 3961 if (smplinfosize == 0) { 3962 smplinfosize = mCurrentSampleInfoSizes[i]; 3963 } 3964 if (smplinfosize > ivlength) { 3965 uint16_t numsubsamples; 3966 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 3967 return ERROR_IO; 3968 } 3969 drmoffset += 2; 3970 for (size_t j = 0; j < numsubsamples; j++) { 3971 uint16_t numclear; 3972 uint32_t numencrypted; 3973 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 3974 return ERROR_IO; 3975 } 3976 drmoffset += 2; 3977 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 3978 return ERROR_IO; 3979 } 3980 drmoffset += 4; 3981 smpl->clearsizes.add(numclear); 3982 smpl->encryptedsizes.add(numencrypted); 3983 } 3984 } else { 3985 smpl->clearsizes.add(0); 3986 smpl->encryptedsizes.add(smpl->size); 3987 } 3988 } 3989 3990 3991 return OK; 3992 } 3993 3994 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 3995 3996 if (size < 8) { 3997 return -EINVAL; 3998 } 3999 4000 uint32_t flags; 4001 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 4002 return ERROR_MALFORMED; 4003 } 4004 4005 if (flags & 0xff000000) { 4006 return -EINVAL; 4007 } 4008 4009 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 4010 return ERROR_MALFORMED; 4011 } 4012 4013 if (mLastParsedTrackId != mTrackId) { 4014 // this is not the right track, skip it 4015 return OK; 4016 } 4017 4018 mTrackFragmentHeaderInfo.mFlags = flags; 4019 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 4020 offset += 8; 4021 size -= 8; 4022 4023 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 4024 4025 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 4026 if (size < 8) { 4027 return -EINVAL; 4028 } 4029 4030 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 4031 return ERROR_MALFORMED; 4032 } 4033 offset += 8; 4034 size -= 8; 4035 } 4036 4037 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 4038 if (size < 4) { 4039 return -EINVAL; 4040 } 4041 4042 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 4043 return ERROR_MALFORMED; 4044 } 4045 offset += 4; 4046 size -= 4; 4047 } 4048 4049 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4050 if (size < 4) { 4051 return -EINVAL; 4052 } 4053 4054 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 4055 return ERROR_MALFORMED; 4056 } 4057 offset += 4; 4058 size -= 4; 4059 } 4060 4061 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4062 if (size < 4) { 4063 return -EINVAL; 4064 } 4065 4066 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 4067 return ERROR_MALFORMED; 4068 } 4069 offset += 4; 4070 size -= 4; 4071 } 4072 4073 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4074 if (size < 4) { 4075 return -EINVAL; 4076 } 4077 4078 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 4079 return ERROR_MALFORMED; 4080 } 4081 offset += 4; 4082 size -= 4; 4083 } 4084 4085 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 4086 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 4087 } 4088 4089 mTrackFragmentHeaderInfo.mDataOffset = 0; 4090 return OK; 4091 } 4092 4093 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 4094 4095 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 4096 if (size < 8) { 4097 return -EINVAL; 4098 } 4099 4100 enum { 4101 kDataOffsetPresent = 0x01, 4102 kFirstSampleFlagsPresent = 0x04, 4103 kSampleDurationPresent = 0x100, 4104 kSampleSizePresent = 0x200, 4105 kSampleFlagsPresent = 0x400, 4106 kSampleCompositionTimeOffsetPresent = 0x800, 4107 }; 4108 4109 uint32_t flags; 4110 if (!mDataSource->getUInt32(offset, &flags)) { 4111 return ERROR_MALFORMED; 4112 } 4113 ALOGV("fragment run flags: %08x", flags); 4114 4115 if (flags & 0xff000000) { 4116 return -EINVAL; 4117 } 4118 4119 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 4120 // These two shall not be used together. 4121 return -EINVAL; 4122 } 4123 4124 uint32_t sampleCount; 4125 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 4126 return ERROR_MALFORMED; 4127 } 4128 offset += 8; 4129 size -= 8; 4130 4131 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 4132 4133 uint32_t firstSampleFlags = 0; 4134 4135 if (flags & kDataOffsetPresent) { 4136 if (size < 4) { 4137 return -EINVAL; 4138 } 4139 4140 int32_t dataOffsetDelta; 4141 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 4142 return ERROR_MALFORMED; 4143 } 4144 4145 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 4146 4147 offset += 4; 4148 size -= 4; 4149 } 4150 4151 if (flags & kFirstSampleFlagsPresent) { 4152 if (size < 4) { 4153 return -EINVAL; 4154 } 4155 4156 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 4157 return ERROR_MALFORMED; 4158 } 4159 offset += 4; 4160 size -= 4; 4161 } 4162 4163 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 4164 sampleCtsOffset = 0; 4165 4166 size_t bytesPerSample = 0; 4167 if (flags & kSampleDurationPresent) { 4168 bytesPerSample += 4; 4169 } else if (mTrackFragmentHeaderInfo.mFlags 4170 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4171 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 4172 } else if (mTrex) { 4173 sampleDuration = mTrex->default_sample_duration; 4174 } 4175 4176 if (flags & kSampleSizePresent) { 4177 bytesPerSample += 4; 4178 } else if (mTrackFragmentHeaderInfo.mFlags 4179 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4180 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4181 } else { 4182 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4183 } 4184 4185 if (flags & kSampleFlagsPresent) { 4186 bytesPerSample += 4; 4187 } else if (mTrackFragmentHeaderInfo.mFlags 4188 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4189 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4190 } else { 4191 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4192 } 4193 4194 if (flags & kSampleCompositionTimeOffsetPresent) { 4195 bytesPerSample += 4; 4196 } else { 4197 sampleCtsOffset = 0; 4198 } 4199 4200 if (size < (off64_t)(sampleCount * bytesPerSample)) { 4201 return -EINVAL; 4202 } 4203 4204 Sample tmp; 4205 for (uint32_t i = 0; i < sampleCount; ++i) { 4206 if (flags & kSampleDurationPresent) { 4207 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 4208 return ERROR_MALFORMED; 4209 } 4210 offset += 4; 4211 } 4212 4213 if (flags & kSampleSizePresent) { 4214 if (!mDataSource->getUInt32(offset, &sampleSize)) { 4215 return ERROR_MALFORMED; 4216 } 4217 offset += 4; 4218 } 4219 4220 if (flags & kSampleFlagsPresent) { 4221 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 4222 return ERROR_MALFORMED; 4223 } 4224 offset += 4; 4225 } 4226 4227 if (flags & kSampleCompositionTimeOffsetPresent) { 4228 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 4229 return ERROR_MALFORMED; 4230 } 4231 offset += 4; 4232 } 4233 4234 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 4235 " flags 0x%08x", i + 1, 4236 dataOffset, sampleSize, sampleDuration, 4237 (flags & kFirstSampleFlagsPresent) && i == 0 4238 ? firstSampleFlags : sampleFlags); 4239 tmp.offset = dataOffset; 4240 tmp.size = sampleSize; 4241 tmp.duration = sampleDuration; 4242 tmp.compositionOffset = sampleCtsOffset; 4243 mCurrentSamples.add(tmp); 4244 4245 dataOffset += sampleSize; 4246 } 4247 4248 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 4249 4250 return OK; 4251 } 4252 4253 sp<MetaData> MPEG4Source::getFormat() { 4254 Mutex::Autolock autoLock(mLock); 4255 4256 return mFormat; 4257 } 4258 4259 size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 4260 switch (mNALLengthSize) { 4261 case 1: 4262 return *data; 4263 case 2: 4264 return U16_AT(data); 4265 case 3: 4266 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 4267 case 4: 4268 return U32_AT(data); 4269 } 4270 4271 // This cannot happen, mNALLengthSize springs to life by adding 1 to 4272 // a 2-bit integer. 4273 CHECK(!"Should not be here."); 4274 4275 return 0; 4276 } 4277 4278 status_t MPEG4Source::read( 4279 MediaBuffer **out, const ReadOptions *options) { 4280 Mutex::Autolock autoLock(mLock); 4281 4282 CHECK(mStarted); 4283 4284 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) { 4285 *out = nullptr; 4286 return WOULD_BLOCK; 4287 } 4288 4289 if (mFirstMoofOffset > 0) { 4290 return fragmentedRead(out, options); 4291 } 4292 4293 *out = NULL; 4294 4295 int64_t targetSampleTimeUs = -1; 4296 4297 int64_t seekTimeUs; 4298 ReadOptions::SeekMode mode; 4299 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4300 uint32_t findFlags = 0; 4301 switch (mode) { 4302 case ReadOptions::SEEK_PREVIOUS_SYNC: 4303 findFlags = SampleTable::kFlagBefore; 4304 break; 4305 case ReadOptions::SEEK_NEXT_SYNC: 4306 findFlags = SampleTable::kFlagAfter; 4307 break; 4308 case ReadOptions::SEEK_CLOSEST_SYNC: 4309 case ReadOptions::SEEK_CLOSEST: 4310 findFlags = SampleTable::kFlagClosest; 4311 break; 4312 default: 4313 CHECK(!"Should not be here."); 4314 break; 4315 } 4316 4317 uint32_t sampleIndex; 4318 status_t err = mSampleTable->findSampleAtTime( 4319 seekTimeUs, 1000000, mTimescale, 4320 &sampleIndex, findFlags); 4321 4322 if (mode == ReadOptions::SEEK_CLOSEST) { 4323 // We found the closest sample already, now we want the sync 4324 // sample preceding it (or the sample itself of course), even 4325 // if the subsequent sync sample is closer. 4326 findFlags = SampleTable::kFlagBefore; 4327 } 4328 4329 uint32_t syncSampleIndex; 4330 if (err == OK) { 4331 err = mSampleTable->findSyncSampleNear( 4332 sampleIndex, &syncSampleIndex, findFlags); 4333 } 4334 4335 uint32_t sampleTime; 4336 if (err == OK) { 4337 err = mSampleTable->getMetaDataForSample( 4338 sampleIndex, NULL, NULL, &sampleTime); 4339 } 4340 4341 if (err != OK) { 4342 if (err == ERROR_OUT_OF_RANGE) { 4343 // An attempt to seek past the end of the stream would 4344 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 4345 // this all the way to the MediaPlayer would cause abnormal 4346 // termination. Legacy behaviour appears to be to behave as if 4347 // we had seeked to the end of stream, ending normally. 4348 err = ERROR_END_OF_STREAM; 4349 } 4350 ALOGV("end of stream"); 4351 return err; 4352 } 4353 4354 if (mode == ReadOptions::SEEK_CLOSEST) { 4355 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 4356 } 4357 4358 #if 0 4359 uint32_t syncSampleTime; 4360 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 4361 syncSampleIndex, NULL, NULL, &syncSampleTime)); 4362 4363 ALOGI("seek to time %lld us => sample at time %lld us, " 4364 "sync sample at time %lld us", 4365 seekTimeUs, 4366 sampleTime * 1000000ll / mTimescale, 4367 syncSampleTime * 1000000ll / mTimescale); 4368 #endif 4369 4370 mCurrentSampleIndex = syncSampleIndex; 4371 if (mBuffer != NULL) { 4372 mBuffer->release(); 4373 mBuffer = NULL; 4374 } 4375 4376 // fall through 4377 } 4378 4379 off64_t offset; 4380 size_t size; 4381 uint32_t cts, stts; 4382 bool isSyncSample; 4383 bool newBuffer = false; 4384 if (mBuffer == NULL) { 4385 newBuffer = true; 4386 4387 status_t err = 4388 mSampleTable->getMetaDataForSample( 4389 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 4390 4391 if (err != OK) { 4392 return err; 4393 } 4394 4395 err = mGroup->acquire_buffer(&mBuffer); 4396 4397 if (err != OK) { 4398 CHECK(mBuffer == NULL); 4399 return err; 4400 } 4401 if (size > mBuffer->size()) { 4402 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4403 return ERROR_BUFFER_TOO_SMALL; 4404 } 4405 } 4406 4407 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 4408 if (newBuffer) { 4409 ssize_t num_bytes_read = 4410 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4411 4412 if (num_bytes_read < (ssize_t)size) { 4413 mBuffer->release(); 4414 mBuffer = NULL; 4415 4416 return ERROR_IO; 4417 } 4418 4419 CHECK(mBuffer != NULL); 4420 mBuffer->set_range(0, size); 4421 mBuffer->meta_data()->clear(); 4422 mBuffer->meta_data()->setInt64( 4423 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4424 mBuffer->meta_data()->setInt64( 4425 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4426 4427 if (targetSampleTimeUs >= 0) { 4428 mBuffer->meta_data()->setInt64( 4429 kKeyTargetTime, targetSampleTimeUs); 4430 } 4431 4432 if (isSyncSample) { 4433 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4434 } 4435 4436 ++mCurrentSampleIndex; 4437 } 4438 4439 if (!mIsAVC && !mIsHEVC) { 4440 *out = mBuffer; 4441 mBuffer = NULL; 4442 4443 return OK; 4444 } 4445 4446 // Each NAL unit is split up into its constituent fragments and 4447 // each one of them returned in its own buffer. 4448 4449 CHECK(mBuffer->range_length() >= mNALLengthSize); 4450 4451 const uint8_t *src = 4452 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4453 4454 size_t nal_size = parseNALSize(src); 4455 if (mNALLengthSize > SIZE_MAX - nal_size) { 4456 ALOGE("b/24441553, b/24445122"); 4457 } 4458 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4459 ALOGE("incomplete NAL unit."); 4460 4461 mBuffer->release(); 4462 mBuffer = NULL; 4463 4464 return ERROR_MALFORMED; 4465 } 4466 4467 MediaBuffer *clone = mBuffer->clone(); 4468 CHECK(clone != NULL); 4469 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4470 4471 CHECK(mBuffer != NULL); 4472 mBuffer->set_range( 4473 mBuffer->range_offset() + mNALLengthSize + nal_size, 4474 mBuffer->range_length() - mNALLengthSize - nal_size); 4475 4476 if (mBuffer->range_length() == 0) { 4477 mBuffer->release(); 4478 mBuffer = NULL; 4479 } 4480 4481 *out = clone; 4482 4483 return OK; 4484 } else { 4485 // Whole NAL units are returned but each fragment is prefixed by 4486 // the start code (0x00 00 00 01). 4487 ssize_t num_bytes_read = 0; 4488 int32_t drm = 0; 4489 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4490 if (usesDRM) { 4491 num_bytes_read = 4492 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4493 } else { 4494 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4495 } 4496 4497 if (num_bytes_read < (ssize_t)size) { 4498 mBuffer->release(); 4499 mBuffer = NULL; 4500 4501 return ERROR_IO; 4502 } 4503 4504 if (usesDRM) { 4505 CHECK(mBuffer != NULL); 4506 mBuffer->set_range(0, size); 4507 4508 } else { 4509 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4510 size_t srcOffset = 0; 4511 size_t dstOffset = 0; 4512 4513 while (srcOffset < size) { 4514 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4515 size_t nalLength = 0; 4516 if (!isMalFormed) { 4517 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4518 srcOffset += mNALLengthSize; 4519 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4520 } 4521 4522 if (isMalFormed) { 4523 ALOGE("Video is malformed"); 4524 mBuffer->release(); 4525 mBuffer = NULL; 4526 return ERROR_MALFORMED; 4527 } 4528 4529 if (nalLength == 0) { 4530 continue; 4531 } 4532 4533 if (dstOffset > SIZE_MAX - 4 || 4534 dstOffset + 4 > SIZE_MAX - nalLength || 4535 dstOffset + 4 + nalLength > mBuffer->size()) { 4536 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size()); 4537 android_errorWriteLog(0x534e4554, "27208621"); 4538 mBuffer->release(); 4539 mBuffer = NULL; 4540 return ERROR_MALFORMED; 4541 } 4542 4543 dstData[dstOffset++] = 0; 4544 dstData[dstOffset++] = 0; 4545 dstData[dstOffset++] = 0; 4546 dstData[dstOffset++] = 1; 4547 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4548 srcOffset += nalLength; 4549 dstOffset += nalLength; 4550 } 4551 CHECK_EQ(srcOffset, size); 4552 CHECK(mBuffer != NULL); 4553 mBuffer->set_range(0, dstOffset); 4554 } 4555 4556 mBuffer->meta_data()->clear(); 4557 mBuffer->meta_data()->setInt64( 4558 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4559 mBuffer->meta_data()->setInt64( 4560 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4561 4562 if (targetSampleTimeUs >= 0) { 4563 mBuffer->meta_data()->setInt64( 4564 kKeyTargetTime, targetSampleTimeUs); 4565 } 4566 4567 if (mIsAVC) { 4568 uint32_t layerId = FindAVCLayerId( 4569 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 4570 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 4571 } 4572 4573 if (isSyncSample) { 4574 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4575 } 4576 4577 ++mCurrentSampleIndex; 4578 4579 *out = mBuffer; 4580 mBuffer = NULL; 4581 4582 return OK; 4583 } 4584 } 4585 4586 status_t MPEG4Source::fragmentedRead( 4587 MediaBuffer **out, const ReadOptions *options) { 4588 4589 ALOGV("MPEG4Source::fragmentedRead"); 4590 4591 CHECK(mStarted); 4592 4593 *out = NULL; 4594 4595 int64_t targetSampleTimeUs = -1; 4596 4597 int64_t seekTimeUs; 4598 ReadOptions::SeekMode mode; 4599 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4600 4601 int numSidxEntries = mSegments.size(); 4602 if (numSidxEntries != 0) { 4603 int64_t totalTime = 0; 4604 off64_t totalOffset = mFirstMoofOffset; 4605 for (int i = 0; i < numSidxEntries; i++) { 4606 const SidxEntry *se = &mSegments[i]; 4607 if (totalTime + se->mDurationUs > seekTimeUs) { 4608 // The requested time is somewhere in this segment 4609 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 4610 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 4611 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 4612 // requested next sync, or closest sync and it was closer to the end of 4613 // this segment 4614 totalTime += se->mDurationUs; 4615 totalOffset += se->mSize; 4616 } 4617 break; 4618 } 4619 totalTime += se->mDurationUs; 4620 totalOffset += se->mSize; 4621 } 4622 mCurrentMoofOffset = totalOffset; 4623 mCurrentSamples.clear(); 4624 mCurrentSampleIndex = 0; 4625 parseChunk(&totalOffset); 4626 mCurrentTime = totalTime * mTimescale / 1000000ll; 4627 } else { 4628 // without sidx boxes, we can only seek to 0 4629 mCurrentMoofOffset = mFirstMoofOffset; 4630 mCurrentSamples.clear(); 4631 mCurrentSampleIndex = 0; 4632 off64_t tmp = mCurrentMoofOffset; 4633 parseChunk(&tmp); 4634 mCurrentTime = 0; 4635 } 4636 4637 if (mBuffer != NULL) { 4638 mBuffer->release(); 4639 mBuffer = NULL; 4640 } 4641 4642 // fall through 4643 } 4644 4645 off64_t offset = 0; 4646 size_t size = 0; 4647 uint32_t cts = 0; 4648 bool isSyncSample = false; 4649 bool newBuffer = false; 4650 if (mBuffer == NULL) { 4651 newBuffer = true; 4652 4653 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4654 // move to next fragment if there is one 4655 if (mNextMoofOffset <= mCurrentMoofOffset) { 4656 return ERROR_END_OF_STREAM; 4657 } 4658 off64_t nextMoof = mNextMoofOffset; 4659 mCurrentMoofOffset = nextMoof; 4660 mCurrentSamples.clear(); 4661 mCurrentSampleIndex = 0; 4662 parseChunk(&nextMoof); 4663 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4664 return ERROR_END_OF_STREAM; 4665 } 4666 } 4667 4668 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4669 offset = smpl->offset; 4670 size = smpl->size; 4671 cts = mCurrentTime + smpl->compositionOffset; 4672 mCurrentTime += smpl->duration; 4673 isSyncSample = (mCurrentSampleIndex == 0); // XXX 4674 4675 status_t err = mGroup->acquire_buffer(&mBuffer); 4676 4677 if (err != OK) { 4678 CHECK(mBuffer == NULL); 4679 ALOGV("acquire_buffer returned %d", err); 4680 return err; 4681 } 4682 if (size > mBuffer->size()) { 4683 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4684 return ERROR_BUFFER_TOO_SMALL; 4685 } 4686 } 4687 4688 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4689 const sp<MetaData> bufmeta = mBuffer->meta_data(); 4690 bufmeta->clear(); 4691 if (smpl->encryptedsizes.size()) { 4692 // store clear/encrypted lengths in metadata 4693 bufmeta->setData(kKeyPlainSizes, 0, 4694 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 4695 bufmeta->setData(kKeyEncryptedSizes, 0, 4696 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 4697 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 4698 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 4699 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 4700 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 4701 } 4702 4703 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 4704 if (newBuffer) { 4705 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 4706 mBuffer->release(); 4707 mBuffer = NULL; 4708 4709 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 4710 return ERROR_MALFORMED; 4711 } 4712 4713 ssize_t num_bytes_read = 4714 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4715 4716 if (num_bytes_read < (ssize_t)size) { 4717 mBuffer->release(); 4718 mBuffer = NULL; 4719 4720 ALOGE("i/o error"); 4721 return ERROR_IO; 4722 } 4723 4724 CHECK(mBuffer != NULL); 4725 mBuffer->set_range(0, size); 4726 mBuffer->meta_data()->setInt64( 4727 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4728 mBuffer->meta_data()->setInt64( 4729 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4730 4731 if (targetSampleTimeUs >= 0) { 4732 mBuffer->meta_data()->setInt64( 4733 kKeyTargetTime, targetSampleTimeUs); 4734 } 4735 4736 if (mIsAVC) { 4737 uint32_t layerId = FindAVCLayerId( 4738 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 4739 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 4740 } 4741 4742 if (isSyncSample) { 4743 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4744 } 4745 4746 ++mCurrentSampleIndex; 4747 } 4748 4749 if (!mIsAVC && !mIsHEVC) { 4750 *out = mBuffer; 4751 mBuffer = NULL; 4752 4753 return OK; 4754 } 4755 4756 // Each NAL unit is split up into its constituent fragments and 4757 // each one of them returned in its own buffer. 4758 4759 CHECK(mBuffer->range_length() >= mNALLengthSize); 4760 4761 const uint8_t *src = 4762 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4763 4764 size_t nal_size = parseNALSize(src); 4765 if (mNALLengthSize > SIZE_MAX - nal_size) { 4766 ALOGE("b/24441553, b/24445122"); 4767 } 4768 4769 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4770 ALOGE("incomplete NAL unit."); 4771 4772 mBuffer->release(); 4773 mBuffer = NULL; 4774 4775 return ERROR_MALFORMED; 4776 } 4777 4778 MediaBuffer *clone = mBuffer->clone(); 4779 CHECK(clone != NULL); 4780 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4781 4782 CHECK(mBuffer != NULL); 4783 mBuffer->set_range( 4784 mBuffer->range_offset() + mNALLengthSize + nal_size, 4785 mBuffer->range_length() - mNALLengthSize - nal_size); 4786 4787 if (mBuffer->range_length() == 0) { 4788 mBuffer->release(); 4789 mBuffer = NULL; 4790 } 4791 4792 *out = clone; 4793 4794 return OK; 4795 } else { 4796 ALOGV("whole NAL"); 4797 // Whole NAL units are returned but each fragment is prefixed by 4798 // the start code (0x00 00 00 01). 4799 ssize_t num_bytes_read = 0; 4800 int32_t drm = 0; 4801 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4802 void *data = NULL; 4803 bool isMalFormed = false; 4804 if (usesDRM) { 4805 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 4806 isMalFormed = true; 4807 } else { 4808 data = mBuffer->data(); 4809 } 4810 } else { 4811 int32_t max_size; 4812 if (mFormat == NULL 4813 || !mFormat->findInt32(kKeyMaxInputSize, &max_size) 4814 || !isInRange((size_t)0u, (size_t)max_size, size)) { 4815 isMalFormed = true; 4816 } else { 4817 data = mSrcBuffer; 4818 } 4819 } 4820 4821 if (isMalFormed || data == NULL) { 4822 ALOGE("isMalFormed size %zu", size); 4823 if (mBuffer != NULL) { 4824 mBuffer->release(); 4825 mBuffer = NULL; 4826 } 4827 return ERROR_MALFORMED; 4828 } 4829 num_bytes_read = mDataSource->readAt(offset, data, size); 4830 4831 if (num_bytes_read < (ssize_t)size) { 4832 mBuffer->release(); 4833 mBuffer = NULL; 4834 4835 ALOGE("i/o error"); 4836 return ERROR_IO; 4837 } 4838 4839 if (usesDRM) { 4840 CHECK(mBuffer != NULL); 4841 mBuffer->set_range(0, size); 4842 4843 } else { 4844 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4845 size_t srcOffset = 0; 4846 size_t dstOffset = 0; 4847 4848 while (srcOffset < size) { 4849 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4850 size_t nalLength = 0; 4851 if (!isMalFormed) { 4852 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4853 srcOffset += mNALLengthSize; 4854 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 4855 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 4856 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 4857 } 4858 4859 if (isMalFormed) { 4860 ALOGE("Video is malformed; nalLength %zu", nalLength); 4861 mBuffer->release(); 4862 mBuffer = NULL; 4863 return ERROR_MALFORMED; 4864 } 4865 4866 if (nalLength == 0) { 4867 continue; 4868 } 4869 4870 if (dstOffset > SIZE_MAX - 4 || 4871 dstOffset + 4 > SIZE_MAX - nalLength || 4872 dstOffset + 4 + nalLength > mBuffer->size()) { 4873 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size()); 4874 android_errorWriteLog(0x534e4554, "26365349"); 4875 mBuffer->release(); 4876 mBuffer = NULL; 4877 return ERROR_MALFORMED; 4878 } 4879 4880 dstData[dstOffset++] = 0; 4881 dstData[dstOffset++] = 0; 4882 dstData[dstOffset++] = 0; 4883 dstData[dstOffset++] = 1; 4884 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4885 srcOffset += nalLength; 4886 dstOffset += nalLength; 4887 } 4888 CHECK_EQ(srcOffset, size); 4889 CHECK(mBuffer != NULL); 4890 mBuffer->set_range(0, dstOffset); 4891 } 4892 4893 mBuffer->meta_data()->setInt64( 4894 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4895 mBuffer->meta_data()->setInt64( 4896 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4897 4898 if (targetSampleTimeUs >= 0) { 4899 mBuffer->meta_data()->setInt64( 4900 kKeyTargetTime, targetSampleTimeUs); 4901 } 4902 4903 if (isSyncSample) { 4904 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4905 } 4906 4907 ++mCurrentSampleIndex; 4908 4909 *out = mBuffer; 4910 mBuffer = NULL; 4911 4912 return OK; 4913 } 4914 } 4915 4916 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 4917 const char *mimePrefix) { 4918 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 4919 const char *mime; 4920 if (track->meta != NULL 4921 && track->meta->findCString(kKeyMIMEType, &mime) 4922 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 4923 return track; 4924 } 4925 } 4926 4927 return NULL; 4928 } 4929 4930 static bool LegacySniffMPEG4( 4931 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 4932 uint8_t header[8]; 4933 4934 ssize_t n = source->readAt(4, header, sizeof(header)); 4935 if (n < (ssize_t)sizeof(header)) { 4936 return false; 4937 } 4938 4939 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 4940 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 4941 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 4942 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 4943 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 4944 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 4945 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4946 *confidence = 0.4; 4947 4948 return true; 4949 } 4950 4951 return false; 4952 } 4953 4954 static bool isCompatibleBrand(uint32_t fourcc) { 4955 static const uint32_t kCompatibleBrands[] = { 4956 FOURCC('i', 's', 'o', 'm'), 4957 FOURCC('i', 's', 'o', '2'), 4958 FOURCC('a', 'v', 'c', '1'), 4959 FOURCC('h', 'v', 'c', '1'), 4960 FOURCC('h', 'e', 'v', '1'), 4961 FOURCC('3', 'g', 'p', '4'), 4962 FOURCC('m', 'p', '4', '1'), 4963 FOURCC('m', 'p', '4', '2'), 4964 4965 // Won't promise that the following file types can be played. 4966 // Just give these file types a chance. 4967 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 4968 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 4969 4970 FOURCC('3', 'g', '2', 'a'), // 3GPP2 4971 FOURCC('3', 'g', '2', 'b'), 4972 }; 4973 4974 for (size_t i = 0; 4975 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 4976 ++i) { 4977 if (kCompatibleBrands[i] == fourcc) { 4978 return true; 4979 } 4980 } 4981 4982 return false; 4983 } 4984 4985 // Attempt to actually parse the 'ftyp' atom and determine if a suitable 4986 // compatible brand is present. 4987 // Also try to identify where this file's metadata ends 4988 // (end of the 'moov' atom) and report it to the caller as part of 4989 // the metadata. 4990 static bool BetterSniffMPEG4( 4991 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4992 sp<AMessage> *meta) { 4993 // We scan up to 128 bytes to identify this file as an MP4. 4994 static const off64_t kMaxScanOffset = 128ll; 4995 4996 off64_t offset = 0ll; 4997 bool foundGoodFileType = false; 4998 off64_t moovAtomEndOffset = -1ll; 4999 bool done = false; 5000 5001 while (!done && offset < kMaxScanOffset) { 5002 uint32_t hdr[2]; 5003 if (source->readAt(offset, hdr, 8) < 8) { 5004 return false; 5005 } 5006 5007 uint64_t chunkSize = ntohl(hdr[0]); 5008 uint32_t chunkType = ntohl(hdr[1]); 5009 off64_t chunkDataOffset = offset + 8; 5010 5011 if (chunkSize == 1) { 5012 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 5013 return false; 5014 } 5015 5016 chunkSize = ntoh64(chunkSize); 5017 chunkDataOffset += 8; 5018 5019 if (chunkSize < 16) { 5020 // The smallest valid chunk is 16 bytes long in this case. 5021 return false; 5022 } 5023 5024 } else if (chunkSize < 8) { 5025 // The smallest valid chunk is 8 bytes long. 5026 return false; 5027 } 5028 5029 // (data_offset - offset) is either 8 or 16 5030 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset); 5031 if (chunkDataSize < 0) { 5032 ALOGE("b/23540914"); 5033 return ERROR_MALFORMED; 5034 } 5035 5036 char chunkstring[5]; 5037 MakeFourCCString(chunkType, chunkstring); 5038 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset); 5039 switch (chunkType) { 5040 case FOURCC('f', 't', 'y', 'p'): 5041 { 5042 if (chunkDataSize < 8) { 5043 return false; 5044 } 5045 5046 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 5047 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 5048 if (i == 1) { 5049 // Skip this index, it refers to the minorVersion, 5050 // not a brand. 5051 continue; 5052 } 5053 5054 uint32_t brand; 5055 if (source->readAt( 5056 chunkDataOffset + 4 * i, &brand, 4) < 4) { 5057 return false; 5058 } 5059 5060 brand = ntohl(brand); 5061 5062 if (isCompatibleBrand(brand)) { 5063 foundGoodFileType = true; 5064 break; 5065 } 5066 } 5067 5068 if (!foundGoodFileType) { 5069 return false; 5070 } 5071 5072 break; 5073 } 5074 5075 case FOURCC('m', 'o', 'o', 'v'): 5076 { 5077 moovAtomEndOffset = offset + chunkSize; 5078 5079 done = true; 5080 break; 5081 } 5082 5083 default: 5084 break; 5085 } 5086 5087 offset += chunkSize; 5088 } 5089 5090 if (!foundGoodFileType) { 5091 return false; 5092 } 5093 5094 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 5095 *confidence = 0.4f; 5096 5097 if (moovAtomEndOffset >= 0) { 5098 *meta = new AMessage; 5099 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 5100 5101 ALOGV("found metadata size: %lld", (long long)moovAtomEndOffset); 5102 } 5103 5104 return true; 5105 } 5106 5107 bool SniffMPEG4( 5108 const sp<DataSource> &source, String8 *mimeType, float *confidence, 5109 sp<AMessage> *meta) { 5110 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 5111 return true; 5112 } 5113 5114 if (LegacySniffMPEG4(source, mimeType, confidence)) { 5115 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 5116 return true; 5117 } 5118 5119 return false; 5120 } 5121 5122 } // namespace android 5123