1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 //#define LOG_NDEBUG 0 18 #define LOG_TAG "MPEG4Extractor" 19 20 #include <ctype.h> 21 #include <inttypes.h> 22 #include <stdint.h> 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include <utils/Log.h> 27 28 #include "include/MPEG4Extractor.h" 29 #include "include/SampleTable.h" 30 #include "include/ESDS.h" 31 32 #include <media/stagefright/foundation/ABitReader.h> 33 #include <media/stagefright/foundation/ABuffer.h> 34 #include <media/stagefright/foundation/ADebug.h> 35 #include <media/stagefright/foundation/AMessage.h> 36 #include <media/stagefright/foundation/AUtils.h> 37 #include <media/stagefright/foundation/ColorUtils.h> 38 #include <media/stagefright/MediaBuffer.h> 39 #include <media/stagefright/MediaBufferGroup.h> 40 #include <media/stagefright/MediaDefs.h> 41 #include <media/stagefright/MediaSource.h> 42 #include <media/stagefright/MetaData.h> 43 #include <utils/String8.h> 44 45 #include <byteswap.h> 46 #include "include/ID3.h" 47 #include "include/avc_utils.h" 48 49 #ifndef UINT32_MAX 50 #define UINT32_MAX (4294967295U) 51 #endif 52 53 namespace android { 54 55 enum { 56 // max track header chunk to return 57 kMaxTrackHeaderSize = 32, 58 59 // maximum size of an atom. Some atoms can be bigger according to the spec, 60 // but we only allow up to this size. 61 kMaxAtomSize = 64 * 1024 * 1024, 62 }; 63 64 class MPEG4Source : public MediaSource { 65 public: 66 // Caller retains ownership of both "dataSource" and "sampleTable". 67 MPEG4Source(const sp<MPEG4Extractor> &owner, 68 const sp<MetaData> &format, 69 const sp<DataSource> &dataSource, 70 int32_t timeScale, 71 const sp<SampleTable> &sampleTable, 72 Vector<SidxEntry> &sidx, 73 const Trex *trex, 74 off64_t firstMoofOffset); 75 76 virtual status_t start(MetaData *params = NULL); 77 virtual status_t stop(); 78 79 virtual sp<MetaData> getFormat(); 80 81 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 82 virtual bool supportNonblockingRead() { return true; } 83 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 84 85 protected: 86 virtual ~MPEG4Source(); 87 88 private: 89 Mutex mLock; 90 91 // keep the MPEG4Extractor around, since we're referencing its data 92 sp<MPEG4Extractor> mOwner; 93 sp<MetaData> mFormat; 94 sp<DataSource> mDataSource; 95 int32_t mTimescale; 96 sp<SampleTable> mSampleTable; 97 uint32_t mCurrentSampleIndex; 98 uint32_t mCurrentFragmentIndex; 99 Vector<SidxEntry> &mSegments; 100 const Trex *mTrex; 101 off64_t mFirstMoofOffset; 102 off64_t mCurrentMoofOffset; 103 off64_t mNextMoofOffset; 104 uint32_t mCurrentTime; 105 int32_t mLastParsedTrackId; 106 int32_t mTrackId; 107 108 int32_t mCryptoMode; // passed in from extractor 109 int32_t mDefaultIVSize; // passed in from extractor 110 uint8_t mCryptoKey[16]; // passed in from extractor 111 uint32_t mCurrentAuxInfoType; 112 uint32_t mCurrentAuxInfoTypeParameter; 113 int32_t mCurrentDefaultSampleInfoSize; 114 uint32_t mCurrentSampleInfoCount; 115 uint32_t mCurrentSampleInfoAllocSize; 116 uint8_t* mCurrentSampleInfoSizes; 117 uint32_t mCurrentSampleInfoOffsetCount; 118 uint32_t mCurrentSampleInfoOffsetsAllocSize; 119 uint64_t* mCurrentSampleInfoOffsets; 120 121 bool mIsAVC; 122 bool mIsHEVC; 123 size_t mNALLengthSize; 124 125 bool mStarted; 126 127 MediaBufferGroup *mGroup; 128 129 MediaBuffer *mBuffer; 130 131 bool mWantsNALFragments; 132 133 uint8_t *mSrcBuffer; 134 135 size_t parseNALSize(const uint8_t *data) const; 136 status_t parseChunk(off64_t *offset); 137 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 138 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 139 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 140 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 141 142 struct TrackFragmentHeaderInfo { 143 enum Flags { 144 kBaseDataOffsetPresent = 0x01, 145 kSampleDescriptionIndexPresent = 0x02, 146 kDefaultSampleDurationPresent = 0x08, 147 kDefaultSampleSizePresent = 0x10, 148 kDefaultSampleFlagsPresent = 0x20, 149 kDurationIsEmpty = 0x10000, 150 }; 151 152 uint32_t mTrackID; 153 uint32_t mFlags; 154 uint64_t mBaseDataOffset; 155 uint32_t mSampleDescriptionIndex; 156 uint32_t mDefaultSampleDuration; 157 uint32_t mDefaultSampleSize; 158 uint32_t mDefaultSampleFlags; 159 160 uint64_t mDataOffset; 161 }; 162 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 163 164 struct Sample { 165 off64_t offset; 166 size_t size; 167 uint32_t duration; 168 int32_t compositionOffset; 169 uint8_t iv[16]; 170 Vector<size_t> clearsizes; 171 Vector<size_t> encryptedsizes; 172 }; 173 Vector<Sample> mCurrentSamples; 174 175 MPEG4Source(const MPEG4Source &); 176 MPEG4Source &operator=(const MPEG4Source &); 177 }; 178 179 // This custom data source wraps an existing one and satisfies requests 180 // falling entirely within a cached range from the cache while forwarding 181 // all remaining requests to the wrapped datasource. 182 // This is used to cache the full sampletable metadata for a single track, 183 // possibly wrapping multiple times to cover all tracks, i.e. 184 // Each MPEG4DataSource caches the sampletable metadata for a single track. 185 186 struct MPEG4DataSource : public DataSource { 187 MPEG4DataSource(const sp<DataSource> &source); 188 189 virtual status_t initCheck() const; 190 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 191 virtual status_t getSize(off64_t *size); 192 virtual uint32_t flags(); 193 194 status_t setCachedRange(off64_t offset, size_t size); 195 196 protected: 197 virtual ~MPEG4DataSource(); 198 199 private: 200 Mutex mLock; 201 202 sp<DataSource> mSource; 203 off64_t mCachedOffset; 204 size_t mCachedSize; 205 uint8_t *mCache; 206 207 void clearCache(); 208 209 MPEG4DataSource(const MPEG4DataSource &); 210 MPEG4DataSource &operator=(const MPEG4DataSource &); 211 }; 212 213 MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 214 : mSource(source), 215 mCachedOffset(0), 216 mCachedSize(0), 217 mCache(NULL) { 218 } 219 220 MPEG4DataSource::~MPEG4DataSource() { 221 clearCache(); 222 } 223 224 void MPEG4DataSource::clearCache() { 225 if (mCache) { 226 free(mCache); 227 mCache = NULL; 228 } 229 230 mCachedOffset = 0; 231 mCachedSize = 0; 232 } 233 234 status_t MPEG4DataSource::initCheck() const { 235 return mSource->initCheck(); 236 } 237 238 ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 239 Mutex::Autolock autoLock(mLock); 240 241 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 242 memcpy(data, &mCache[offset - mCachedOffset], size); 243 return size; 244 } 245 246 return mSource->readAt(offset, data, size); 247 } 248 249 status_t MPEG4DataSource::getSize(off64_t *size) { 250 return mSource->getSize(size); 251 } 252 253 uint32_t MPEG4DataSource::flags() { 254 return mSource->flags(); 255 } 256 257 status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 258 Mutex::Autolock autoLock(mLock); 259 260 clearCache(); 261 262 mCache = (uint8_t *)malloc(size); 263 264 if (mCache == NULL) { 265 return -ENOMEM; 266 } 267 268 mCachedOffset = offset; 269 mCachedSize = size; 270 271 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 272 273 if (err < (ssize_t)size) { 274 clearCache(); 275 276 return ERROR_IO; 277 } 278 279 return OK; 280 } 281 282 //////////////////////////////////////////////////////////////////////////////// 283 284 static const bool kUseHexDump = false; 285 286 static void hexdump(const void *_data, size_t size) { 287 const uint8_t *data = (const uint8_t *)_data; 288 size_t offset = 0; 289 while (offset < size) { 290 printf("0x%04zx ", offset); 291 292 size_t n = size - offset; 293 if (n > 16) { 294 n = 16; 295 } 296 297 for (size_t i = 0; i < 16; ++i) { 298 if (i == 8) { 299 printf(" "); 300 } 301 302 if (offset + i < size) { 303 printf("%02x ", data[offset + i]); 304 } else { 305 printf(" "); 306 } 307 } 308 309 printf(" "); 310 311 for (size_t i = 0; i < n; ++i) { 312 if (isprint(data[offset + i])) { 313 printf("%c", data[offset + i]); 314 } else { 315 printf("."); 316 } 317 } 318 319 printf("\n"); 320 321 offset += 16; 322 } 323 } 324 325 static const char *FourCC2MIME(uint32_t fourcc) { 326 switch (fourcc) { 327 case FOURCC('m', 'p', '4', 'a'): 328 return MEDIA_MIMETYPE_AUDIO_AAC; 329 330 case FOURCC('s', 'a', 'm', 'r'): 331 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 332 333 case FOURCC('s', 'a', 'w', 'b'): 334 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 335 336 case FOURCC('m', 'p', '4', 'v'): 337 return MEDIA_MIMETYPE_VIDEO_MPEG4; 338 339 case FOURCC('s', '2', '6', '3'): 340 case FOURCC('h', '2', '6', '3'): 341 case FOURCC('H', '2', '6', '3'): 342 return MEDIA_MIMETYPE_VIDEO_H263; 343 344 case FOURCC('a', 'v', 'c', '1'): 345 return MEDIA_MIMETYPE_VIDEO_AVC; 346 347 case FOURCC('h', 'v', 'c', '1'): 348 case FOURCC('h', 'e', 'v', '1'): 349 return MEDIA_MIMETYPE_VIDEO_HEVC; 350 default: 351 CHECK(!"should not be here."); 352 return NULL; 353 } 354 } 355 356 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 357 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 358 // AMR NB audio is always mono, 8kHz 359 *channels = 1; 360 *rate = 8000; 361 return true; 362 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 363 // AMR WB audio is always mono, 16kHz 364 *channels = 1; 365 *rate = 16000; 366 return true; 367 } 368 return false; 369 } 370 371 MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 372 : mMoofOffset(0), 373 mMoofFound(false), 374 mMdatFound(false), 375 mDataSource(source), 376 mInitCheck(NO_INIT), 377 mHeaderTimescale(0), 378 mIsQT(false), 379 mFirstTrack(NULL), 380 mLastTrack(NULL), 381 mFileMetaData(new MetaData), 382 mFirstSINF(NULL), 383 mIsDrm(false) { 384 } 385 386 MPEG4Extractor::~MPEG4Extractor() { 387 Track *track = mFirstTrack; 388 while (track) { 389 Track *next = track->next; 390 391 delete track; 392 track = next; 393 } 394 mFirstTrack = mLastTrack = NULL; 395 396 SINF *sinf = mFirstSINF; 397 while (sinf) { 398 SINF *next = sinf->next; 399 delete[] sinf->IPMPData; 400 delete sinf; 401 sinf = next; 402 } 403 mFirstSINF = NULL; 404 405 for (size_t i = 0; i < mPssh.size(); i++) { 406 delete [] mPssh[i].data; 407 } 408 } 409 410 uint32_t MPEG4Extractor::flags() const { 411 return CAN_PAUSE | 412 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 413 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 414 } 415 416 sp<MetaData> MPEG4Extractor::getMetaData() { 417 status_t err; 418 if ((err = readMetaData()) != OK) { 419 return new MetaData; 420 } 421 422 return mFileMetaData; 423 } 424 425 size_t MPEG4Extractor::countTracks() { 426 status_t err; 427 if ((err = readMetaData()) != OK) { 428 ALOGV("MPEG4Extractor::countTracks: no tracks"); 429 return 0; 430 } 431 432 size_t n = 0; 433 Track *track = mFirstTrack; 434 while (track) { 435 ++n; 436 track = track->next; 437 } 438 439 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n); 440 return n; 441 } 442 443 sp<MetaData> MPEG4Extractor::getTrackMetaData( 444 size_t index, uint32_t flags) { 445 status_t err; 446 if ((err = readMetaData()) != OK) { 447 return NULL; 448 } 449 450 Track *track = mFirstTrack; 451 while (index > 0) { 452 if (track == NULL) { 453 return NULL; 454 } 455 456 track = track->next; 457 --index; 458 } 459 460 if (track == NULL) { 461 return NULL; 462 } 463 464 if ((flags & kIncludeExtensiveMetaData) 465 && !track->includes_expensive_metadata) { 466 track->includes_expensive_metadata = true; 467 468 const char *mime; 469 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 470 if (!strncasecmp("video/", mime, 6)) { 471 if (mMoofOffset > 0) { 472 int64_t duration; 473 if (track->meta->findInt64(kKeyDuration, &duration)) { 474 // nothing fancy, just pick a frame near 1/4th of the duration 475 track->meta->setInt64( 476 kKeyThumbnailTime, duration / 4); 477 } 478 } else { 479 uint32_t sampleIndex; 480 uint32_t sampleTime; 481 if (track->timescale != 0 && 482 track->sampleTable->findThumbnailSample(&sampleIndex) == OK 483 && track->sampleTable->getMetaDataForSample( 484 sampleIndex, NULL /* offset */, NULL /* size */, 485 &sampleTime) == OK) { 486 track->meta->setInt64( 487 kKeyThumbnailTime, 488 ((int64_t)sampleTime * 1000000) / track->timescale); 489 } 490 } 491 492 // MPEG2 tracks do not provide CSD, so read the stream header 493 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) { 494 off64_t offset; 495 size_t size; 496 if (track->sampleTable->getMetaDataForSample( 497 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) { 498 if (size > kMaxTrackHeaderSize) { 499 size = kMaxTrackHeaderSize; 500 } 501 uint8_t header[kMaxTrackHeaderSize]; 502 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) { 503 track->meta->setData(kKeyStreamHeader, 'mdat', header, size); 504 } 505 } 506 } 507 } 508 } 509 510 return track->meta; 511 } 512 513 static void MakeFourCCString(uint32_t x, char *s) { 514 s[0] = x >> 24; 515 s[1] = (x >> 16) & 0xff; 516 s[2] = (x >> 8) & 0xff; 517 s[3] = x & 0xff; 518 s[4] = '\0'; 519 } 520 521 status_t MPEG4Extractor::readMetaData() { 522 if (mInitCheck != NO_INIT) { 523 return mInitCheck; 524 } 525 526 off64_t offset = 0; 527 status_t err; 528 bool sawMoovOrSidx = false; 529 530 while (!(sawMoovOrSidx && (mMdatFound || mMoofFound))) { 531 off64_t orig_offset = offset; 532 err = parseChunk(&offset, 0); 533 534 if (err != OK && err != UNKNOWN_ERROR) { 535 break; 536 } else if (offset <= orig_offset) { 537 // only continue parsing if the offset was advanced, 538 // otherwise we might end up in an infinite loop 539 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset); 540 err = ERROR_MALFORMED; 541 break; 542 } else if (err == UNKNOWN_ERROR) { 543 sawMoovOrSidx = true; 544 } 545 } 546 547 if (mInitCheck == OK) { 548 if (findTrackByMimePrefix("video/") != NULL) { 549 mFileMetaData->setCString( 550 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 551 } else if (findTrackByMimePrefix("audio/") != NULL) { 552 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 553 } else { 554 mFileMetaData->setCString(kKeyMIMEType, "application/octet-stream"); 555 } 556 } else { 557 mInitCheck = err; 558 } 559 560 CHECK_NE(err, (status_t)NO_INIT); 561 562 // copy pssh data into file metadata 563 uint64_t psshsize = 0; 564 for (size_t i = 0; i < mPssh.size(); i++) { 565 psshsize += 20 + mPssh[i].datalen; 566 } 567 if (psshsize > 0 && psshsize <= UINT32_MAX) { 568 char *buf = (char*)malloc(psshsize); 569 if (!buf) { 570 ALOGE("b/28471206"); 571 return NO_MEMORY; 572 } 573 char *ptr = buf; 574 for (size_t i = 0; i < mPssh.size(); i++) { 575 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 576 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 577 ptr += (20 + mPssh[i].datalen); 578 } 579 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 580 free(buf); 581 } 582 return mInitCheck; 583 } 584 585 char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 586 if (mFirstSINF == NULL) { 587 return NULL; 588 } 589 590 SINF *sinf = mFirstSINF; 591 while (sinf && (trackID != sinf->trackID)) { 592 sinf = sinf->next; 593 } 594 595 if (sinf == NULL) { 596 return NULL; 597 } 598 599 *len = sinf->len; 600 return sinf->IPMPData; 601 } 602 603 // Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 604 static int32_t readSize(off64_t offset, 605 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 606 uint32_t size = 0; 607 uint8_t data; 608 bool moreData = true; 609 *numOfBytes = 0; 610 611 while (moreData) { 612 if (DataSource->readAt(offset, &data, 1) < 1) { 613 return -1; 614 } 615 offset ++; 616 moreData = (data >= 128) ? true : false; 617 size = (size << 7) | (data & 0x7f); // Take last 7 bits 618 (*numOfBytes) ++; 619 } 620 621 return size; 622 } 623 624 status_t MPEG4Extractor::parseDrmSINF( 625 off64_t * /* offset */, off64_t data_offset) { 626 uint8_t updateIdTag; 627 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 628 return ERROR_IO; 629 } 630 data_offset ++; 631 632 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 633 return ERROR_MALFORMED; 634 } 635 636 uint8_t numOfBytes; 637 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 638 if (size < 0) { 639 return ERROR_IO; 640 } 641 data_offset += numOfBytes; 642 643 while(size >= 11 ) { 644 uint8_t descriptorTag; 645 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 646 return ERROR_IO; 647 } 648 data_offset ++; 649 650 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 651 return ERROR_MALFORMED; 652 } 653 654 uint8_t buffer[8]; 655 //ObjectDescriptorID and ObjectDescriptor url flag 656 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 657 return ERROR_IO; 658 } 659 data_offset += 2; 660 661 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 662 return ERROR_MALFORMED; 663 } 664 665 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 666 return ERROR_IO; 667 } 668 data_offset += 8; 669 670 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 671 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 672 return ERROR_MALFORMED; 673 } 674 675 SINF *sinf = new SINF; 676 sinf->trackID = U16_AT(&buffer[3]); 677 sinf->IPMPDescriptorID = buffer[7]; 678 sinf->next = mFirstSINF; 679 mFirstSINF = sinf; 680 681 size -= (8 + 2 + 1); 682 } 683 684 if (size != 0) { 685 return ERROR_MALFORMED; 686 } 687 688 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 689 return ERROR_IO; 690 } 691 data_offset ++; 692 693 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 694 return ERROR_MALFORMED; 695 } 696 697 size = readSize(data_offset, mDataSource, &numOfBytes); 698 if (size < 0) { 699 return ERROR_IO; 700 } 701 data_offset += numOfBytes; 702 703 while (size > 0) { 704 uint8_t tag; 705 int32_t dataLen; 706 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 707 return ERROR_IO; 708 } 709 data_offset ++; 710 711 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 712 uint8_t id; 713 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 714 if (dataLen < 0) { 715 return ERROR_IO; 716 } else if (dataLen < 4) { 717 return ERROR_MALFORMED; 718 } 719 data_offset += numOfBytes; 720 721 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 722 return ERROR_IO; 723 } 724 data_offset ++; 725 726 SINF *sinf = mFirstSINF; 727 while (sinf && (sinf->IPMPDescriptorID != id)) { 728 sinf = sinf->next; 729 } 730 if (sinf == NULL) { 731 return ERROR_MALFORMED; 732 } 733 sinf->len = dataLen - 3; 734 sinf->IPMPData = new (std::nothrow) char[sinf->len]; 735 if (sinf->IPMPData == NULL) { 736 return ERROR_MALFORMED; 737 } 738 data_offset += 2; 739 740 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 741 return ERROR_IO; 742 } 743 data_offset += sinf->len; 744 745 size -= (dataLen + numOfBytes + 1); 746 } 747 } 748 749 if (size != 0) { 750 return ERROR_MALFORMED; 751 } 752 753 return UNKNOWN_ERROR; // Return a dummy error. 754 } 755 756 struct PathAdder { 757 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 758 : mPath(path) { 759 mPath->push(chunkType); 760 } 761 762 ~PathAdder() { 763 mPath->pop(); 764 } 765 766 private: 767 Vector<uint32_t> *mPath; 768 769 PathAdder(const PathAdder &); 770 PathAdder &operator=(const PathAdder &); 771 }; 772 773 static bool underMetaDataPath(const Vector<uint32_t> &path) { 774 return path.size() >= 5 775 && path[0] == FOURCC('m', 'o', 'o', 'v') 776 && path[1] == FOURCC('u', 'd', 't', 'a') 777 && path[2] == FOURCC('m', 'e', 't', 'a') 778 && path[3] == FOURCC('i', 'l', 's', 't'); 779 } 780 781 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) { 782 return path.size() >= 2 783 && path[0] == FOURCC('m', 'o', 'o', 'v') 784 && path[1] == FOURCC('m', 'e', 't', 'a') 785 && (depth == 2 786 || (depth == 3 787 && (path[2] == FOURCC('h', 'd', 'l', 'r') 788 || path[2] == FOURCC('i', 'l', 's', 't') 789 || path[2] == FOURCC('k', 'e', 'y', 's')))); 790 } 791 792 // Given a time in seconds since Jan 1 1904, produce a human-readable string. 793 static bool convertTimeToDate(int64_t time_1904, String8 *s) { 794 // delta between mpeg4 time and unix epoch time 795 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600); 796 if (time_1904 < INT64_MIN + delta) { 797 return false; 798 } 799 time_t time_1970 = time_1904 - delta; 800 801 char tmp[32]; 802 struct tm* tm = gmtime(&time_1970); 803 if (tm != NULL && 804 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) { 805 s->setTo(tmp); 806 return true; 807 } 808 return false; 809 } 810 811 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 812 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth); 813 814 if (*offset < 0) { 815 ALOGE("b/23540914"); 816 return ERROR_MALFORMED; 817 } 818 uint32_t hdr[2]; 819 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 820 return ERROR_IO; 821 } 822 uint64_t chunk_size = ntohl(hdr[0]); 823 int32_t chunk_type = ntohl(hdr[1]); 824 off64_t data_offset = *offset + 8; 825 826 if (chunk_size == 1) { 827 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 828 return ERROR_IO; 829 } 830 chunk_size = ntoh64(chunk_size); 831 data_offset += 8; 832 833 if (chunk_size < 16) { 834 // The smallest valid chunk is 16 bytes long in this case. 835 return ERROR_MALFORMED; 836 } 837 } else if (chunk_size == 0) { 838 if (depth == 0) { 839 // atom extends to end of file 840 off64_t sourceSize; 841 if (mDataSource->getSize(&sourceSize) == OK) { 842 chunk_size = (sourceSize - *offset); 843 } else { 844 // XXX could we just pick a "sufficiently large" value here? 845 ALOGE("atom size is 0, and data source has no size"); 846 return ERROR_MALFORMED; 847 } 848 } else { 849 // not allowed for non-toplevel atoms, skip it 850 *offset += 4; 851 return OK; 852 } 853 } else if (chunk_size < 8) { 854 // The smallest valid chunk is 8 bytes long. 855 ALOGE("invalid chunk size: %" PRIu64, chunk_size); 856 return ERROR_MALFORMED; 857 } 858 859 char chunk[5]; 860 MakeFourCCString(chunk_type, chunk); 861 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth); 862 863 if (kUseHexDump) { 864 static const char kWhitespace[] = " "; 865 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 866 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size); 867 868 char buffer[256]; 869 size_t n = chunk_size; 870 if (n > sizeof(buffer)) { 871 n = sizeof(buffer); 872 } 873 if (mDataSource->readAt(*offset, buffer, n) 874 < (ssize_t)n) { 875 return ERROR_IO; 876 } 877 878 hexdump(buffer, n); 879 } 880 881 PathAdder autoAdder(&mPath, chunk_type); 882 883 // (data_offset - *offset) is either 8 or 16 884 off64_t chunk_data_size = chunk_size - (data_offset - *offset); 885 if (chunk_data_size < 0) { 886 ALOGE("b/23540914"); 887 return ERROR_MALFORMED; 888 } 889 if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) { 890 char errMsg[100]; 891 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size); 892 ALOGE("%s (b/28615448)", errMsg); 893 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg)); 894 return ERROR_MALFORMED; 895 } 896 897 if (chunk_type != FOURCC('c', 'p', 'r', 't') 898 && chunk_type != FOURCC('c', 'o', 'v', 'r') 899 && mPath.size() == 5 && underMetaDataPath(mPath)) { 900 off64_t stop_offset = *offset + chunk_size; 901 *offset = data_offset; 902 while (*offset < stop_offset) { 903 status_t err = parseChunk(offset, depth + 1); 904 if (err != OK) { 905 return err; 906 } 907 } 908 909 if (*offset != stop_offset) { 910 return ERROR_MALFORMED; 911 } 912 913 return OK; 914 } 915 916 switch(chunk_type) { 917 case FOURCC('m', 'o', 'o', 'v'): 918 case FOURCC('t', 'r', 'a', 'k'): 919 case FOURCC('m', 'd', 'i', 'a'): 920 case FOURCC('m', 'i', 'n', 'f'): 921 case FOURCC('d', 'i', 'n', 'f'): 922 case FOURCC('s', 't', 'b', 'l'): 923 case FOURCC('m', 'v', 'e', 'x'): 924 case FOURCC('m', 'o', 'o', 'f'): 925 case FOURCC('t', 'r', 'a', 'f'): 926 case FOURCC('m', 'f', 'r', 'a'): 927 case FOURCC('u', 'd', 't', 'a'): 928 case FOURCC('i', 'l', 's', 't'): 929 case FOURCC('s', 'i', 'n', 'f'): 930 case FOURCC('s', 'c', 'h', 'i'): 931 case FOURCC('e', 'd', 't', 's'): 932 case FOURCC('w', 'a', 'v', 'e'): 933 { 934 if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) { 935 ALOGE("moov: depth %d", depth); 936 return ERROR_MALFORMED; 937 } 938 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) { 939 // store the offset of the first segment 940 mMoofFound = true; 941 mMoofOffset = *offset; 942 } 943 944 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 945 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size); 946 947 if (mDataSource->flags() 948 & (DataSource::kWantsPrefetching 949 | DataSource::kIsCachingDataSource)) { 950 sp<MPEG4DataSource> cachedSource = 951 new MPEG4DataSource(mDataSource); 952 953 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 954 mDataSource = cachedSource; 955 } 956 } 957 958 if (mLastTrack == NULL) 959 return ERROR_MALFORMED; 960 961 mLastTrack->sampleTable = new SampleTable(mDataSource); 962 } 963 964 bool isTrack = false; 965 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 966 if (depth != 1) { 967 ALOGE("trak: depth %d", depth); 968 return ERROR_MALFORMED; 969 } 970 isTrack = true; 971 972 Track *track = new Track; 973 track->next = NULL; 974 if (mLastTrack) { 975 mLastTrack->next = track; 976 } else { 977 mFirstTrack = track; 978 } 979 mLastTrack = track; 980 981 track->meta = new MetaData; 982 track->includes_expensive_metadata = false; 983 track->skipTrack = false; 984 track->timescale = 0; 985 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 986 } 987 988 off64_t stop_offset = *offset + chunk_size; 989 *offset = data_offset; 990 while (*offset < stop_offset) { 991 status_t err = parseChunk(offset, depth + 1); 992 if (err != OK) { 993 if (isTrack) { 994 mLastTrack->skipTrack = true; 995 break; 996 } 997 return err; 998 } 999 } 1000 1001 if (*offset != stop_offset) { 1002 return ERROR_MALFORMED; 1003 } 1004 1005 if (isTrack) { 1006 int32_t trackId; 1007 // There must be exact one track header per track. 1008 if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 1009 mLastTrack->skipTrack = true; 1010 } 1011 if (mLastTrack->skipTrack) { 1012 Track *cur = mFirstTrack; 1013 1014 if (cur == mLastTrack) { 1015 delete cur; 1016 mFirstTrack = mLastTrack = NULL; 1017 } else { 1018 while (cur && cur->next != mLastTrack) { 1019 cur = cur->next; 1020 } 1021 cur->next = NULL; 1022 delete mLastTrack; 1023 mLastTrack = cur; 1024 } 1025 1026 return OK; 1027 } 1028 1029 status_t err = verifyTrack(mLastTrack); 1030 1031 if (err != OK) { 1032 return err; 1033 } 1034 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 1035 mInitCheck = OK; 1036 1037 if (!mIsDrm) { 1038 return UNKNOWN_ERROR; // Return a dummy error. 1039 } else { 1040 return OK; 1041 } 1042 } 1043 break; 1044 } 1045 1046 case FOURCC('e', 'l', 's', 't'): 1047 { 1048 *offset += chunk_size; 1049 1050 // See 14496-12 8.6.6 1051 uint8_t version; 1052 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1053 return ERROR_IO; 1054 } 1055 1056 uint32_t entry_count; 1057 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 1058 return ERROR_IO; 1059 } 1060 1061 if (entry_count != 1) { 1062 // we only support a single entry at the moment, for gapless playback 1063 ALOGW("ignoring edit list with %d entries", entry_count); 1064 } else if (mHeaderTimescale == 0) { 1065 ALOGW("ignoring edit list because timescale is 0"); 1066 } else { 1067 off64_t entriesoffset = data_offset + 8; 1068 uint64_t segment_duration; 1069 int64_t media_time; 1070 1071 if (version == 1) { 1072 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 1073 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 1074 return ERROR_IO; 1075 } 1076 } else if (version == 0) { 1077 uint32_t sd; 1078 int32_t mt; 1079 if (!mDataSource->getUInt32(entriesoffset, &sd) || 1080 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 1081 return ERROR_IO; 1082 } 1083 segment_duration = sd; 1084 media_time = mt; 1085 } else { 1086 return ERROR_IO; 1087 } 1088 1089 uint64_t halfscale = mHeaderTimescale / 2; 1090 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 1091 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 1092 1093 int64_t duration; 1094 int32_t samplerate; 1095 if (!mLastTrack) { 1096 return ERROR_MALFORMED; 1097 } 1098 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 1099 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 1100 1101 int64_t delay = (media_time * samplerate + 500000) / 1000000; 1102 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 1103 1104 int64_t paddingus = duration - (int64_t)(segment_duration + media_time); 1105 if (paddingus < 0) { 1106 // track duration from media header (which is what kKeyDuration is) might 1107 // be slightly shorter than the segment duration, which would make the 1108 // padding negative. Clamp to zero. 1109 paddingus = 0; 1110 } 1111 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1112 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1113 } 1114 } 1115 break; 1116 } 1117 1118 case FOURCC('f', 'r', 'm', 'a'): 1119 { 1120 *offset += chunk_size; 1121 1122 uint32_t original_fourcc; 1123 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1124 return ERROR_IO; 1125 } 1126 original_fourcc = ntohl(original_fourcc); 1127 ALOGV("read original format: %d", original_fourcc); 1128 1129 if (mLastTrack == NULL) 1130 return ERROR_MALFORMED; 1131 1132 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1133 uint32_t num_channels = 0; 1134 uint32_t sample_rate = 0; 1135 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1136 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1137 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1138 } 1139 break; 1140 } 1141 1142 case FOURCC('t', 'e', 'n', 'c'): 1143 { 1144 *offset += chunk_size; 1145 1146 if (chunk_size < 32) { 1147 return ERROR_MALFORMED; 1148 } 1149 1150 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1151 // default IV size, 16 bytes default KeyID 1152 // (ISO 23001-7) 1153 char buf[4]; 1154 memset(buf, 0, 4); 1155 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1156 return ERROR_IO; 1157 } 1158 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1159 if (defaultAlgorithmId > 1) { 1160 // only 0 (clear) and 1 (AES-128) are valid 1161 return ERROR_MALFORMED; 1162 } 1163 1164 memset(buf, 0, 4); 1165 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1166 return ERROR_IO; 1167 } 1168 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1169 1170 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1171 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1172 // only unencrypted data must have 0 IV size 1173 return ERROR_MALFORMED; 1174 } else if (defaultIVSize != 0 && 1175 defaultIVSize != 8 && 1176 defaultIVSize != 16) { 1177 // only supported sizes are 0, 8 and 16 1178 return ERROR_MALFORMED; 1179 } 1180 1181 uint8_t defaultKeyId[16]; 1182 1183 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1184 return ERROR_IO; 1185 } 1186 1187 if (mLastTrack == NULL) 1188 return ERROR_MALFORMED; 1189 1190 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1191 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1192 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1193 break; 1194 } 1195 1196 case FOURCC('t', 'k', 'h', 'd'): 1197 { 1198 *offset += chunk_size; 1199 1200 status_t err; 1201 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1202 return err; 1203 } 1204 1205 break; 1206 } 1207 1208 case FOURCC('p', 's', 's', 'h'): 1209 { 1210 *offset += chunk_size; 1211 1212 PsshInfo pssh; 1213 1214 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1215 return ERROR_IO; 1216 } 1217 1218 uint32_t psshdatalen = 0; 1219 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1220 return ERROR_IO; 1221 } 1222 pssh.datalen = ntohl(psshdatalen); 1223 ALOGV("pssh data size: %d", pssh.datalen); 1224 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) { 1225 // pssh data length exceeds size of containing box 1226 return ERROR_MALFORMED; 1227 } 1228 1229 pssh.data = new (std::nothrow) uint8_t[pssh.datalen]; 1230 if (pssh.data == NULL) { 1231 return ERROR_MALFORMED; 1232 } 1233 ALOGV("allocated pssh @ %p", pssh.data); 1234 ssize_t requested = (ssize_t) pssh.datalen; 1235 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1236 return ERROR_IO; 1237 } 1238 mPssh.push_back(pssh); 1239 1240 break; 1241 } 1242 1243 case FOURCC('m', 'd', 'h', 'd'): 1244 { 1245 *offset += chunk_size; 1246 1247 if (chunk_data_size < 4 || mLastTrack == NULL) { 1248 return ERROR_MALFORMED; 1249 } 1250 1251 uint8_t version; 1252 if (mDataSource->readAt( 1253 data_offset, &version, sizeof(version)) 1254 < (ssize_t)sizeof(version)) { 1255 return ERROR_IO; 1256 } 1257 1258 off64_t timescale_offset; 1259 1260 if (version == 1) { 1261 timescale_offset = data_offset + 4 + 16; 1262 } else if (version == 0) { 1263 timescale_offset = data_offset + 4 + 8; 1264 } else { 1265 return ERROR_IO; 1266 } 1267 1268 uint32_t timescale; 1269 if (mDataSource->readAt( 1270 timescale_offset, ×cale, sizeof(timescale)) 1271 < (ssize_t)sizeof(timescale)) { 1272 return ERROR_IO; 1273 } 1274 1275 if (!timescale) { 1276 ALOGE("timescale should not be ZERO."); 1277 return ERROR_MALFORMED; 1278 } 1279 1280 mLastTrack->timescale = ntohl(timescale); 1281 1282 // 14496-12 says all ones means indeterminate, but some files seem to use 1283 // 0 instead. We treat both the same. 1284 int64_t duration = 0; 1285 if (version == 1) { 1286 if (mDataSource->readAt( 1287 timescale_offset + 4, &duration, sizeof(duration)) 1288 < (ssize_t)sizeof(duration)) { 1289 return ERROR_IO; 1290 } 1291 if (duration != -1) { 1292 duration = ntoh64(duration); 1293 } 1294 } else { 1295 uint32_t duration32; 1296 if (mDataSource->readAt( 1297 timescale_offset + 4, &duration32, sizeof(duration32)) 1298 < (ssize_t)sizeof(duration32)) { 1299 return ERROR_IO; 1300 } 1301 if (duration32 != 0xffffffff) { 1302 duration = ntohl(duration32); 1303 } 1304 } 1305 if (duration != 0 && mLastTrack->timescale != 0) { 1306 mLastTrack->meta->setInt64( 1307 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1308 } 1309 1310 uint8_t lang[2]; 1311 off64_t lang_offset; 1312 if (version == 1) { 1313 lang_offset = timescale_offset + 4 + 8; 1314 } else if (version == 0) { 1315 lang_offset = timescale_offset + 4 + 4; 1316 } else { 1317 return ERROR_IO; 1318 } 1319 1320 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1321 < (ssize_t)sizeof(lang)) { 1322 return ERROR_IO; 1323 } 1324 1325 // To get the ISO-639-2/T three character language code 1326 // 1 bit pad followed by 3 5-bits characters. Each character 1327 // is packed as the difference between its ASCII value and 0x60. 1328 char lang_code[4]; 1329 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1330 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1331 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1332 lang_code[3] = '\0'; 1333 1334 mLastTrack->meta->setCString( 1335 kKeyMediaLanguage, lang_code); 1336 1337 break; 1338 } 1339 1340 case FOURCC('s', 't', 's', 'd'): 1341 { 1342 uint8_t buffer[8]; 1343 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1344 return ERROR_MALFORMED; 1345 } 1346 1347 if (mDataSource->readAt( 1348 data_offset, buffer, 8) < 8) { 1349 return ERROR_IO; 1350 } 1351 1352 if (U32_AT(buffer) != 0) { 1353 // Should be version 0, flags 0. 1354 return ERROR_MALFORMED; 1355 } 1356 1357 uint32_t entry_count = U32_AT(&buffer[4]); 1358 1359 if (entry_count > 1) { 1360 // For 3GPP timed text, there could be multiple tx3g boxes contain 1361 // multiple text display formats. These formats will be used to 1362 // display the timed text. 1363 // For encrypted files, there may also be more than one entry. 1364 const char *mime; 1365 1366 if (mLastTrack == NULL) 1367 return ERROR_MALFORMED; 1368 1369 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1370 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1371 strcasecmp(mime, "application/octet-stream")) { 1372 // For now we only support a single type of media per track. 1373 mLastTrack->skipTrack = true; 1374 *offset += chunk_size; 1375 break; 1376 } 1377 } 1378 off64_t stop_offset = *offset + chunk_size; 1379 *offset = data_offset + 8; 1380 for (uint32_t i = 0; i < entry_count; ++i) { 1381 status_t err = parseChunk(offset, depth + 1); 1382 if (err != OK) { 1383 return err; 1384 } 1385 } 1386 1387 if (*offset != stop_offset) { 1388 return ERROR_MALFORMED; 1389 } 1390 break; 1391 } 1392 1393 case FOURCC('m', 'p', '4', 'a'): 1394 case FOURCC('e', 'n', 'c', 'a'): 1395 case FOURCC('s', 'a', 'm', 'r'): 1396 case FOURCC('s', 'a', 'w', 'b'): 1397 { 1398 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a') 1399 && depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) { 1400 // Ignore mp4a embedded in QT wave atom 1401 *offset += chunk_size; 1402 break; 1403 } 1404 1405 uint8_t buffer[8 + 20]; 1406 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1407 // Basic AudioSampleEntry size. 1408 return ERROR_MALFORMED; 1409 } 1410 1411 if (mDataSource->readAt( 1412 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1413 return ERROR_IO; 1414 } 1415 1416 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1417 uint16_t version = U16_AT(&buffer[8]); 1418 uint32_t num_channels = U16_AT(&buffer[16]); 1419 1420 uint16_t sample_size = U16_AT(&buffer[18]); 1421 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1422 1423 if (mLastTrack == NULL) 1424 return ERROR_MALFORMED; 1425 1426 off64_t stop_offset = *offset + chunk_size; 1427 *offset = data_offset + sizeof(buffer); 1428 1429 if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) { 1430 if (version == 1) { 1431 if (mDataSource->readAt(*offset, buffer, 16) < 16) { 1432 return ERROR_IO; 1433 } 1434 1435 #if 0 1436 U32_AT(buffer); // samples per packet 1437 U32_AT(&buffer[4]); // bytes per packet 1438 U32_AT(&buffer[8]); // bytes per frame 1439 U32_AT(&buffer[12]); // bytes per sample 1440 #endif 1441 *offset += 16; 1442 } else if (version == 2) { 1443 uint8_t v2buffer[36]; 1444 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) { 1445 return ERROR_IO; 1446 } 1447 1448 #if 0 1449 U32_AT(v2buffer); // size of struct only 1450 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate 1451 num_channels = U32_AT(&v2buffer[12]); // num audio channels 1452 U32_AT(&v2buffer[16]); // always 0x7f000000 1453 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel 1454 U32_AT(&v2buffer[24]); // format specifc flags 1455 U32_AT(&v2buffer[28]); // const bytes per audio packet 1456 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet 1457 #endif 1458 *offset += 36; 1459 } 1460 } 1461 1462 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1463 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1464 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1465 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1466 } 1467 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1468 chunk, num_channels, sample_size, sample_rate); 1469 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1470 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1471 1472 while (*offset < stop_offset) { 1473 status_t err = parseChunk(offset, depth + 1); 1474 if (err != OK) { 1475 return err; 1476 } 1477 } 1478 1479 if (*offset != stop_offset) { 1480 return ERROR_MALFORMED; 1481 } 1482 break; 1483 } 1484 1485 case FOURCC('m', 'p', '4', 'v'): 1486 case FOURCC('e', 'n', 'c', 'v'): 1487 case FOURCC('s', '2', '6', '3'): 1488 case FOURCC('H', '2', '6', '3'): 1489 case FOURCC('h', '2', '6', '3'): 1490 case FOURCC('a', 'v', 'c', '1'): 1491 case FOURCC('h', 'v', 'c', '1'): 1492 case FOURCC('h', 'e', 'v', '1'): 1493 { 1494 uint8_t buffer[78]; 1495 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1496 // Basic VideoSampleEntry size. 1497 return ERROR_MALFORMED; 1498 } 1499 1500 if (mDataSource->readAt( 1501 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1502 return ERROR_IO; 1503 } 1504 1505 uint16_t data_ref_index __unused = U16_AT(&buffer[6]); 1506 uint16_t width = U16_AT(&buffer[6 + 18]); 1507 uint16_t height = U16_AT(&buffer[6 + 20]); 1508 1509 // The video sample is not standard-compliant if it has invalid dimension. 1510 // Use some default width and height value, and 1511 // let the decoder figure out the actual width and height (and thus 1512 // be prepared for INFO_FOMRAT_CHANGED event). 1513 if (width == 0) width = 352; 1514 if (height == 0) height = 288; 1515 1516 // printf("*** coding='%s' width=%d height=%d\n", 1517 // chunk, width, height); 1518 1519 if (mLastTrack == NULL) 1520 return ERROR_MALFORMED; 1521 1522 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1523 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1524 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1525 } 1526 mLastTrack->meta->setInt32(kKeyWidth, width); 1527 mLastTrack->meta->setInt32(kKeyHeight, height); 1528 1529 off64_t stop_offset = *offset + chunk_size; 1530 *offset = data_offset + sizeof(buffer); 1531 while (*offset < stop_offset) { 1532 status_t err = parseChunk(offset, depth + 1); 1533 if (err != OK) { 1534 return err; 1535 } 1536 } 1537 1538 if (*offset != stop_offset) { 1539 return ERROR_MALFORMED; 1540 } 1541 break; 1542 } 1543 1544 case FOURCC('s', 't', 'c', 'o'): 1545 case FOURCC('c', 'o', '6', '4'): 1546 { 1547 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1548 return ERROR_MALFORMED; 1549 1550 status_t err = 1551 mLastTrack->sampleTable->setChunkOffsetParams( 1552 chunk_type, data_offset, chunk_data_size); 1553 1554 *offset += chunk_size; 1555 1556 if (err != OK) { 1557 return err; 1558 } 1559 1560 break; 1561 } 1562 1563 case FOURCC('s', 't', 's', 'c'): 1564 { 1565 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1566 return ERROR_MALFORMED; 1567 1568 status_t err = 1569 mLastTrack->sampleTable->setSampleToChunkParams( 1570 data_offset, chunk_data_size); 1571 1572 *offset += chunk_size; 1573 1574 if (err != OK) { 1575 return err; 1576 } 1577 1578 break; 1579 } 1580 1581 case FOURCC('s', 't', 's', 'z'): 1582 case FOURCC('s', 't', 'z', '2'): 1583 { 1584 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1585 return ERROR_MALFORMED; 1586 1587 status_t err = 1588 mLastTrack->sampleTable->setSampleSizeParams( 1589 chunk_type, data_offset, chunk_data_size); 1590 1591 *offset += chunk_size; 1592 1593 if (err != OK) { 1594 return err; 1595 } 1596 1597 size_t max_size; 1598 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1599 1600 if (err != OK) { 1601 return err; 1602 } 1603 1604 if (max_size != 0) { 1605 // Assume that a given buffer only contains at most 10 chunks, 1606 // each chunk originally prefixed with a 2 byte length will 1607 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1608 // and thus will grow by 2 bytes per chunk. 1609 if (max_size > SIZE_MAX - 10 * 2) { 1610 ALOGE("max sample size too big: %zu", max_size); 1611 return ERROR_MALFORMED; 1612 } 1613 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1614 } else { 1615 // No size was specified. Pick a conservatively large size. 1616 uint32_t width, height; 1617 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) || 1618 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) { 1619 ALOGE("No width or height, assuming worst case 1080p"); 1620 width = 1920; 1621 height = 1080; 1622 } else { 1623 // A resolution was specified, check that it's not too big. The values below 1624 // were chosen so that the calculations below don't cause overflows, they're 1625 // not indicating that resolutions up to 32kx32k are actually supported. 1626 if (width > 32768 || height > 32768) { 1627 ALOGE("can't support %u x %u video", width, height); 1628 return ERROR_MALFORMED; 1629 } 1630 } 1631 1632 const char *mime; 1633 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1634 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC) 1635 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 1636 // AVC & HEVC requires compression ratio of at least 2, and uses 1637 // macroblocks 1638 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1639 } else { 1640 // For all other formats there is no minimum compression 1641 // ratio. Use compression ratio of 1. 1642 max_size = width * height * 3 / 2; 1643 } 1644 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1645 } 1646 1647 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1648 // mimetype) previously obtained, so don't cache them. 1649 const char *mime; 1650 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1651 // Calculate average frame rate. 1652 if (!strncasecmp("video/", mime, 6)) { 1653 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1654 if (nSamples == 0) { 1655 int32_t trackId; 1656 if (mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) { 1657 for (size_t i = 0; i < mTrex.size(); i++) { 1658 Trex *t = &mTrex.editItemAt(i); 1659 if (t->track_ID == (uint32_t) trackId) { 1660 if (t->default_sample_duration > 0) { 1661 int32_t frameRate = 1662 mLastTrack->timescale / t->default_sample_duration; 1663 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1664 } 1665 break; 1666 } 1667 } 1668 } 1669 } else { 1670 int64_t durationUs; 1671 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1672 if (durationUs > 0) { 1673 int32_t frameRate = (nSamples * 1000000LL + 1674 (durationUs >> 1)) / durationUs; 1675 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1676 } 1677 } 1678 } 1679 } 1680 1681 break; 1682 } 1683 1684 case FOURCC('s', 't', 't', 's'): 1685 { 1686 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1687 return ERROR_MALFORMED; 1688 1689 *offset += chunk_size; 1690 1691 status_t err = 1692 mLastTrack->sampleTable->setTimeToSampleParams( 1693 data_offset, chunk_data_size); 1694 1695 if (err != OK) { 1696 return err; 1697 } 1698 1699 break; 1700 } 1701 1702 case FOURCC('c', 't', 't', 's'): 1703 { 1704 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1705 return ERROR_MALFORMED; 1706 1707 *offset += chunk_size; 1708 1709 status_t err = 1710 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1711 data_offset, chunk_data_size); 1712 1713 if (err != OK) { 1714 return err; 1715 } 1716 1717 break; 1718 } 1719 1720 case FOURCC('s', 't', 's', 's'): 1721 { 1722 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) 1723 return ERROR_MALFORMED; 1724 1725 *offset += chunk_size; 1726 1727 status_t err = 1728 mLastTrack->sampleTable->setSyncSampleParams( 1729 data_offset, chunk_data_size); 1730 1731 if (err != OK) { 1732 return err; 1733 } 1734 1735 break; 1736 } 1737 1738 // \xA9xyz 1739 case FOURCC(0xA9, 'x', 'y', 'z'): 1740 { 1741 *offset += chunk_size; 1742 1743 // Best case the total data length inside "\xA9xyz" box 1744 // would be 8, for instance "\xA9xyz" + "\x00\x04\x15\xc7" + "0+0/", 1745 // where "\x00\x04" is the text string length with value = 4, 1746 // "\0x15\xc7" is the language code = en, and "0+0" is a 1747 // location (string) value with longitude = 0 and latitude = 0. 1748 if (chunk_data_size < 8) { 1749 return ERROR_MALFORMED; 1750 } 1751 1752 // Worst case the location string length would be 18, 1753 // for instance +90.0000-180.0000, without the trailing "/" and 1754 // the string length + language code, and some devices include 1755 // an additional 8 bytes of altitude, e.g. +007.186 1756 char buffer[18 + 8]; 1757 1758 // Substracting 5 from the data size is because the text string length + 1759 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1760 off64_t location_length = chunk_data_size - 5; 1761 if (location_length >= (off64_t) sizeof(buffer)) { 1762 return ERROR_MALFORMED; 1763 } 1764 1765 if (mDataSource->readAt( 1766 data_offset + 4, buffer, location_length) < location_length) { 1767 return ERROR_IO; 1768 } 1769 1770 buffer[location_length] = '\0'; 1771 mFileMetaData->setCString(kKeyLocation, buffer); 1772 break; 1773 } 1774 1775 case FOURCC('e', 's', 'd', 's'): 1776 { 1777 *offset += chunk_size; 1778 1779 if (chunk_data_size < 4) { 1780 return ERROR_MALFORMED; 1781 } 1782 1783 uint8_t buffer[256]; 1784 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1785 return ERROR_BUFFER_TOO_SMALL; 1786 } 1787 1788 if (mDataSource->readAt( 1789 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1790 return ERROR_IO; 1791 } 1792 1793 if (U32_AT(buffer) != 0) { 1794 // Should be version 0, flags 0. 1795 return ERROR_MALFORMED; 1796 } 1797 1798 if (mLastTrack == NULL) 1799 return ERROR_MALFORMED; 1800 1801 mLastTrack->meta->setData( 1802 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1803 1804 if (mPath.size() >= 2 1805 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1806 // Information from the ESDS must be relied on for proper 1807 // setup of sample rate and channel count for MPEG4 Audio. 1808 // The generic header appears to only contain generic 1809 // information... 1810 1811 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1812 &buffer[4], chunk_data_size - 4); 1813 1814 if (err != OK) { 1815 return err; 1816 } 1817 } 1818 if (mPath.size() >= 2 1819 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) { 1820 // Check if the video is MPEG2 1821 ESDS esds(&buffer[4], chunk_data_size - 4); 1822 1823 uint8_t objectTypeIndication; 1824 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) { 1825 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) { 1826 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2); 1827 } 1828 } 1829 } 1830 break; 1831 } 1832 1833 case FOURCC('b', 't', 'r', 't'): 1834 { 1835 *offset += chunk_size; 1836 if (mLastTrack == NULL) { 1837 return ERROR_MALFORMED; 1838 } 1839 1840 uint8_t buffer[12]; 1841 if (chunk_data_size != sizeof(buffer)) { 1842 return ERROR_MALFORMED; 1843 } 1844 1845 if (mDataSource->readAt( 1846 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1847 return ERROR_IO; 1848 } 1849 1850 uint32_t maxBitrate = U32_AT(&buffer[4]); 1851 uint32_t avgBitrate = U32_AT(&buffer[8]); 1852 if (maxBitrate > 0 && maxBitrate < INT32_MAX) { 1853 mLastTrack->meta->setInt32(kKeyMaxBitRate, (int32_t)maxBitrate); 1854 } 1855 if (avgBitrate > 0 && avgBitrate < INT32_MAX) { 1856 mLastTrack->meta->setInt32(kKeyBitRate, (int32_t)avgBitrate); 1857 } 1858 break; 1859 } 1860 1861 case FOURCC('a', 'v', 'c', 'C'): 1862 { 1863 *offset += chunk_size; 1864 1865 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1866 1867 if (buffer->data() == NULL) { 1868 ALOGE("b/28471206"); 1869 return NO_MEMORY; 1870 } 1871 1872 if (mDataSource->readAt( 1873 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1874 return ERROR_IO; 1875 } 1876 1877 if (mLastTrack == NULL) 1878 return ERROR_MALFORMED; 1879 1880 mLastTrack->meta->setData( 1881 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1882 1883 break; 1884 } 1885 case FOURCC('h', 'v', 'c', 'C'): 1886 { 1887 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1888 1889 if (buffer->data() == NULL) { 1890 ALOGE("b/28471206"); 1891 return NO_MEMORY; 1892 } 1893 1894 if (mDataSource->readAt( 1895 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1896 return ERROR_IO; 1897 } 1898 1899 if (mLastTrack == NULL) 1900 return ERROR_MALFORMED; 1901 1902 mLastTrack->meta->setData( 1903 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1904 1905 *offset += chunk_size; 1906 break; 1907 } 1908 1909 case FOURCC('d', '2', '6', '3'): 1910 { 1911 *offset += chunk_size; 1912 /* 1913 * d263 contains a fixed 7 bytes part: 1914 * vendor - 4 bytes 1915 * version - 1 byte 1916 * level - 1 byte 1917 * profile - 1 byte 1918 * optionally, "d263" box itself may contain a 16-byte 1919 * bit rate box (bitr) 1920 * average bit rate - 4 bytes 1921 * max bit rate - 4 bytes 1922 */ 1923 char buffer[23]; 1924 if (chunk_data_size != 7 && 1925 chunk_data_size != 23) { 1926 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size); 1927 return ERROR_MALFORMED; 1928 } 1929 1930 if (mDataSource->readAt( 1931 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1932 return ERROR_IO; 1933 } 1934 1935 if (mLastTrack == NULL) 1936 return ERROR_MALFORMED; 1937 1938 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1939 1940 break; 1941 } 1942 1943 case FOURCC('m', 'e', 't', 'a'): 1944 { 1945 off64_t stop_offset = *offset + chunk_size; 1946 *offset = data_offset; 1947 bool isParsingMetaKeys = underQTMetaPath(mPath, 2); 1948 if (!isParsingMetaKeys) { 1949 uint8_t buffer[4]; 1950 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1951 *offset = stop_offset; 1952 return ERROR_MALFORMED; 1953 } 1954 1955 if (mDataSource->readAt( 1956 data_offset, buffer, 4) < 4) { 1957 *offset = stop_offset; 1958 return ERROR_IO; 1959 } 1960 1961 if (U32_AT(buffer) != 0) { 1962 // Should be version 0, flags 0. 1963 1964 // If it's not, let's assume this is one of those 1965 // apparently malformed chunks that don't have flags 1966 // and completely different semantics than what's 1967 // in the MPEG4 specs and skip it. 1968 *offset = stop_offset; 1969 return OK; 1970 } 1971 *offset += sizeof(buffer); 1972 } 1973 1974 while (*offset < stop_offset) { 1975 status_t err = parseChunk(offset, depth + 1); 1976 if (err != OK) { 1977 return err; 1978 } 1979 } 1980 1981 if (*offset != stop_offset) { 1982 return ERROR_MALFORMED; 1983 } 1984 break; 1985 } 1986 1987 case FOURCC('m', 'e', 'a', 'n'): 1988 case FOURCC('n', 'a', 'm', 'e'): 1989 case FOURCC('d', 'a', 't', 'a'): 1990 { 1991 *offset += chunk_size; 1992 1993 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1994 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 1995 1996 if (err != OK) { 1997 return err; 1998 } 1999 } 2000 2001 break; 2002 } 2003 2004 case FOURCC('m', 'v', 'h', 'd'): 2005 { 2006 *offset += chunk_size; 2007 2008 if (depth != 1) { 2009 ALOGE("mvhd: depth %d", depth); 2010 return ERROR_MALFORMED; 2011 } 2012 if (chunk_data_size < 32) { 2013 return ERROR_MALFORMED; 2014 } 2015 2016 uint8_t header[32]; 2017 if (mDataSource->readAt( 2018 data_offset, header, sizeof(header)) 2019 < (ssize_t)sizeof(header)) { 2020 return ERROR_IO; 2021 } 2022 2023 uint64_t creationTime; 2024 uint64_t duration = 0; 2025 if (header[0] == 1) { 2026 creationTime = U64_AT(&header[4]); 2027 mHeaderTimescale = U32_AT(&header[20]); 2028 duration = U64_AT(&header[24]); 2029 if (duration == 0xffffffffffffffff) { 2030 duration = 0; 2031 } 2032 } else if (header[0] != 0) { 2033 return ERROR_MALFORMED; 2034 } else { 2035 creationTime = U32_AT(&header[4]); 2036 mHeaderTimescale = U32_AT(&header[12]); 2037 uint32_t d32 = U32_AT(&header[16]); 2038 if (d32 == 0xffffffff) { 2039 d32 = 0; 2040 } 2041 duration = d32; 2042 } 2043 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) { 2044 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2045 } 2046 2047 String8 s; 2048 if (convertTimeToDate(creationTime, &s)) { 2049 mFileMetaData->setCString(kKeyDate, s.string()); 2050 } 2051 2052 2053 break; 2054 } 2055 2056 case FOURCC('m', 'e', 'h', 'd'): 2057 { 2058 *offset += chunk_size; 2059 2060 if (chunk_data_size < 8) { 2061 return ERROR_MALFORMED; 2062 } 2063 2064 uint8_t flags[4]; 2065 if (mDataSource->readAt( 2066 data_offset, flags, sizeof(flags)) 2067 < (ssize_t)sizeof(flags)) { 2068 return ERROR_IO; 2069 } 2070 2071 uint64_t duration = 0; 2072 if (flags[0] == 1) { 2073 // 64 bit 2074 if (chunk_data_size < 12) { 2075 return ERROR_MALFORMED; 2076 } 2077 mDataSource->getUInt64(data_offset + 4, &duration); 2078 if (duration == 0xffffffffffffffff) { 2079 duration = 0; 2080 } 2081 } else if (flags[0] == 0) { 2082 // 32 bit 2083 uint32_t d32; 2084 mDataSource->getUInt32(data_offset + 4, &d32); 2085 if (d32 == 0xffffffff) { 2086 d32 = 0; 2087 } 2088 duration = d32; 2089 } else { 2090 return ERROR_MALFORMED; 2091 } 2092 2093 if (duration != 0 && mHeaderTimescale != 0) { 2094 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale); 2095 } 2096 2097 break; 2098 } 2099 2100 case FOURCC('m', 'd', 'a', 't'): 2101 { 2102 ALOGV("mdat chunk, drm: %d", mIsDrm); 2103 2104 mMdatFound = true; 2105 2106 if (!mIsDrm) { 2107 *offset += chunk_size; 2108 break; 2109 } 2110 2111 if (chunk_size < 8) { 2112 return ERROR_MALFORMED; 2113 } 2114 2115 return parseDrmSINF(offset, data_offset); 2116 } 2117 2118 case FOURCC('h', 'd', 'l', 'r'): 2119 { 2120 *offset += chunk_size; 2121 2122 if (underQTMetaPath(mPath, 3)) { 2123 break; 2124 } 2125 2126 uint32_t buffer; 2127 if (mDataSource->readAt( 2128 data_offset + 8, &buffer, 4) < 4) { 2129 return ERROR_IO; 2130 } 2131 2132 uint32_t type = ntohl(buffer); 2133 // For the 3GPP file format, the handler-type within the 'hdlr' box 2134 // shall be 'text'. We also want to support 'sbtl' handler type 2135 // for a practical reason as various MPEG4 containers use it. 2136 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 2137 if (mLastTrack != NULL) { 2138 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 2139 } 2140 } 2141 2142 break; 2143 } 2144 2145 case FOURCC('k', 'e', 'y', 's'): 2146 { 2147 *offset += chunk_size; 2148 2149 if (underQTMetaPath(mPath, 3)) { 2150 parseQTMetaKey(data_offset, chunk_data_size); 2151 } 2152 break; 2153 } 2154 2155 case FOURCC('t', 'r', 'e', 'x'): 2156 { 2157 *offset += chunk_size; 2158 2159 if (chunk_data_size < 24) { 2160 return ERROR_IO; 2161 } 2162 Trex trex; 2163 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) || 2164 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) || 2165 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) || 2166 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) || 2167 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) { 2168 return ERROR_IO; 2169 } 2170 mTrex.add(trex); 2171 break; 2172 } 2173 2174 case FOURCC('t', 'x', '3', 'g'): 2175 { 2176 if (mLastTrack == NULL) 2177 return ERROR_MALFORMED; 2178 2179 uint32_t type; 2180 const void *data; 2181 size_t size = 0; 2182 if (!mLastTrack->meta->findData( 2183 kKeyTextFormatData, &type, &data, &size)) { 2184 size = 0; 2185 } 2186 2187 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { 2188 return ERROR_MALFORMED; 2189 } 2190 2191 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size]; 2192 if (buffer == NULL) { 2193 return ERROR_MALFORMED; 2194 } 2195 2196 if (size > 0) { 2197 memcpy(buffer, data, size); 2198 } 2199 2200 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 2201 < chunk_size) { 2202 delete[] buffer; 2203 buffer = NULL; 2204 2205 // advance read pointer so we don't end up reading this again 2206 *offset += chunk_size; 2207 return ERROR_IO; 2208 } 2209 2210 mLastTrack->meta->setData( 2211 kKeyTextFormatData, 0, buffer, size + chunk_size); 2212 2213 delete[] buffer; 2214 2215 *offset += chunk_size; 2216 break; 2217 } 2218 2219 case FOURCC('c', 'o', 'v', 'r'): 2220 { 2221 *offset += chunk_size; 2222 2223 if (mFileMetaData != NULL) { 2224 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64, 2225 chunk_data_size, data_offset); 2226 2227 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) { 2228 return ERROR_MALFORMED; 2229 } 2230 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 2231 if (buffer->data() == NULL) { 2232 ALOGE("b/28471206"); 2233 return NO_MEMORY; 2234 } 2235 if (mDataSource->readAt( 2236 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 2237 return ERROR_IO; 2238 } 2239 const int kSkipBytesOfDataBox = 16; 2240 if (chunk_data_size <= kSkipBytesOfDataBox) { 2241 return ERROR_MALFORMED; 2242 } 2243 2244 mFileMetaData->setData( 2245 kKeyAlbumArt, MetaData::TYPE_NONE, 2246 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 2247 } 2248 2249 break; 2250 } 2251 2252 case FOURCC('c', 'o', 'l', 'r'): 2253 { 2254 *offset += chunk_size; 2255 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd') 2256 // ignore otherwise 2257 if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) { 2258 status_t err = parseColorInfo(data_offset, chunk_data_size); 2259 if (err != OK) { 2260 return err; 2261 } 2262 } 2263 2264 break; 2265 } 2266 2267 case FOURCC('t', 'i', 't', 'l'): 2268 case FOURCC('p', 'e', 'r', 'f'): 2269 case FOURCC('a', 'u', 't', 'h'): 2270 case FOURCC('g', 'n', 'r', 'e'): 2271 case FOURCC('a', 'l', 'b', 'm'): 2272 case FOURCC('y', 'r', 'r', 'c'): 2273 { 2274 *offset += chunk_size; 2275 2276 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 2277 2278 if (err != OK) { 2279 return err; 2280 } 2281 2282 break; 2283 } 2284 2285 case FOURCC('I', 'D', '3', '2'): 2286 { 2287 *offset += chunk_size; 2288 2289 if (chunk_data_size < 6) { 2290 return ERROR_MALFORMED; 2291 } 2292 2293 parseID3v2MetaData(data_offset + 6); 2294 2295 break; 2296 } 2297 2298 case FOURCC('-', '-', '-', '-'): 2299 { 2300 mLastCommentMean.clear(); 2301 mLastCommentName.clear(); 2302 mLastCommentData.clear(); 2303 *offset += chunk_size; 2304 break; 2305 } 2306 2307 case FOURCC('s', 'i', 'd', 'x'): 2308 { 2309 parseSegmentIndex(data_offset, chunk_data_size); 2310 *offset += chunk_size; 2311 return UNKNOWN_ERROR; // stop parsing after sidx 2312 } 2313 2314 case FOURCC('f', 't', 'y', 'p'): 2315 { 2316 if (chunk_data_size < 8 || depth != 0) { 2317 return ERROR_MALFORMED; 2318 } 2319 2320 off64_t stop_offset = *offset + chunk_size; 2321 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4; 2322 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 2323 if (i == 1) { 2324 // Skip this index, it refers to the minorVersion, 2325 // not a brand. 2326 continue; 2327 } 2328 2329 uint32_t brand; 2330 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) { 2331 return ERROR_MALFORMED; 2332 } 2333 2334 brand = ntohl(brand); 2335 if (brand == FOURCC('q', 't', ' ', ' ')) { 2336 mIsQT = true; 2337 break; 2338 } 2339 } 2340 2341 *offset = stop_offset; 2342 2343 break; 2344 } 2345 2346 default: 2347 { 2348 // check if we're parsing 'ilst' for meta keys 2349 // if so, treat type as a number (key-id). 2350 if (underQTMetaPath(mPath, 3)) { 2351 parseQTMetaVal(chunk_type, data_offset, chunk_data_size); 2352 } 2353 2354 *offset += chunk_size; 2355 break; 2356 } 2357 } 2358 2359 return OK; 2360 } 2361 2362 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 2363 ALOGV("MPEG4Extractor::parseSegmentIndex"); 2364 2365 if (size < 12) { 2366 return -EINVAL; 2367 } 2368 2369 uint32_t flags; 2370 if (!mDataSource->getUInt32(offset, &flags)) { 2371 return ERROR_MALFORMED; 2372 } 2373 2374 uint32_t version = flags >> 24; 2375 flags &= 0xffffff; 2376 2377 ALOGV("sidx version %d", version); 2378 2379 uint32_t referenceId; 2380 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 2381 return ERROR_MALFORMED; 2382 } 2383 2384 uint32_t timeScale; 2385 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 2386 return ERROR_MALFORMED; 2387 } 2388 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 2389 if (timeScale == 0) 2390 return ERROR_MALFORMED; 2391 2392 uint64_t earliestPresentationTime; 2393 uint64_t firstOffset; 2394 2395 offset += 12; 2396 size -= 12; 2397 2398 if (version == 0) { 2399 if (size < 8) { 2400 return -EINVAL; 2401 } 2402 uint32_t tmp; 2403 if (!mDataSource->getUInt32(offset, &tmp)) { 2404 return ERROR_MALFORMED; 2405 } 2406 earliestPresentationTime = tmp; 2407 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 2408 return ERROR_MALFORMED; 2409 } 2410 firstOffset = tmp; 2411 offset += 8; 2412 size -= 8; 2413 } else { 2414 if (size < 16) { 2415 return -EINVAL; 2416 } 2417 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 2418 return ERROR_MALFORMED; 2419 } 2420 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 2421 return ERROR_MALFORMED; 2422 } 2423 offset += 16; 2424 size -= 16; 2425 } 2426 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset); 2427 2428 if (size < 4) { 2429 return -EINVAL; 2430 } 2431 2432 uint16_t referenceCount; 2433 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 2434 return ERROR_MALFORMED; 2435 } 2436 offset += 4; 2437 size -= 4; 2438 ALOGV("refcount: %d", referenceCount); 2439 2440 if (size < referenceCount * 12) { 2441 return -EINVAL; 2442 } 2443 2444 uint64_t total_duration = 0; 2445 for (unsigned int i = 0; i < referenceCount; i++) { 2446 uint32_t d1, d2, d3; 2447 2448 if (!mDataSource->getUInt32(offset, &d1) || // size 2449 !mDataSource->getUInt32(offset + 4, &d2) || // duration 2450 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 2451 return ERROR_MALFORMED; 2452 } 2453 2454 if (d1 & 0x80000000) { 2455 ALOGW("sub-sidx boxes not supported yet"); 2456 } 2457 bool sap = d3 & 0x80000000; 2458 uint32_t saptype = (d3 >> 28) & 7; 2459 if (!sap || (saptype != 1 && saptype != 2)) { 2460 // type 1 and 2 are sync samples 2461 ALOGW("not a stream access point, or unsupported type: %08x", d3); 2462 } 2463 total_duration += d2; 2464 offset += 12; 2465 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2466 SidxEntry se; 2467 se.mSize = d1 & 0x7fffffff; 2468 se.mDurationUs = 1000000LL * d2 / timeScale; 2469 mSidxEntries.add(se); 2470 } 2471 2472 uint64_t sidxDuration = total_duration * 1000000 / timeScale; 2473 2474 if (mLastTrack == NULL) 2475 return ERROR_MALFORMED; 2476 2477 int64_t metaDuration; 2478 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2479 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration); 2480 } 2481 return OK; 2482 } 2483 2484 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) { 2485 if (size < 8) { 2486 return ERROR_MALFORMED; 2487 } 2488 2489 uint32_t count; 2490 if (!mDataSource->getUInt32(offset + 4, &count)) { 2491 return ERROR_MALFORMED; 2492 } 2493 2494 if (mMetaKeyMap.size() > 0) { 2495 ALOGW("'keys' atom seen again, discarding existing entries"); 2496 mMetaKeyMap.clear(); 2497 } 2498 2499 off64_t keyOffset = offset + 8; 2500 off64_t stopOffset = offset + size; 2501 for (size_t i = 1; i <= count; i++) { 2502 if (keyOffset + 8 > stopOffset) { 2503 return ERROR_MALFORMED; 2504 } 2505 2506 uint32_t keySize; 2507 if (!mDataSource->getUInt32(keyOffset, &keySize) 2508 || keySize < 8 2509 || keyOffset + keySize > stopOffset) { 2510 return ERROR_MALFORMED; 2511 } 2512 2513 uint32_t type; 2514 if (!mDataSource->getUInt32(keyOffset + 4, &type) 2515 || type != FOURCC('m', 'd', 't', 'a')) { 2516 return ERROR_MALFORMED; 2517 } 2518 2519 keySize -= 8; 2520 keyOffset += 8; 2521 2522 sp<ABuffer> keyData = new ABuffer(keySize); 2523 if (keyData->data() == NULL) { 2524 return ERROR_MALFORMED; 2525 } 2526 if (mDataSource->readAt( 2527 keyOffset, keyData->data(), keySize) < (ssize_t) keySize) { 2528 return ERROR_MALFORMED; 2529 } 2530 2531 AString key((const char *)keyData->data(), keySize); 2532 mMetaKeyMap.add(i, key); 2533 2534 keyOffset += keySize; 2535 } 2536 return OK; 2537 } 2538 2539 status_t MPEG4Extractor::parseQTMetaVal( 2540 int32_t keyId, off64_t offset, size_t size) { 2541 ssize_t index = mMetaKeyMap.indexOfKey(keyId); 2542 if (index < 0) { 2543 // corresponding key is not present, ignore 2544 return ERROR_MALFORMED; 2545 } 2546 2547 if (size <= 16) { 2548 return ERROR_MALFORMED; 2549 } 2550 uint32_t dataSize; 2551 if (!mDataSource->getUInt32(offset, &dataSize) 2552 || dataSize > size || dataSize <= 16) { 2553 return ERROR_MALFORMED; 2554 } 2555 uint32_t atomFourCC; 2556 if (!mDataSource->getUInt32(offset + 4, &atomFourCC) 2557 || atomFourCC != FOURCC('d', 'a', 't', 'a')) { 2558 return ERROR_MALFORMED; 2559 } 2560 uint32_t dataType; 2561 if (!mDataSource->getUInt32(offset + 8, &dataType) 2562 || ((dataType & 0xff000000) != 0)) { 2563 // not well-known type 2564 return ERROR_MALFORMED; 2565 } 2566 2567 dataSize -= 16; 2568 offset += 16; 2569 2570 if (dataType == 23 && dataSize >= 4) { 2571 // BE Float32 2572 uint32_t val; 2573 if (!mDataSource->getUInt32(offset, &val)) { 2574 return ERROR_MALFORMED; 2575 } 2576 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) { 2577 mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val); 2578 } 2579 } else if (dataType == 67 && dataSize >= 4) { 2580 // BE signed int32 2581 uint32_t val; 2582 if (!mDataSource->getUInt32(offset, &val)) { 2583 return ERROR_MALFORMED; 2584 } 2585 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) { 2586 mFileMetaData->setInt32(kKeyTemporalLayerCount, val); 2587 } 2588 } else { 2589 // add more keys if needed 2590 ALOGV("ignoring key: type %d, size %d", dataType, dataSize); 2591 } 2592 2593 return OK; 2594 } 2595 2596 status_t MPEG4Extractor::parseTrackHeader( 2597 off64_t data_offset, off64_t data_size) { 2598 if (data_size < 4) { 2599 return ERROR_MALFORMED; 2600 } 2601 2602 uint8_t version; 2603 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2604 return ERROR_IO; 2605 } 2606 2607 size_t dynSize = (version == 1) ? 36 : 24; 2608 2609 uint8_t buffer[36 + 60]; 2610 2611 if (data_size != (off64_t)dynSize + 60) { 2612 return ERROR_MALFORMED; 2613 } 2614 2615 if (mDataSource->readAt( 2616 data_offset, buffer, data_size) < (ssize_t)data_size) { 2617 return ERROR_IO; 2618 } 2619 2620 uint64_t ctime __unused, mtime __unused, duration __unused; 2621 int32_t id; 2622 2623 if (version == 1) { 2624 ctime = U64_AT(&buffer[4]); 2625 mtime = U64_AT(&buffer[12]); 2626 id = U32_AT(&buffer[20]); 2627 duration = U64_AT(&buffer[28]); 2628 } else if (version == 0) { 2629 ctime = U32_AT(&buffer[4]); 2630 mtime = U32_AT(&buffer[8]); 2631 id = U32_AT(&buffer[12]); 2632 duration = U32_AT(&buffer[20]); 2633 } else { 2634 return ERROR_UNSUPPORTED; 2635 } 2636 2637 if (mLastTrack == NULL) 2638 return ERROR_MALFORMED; 2639 2640 mLastTrack->meta->setInt32(kKeyTrackID, id); 2641 2642 size_t matrixOffset = dynSize + 16; 2643 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2644 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2645 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2646 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2647 2648 #if 0 2649 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2650 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2651 2652 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2653 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2654 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2655 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2656 #endif 2657 2658 uint32_t rotationDegrees; 2659 2660 static const int32_t kFixedOne = 0x10000; 2661 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2662 // Identity, no rotation 2663 rotationDegrees = 0; 2664 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2665 rotationDegrees = 90; 2666 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2667 rotationDegrees = 270; 2668 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2669 rotationDegrees = 180; 2670 } else { 2671 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2672 rotationDegrees = 0; 2673 } 2674 2675 if (rotationDegrees != 0) { 2676 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2677 } 2678 2679 // Handle presentation display size, which could be different 2680 // from the image size indicated by kKeyWidth and kKeyHeight. 2681 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2682 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2683 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2684 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2685 2686 return OK; 2687 } 2688 2689 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2690 if (size < 4 || size == SIZE_MAX) { 2691 return ERROR_MALFORMED; 2692 } 2693 2694 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2695 if (buffer == NULL) { 2696 return ERROR_MALFORMED; 2697 } 2698 if (mDataSource->readAt( 2699 offset, buffer, size) != (ssize_t)size) { 2700 delete[] buffer; 2701 buffer = NULL; 2702 2703 return ERROR_IO; 2704 } 2705 2706 uint32_t flags = U32_AT(buffer); 2707 2708 uint32_t metadataKey = 0; 2709 char chunk[5]; 2710 MakeFourCCString(mPath[4], chunk); 2711 ALOGV("meta: %s @ %lld", chunk, (long long)offset); 2712 switch ((int32_t)mPath[4]) { 2713 case FOURCC(0xa9, 'a', 'l', 'b'): 2714 { 2715 metadataKey = kKeyAlbum; 2716 break; 2717 } 2718 case FOURCC(0xa9, 'A', 'R', 'T'): 2719 { 2720 metadataKey = kKeyArtist; 2721 break; 2722 } 2723 case FOURCC('a', 'A', 'R', 'T'): 2724 { 2725 metadataKey = kKeyAlbumArtist; 2726 break; 2727 } 2728 case FOURCC(0xa9, 'd', 'a', 'y'): 2729 { 2730 metadataKey = kKeyYear; 2731 break; 2732 } 2733 case FOURCC(0xa9, 'n', 'a', 'm'): 2734 { 2735 metadataKey = kKeyTitle; 2736 break; 2737 } 2738 case FOURCC(0xa9, 'w', 'r', 't'): 2739 { 2740 metadataKey = kKeyWriter; 2741 break; 2742 } 2743 case FOURCC('c', 'o', 'v', 'r'): 2744 { 2745 metadataKey = kKeyAlbumArt; 2746 break; 2747 } 2748 case FOURCC('g', 'n', 'r', 'e'): 2749 { 2750 metadataKey = kKeyGenre; 2751 break; 2752 } 2753 case FOURCC(0xa9, 'g', 'e', 'n'): 2754 { 2755 metadataKey = kKeyGenre; 2756 break; 2757 } 2758 case FOURCC('c', 'p', 'i', 'l'): 2759 { 2760 if (size == 9 && flags == 21) { 2761 char tmp[16]; 2762 sprintf(tmp, "%d", 2763 (int)buffer[size - 1]); 2764 2765 mFileMetaData->setCString(kKeyCompilation, tmp); 2766 } 2767 break; 2768 } 2769 case FOURCC('t', 'r', 'k', 'n'): 2770 { 2771 if (size == 16 && flags == 0) { 2772 char tmp[16]; 2773 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2774 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2775 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2776 2777 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2778 } 2779 break; 2780 } 2781 case FOURCC('d', 'i', 's', 'k'): 2782 { 2783 if ((size == 14 || size == 16) && flags == 0) { 2784 char tmp[16]; 2785 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2786 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2787 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2788 2789 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2790 } 2791 break; 2792 } 2793 case FOURCC('-', '-', '-', '-'): 2794 { 2795 buffer[size] = '\0'; 2796 switch (mPath[5]) { 2797 case FOURCC('m', 'e', 'a', 'n'): 2798 mLastCommentMean.setTo((const char *)buffer + 4); 2799 break; 2800 case FOURCC('n', 'a', 'm', 'e'): 2801 mLastCommentName.setTo((const char *)buffer + 4); 2802 break; 2803 case FOURCC('d', 'a', 't', 'a'): 2804 if (size < 8) { 2805 delete[] buffer; 2806 buffer = NULL; 2807 ALOGE("b/24346430"); 2808 return ERROR_MALFORMED; 2809 } 2810 mLastCommentData.setTo((const char *)buffer + 8); 2811 break; 2812 } 2813 2814 // Once we have a set of mean/name/data info, go ahead and process 2815 // it to see if its something we are interested in. Whether or not 2816 // were are interested in the specific tag, make sure to clear out 2817 // the set so we can be ready to process another tuple should one 2818 // show up later in the file. 2819 if ((mLastCommentMean.length() != 0) && 2820 (mLastCommentName.length() != 0) && 2821 (mLastCommentData.length() != 0)) { 2822 2823 if (mLastCommentMean == "com.apple.iTunes" 2824 && mLastCommentName == "iTunSMPB") { 2825 int32_t delay, padding; 2826 if (sscanf(mLastCommentData, 2827 " %*x %x %x %*x", &delay, &padding) == 2) { 2828 if (mLastTrack == NULL) 2829 return ERROR_MALFORMED; 2830 2831 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2832 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2833 } 2834 } 2835 2836 mLastCommentMean.clear(); 2837 mLastCommentName.clear(); 2838 mLastCommentData.clear(); 2839 } 2840 break; 2841 } 2842 2843 default: 2844 break; 2845 } 2846 2847 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2848 if (metadataKey == kKeyAlbumArt) { 2849 mFileMetaData->setData( 2850 kKeyAlbumArt, MetaData::TYPE_NONE, 2851 buffer + 8, size - 8); 2852 } else if (metadataKey == kKeyGenre) { 2853 if (flags == 0) { 2854 // uint8_t genre code, iTunes genre codes are 2855 // the standard id3 codes, except they start 2856 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2857 // We use standard id3 numbering, so subtract 1. 2858 int genrecode = (int)buffer[size - 1]; 2859 genrecode--; 2860 if (genrecode < 0) { 2861 genrecode = 255; // reserved for 'unknown genre' 2862 } 2863 char genre[10]; 2864 sprintf(genre, "%d", genrecode); 2865 2866 mFileMetaData->setCString(metadataKey, genre); 2867 } else if (flags == 1) { 2868 // custom genre string 2869 buffer[size] = '\0'; 2870 2871 mFileMetaData->setCString( 2872 metadataKey, (const char *)buffer + 8); 2873 } 2874 } else { 2875 buffer[size] = '\0'; 2876 2877 mFileMetaData->setCString( 2878 metadataKey, (const char *)buffer + 8); 2879 } 2880 } 2881 2882 delete[] buffer; 2883 buffer = NULL; 2884 2885 return OK; 2886 } 2887 2888 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) { 2889 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) { 2890 return ERROR_MALFORMED; 2891 } 2892 2893 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2894 if (buffer == NULL) { 2895 return ERROR_MALFORMED; 2896 } 2897 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) { 2898 delete[] buffer; 2899 buffer = NULL; 2900 2901 return ERROR_IO; 2902 } 2903 2904 int32_t type = U32_AT(&buffer[0]); 2905 if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11) 2906 || (type == FOURCC('n', 'c', 'l', 'c' && size >= 10))) { 2907 int32_t primaries = U16_AT(&buffer[4]); 2908 int32_t transfer = U16_AT(&buffer[6]); 2909 int32_t coeffs = U16_AT(&buffer[8]); 2910 bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128); 2911 2912 ColorAspects aspects; 2913 ColorUtils::convertIsoColorAspectsToCodecAspects( 2914 primaries, transfer, coeffs, fullRange, aspects); 2915 2916 // only store the first color specification 2917 if (!mLastTrack->meta->hasData(kKeyColorPrimaries)) { 2918 mLastTrack->meta->setInt32(kKeyColorPrimaries, aspects.mPrimaries); 2919 mLastTrack->meta->setInt32(kKeyTransferFunction, aspects.mTransfer); 2920 mLastTrack->meta->setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs); 2921 mLastTrack->meta->setInt32(kKeyColorRange, aspects.mRange); 2922 } 2923 } 2924 2925 delete[] buffer; 2926 buffer = NULL; 2927 2928 return OK; 2929 } 2930 2931 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 2932 if (size < 4 || size == SIZE_MAX) { 2933 return ERROR_MALFORMED; 2934 } 2935 2936 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1]; 2937 if (buffer == NULL) { 2938 return ERROR_MALFORMED; 2939 } 2940 if (mDataSource->readAt( 2941 offset, buffer, size) != (ssize_t)size) { 2942 delete[] buffer; 2943 buffer = NULL; 2944 2945 return ERROR_IO; 2946 } 2947 2948 uint32_t metadataKey = 0; 2949 switch (mPath[depth]) { 2950 case FOURCC('t', 'i', 't', 'l'): 2951 { 2952 metadataKey = kKeyTitle; 2953 break; 2954 } 2955 case FOURCC('p', 'e', 'r', 'f'): 2956 { 2957 metadataKey = kKeyArtist; 2958 break; 2959 } 2960 case FOURCC('a', 'u', 't', 'h'): 2961 { 2962 metadataKey = kKeyWriter; 2963 break; 2964 } 2965 case FOURCC('g', 'n', 'r', 'e'): 2966 { 2967 metadataKey = kKeyGenre; 2968 break; 2969 } 2970 case FOURCC('a', 'l', 'b', 'm'): 2971 { 2972 if (buffer[size - 1] != '\0') { 2973 char tmp[4]; 2974 sprintf(tmp, "%u", buffer[size - 1]); 2975 2976 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2977 } 2978 2979 metadataKey = kKeyAlbum; 2980 break; 2981 } 2982 case FOURCC('y', 'r', 'r', 'c'): 2983 { 2984 char tmp[5]; 2985 uint16_t year = U16_AT(&buffer[4]); 2986 2987 if (year < 10000) { 2988 sprintf(tmp, "%u", year); 2989 2990 mFileMetaData->setCString(kKeyYear, tmp); 2991 } 2992 break; 2993 } 2994 2995 default: 2996 break; 2997 } 2998 2999 if (metadataKey > 0) { 3000 bool isUTF8 = true; // Common case 3001 char16_t *framedata = NULL; 3002 int len16 = 0; // Number of UTF-16 characters 3003 3004 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 3005 if (size < 6) { 3006 return ERROR_MALFORMED; 3007 } 3008 3009 if (size - 6 >= 4) { 3010 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 3011 framedata = (char16_t *)(buffer + 6); 3012 if (0xfffe == *framedata) { 3013 // endianness marker (BOM) doesn't match host endianness 3014 for (int i = 0; i < len16; i++) { 3015 framedata[i] = bswap_16(framedata[i]); 3016 } 3017 // BOM is now swapped to 0xfeff, we will execute next block too 3018 } 3019 3020 if (0xfeff == *framedata) { 3021 // Remove the BOM 3022 framedata++; 3023 len16--; 3024 isUTF8 = false; 3025 } 3026 // else normal non-zero-length UTF-8 string 3027 // we can't handle UTF-16 without BOM as there is no other 3028 // indication of encoding. 3029 } 3030 3031 if (isUTF8) { 3032 buffer[size] = 0; 3033 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 3034 } else { 3035 // Convert from UTF-16 string to UTF-8 string. 3036 String8 tmpUTF8str(framedata, len16); 3037 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 3038 } 3039 } 3040 3041 delete[] buffer; 3042 buffer = NULL; 3043 3044 return OK; 3045 } 3046 3047 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 3048 ID3 id3(mDataSource, true /* ignorev1 */, offset); 3049 3050 if (id3.isValid()) { 3051 struct Map { 3052 int key; 3053 const char *tag1; 3054 const char *tag2; 3055 }; 3056 static const Map kMap[] = { 3057 { kKeyAlbum, "TALB", "TAL" }, 3058 { kKeyArtist, "TPE1", "TP1" }, 3059 { kKeyAlbumArtist, "TPE2", "TP2" }, 3060 { kKeyComposer, "TCOM", "TCM" }, 3061 { kKeyGenre, "TCON", "TCO" }, 3062 { kKeyTitle, "TIT2", "TT2" }, 3063 { kKeyYear, "TYE", "TYER" }, 3064 { kKeyAuthor, "TXT", "TEXT" }, 3065 { kKeyCDTrackNumber, "TRK", "TRCK" }, 3066 { kKeyDiscNumber, "TPA", "TPOS" }, 3067 { kKeyCompilation, "TCP", "TCMP" }, 3068 }; 3069 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 3070 3071 for (size_t i = 0; i < kNumMapEntries; ++i) { 3072 if (!mFileMetaData->hasData(kMap[i].key)) { 3073 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 3074 if (it->done()) { 3075 delete it; 3076 it = new ID3::Iterator(id3, kMap[i].tag2); 3077 } 3078 3079 if (it->done()) { 3080 delete it; 3081 continue; 3082 } 3083 3084 String8 s; 3085 it->getString(&s); 3086 delete it; 3087 3088 mFileMetaData->setCString(kMap[i].key, s); 3089 } 3090 } 3091 3092 size_t dataSize; 3093 String8 mime; 3094 const void *data = id3.getAlbumArt(&dataSize, &mime); 3095 3096 if (data) { 3097 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 3098 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 3099 } 3100 } 3101 } 3102 3103 sp<IMediaSource> MPEG4Extractor::getTrack(size_t index) { 3104 status_t err; 3105 if ((err = readMetaData()) != OK) { 3106 return NULL; 3107 } 3108 3109 Track *track = mFirstTrack; 3110 while (index > 0) { 3111 if (track == NULL) { 3112 return NULL; 3113 } 3114 3115 track = track->next; 3116 --index; 3117 } 3118 3119 if (track == NULL) { 3120 return NULL; 3121 } 3122 3123 3124 Trex *trex = NULL; 3125 int32_t trackId; 3126 if (track->meta->findInt32(kKeyTrackID, &trackId)) { 3127 for (size_t i = 0; i < mTrex.size(); i++) { 3128 Trex *t = &mTrex.editItemAt(i); 3129 if (t->track_ID == (uint32_t) trackId) { 3130 trex = t; 3131 break; 3132 } 3133 } 3134 } else { 3135 ALOGE("b/21657957"); 3136 return NULL; 3137 } 3138 3139 ALOGV("getTrack called, pssh: %zu", mPssh.size()); 3140 3141 const char *mime; 3142 if (!track->meta->findCString(kKeyMIMEType, &mime)) { 3143 return NULL; 3144 } 3145 3146 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3147 uint32_t type; 3148 const void *data; 3149 size_t size; 3150 if (!track->meta->findData(kKeyAVCC, &type, &data, &size)) { 3151 return NULL; 3152 } 3153 3154 const uint8_t *ptr = (const uint8_t *)data; 3155 3156 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1 3157 return NULL; 3158 } 3159 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3160 uint32_t type; 3161 const void *data; 3162 size_t size; 3163 if (!track->meta->findData(kKeyHVCC, &type, &data, &size)) { 3164 return NULL; 3165 } 3166 3167 const uint8_t *ptr = (const uint8_t *)data; 3168 3169 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1 3170 return NULL; 3171 } 3172 } 3173 3174 return new MPEG4Source(this, 3175 track->meta, mDataSource, track->timescale, track->sampleTable, 3176 mSidxEntries, trex, mMoofOffset); 3177 } 3178 3179 // static 3180 status_t MPEG4Extractor::verifyTrack(Track *track) { 3181 const char *mime; 3182 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 3183 3184 uint32_t type; 3185 const void *data; 3186 size_t size; 3187 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 3188 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 3189 || type != kTypeAVCC) { 3190 return ERROR_MALFORMED; 3191 } 3192 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 3193 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 3194 || type != kTypeHVCC) { 3195 return ERROR_MALFORMED; 3196 } 3197 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 3198 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2) 3199 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 3200 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 3201 || type != kTypeESDS) { 3202 return ERROR_MALFORMED; 3203 } 3204 } 3205 3206 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 3207 // Make sure we have all the metadata we need. 3208 ALOGE("stbl atom missing/invalid."); 3209 return ERROR_MALFORMED; 3210 } 3211 3212 if (track->timescale == 0) { 3213 ALOGE("timescale invalid."); 3214 return ERROR_MALFORMED; 3215 } 3216 3217 return OK; 3218 } 3219 3220 typedef enum { 3221 //AOT_NONE = -1, 3222 //AOT_NULL_OBJECT = 0, 3223 //AOT_AAC_MAIN = 1, /**< Main profile */ 3224 AOT_AAC_LC = 2, /**< Low Complexity object */ 3225 //AOT_AAC_SSR = 3, 3226 //AOT_AAC_LTP = 4, 3227 AOT_SBR = 5, 3228 //AOT_AAC_SCAL = 6, 3229 //AOT_TWIN_VQ = 7, 3230 //AOT_CELP = 8, 3231 //AOT_HVXC = 9, 3232 //AOT_RSVD_10 = 10, /**< (reserved) */ 3233 //AOT_RSVD_11 = 11, /**< (reserved) */ 3234 //AOT_TTSI = 12, /**< TTSI Object */ 3235 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 3236 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 3237 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 3238 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 3239 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 3240 //AOT_RSVD_18 = 18, /**< (reserved) */ 3241 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 3242 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 3243 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 3244 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 3245 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 3246 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 3247 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 3248 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 3249 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 3250 //AOT_RSVD_28 = 28, /**< might become SSC */ 3251 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 3252 //AOT_MPEGS = 30, /**< MPEG Surround */ 3253 3254 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 3255 3256 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 3257 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 3258 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 3259 //AOT_RSVD_35 = 35, /**< might become DST */ 3260 //AOT_RSVD_36 = 36, /**< might become ALS */ 3261 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 3262 //AOT_SLS = 38, /**< SLS */ 3263 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 3264 3265 //AOT_USAC = 42, /**< USAC */ 3266 //AOT_SAOC = 43, /**< SAOC */ 3267 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 3268 3269 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 3270 } AUDIO_OBJECT_TYPE; 3271 3272 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 3273 const void *esds_data, size_t esds_size) { 3274 ESDS esds(esds_data, esds_size); 3275 3276 uint8_t objectTypeIndication; 3277 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 3278 return ERROR_MALFORMED; 3279 } 3280 3281 if (objectTypeIndication == 0xe1) { 3282 // This isn't MPEG4 audio at all, it's QCELP 14k... 3283 if (mLastTrack == NULL) 3284 return ERROR_MALFORMED; 3285 3286 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 3287 return OK; 3288 } 3289 3290 if (objectTypeIndication == 0x6b) { 3291 // The media subtype is MP3 audio 3292 // Our software MP3 audio decoder may not be able to handle 3293 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 3294 ALOGE("MP3 track in MP4/3GPP file is not supported"); 3295 return ERROR_UNSUPPORTED; 3296 } 3297 3298 const uint8_t *csd; 3299 size_t csd_size; 3300 if (esds.getCodecSpecificInfo( 3301 (const void **)&csd, &csd_size) != OK) { 3302 return ERROR_MALFORMED; 3303 } 3304 3305 if (kUseHexDump) { 3306 printf("ESD of size %zu\n", csd_size); 3307 hexdump(csd, csd_size); 3308 } 3309 3310 if (csd_size == 0) { 3311 // There's no further information, i.e. no codec specific data 3312 // Let's assume that the information provided in the mpeg4 headers 3313 // is accurate and hope for the best. 3314 3315 return OK; 3316 } 3317 3318 if (csd_size < 2) { 3319 return ERROR_MALFORMED; 3320 } 3321 3322 static uint32_t kSamplingRate[] = { 3323 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 3324 16000, 12000, 11025, 8000, 7350 3325 }; 3326 3327 ABitReader br(csd, csd_size); 3328 uint32_t objectType = br.getBits(5); 3329 3330 if (objectType == 31) { // AAC-ELD => additional 6 bits 3331 objectType = 32 + br.getBits(6); 3332 } 3333 3334 if (mLastTrack == NULL) 3335 return ERROR_MALFORMED; 3336 3337 //keep AOT type 3338 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 3339 3340 uint32_t freqIndex = br.getBits(4); 3341 3342 int32_t sampleRate = 0; 3343 int32_t numChannels = 0; 3344 if (freqIndex == 15) { 3345 if (br.numBitsLeft() < 28) return ERROR_MALFORMED; 3346 sampleRate = br.getBits(24); 3347 numChannels = br.getBits(4); 3348 } else { 3349 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3350 numChannels = br.getBits(4); 3351 3352 if (freqIndex == 13 || freqIndex == 14) { 3353 return ERROR_MALFORMED; 3354 } 3355 3356 sampleRate = kSamplingRate[freqIndex]; 3357 } 3358 3359 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 3360 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3361 uint32_t extFreqIndex = br.getBits(4); 3362 int32_t extSampleRate __unused; 3363 if (extFreqIndex == 15) { 3364 if (csd_size < 8) { 3365 return ERROR_MALFORMED; 3366 } 3367 if (br.numBitsLeft() < 24) return ERROR_MALFORMED; 3368 extSampleRate = br.getBits(24); 3369 } else { 3370 if (extFreqIndex == 13 || extFreqIndex == 14) { 3371 return ERROR_MALFORMED; 3372 } 3373 extSampleRate = kSamplingRate[extFreqIndex]; 3374 } 3375 //TODO: save the extension sampling rate value in meta data => 3376 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 3377 } 3378 3379 switch (numChannels) { 3380 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 3381 case 0: 3382 case 1:// FC 3383 case 2:// FL FR 3384 case 3:// FC, FL FR 3385 case 4:// FC, FL FR, RC 3386 case 5:// FC, FL FR, SL SR 3387 case 6:// FC, FL FR, SL SR, LFE 3388 //numChannels already contains the right value 3389 break; 3390 case 11:// FC, FL FR, SL SR, RC, LFE 3391 numChannels = 7; 3392 break; 3393 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 3394 case 12:// FC, FL FR, SL SR, RL RR, LFE 3395 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 3396 numChannels = 8; 3397 break; 3398 default: 3399 return ERROR_UNSUPPORTED; 3400 } 3401 3402 { 3403 if (objectType == AOT_SBR || objectType == AOT_PS) { 3404 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3405 objectType = br.getBits(5); 3406 3407 if (objectType == AOT_ESCAPE) { 3408 if (br.numBitsLeft() < 6) return ERROR_MALFORMED; 3409 objectType = 32 + br.getBits(6); 3410 } 3411 } 3412 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 3413 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 3414 objectType == AOT_ER_BSAC) { 3415 if (br.numBitsLeft() < 2) return ERROR_MALFORMED; 3416 const int32_t frameLengthFlag __unused = br.getBits(1); 3417 3418 const int32_t dependsOnCoreCoder = br.getBits(1); 3419 3420 if (dependsOnCoreCoder ) { 3421 if (br.numBitsLeft() < 14) return ERROR_MALFORMED; 3422 const int32_t coreCoderDelay __unused = br.getBits(14); 3423 } 3424 3425 int32_t extensionFlag = -1; 3426 if (br.numBitsLeft() > 0) { 3427 extensionFlag = br.getBits(1); 3428 } else { 3429 switch (objectType) { 3430 // 14496-3 4.5.1.1 extensionFlag 3431 case AOT_AAC_LC: 3432 extensionFlag = 0; 3433 break; 3434 case AOT_ER_AAC_LC: 3435 case AOT_ER_AAC_SCAL: 3436 case AOT_ER_BSAC: 3437 case AOT_ER_AAC_LD: 3438 extensionFlag = 1; 3439 break; 3440 default: 3441 return ERROR_MALFORMED; 3442 break; 3443 } 3444 ALOGW("csd missing extension flag; assuming %d for object type %u.", 3445 extensionFlag, objectType); 3446 } 3447 3448 if (numChannels == 0) { 3449 int32_t channelsEffectiveNum = 0; 3450 int32_t channelsNum = 0; 3451 if (br.numBitsLeft() < 32) { 3452 return ERROR_MALFORMED; 3453 } 3454 const int32_t ElementInstanceTag __unused = br.getBits(4); 3455 const int32_t Profile __unused = br.getBits(2); 3456 const int32_t SamplingFrequencyIndex __unused = br.getBits(4); 3457 const int32_t NumFrontChannelElements = br.getBits(4); 3458 const int32_t NumSideChannelElements = br.getBits(4); 3459 const int32_t NumBackChannelElements = br.getBits(4); 3460 const int32_t NumLfeChannelElements = br.getBits(2); 3461 const int32_t NumAssocDataElements __unused = br.getBits(3); 3462 const int32_t NumValidCcElements __unused = br.getBits(4); 3463 3464 const int32_t MonoMixdownPresent = br.getBits(1); 3465 3466 if (MonoMixdownPresent != 0) { 3467 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3468 const int32_t MonoMixdownElementNumber __unused = br.getBits(4); 3469 } 3470 3471 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3472 const int32_t StereoMixdownPresent = br.getBits(1); 3473 if (StereoMixdownPresent != 0) { 3474 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3475 const int32_t StereoMixdownElementNumber __unused = br.getBits(4); 3476 } 3477 3478 if (br.numBitsLeft() < 1) return ERROR_MALFORMED; 3479 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 3480 if (MatrixMixdownIndexPresent != 0) { 3481 if (br.numBitsLeft() < 3) return ERROR_MALFORMED; 3482 const int32_t MatrixMixdownIndex __unused = br.getBits(2); 3483 const int32_t PseudoSurroundEnable __unused = br.getBits(1); 3484 } 3485 3486 int i; 3487 for (i=0; i < NumFrontChannelElements; i++) { 3488 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3489 const int32_t FrontElementIsCpe = br.getBits(1); 3490 const int32_t FrontElementTagSelect __unused = br.getBits(4); 3491 channelsNum += FrontElementIsCpe ? 2 : 1; 3492 } 3493 3494 for (i=0; i < NumSideChannelElements; i++) { 3495 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3496 const int32_t SideElementIsCpe = br.getBits(1); 3497 const int32_t SideElementTagSelect __unused = br.getBits(4); 3498 channelsNum += SideElementIsCpe ? 2 : 1; 3499 } 3500 3501 for (i=0; i < NumBackChannelElements; i++) { 3502 if (br.numBitsLeft() < 5) return ERROR_MALFORMED; 3503 const int32_t BackElementIsCpe = br.getBits(1); 3504 const int32_t BackElementTagSelect __unused = br.getBits(4); 3505 channelsNum += BackElementIsCpe ? 2 : 1; 3506 } 3507 channelsEffectiveNum = channelsNum; 3508 3509 for (i=0; i < NumLfeChannelElements; i++) { 3510 if (br.numBitsLeft() < 4) return ERROR_MALFORMED; 3511 const int32_t LfeElementTagSelect __unused = br.getBits(4); 3512 channelsNum += 1; 3513 } 3514 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 3515 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 3516 numChannels = channelsNum; 3517 } 3518 } 3519 } 3520 3521 if (numChannels == 0) { 3522 return ERROR_UNSUPPORTED; 3523 } 3524 3525 if (mLastTrack == NULL) 3526 return ERROR_MALFORMED; 3527 3528 int32_t prevSampleRate; 3529 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 3530 3531 if (prevSampleRate != sampleRate) { 3532 ALOGV("mpeg4 audio sample rate different from previous setting. " 3533 "was: %d, now: %d", prevSampleRate, sampleRate); 3534 } 3535 3536 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 3537 3538 int32_t prevChannelCount; 3539 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 3540 3541 if (prevChannelCount != numChannels) { 3542 ALOGV("mpeg4 audio channel count different from previous setting. " 3543 "was: %d, now: %d", prevChannelCount, numChannels); 3544 } 3545 3546 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 3547 3548 return OK; 3549 } 3550 3551 //////////////////////////////////////////////////////////////////////////////// 3552 3553 MPEG4Source::MPEG4Source( 3554 const sp<MPEG4Extractor> &owner, 3555 const sp<MetaData> &format, 3556 const sp<DataSource> &dataSource, 3557 int32_t timeScale, 3558 const sp<SampleTable> &sampleTable, 3559 Vector<SidxEntry> &sidx, 3560 const Trex *trex, 3561 off64_t firstMoofOffset) 3562 : mOwner(owner), 3563 mFormat(format), 3564 mDataSource(dataSource), 3565 mTimescale(timeScale), 3566 mSampleTable(sampleTable), 3567 mCurrentSampleIndex(0), 3568 mCurrentFragmentIndex(0), 3569 mSegments(sidx), 3570 mTrex(trex), 3571 mFirstMoofOffset(firstMoofOffset), 3572 mCurrentMoofOffset(firstMoofOffset), 3573 mCurrentTime(0), 3574 mCurrentSampleInfoAllocSize(0), 3575 mCurrentSampleInfoSizes(NULL), 3576 mCurrentSampleInfoOffsetsAllocSize(0), 3577 mCurrentSampleInfoOffsets(NULL), 3578 mIsAVC(false), 3579 mIsHEVC(false), 3580 mNALLengthSize(0), 3581 mStarted(false), 3582 mGroup(NULL), 3583 mBuffer(NULL), 3584 mWantsNALFragments(false), 3585 mSrcBuffer(NULL) { 3586 3587 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo)); 3588 3589 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 3590 mDefaultIVSize = 0; 3591 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 3592 uint32_t keytype; 3593 const void *key; 3594 size_t keysize; 3595 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 3596 CHECK(keysize <= 16); 3597 memset(mCryptoKey, 0, 16); 3598 memcpy(mCryptoKey, key, keysize); 3599 } 3600 3601 const char *mime; 3602 bool success = mFormat->findCString(kKeyMIMEType, &mime); 3603 CHECK(success); 3604 3605 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 3606 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 3607 3608 if (mIsAVC) { 3609 uint32_t type; 3610 const void *data; 3611 size_t size; 3612 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 3613 3614 const uint8_t *ptr = (const uint8_t *)data; 3615 3616 CHECK(size >= 7); 3617 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3618 3619 // The number of bytes used to encode the length of a NAL unit. 3620 mNALLengthSize = 1 + (ptr[4] & 3); 3621 } else if (mIsHEVC) { 3622 uint32_t type; 3623 const void *data; 3624 size_t size; 3625 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 3626 3627 const uint8_t *ptr = (const uint8_t *)data; 3628 3629 CHECK(size >= 22); 3630 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 3631 3632 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 3633 } 3634 3635 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 3636 3637 if (mFirstMoofOffset != 0) { 3638 off64_t offset = mFirstMoofOffset; 3639 parseChunk(&offset); 3640 } 3641 } 3642 3643 MPEG4Source::~MPEG4Source() { 3644 if (mStarted) { 3645 stop(); 3646 } 3647 free(mCurrentSampleInfoSizes); 3648 free(mCurrentSampleInfoOffsets); 3649 } 3650 3651 status_t MPEG4Source::start(MetaData *params) { 3652 Mutex::Autolock autoLock(mLock); 3653 3654 CHECK(!mStarted); 3655 3656 int32_t val; 3657 if (params && params->findInt32(kKeyWantsNALFragments, &val) 3658 && val != 0) { 3659 mWantsNALFragments = true; 3660 } else { 3661 mWantsNALFragments = false; 3662 } 3663 3664 int32_t tmp; 3665 CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp)); 3666 size_t max_size = tmp; 3667 3668 // A somewhat arbitrary limit that should be sufficient for 8k video frames 3669 // If you see the message below for a valid input stream: increase the limit 3670 const size_t kMaxBufferSize = 64 * 1024 * 1024; 3671 if (max_size > kMaxBufferSize) { 3672 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize); 3673 return ERROR_MALFORMED; 3674 } 3675 if (max_size == 0) { 3676 ALOGE("zero max input size"); 3677 return ERROR_MALFORMED; 3678 } 3679 3680 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize. 3681 const size_t kMaxBuffers = 8; 3682 const size_t buffers = min(kMaxBufferSize / max_size, kMaxBuffers); 3683 mGroup = new MediaBufferGroup(buffers, max_size); 3684 mSrcBuffer = new (std::nothrow) uint8_t[max_size]; 3685 if (mSrcBuffer == NULL) { 3686 // file probably specified a bad max size 3687 delete mGroup; 3688 mGroup = NULL; 3689 return ERROR_MALFORMED; 3690 } 3691 3692 mStarted = true; 3693 3694 return OK; 3695 } 3696 3697 status_t MPEG4Source::stop() { 3698 Mutex::Autolock autoLock(mLock); 3699 3700 CHECK(mStarted); 3701 3702 if (mBuffer != NULL) { 3703 mBuffer->release(); 3704 mBuffer = NULL; 3705 } 3706 3707 delete[] mSrcBuffer; 3708 mSrcBuffer = NULL; 3709 3710 delete mGroup; 3711 mGroup = NULL; 3712 3713 mStarted = false; 3714 mCurrentSampleIndex = 0; 3715 3716 return OK; 3717 } 3718 3719 status_t MPEG4Source::parseChunk(off64_t *offset) { 3720 uint32_t hdr[2]; 3721 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3722 return ERROR_IO; 3723 } 3724 uint64_t chunk_size = ntohl(hdr[0]); 3725 uint32_t chunk_type = ntohl(hdr[1]); 3726 off64_t data_offset = *offset + 8; 3727 3728 if (chunk_size == 1) { 3729 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 3730 return ERROR_IO; 3731 } 3732 chunk_size = ntoh64(chunk_size); 3733 data_offset += 8; 3734 3735 if (chunk_size < 16) { 3736 // The smallest valid chunk is 16 bytes long in this case. 3737 return ERROR_MALFORMED; 3738 } 3739 } else if (chunk_size < 8) { 3740 // The smallest valid chunk is 8 bytes long. 3741 return ERROR_MALFORMED; 3742 } 3743 3744 char chunk[5]; 3745 MakeFourCCString(chunk_type, chunk); 3746 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset); 3747 3748 off64_t chunk_data_size = *offset + chunk_size - data_offset; 3749 3750 switch(chunk_type) { 3751 3752 case FOURCC('t', 'r', 'a', 'f'): 3753 case FOURCC('m', 'o', 'o', 'f'): { 3754 off64_t stop_offset = *offset + chunk_size; 3755 *offset = data_offset; 3756 while (*offset < stop_offset) { 3757 status_t err = parseChunk(offset); 3758 if (err != OK) { 3759 return err; 3760 } 3761 } 3762 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3763 // *offset points to the box following this moof. Find the next moof from there. 3764 3765 while (true) { 3766 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 3767 return ERROR_END_OF_STREAM; 3768 } 3769 chunk_size = ntohl(hdr[0]); 3770 chunk_type = ntohl(hdr[1]); 3771 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3772 mNextMoofOffset = *offset; 3773 break; 3774 } 3775 *offset += chunk_size; 3776 } 3777 } 3778 break; 3779 } 3780 3781 case FOURCC('t', 'f', 'h', 'd'): { 3782 status_t err; 3783 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3784 return err; 3785 } 3786 *offset += chunk_size; 3787 break; 3788 } 3789 3790 case FOURCC('t', 'r', 'u', 'n'): { 3791 status_t err; 3792 if (mLastParsedTrackId == mTrackId) { 3793 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3794 return err; 3795 } 3796 } 3797 3798 *offset += chunk_size; 3799 break; 3800 } 3801 3802 case FOURCC('s', 'a', 'i', 'z'): { 3803 status_t err; 3804 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3805 return err; 3806 } 3807 *offset += chunk_size; 3808 break; 3809 } 3810 case FOURCC('s', 'a', 'i', 'o'): { 3811 status_t err; 3812 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3813 return err; 3814 } 3815 *offset += chunk_size; 3816 break; 3817 } 3818 3819 case FOURCC('m', 'd', 'a', 't'): { 3820 // parse DRM info if present 3821 ALOGV("MPEG4Source::parseChunk mdat"); 3822 // if saiz/saoi was previously observed, do something with the sampleinfos 3823 *offset += chunk_size; 3824 break; 3825 } 3826 3827 default: { 3828 *offset += chunk_size; 3829 break; 3830 } 3831 } 3832 return OK; 3833 } 3834 3835 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 3836 off64_t offset, off64_t /* size */) { 3837 ALOGV("parseSampleAuxiliaryInformationSizes"); 3838 // 14496-12 8.7.12 3839 uint8_t version; 3840 if (mDataSource->readAt( 3841 offset, &version, sizeof(version)) 3842 < (ssize_t)sizeof(version)) { 3843 return ERROR_IO; 3844 } 3845 3846 if (version != 0) { 3847 return ERROR_UNSUPPORTED; 3848 } 3849 offset++; 3850 3851 uint32_t flags; 3852 if (!mDataSource->getUInt24(offset, &flags)) { 3853 return ERROR_IO; 3854 } 3855 offset += 3; 3856 3857 if (flags & 1) { 3858 uint32_t tmp; 3859 if (!mDataSource->getUInt32(offset, &tmp)) { 3860 return ERROR_MALFORMED; 3861 } 3862 mCurrentAuxInfoType = tmp; 3863 offset += 4; 3864 if (!mDataSource->getUInt32(offset, &tmp)) { 3865 return ERROR_MALFORMED; 3866 } 3867 mCurrentAuxInfoTypeParameter = tmp; 3868 offset += 4; 3869 } 3870 3871 uint8_t defsize; 3872 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 3873 return ERROR_MALFORMED; 3874 } 3875 mCurrentDefaultSampleInfoSize = defsize; 3876 offset++; 3877 3878 uint32_t smplcnt; 3879 if (!mDataSource->getUInt32(offset, &smplcnt)) { 3880 return ERROR_MALFORMED; 3881 } 3882 mCurrentSampleInfoCount = smplcnt; 3883 offset += 4; 3884 3885 if (mCurrentDefaultSampleInfoSize != 0) { 3886 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 3887 return OK; 3888 } 3889 if (smplcnt > mCurrentSampleInfoAllocSize) { 3890 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 3891 mCurrentSampleInfoAllocSize = smplcnt; 3892 } 3893 3894 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 3895 return OK; 3896 } 3897 3898 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 3899 off64_t offset, off64_t /* size */) { 3900 ALOGV("parseSampleAuxiliaryInformationOffsets"); 3901 // 14496-12 8.7.13 3902 uint8_t version; 3903 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 3904 return ERROR_IO; 3905 } 3906 offset++; 3907 3908 uint32_t flags; 3909 if (!mDataSource->getUInt24(offset, &flags)) { 3910 return ERROR_IO; 3911 } 3912 offset += 3; 3913 3914 uint32_t entrycount; 3915 if (!mDataSource->getUInt32(offset, &entrycount)) { 3916 return ERROR_IO; 3917 } 3918 offset += 4; 3919 if (entrycount == 0) { 3920 return OK; 3921 } 3922 if (entrycount > UINT32_MAX / 8) { 3923 return ERROR_MALFORMED; 3924 } 3925 3926 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 3927 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8); 3928 if (newPtr == NULL) { 3929 return NO_MEMORY; 3930 } 3931 mCurrentSampleInfoOffsets = newPtr; 3932 mCurrentSampleInfoOffsetsAllocSize = entrycount; 3933 } 3934 mCurrentSampleInfoOffsetCount = entrycount; 3935 3936 if (mCurrentSampleInfoOffsets == NULL) { 3937 return OK; 3938 } 3939 3940 for (size_t i = 0; i < entrycount; i++) { 3941 if (version == 0) { 3942 uint32_t tmp; 3943 if (!mDataSource->getUInt32(offset, &tmp)) { 3944 return ERROR_IO; 3945 } 3946 mCurrentSampleInfoOffsets[i] = tmp; 3947 offset += 4; 3948 } else { 3949 uint64_t tmp; 3950 if (!mDataSource->getUInt64(offset, &tmp)) { 3951 return ERROR_IO; 3952 } 3953 mCurrentSampleInfoOffsets[i] = tmp; 3954 offset += 8; 3955 } 3956 } 3957 3958 // parse clear/encrypted data 3959 3960 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 3961 3962 drmoffset += mCurrentMoofOffset; 3963 int ivlength; 3964 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 3965 3966 // only 0, 8 and 16 byte initialization vectors are supported 3967 if (ivlength != 0 && ivlength != 8 && ivlength != 16) { 3968 ALOGW("unsupported IV length: %d", ivlength); 3969 return ERROR_MALFORMED; 3970 } 3971 // read CencSampleAuxiliaryDataFormats 3972 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 3973 if (i >= mCurrentSamples.size()) { 3974 ALOGW("too few samples"); 3975 break; 3976 } 3977 Sample *smpl = &mCurrentSamples.editItemAt(i); 3978 3979 memset(smpl->iv, 0, 16); 3980 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 3981 return ERROR_IO; 3982 } 3983 3984 drmoffset += ivlength; 3985 3986 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 3987 if (smplinfosize == 0) { 3988 smplinfosize = mCurrentSampleInfoSizes[i]; 3989 } 3990 if (smplinfosize > ivlength) { 3991 uint16_t numsubsamples; 3992 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 3993 return ERROR_IO; 3994 } 3995 drmoffset += 2; 3996 for (size_t j = 0; j < numsubsamples; j++) { 3997 uint16_t numclear; 3998 uint32_t numencrypted; 3999 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 4000 return ERROR_IO; 4001 } 4002 drmoffset += 2; 4003 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 4004 return ERROR_IO; 4005 } 4006 drmoffset += 4; 4007 smpl->clearsizes.add(numclear); 4008 smpl->encryptedsizes.add(numencrypted); 4009 } 4010 } else { 4011 smpl->clearsizes.add(0); 4012 smpl->encryptedsizes.add(smpl->size); 4013 } 4014 } 4015 4016 4017 return OK; 4018 } 4019 4020 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 4021 4022 if (size < 8) { 4023 return -EINVAL; 4024 } 4025 4026 uint32_t flags; 4027 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 4028 return ERROR_MALFORMED; 4029 } 4030 4031 if (flags & 0xff000000) { 4032 return -EINVAL; 4033 } 4034 4035 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 4036 return ERROR_MALFORMED; 4037 } 4038 4039 if (mLastParsedTrackId != mTrackId) { 4040 // this is not the right track, skip it 4041 return OK; 4042 } 4043 4044 mTrackFragmentHeaderInfo.mFlags = flags; 4045 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 4046 offset += 8; 4047 size -= 8; 4048 4049 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 4050 4051 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 4052 if (size < 8) { 4053 return -EINVAL; 4054 } 4055 4056 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 4057 return ERROR_MALFORMED; 4058 } 4059 offset += 8; 4060 size -= 8; 4061 } 4062 4063 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 4064 if (size < 4) { 4065 return -EINVAL; 4066 } 4067 4068 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 4069 return ERROR_MALFORMED; 4070 } 4071 offset += 4; 4072 size -= 4; 4073 } 4074 4075 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4076 if (size < 4) { 4077 return -EINVAL; 4078 } 4079 4080 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 4081 return ERROR_MALFORMED; 4082 } 4083 offset += 4; 4084 size -= 4; 4085 } 4086 4087 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4088 if (size < 4) { 4089 return -EINVAL; 4090 } 4091 4092 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 4093 return ERROR_MALFORMED; 4094 } 4095 offset += 4; 4096 size -= 4; 4097 } 4098 4099 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4100 if (size < 4) { 4101 return -EINVAL; 4102 } 4103 4104 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 4105 return ERROR_MALFORMED; 4106 } 4107 offset += 4; 4108 size -= 4; 4109 } 4110 4111 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 4112 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 4113 } 4114 4115 mTrackFragmentHeaderInfo.mDataOffset = 0; 4116 return OK; 4117 } 4118 4119 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 4120 4121 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 4122 if (size < 8) { 4123 return -EINVAL; 4124 } 4125 4126 enum { 4127 kDataOffsetPresent = 0x01, 4128 kFirstSampleFlagsPresent = 0x04, 4129 kSampleDurationPresent = 0x100, 4130 kSampleSizePresent = 0x200, 4131 kSampleFlagsPresent = 0x400, 4132 kSampleCompositionTimeOffsetPresent = 0x800, 4133 }; 4134 4135 uint32_t flags; 4136 if (!mDataSource->getUInt32(offset, &flags)) { 4137 return ERROR_MALFORMED; 4138 } 4139 // |version| only affects SampleCompositionTimeOffset field. 4140 // If version == 0, SampleCompositionTimeOffset is uint32_t; 4141 // Otherwise, SampleCompositionTimeOffset is int32_t. 4142 // Sample.compositionOffset is defined as int32_t. 4143 uint8_t version = flags >> 24; 4144 flags &= 0xffffff; 4145 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags); 4146 4147 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 4148 // These two shall not be used together. 4149 return -EINVAL; 4150 } 4151 4152 uint32_t sampleCount; 4153 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 4154 return ERROR_MALFORMED; 4155 } 4156 offset += 8; 4157 size -= 8; 4158 4159 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 4160 4161 uint32_t firstSampleFlags = 0; 4162 4163 if (flags & kDataOffsetPresent) { 4164 if (size < 4) { 4165 return -EINVAL; 4166 } 4167 4168 int32_t dataOffsetDelta; 4169 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 4170 return ERROR_MALFORMED; 4171 } 4172 4173 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 4174 4175 offset += 4; 4176 size -= 4; 4177 } 4178 4179 if (flags & kFirstSampleFlagsPresent) { 4180 if (size < 4) { 4181 return -EINVAL; 4182 } 4183 4184 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 4185 return ERROR_MALFORMED; 4186 } 4187 offset += 4; 4188 size -= 4; 4189 } 4190 4191 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 4192 sampleCtsOffset = 0; 4193 4194 size_t bytesPerSample = 0; 4195 if (flags & kSampleDurationPresent) { 4196 bytesPerSample += 4; 4197 } else if (mTrackFragmentHeaderInfo.mFlags 4198 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 4199 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 4200 } else if (mTrex) { 4201 sampleDuration = mTrex->default_sample_duration; 4202 } 4203 4204 if (flags & kSampleSizePresent) { 4205 bytesPerSample += 4; 4206 } else if (mTrackFragmentHeaderInfo.mFlags 4207 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 4208 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4209 } else { 4210 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 4211 } 4212 4213 if (flags & kSampleFlagsPresent) { 4214 bytesPerSample += 4; 4215 } else if (mTrackFragmentHeaderInfo.mFlags 4216 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 4217 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4218 } else { 4219 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 4220 } 4221 4222 if (flags & kSampleCompositionTimeOffsetPresent) { 4223 bytesPerSample += 4; 4224 } else { 4225 sampleCtsOffset = 0; 4226 } 4227 4228 if (size < (off64_t)(sampleCount * bytesPerSample)) { 4229 return -EINVAL; 4230 } 4231 4232 Sample tmp; 4233 for (uint32_t i = 0; i < sampleCount; ++i) { 4234 if (flags & kSampleDurationPresent) { 4235 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 4236 return ERROR_MALFORMED; 4237 } 4238 offset += 4; 4239 } 4240 4241 if (flags & kSampleSizePresent) { 4242 if (!mDataSource->getUInt32(offset, &sampleSize)) { 4243 return ERROR_MALFORMED; 4244 } 4245 offset += 4; 4246 } 4247 4248 if (flags & kSampleFlagsPresent) { 4249 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 4250 return ERROR_MALFORMED; 4251 } 4252 offset += 4; 4253 } 4254 4255 if (flags & kSampleCompositionTimeOffsetPresent) { 4256 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 4257 return ERROR_MALFORMED; 4258 } 4259 offset += 4; 4260 } 4261 4262 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, " 4263 " flags 0x%08x", i + 1, 4264 dataOffset, sampleSize, sampleDuration, 4265 (flags & kFirstSampleFlagsPresent) && i == 0 4266 ? firstSampleFlags : sampleFlags); 4267 tmp.offset = dataOffset; 4268 tmp.size = sampleSize; 4269 tmp.duration = sampleDuration; 4270 tmp.compositionOffset = sampleCtsOffset; 4271 mCurrentSamples.add(tmp); 4272 4273 dataOffset += sampleSize; 4274 } 4275 4276 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 4277 4278 return OK; 4279 } 4280 4281 sp<MetaData> MPEG4Source::getFormat() { 4282 Mutex::Autolock autoLock(mLock); 4283 4284 return mFormat; 4285 } 4286 4287 size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 4288 switch (mNALLengthSize) { 4289 case 1: 4290 return *data; 4291 case 2: 4292 return U16_AT(data); 4293 case 3: 4294 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 4295 case 4: 4296 return U32_AT(data); 4297 } 4298 4299 // This cannot happen, mNALLengthSize springs to life by adding 1 to 4300 // a 2-bit integer. 4301 CHECK(!"Should not be here."); 4302 4303 return 0; 4304 } 4305 4306 status_t MPEG4Source::read( 4307 MediaBuffer **out, const ReadOptions *options) { 4308 Mutex::Autolock autoLock(mLock); 4309 4310 CHECK(mStarted); 4311 4312 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) { 4313 *out = nullptr; 4314 return WOULD_BLOCK; 4315 } 4316 4317 if (mFirstMoofOffset > 0) { 4318 return fragmentedRead(out, options); 4319 } 4320 4321 *out = NULL; 4322 4323 int64_t targetSampleTimeUs = -1; 4324 4325 int64_t seekTimeUs; 4326 ReadOptions::SeekMode mode; 4327 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4328 uint32_t findFlags = 0; 4329 switch (mode) { 4330 case ReadOptions::SEEK_PREVIOUS_SYNC: 4331 findFlags = SampleTable::kFlagBefore; 4332 break; 4333 case ReadOptions::SEEK_NEXT_SYNC: 4334 findFlags = SampleTable::kFlagAfter; 4335 break; 4336 case ReadOptions::SEEK_CLOSEST_SYNC: 4337 case ReadOptions::SEEK_CLOSEST: 4338 findFlags = SampleTable::kFlagClosest; 4339 break; 4340 default: 4341 CHECK(!"Should not be here."); 4342 break; 4343 } 4344 4345 uint32_t sampleIndex; 4346 status_t err = mSampleTable->findSampleAtTime( 4347 seekTimeUs, 1000000, mTimescale, 4348 &sampleIndex, findFlags); 4349 4350 if (mode == ReadOptions::SEEK_CLOSEST) { 4351 // We found the closest sample already, now we want the sync 4352 // sample preceding it (or the sample itself of course), even 4353 // if the subsequent sync sample is closer. 4354 findFlags = SampleTable::kFlagBefore; 4355 } 4356 4357 uint32_t syncSampleIndex; 4358 if (err == OK) { 4359 err = mSampleTable->findSyncSampleNear( 4360 sampleIndex, &syncSampleIndex, findFlags); 4361 } 4362 4363 uint32_t sampleTime; 4364 if (err == OK) { 4365 err = mSampleTable->getMetaDataForSample( 4366 sampleIndex, NULL, NULL, &sampleTime); 4367 } 4368 4369 if (err != OK) { 4370 if (err == ERROR_OUT_OF_RANGE) { 4371 // An attempt to seek past the end of the stream would 4372 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 4373 // this all the way to the MediaPlayer would cause abnormal 4374 // termination. Legacy behaviour appears to be to behave as if 4375 // we had seeked to the end of stream, ending normally. 4376 err = ERROR_END_OF_STREAM; 4377 } 4378 ALOGV("end of stream"); 4379 return err; 4380 } 4381 4382 if (mode == ReadOptions::SEEK_CLOSEST) { 4383 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 4384 } 4385 4386 #if 0 4387 uint32_t syncSampleTime; 4388 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 4389 syncSampleIndex, NULL, NULL, &syncSampleTime)); 4390 4391 ALOGI("seek to time %lld us => sample at time %lld us, " 4392 "sync sample at time %lld us", 4393 seekTimeUs, 4394 sampleTime * 1000000ll / mTimescale, 4395 syncSampleTime * 1000000ll / mTimescale); 4396 #endif 4397 4398 mCurrentSampleIndex = syncSampleIndex; 4399 if (mBuffer != NULL) { 4400 mBuffer->release(); 4401 mBuffer = NULL; 4402 } 4403 4404 // fall through 4405 } 4406 4407 off64_t offset; 4408 size_t size; 4409 uint32_t cts, stts; 4410 bool isSyncSample; 4411 bool newBuffer = false; 4412 if (mBuffer == NULL) { 4413 newBuffer = true; 4414 4415 status_t err = 4416 mSampleTable->getMetaDataForSample( 4417 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 4418 4419 if (err != OK) { 4420 return err; 4421 } 4422 4423 err = mGroup->acquire_buffer(&mBuffer); 4424 4425 if (err != OK) { 4426 CHECK(mBuffer == NULL); 4427 return err; 4428 } 4429 if (size > mBuffer->size()) { 4430 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4431 return ERROR_BUFFER_TOO_SMALL; 4432 } 4433 } 4434 4435 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 4436 if (newBuffer) { 4437 ssize_t num_bytes_read = 4438 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4439 4440 if (num_bytes_read < (ssize_t)size) { 4441 mBuffer->release(); 4442 mBuffer = NULL; 4443 4444 return ERROR_IO; 4445 } 4446 4447 CHECK(mBuffer != NULL); 4448 mBuffer->set_range(0, size); 4449 mBuffer->meta_data()->clear(); 4450 mBuffer->meta_data()->setInt64( 4451 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4452 mBuffer->meta_data()->setInt64( 4453 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4454 4455 if (targetSampleTimeUs >= 0) { 4456 mBuffer->meta_data()->setInt64( 4457 kKeyTargetTime, targetSampleTimeUs); 4458 } 4459 4460 if (isSyncSample) { 4461 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4462 } 4463 4464 ++mCurrentSampleIndex; 4465 } 4466 4467 if (!mIsAVC && !mIsHEVC) { 4468 *out = mBuffer; 4469 mBuffer = NULL; 4470 4471 return OK; 4472 } 4473 4474 // Each NAL unit is split up into its constituent fragments and 4475 // each one of them returned in its own buffer. 4476 4477 CHECK(mBuffer->range_length() >= mNALLengthSize); 4478 4479 const uint8_t *src = 4480 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4481 4482 size_t nal_size = parseNALSize(src); 4483 if (mNALLengthSize > SIZE_MAX - nal_size) { 4484 ALOGE("b/24441553, b/24445122"); 4485 } 4486 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4487 ALOGE("incomplete NAL unit."); 4488 4489 mBuffer->release(); 4490 mBuffer = NULL; 4491 4492 return ERROR_MALFORMED; 4493 } 4494 4495 MediaBuffer *clone = mBuffer->clone(); 4496 CHECK(clone != NULL); 4497 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4498 4499 CHECK(mBuffer != NULL); 4500 mBuffer->set_range( 4501 mBuffer->range_offset() + mNALLengthSize + nal_size, 4502 mBuffer->range_length() - mNALLengthSize - nal_size); 4503 4504 if (mBuffer->range_length() == 0) { 4505 mBuffer->release(); 4506 mBuffer = NULL; 4507 } 4508 4509 *out = clone; 4510 4511 return OK; 4512 } else { 4513 // Whole NAL units are returned but each fragment is prefixed by 4514 // the start code (0x00 00 00 01). 4515 ssize_t num_bytes_read = 0; 4516 int32_t drm = 0; 4517 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4518 if (usesDRM) { 4519 num_bytes_read = 4520 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 4521 } else { 4522 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 4523 } 4524 4525 if (num_bytes_read < (ssize_t)size) { 4526 mBuffer->release(); 4527 mBuffer = NULL; 4528 4529 return ERROR_IO; 4530 } 4531 4532 if (usesDRM) { 4533 CHECK(mBuffer != NULL); 4534 mBuffer->set_range(0, size); 4535 4536 } else { 4537 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4538 size_t srcOffset = 0; 4539 size_t dstOffset = 0; 4540 4541 while (srcOffset < size) { 4542 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4543 size_t nalLength = 0; 4544 if (!isMalFormed) { 4545 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4546 srcOffset += mNALLengthSize; 4547 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength); 4548 } 4549 4550 if (isMalFormed) { 4551 ALOGE("Video is malformed"); 4552 mBuffer->release(); 4553 mBuffer = NULL; 4554 return ERROR_MALFORMED; 4555 } 4556 4557 if (nalLength == 0) { 4558 continue; 4559 } 4560 4561 if (dstOffset > SIZE_MAX - 4 || 4562 dstOffset + 4 > SIZE_MAX - nalLength || 4563 dstOffset + 4 + nalLength > mBuffer->size()) { 4564 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size()); 4565 android_errorWriteLog(0x534e4554, "27208621"); 4566 mBuffer->release(); 4567 mBuffer = NULL; 4568 return ERROR_MALFORMED; 4569 } 4570 4571 dstData[dstOffset++] = 0; 4572 dstData[dstOffset++] = 0; 4573 dstData[dstOffset++] = 0; 4574 dstData[dstOffset++] = 1; 4575 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4576 srcOffset += nalLength; 4577 dstOffset += nalLength; 4578 } 4579 CHECK_EQ(srcOffset, size); 4580 CHECK(mBuffer != NULL); 4581 mBuffer->set_range(0, dstOffset); 4582 } 4583 4584 mBuffer->meta_data()->clear(); 4585 mBuffer->meta_data()->setInt64( 4586 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4587 mBuffer->meta_data()->setInt64( 4588 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 4589 4590 if (targetSampleTimeUs >= 0) { 4591 mBuffer->meta_data()->setInt64( 4592 kKeyTargetTime, targetSampleTimeUs); 4593 } 4594 4595 if (mIsAVC) { 4596 uint32_t layerId = FindAVCLayerId( 4597 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 4598 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 4599 } 4600 4601 if (isSyncSample) { 4602 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4603 } 4604 4605 ++mCurrentSampleIndex; 4606 4607 *out = mBuffer; 4608 mBuffer = NULL; 4609 4610 return OK; 4611 } 4612 } 4613 4614 status_t MPEG4Source::fragmentedRead( 4615 MediaBuffer **out, const ReadOptions *options) { 4616 4617 ALOGV("MPEG4Source::fragmentedRead"); 4618 4619 CHECK(mStarted); 4620 4621 *out = NULL; 4622 4623 int64_t targetSampleTimeUs = -1; 4624 4625 int64_t seekTimeUs; 4626 ReadOptions::SeekMode mode; 4627 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 4628 4629 int numSidxEntries = mSegments.size(); 4630 if (numSidxEntries != 0) { 4631 int64_t totalTime = 0; 4632 off64_t totalOffset = mFirstMoofOffset; 4633 for (int i = 0; i < numSidxEntries; i++) { 4634 const SidxEntry *se = &mSegments[i]; 4635 if (totalTime + se->mDurationUs > seekTimeUs) { 4636 // The requested time is somewhere in this segment 4637 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 4638 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 4639 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 4640 // requested next sync, or closest sync and it was closer to the end of 4641 // this segment 4642 totalTime += se->mDurationUs; 4643 totalOffset += se->mSize; 4644 } 4645 break; 4646 } 4647 totalTime += se->mDurationUs; 4648 totalOffset += se->mSize; 4649 } 4650 mCurrentMoofOffset = totalOffset; 4651 mCurrentSamples.clear(); 4652 mCurrentSampleIndex = 0; 4653 parseChunk(&totalOffset); 4654 mCurrentTime = totalTime * mTimescale / 1000000ll; 4655 } else { 4656 // without sidx boxes, we can only seek to 0 4657 mCurrentMoofOffset = mFirstMoofOffset; 4658 mCurrentSamples.clear(); 4659 mCurrentSampleIndex = 0; 4660 off64_t tmp = mCurrentMoofOffset; 4661 parseChunk(&tmp); 4662 mCurrentTime = 0; 4663 } 4664 4665 if (mBuffer != NULL) { 4666 mBuffer->release(); 4667 mBuffer = NULL; 4668 } 4669 4670 // fall through 4671 } 4672 4673 off64_t offset = 0; 4674 size_t size = 0; 4675 uint32_t cts = 0; 4676 bool isSyncSample = false; 4677 bool newBuffer = false; 4678 if (mBuffer == NULL) { 4679 newBuffer = true; 4680 4681 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4682 // move to next fragment if there is one 4683 if (mNextMoofOffset <= mCurrentMoofOffset) { 4684 return ERROR_END_OF_STREAM; 4685 } 4686 off64_t nextMoof = mNextMoofOffset; 4687 mCurrentMoofOffset = nextMoof; 4688 mCurrentSamples.clear(); 4689 mCurrentSampleIndex = 0; 4690 parseChunk(&nextMoof); 4691 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 4692 return ERROR_END_OF_STREAM; 4693 } 4694 } 4695 4696 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4697 offset = smpl->offset; 4698 size = smpl->size; 4699 cts = mCurrentTime + smpl->compositionOffset; 4700 mCurrentTime += smpl->duration; 4701 isSyncSample = (mCurrentSampleIndex == 0); // XXX 4702 4703 status_t err = mGroup->acquire_buffer(&mBuffer); 4704 4705 if (err != OK) { 4706 CHECK(mBuffer == NULL); 4707 ALOGV("acquire_buffer returned %d", err); 4708 return err; 4709 } 4710 if (size > mBuffer->size()) { 4711 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size()); 4712 return ERROR_BUFFER_TOO_SMALL; 4713 } 4714 } 4715 4716 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 4717 const sp<MetaData> bufmeta = mBuffer->meta_data(); 4718 bufmeta->clear(); 4719 if (smpl->encryptedsizes.size()) { 4720 // store clear/encrypted lengths in metadata 4721 bufmeta->setData(kKeyPlainSizes, 0, 4722 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 4723 bufmeta->setData(kKeyEncryptedSizes, 0, 4724 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 4725 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 4726 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 4727 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 4728 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 4729 } 4730 4731 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 4732 if (newBuffer) { 4733 if (!isInRange((size_t)0u, mBuffer->size(), size)) { 4734 mBuffer->release(); 4735 mBuffer = NULL; 4736 4737 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size); 4738 return ERROR_MALFORMED; 4739 } 4740 4741 ssize_t num_bytes_read = 4742 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 4743 4744 if (num_bytes_read < (ssize_t)size) { 4745 mBuffer->release(); 4746 mBuffer = NULL; 4747 4748 ALOGE("i/o error"); 4749 return ERROR_IO; 4750 } 4751 4752 CHECK(mBuffer != NULL); 4753 mBuffer->set_range(0, size); 4754 mBuffer->meta_data()->setInt64( 4755 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4756 mBuffer->meta_data()->setInt64( 4757 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4758 4759 if (targetSampleTimeUs >= 0) { 4760 mBuffer->meta_data()->setInt64( 4761 kKeyTargetTime, targetSampleTimeUs); 4762 } 4763 4764 if (mIsAVC) { 4765 uint32_t layerId = FindAVCLayerId( 4766 (const uint8_t *)mBuffer->data(), mBuffer->range_length()); 4767 mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId); 4768 } 4769 4770 if (isSyncSample) { 4771 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4772 } 4773 4774 ++mCurrentSampleIndex; 4775 } 4776 4777 if (!mIsAVC && !mIsHEVC) { 4778 *out = mBuffer; 4779 mBuffer = NULL; 4780 4781 return OK; 4782 } 4783 4784 // Each NAL unit is split up into its constituent fragments and 4785 // each one of them returned in its own buffer. 4786 4787 CHECK(mBuffer->range_length() >= mNALLengthSize); 4788 4789 const uint8_t *src = 4790 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 4791 4792 size_t nal_size = parseNALSize(src); 4793 if (mNALLengthSize > SIZE_MAX - nal_size) { 4794 ALOGE("b/24441553, b/24445122"); 4795 } 4796 4797 if (mBuffer->range_length() - mNALLengthSize < nal_size) { 4798 ALOGE("incomplete NAL unit."); 4799 4800 mBuffer->release(); 4801 mBuffer = NULL; 4802 4803 return ERROR_MALFORMED; 4804 } 4805 4806 MediaBuffer *clone = mBuffer->clone(); 4807 CHECK(clone != NULL); 4808 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 4809 4810 CHECK(mBuffer != NULL); 4811 mBuffer->set_range( 4812 mBuffer->range_offset() + mNALLengthSize + nal_size, 4813 mBuffer->range_length() - mNALLengthSize - nal_size); 4814 4815 if (mBuffer->range_length() == 0) { 4816 mBuffer->release(); 4817 mBuffer = NULL; 4818 } 4819 4820 *out = clone; 4821 4822 return OK; 4823 } else { 4824 ALOGV("whole NAL"); 4825 // Whole NAL units are returned but each fragment is prefixed by 4826 // the start code (0x00 00 00 01). 4827 ssize_t num_bytes_read = 0; 4828 int32_t drm = 0; 4829 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 4830 void *data = NULL; 4831 bool isMalFormed = false; 4832 if (usesDRM) { 4833 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) { 4834 isMalFormed = true; 4835 } else { 4836 data = mBuffer->data(); 4837 } 4838 } else { 4839 int32_t max_size; 4840 if (mFormat == NULL 4841 || !mFormat->findInt32(kKeyMaxInputSize, &max_size) 4842 || !isInRange((size_t)0u, (size_t)max_size, size)) { 4843 isMalFormed = true; 4844 } else { 4845 data = mSrcBuffer; 4846 } 4847 } 4848 4849 if (isMalFormed || data == NULL) { 4850 ALOGE("isMalFormed size %zu", size); 4851 if (mBuffer != NULL) { 4852 mBuffer->release(); 4853 mBuffer = NULL; 4854 } 4855 return ERROR_MALFORMED; 4856 } 4857 num_bytes_read = mDataSource->readAt(offset, data, size); 4858 4859 if (num_bytes_read < (ssize_t)size) { 4860 mBuffer->release(); 4861 mBuffer = NULL; 4862 4863 ALOGE("i/o error"); 4864 return ERROR_IO; 4865 } 4866 4867 if (usesDRM) { 4868 CHECK(mBuffer != NULL); 4869 mBuffer->set_range(0, size); 4870 4871 } else { 4872 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4873 size_t srcOffset = 0; 4874 size_t dstOffset = 0; 4875 4876 while (srcOffset < size) { 4877 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize); 4878 size_t nalLength = 0; 4879 if (!isMalFormed) { 4880 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4881 srcOffset += mNALLengthSize; 4882 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength) 4883 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u) 4884 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength); 4885 } 4886 4887 if (isMalFormed) { 4888 ALOGE("Video is malformed; nalLength %zu", nalLength); 4889 mBuffer->release(); 4890 mBuffer = NULL; 4891 return ERROR_MALFORMED; 4892 } 4893 4894 if (nalLength == 0) { 4895 continue; 4896 } 4897 4898 if (dstOffset > SIZE_MAX - 4 || 4899 dstOffset + 4 > SIZE_MAX - nalLength || 4900 dstOffset + 4 + nalLength > mBuffer->size()) { 4901 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size()); 4902 android_errorWriteLog(0x534e4554, "26365349"); 4903 mBuffer->release(); 4904 mBuffer = NULL; 4905 return ERROR_MALFORMED; 4906 } 4907 4908 dstData[dstOffset++] = 0; 4909 dstData[dstOffset++] = 0; 4910 dstData[dstOffset++] = 0; 4911 dstData[dstOffset++] = 1; 4912 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4913 srcOffset += nalLength; 4914 dstOffset += nalLength; 4915 } 4916 CHECK_EQ(srcOffset, size); 4917 CHECK(mBuffer != NULL); 4918 mBuffer->set_range(0, dstOffset); 4919 } 4920 4921 mBuffer->meta_data()->setInt64( 4922 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4923 mBuffer->meta_data()->setInt64( 4924 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4925 4926 if (targetSampleTimeUs >= 0) { 4927 mBuffer->meta_data()->setInt64( 4928 kKeyTargetTime, targetSampleTimeUs); 4929 } 4930 4931 if (isSyncSample) { 4932 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4933 } 4934 4935 ++mCurrentSampleIndex; 4936 4937 *out = mBuffer; 4938 mBuffer = NULL; 4939 4940 return OK; 4941 } 4942 } 4943 4944 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 4945 const char *mimePrefix) { 4946 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 4947 const char *mime; 4948 if (track->meta != NULL 4949 && track->meta->findCString(kKeyMIMEType, &mime) 4950 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 4951 return track; 4952 } 4953 } 4954 4955 return NULL; 4956 } 4957 4958 static bool LegacySniffMPEG4( 4959 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 4960 uint8_t header[8]; 4961 4962 ssize_t n = source->readAt(4, header, sizeof(header)); 4963 if (n < (ssize_t)sizeof(header)) { 4964 return false; 4965 } 4966 4967 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 4968 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 4969 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 4970 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 4971 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 4972 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 4973 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4974 *confidence = 0.4; 4975 4976 return true; 4977 } 4978 4979 return false; 4980 } 4981 4982 static bool isCompatibleBrand(uint32_t fourcc) { 4983 static const uint32_t kCompatibleBrands[] = { 4984 FOURCC('i', 's', 'o', 'm'), 4985 FOURCC('i', 's', 'o', '2'), 4986 FOURCC('a', 'v', 'c', '1'), 4987 FOURCC('h', 'v', 'c', '1'), 4988 FOURCC('h', 'e', 'v', '1'), 4989 FOURCC('3', 'g', 'p', '4'), 4990 FOURCC('m', 'p', '4', '1'), 4991 FOURCC('m', 'p', '4', '2'), 4992 4993 // Won't promise that the following file types can be played. 4994 // Just give these file types a chance. 4995 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 4996 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 4997 4998 FOURCC('3', 'g', '2', 'a'), // 3GPP2 4999 FOURCC('3', 'g', '2', 'b'), 5000 }; 5001 5002 for (size_t i = 0; 5003 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 5004 ++i) { 5005 if (kCompatibleBrands[i] == fourcc) { 5006 return true; 5007 } 5008 } 5009 5010 return false; 5011 } 5012 5013 // Attempt to actually parse the 'ftyp' atom and determine if a suitable 5014 // compatible brand is present. 5015 // Also try to identify where this file's metadata ends 5016 // (end of the 'moov' atom) and report it to the caller as part of 5017 // the metadata. 5018 static bool BetterSniffMPEG4( 5019 const sp<DataSource> &source, String8 *mimeType, float *confidence, 5020 sp<AMessage> *meta) { 5021 // We scan up to 128 bytes to identify this file as an MP4. 5022 static const off64_t kMaxScanOffset = 128ll; 5023 5024 off64_t offset = 0ll; 5025 bool foundGoodFileType = false; 5026 off64_t moovAtomEndOffset = -1ll; 5027 bool done = false; 5028 5029 while (!done && offset < kMaxScanOffset) { 5030 uint32_t hdr[2]; 5031 if (source->readAt(offset, hdr, 8) < 8) { 5032 return false; 5033 } 5034 5035 uint64_t chunkSize = ntohl(hdr[0]); 5036 uint32_t chunkType = ntohl(hdr[1]); 5037 off64_t chunkDataOffset = offset + 8; 5038 5039 if (chunkSize == 1) { 5040 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 5041 return false; 5042 } 5043 5044 chunkSize = ntoh64(chunkSize); 5045 chunkDataOffset += 8; 5046 5047 if (chunkSize < 16) { 5048 // The smallest valid chunk is 16 bytes long in this case. 5049 return false; 5050 } 5051 5052 } else if (chunkSize < 8) { 5053 // The smallest valid chunk is 8 bytes long. 5054 return false; 5055 } 5056 5057 // (data_offset - offset) is either 8 or 16 5058 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset); 5059 if (chunkDataSize < 0) { 5060 ALOGE("b/23540914"); 5061 return ERROR_MALFORMED; 5062 } 5063 5064 char chunkstring[5]; 5065 MakeFourCCString(chunkType, chunkstring); 5066 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset); 5067 switch (chunkType) { 5068 case FOURCC('f', 't', 'y', 'p'): 5069 { 5070 if (chunkDataSize < 8) { 5071 return false; 5072 } 5073 5074 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 5075 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 5076 if (i == 1) { 5077 // Skip this index, it refers to the minorVersion, 5078 // not a brand. 5079 continue; 5080 } 5081 5082 uint32_t brand; 5083 if (source->readAt( 5084 chunkDataOffset + 4 * i, &brand, 4) < 4) { 5085 return false; 5086 } 5087 5088 brand = ntohl(brand); 5089 5090 if (isCompatibleBrand(brand)) { 5091 foundGoodFileType = true; 5092 break; 5093 } 5094 } 5095 5096 if (!foundGoodFileType) { 5097 return false; 5098 } 5099 5100 break; 5101 } 5102 5103 case FOURCC('m', 'o', 'o', 'v'): 5104 { 5105 moovAtomEndOffset = offset + chunkSize; 5106 5107 done = true; 5108 break; 5109 } 5110 5111 default: 5112 break; 5113 } 5114 5115 offset += chunkSize; 5116 } 5117 5118 if (!foundGoodFileType) { 5119 return false; 5120 } 5121 5122 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 5123 *confidence = 0.4f; 5124 5125 if (moovAtomEndOffset >= 0) { 5126 *meta = new AMessage; 5127 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 5128 5129 ALOGV("found metadata size: %lld", (long long)moovAtomEndOffset); 5130 } 5131 5132 return true; 5133 } 5134 5135 bool SniffMPEG4( 5136 const sp<DataSource> &source, String8 *mimeType, float *confidence, 5137 sp<AMessage> *meta) { 5138 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 5139 return true; 5140 } 5141 5142 if (LegacySniffMPEG4(source, mimeType, confidence)) { 5143 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 5144 return true; 5145 } 5146 5147 return false; 5148 } 5149 5150 } // namespace android 5151