Home | History | Annotate | Download | only in libstagefright
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 //#define LOG_NDEBUG 0
     18 #define LOG_TAG "WAVExtractor"
     19 #include <utils/Log.h>
     20 
     21 #include "include/WAVExtractor.h"
     22 
     23 #include <audio_utils/primitives.h>
     24 #include <media/stagefright/foundation/ADebug.h>
     25 #include <media/stagefright/DataSource.h>
     26 #include <media/stagefright/MediaBufferGroup.h>
     27 #include <media/stagefright/MediaDefs.h>
     28 #include <media/stagefright/MediaErrors.h>
     29 #include <media/stagefright/MediaSource.h>
     30 #include <media/stagefright/MetaData.h>
     31 #include <utils/String8.h>
     32 #include <cutils/bitops.h>
     33 
     34 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
     35 
     36 namespace android {
     37 
     38 enum {
     39     WAVE_FORMAT_PCM        = 0x0001,
     40     WAVE_FORMAT_IEEE_FLOAT = 0x0003,
     41     WAVE_FORMAT_ALAW       = 0x0006,
     42     WAVE_FORMAT_MULAW      = 0x0007,
     43     WAVE_FORMAT_MSGSM      = 0x0031,
     44     WAVE_FORMAT_EXTENSIBLE = 0xFFFE
     45 };
     46 
     47 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
     48 static const char* AMBISONIC_SUBFORMAT = "\x00\x00\x21\x07\xD3\x11\x86\x44\xC8\xC1\xCA\x00\x00\x00";
     49 
     50 static uint32_t U32_LE_AT(const uint8_t *ptr) {
     51     return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
     52 }
     53 
     54 static uint16_t U16_LE_AT(const uint8_t *ptr) {
     55     return ptr[1] << 8 | ptr[0];
     56 }
     57 
     58 struct WAVSource : public MediaSource {
     59     WAVSource(
     60             const sp<DataSource> &dataSource,
     61             const sp<MetaData> &meta,
     62             uint16_t waveFormat,
     63             int32_t bitsPerSample,
     64             off64_t offset, size_t size);
     65 
     66     virtual status_t start(MetaData *params = NULL);
     67     virtual status_t stop();
     68     virtual sp<MetaData> getFormat();
     69 
     70     virtual status_t read(
     71             MediaBuffer **buffer, const ReadOptions *options = NULL);
     72 
     73     virtual bool supportNonblockingRead() { return true; }
     74 
     75 protected:
     76     virtual ~WAVSource();
     77 
     78 private:
     79     static const size_t kMaxFrameSize;
     80 
     81     sp<DataSource> mDataSource;
     82     sp<MetaData> mMeta;
     83     uint16_t mWaveFormat;
     84     int32_t mSampleRate;
     85     int32_t mNumChannels;
     86     int32_t mBitsPerSample;
     87     off64_t mOffset;
     88     size_t mSize;
     89     bool mStarted;
     90     MediaBufferGroup *mGroup;
     91     off64_t mCurrentPos;
     92 
     93     WAVSource(const WAVSource &);
     94     WAVSource &operator=(const WAVSource &);
     95 };
     96 
     97 WAVExtractor::WAVExtractor(const sp<DataSource> &source)
     98     : mDataSource(source),
     99       mValidFormat(false),
    100       mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
    101     mInitCheck = init();
    102 }
    103 
    104 WAVExtractor::~WAVExtractor() {
    105 }
    106 
    107 sp<MetaData> WAVExtractor::getMetaData() {
    108     sp<MetaData> meta = new MetaData;
    109 
    110     if (mInitCheck != OK) {
    111         return meta;
    112     }
    113 
    114     meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
    115 
    116     return meta;
    117 }
    118 
    119 size_t WAVExtractor::countTracks() {
    120     return mInitCheck == OK ? 1 : 0;
    121 }
    122 
    123 sp<IMediaSource> WAVExtractor::getTrack(size_t index) {
    124     if (mInitCheck != OK || index > 0) {
    125         return NULL;
    126     }
    127 
    128     return new WAVSource(
    129             mDataSource, mTrackMeta,
    130             mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
    131 }
    132 
    133 sp<MetaData> WAVExtractor::getTrackMetaData(
    134         size_t index, uint32_t /* flags */) {
    135     if (mInitCheck != OK || index > 0) {
    136         return NULL;
    137     }
    138 
    139     return mTrackMeta;
    140 }
    141 
    142 status_t WAVExtractor::init() {
    143     uint8_t header[12];
    144     if (mDataSource->readAt(
    145                 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
    146         return NO_INIT;
    147     }
    148 
    149     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
    150         return NO_INIT;
    151     }
    152 
    153     size_t totalSize = U32_LE_AT(&header[4]);
    154 
    155     off64_t offset = 12;
    156     size_t remainingSize = totalSize;
    157     while (remainingSize >= 8) {
    158         uint8_t chunkHeader[8];
    159         if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
    160             return NO_INIT;
    161         }
    162 
    163         remainingSize -= 8;
    164         offset += 8;
    165 
    166         uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
    167 
    168         if (chunkSize > remainingSize) {
    169             return NO_INIT;
    170         }
    171 
    172         if (!memcmp(chunkHeader, "fmt ", 4)) {
    173             if (chunkSize < 16) {
    174                 return NO_INIT;
    175             }
    176 
    177             uint8_t formatSpec[40];
    178             if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
    179                 return NO_INIT;
    180             }
    181 
    182             mWaveFormat = U16_LE_AT(formatSpec);
    183             if (mWaveFormat != WAVE_FORMAT_PCM
    184                     && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT
    185                     && mWaveFormat != WAVE_FORMAT_ALAW
    186                     && mWaveFormat != WAVE_FORMAT_MULAW
    187                     && mWaveFormat != WAVE_FORMAT_MSGSM
    188                     && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
    189                 return ERROR_UNSUPPORTED;
    190             }
    191 
    192             uint8_t fmtSize = 16;
    193             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    194                 fmtSize = 40;
    195             }
    196             if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
    197                 return NO_INIT;
    198             }
    199 
    200             mNumChannels = U16_LE_AT(&formatSpec[2]);
    201 
    202             if (mNumChannels < 1 || mNumChannels > 8) {
    203                 ALOGE("Unsupported number of channels (%d)", mNumChannels);
    204                 return ERROR_UNSUPPORTED;
    205             }
    206 
    207             if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
    208                 if (mNumChannels != 1 && mNumChannels != 2) {
    209                     ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
    210                             mNumChannels);
    211                 }
    212             }
    213 
    214             mSampleRate = U32_LE_AT(&formatSpec[4]);
    215 
    216             if (mSampleRate == 0) {
    217                 return ERROR_MALFORMED;
    218             }
    219 
    220             mBitsPerSample = U16_LE_AT(&formatSpec[14]);
    221 
    222             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    223                 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
    224                 if (validBitsPerSample != mBitsPerSample) {
    225                     if (validBitsPerSample != 0) {
    226                         ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
    227                                 validBitsPerSample, mBitsPerSample);
    228                         return ERROR_UNSUPPORTED;
    229                     } else {
    230                         // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
    231                         // writers don't correctly set the valid bits value, and leave it at 0.
    232                         ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
    233                     }
    234                 }
    235 
    236                 mChannelMask = U32_LE_AT(&formatSpec[20]);
    237                 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
    238                 if ((mChannelMask >> 18) != 0) {
    239                     ALOGE("invalid channel mask 0x%x", mChannelMask);
    240                     return ERROR_MALFORMED;
    241                 }
    242 
    243                 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
    244                         && (popcount(mChannelMask) != mNumChannels)) {
    245                     ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
    246                             popcount(mChannelMask), mChannelMask);
    247                     return ERROR_MALFORMED;
    248                 }
    249 
    250                 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
    251                 // the sample format, using the same definitions as a regular WAV header
    252                 mWaveFormat = U16_LE_AT(&formatSpec[24]);
    253                 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14) &&
    254                     memcmp(&formatSpec[26], AMBISONIC_SUBFORMAT, 14)) {
    255                     ALOGE("unsupported GUID");
    256                     return ERROR_UNSUPPORTED;
    257                 }
    258             }
    259 
    260             if (mWaveFormat == WAVE_FORMAT_PCM) {
    261                 if (mBitsPerSample != 8 && mBitsPerSample != 16
    262                     && mBitsPerSample != 24 && mBitsPerSample != 32) {
    263                     return ERROR_UNSUPPORTED;
    264                 }
    265             } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
    266                 if (mBitsPerSample != 32) {  // TODO we don't support double
    267                     return ERROR_UNSUPPORTED;
    268                 }
    269             }
    270             else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    271                 if (mBitsPerSample != 0) {
    272                     return ERROR_UNSUPPORTED;
    273                 }
    274             } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) {
    275                 if (mBitsPerSample != 8) {
    276                     return ERROR_UNSUPPORTED;
    277                 }
    278             } else {
    279                 return ERROR_UNSUPPORTED;
    280             }
    281 
    282             mValidFormat = true;
    283         } else if (!memcmp(chunkHeader, "data", 4)) {
    284             if (mValidFormat) {
    285                 mDataOffset = offset;
    286                 mDataSize = chunkSize;
    287 
    288                 mTrackMeta = new MetaData;
    289 
    290                 switch (mWaveFormat) {
    291                     case WAVE_FORMAT_PCM:
    292                     case WAVE_FORMAT_IEEE_FLOAT:
    293                         mTrackMeta->setCString(
    294                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
    295                         break;
    296                     case WAVE_FORMAT_ALAW:
    297                         mTrackMeta->setCString(
    298                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
    299                         break;
    300                     case WAVE_FORMAT_MSGSM:
    301                         mTrackMeta->setCString(
    302                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
    303                         break;
    304                     default:
    305                         CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
    306                         mTrackMeta->setCString(
    307                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
    308                         break;
    309                 }
    310 
    311                 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
    312                 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
    313                 mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
    314                 mTrackMeta->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
    315 
    316                 int64_t durationUs = 0;
    317                 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    318                     // 65 bytes decode to 320 8kHz samples
    319                     durationUs =
    320                         1000000LL * (mDataSize / 65 * 320) / 8000;
    321                 } else {
    322                     size_t bytesPerSample = mBitsPerSample >> 3;
    323 
    324                     if (!bytesPerSample || !mNumChannels)
    325                         return ERROR_MALFORMED;
    326 
    327                     size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
    328 
    329                     if (!mSampleRate)
    330                         return ERROR_MALFORMED;
    331 
    332                     durationUs =
    333                         1000000LL * num_samples / mSampleRate;
    334                 }
    335 
    336                 mTrackMeta->setInt64(kKeyDuration, durationUs);
    337 
    338                 return OK;
    339             }
    340         }
    341 
    342         offset += chunkSize;
    343     }
    344 
    345     return NO_INIT;
    346 }
    347 
    348 const size_t WAVSource::kMaxFrameSize = 32768;
    349 
    350 WAVSource::WAVSource(
    351         const sp<DataSource> &dataSource,
    352         const sp<MetaData> &meta,
    353         uint16_t waveFormat,
    354         int32_t bitsPerSample,
    355         off64_t offset, size_t size)
    356     : mDataSource(dataSource),
    357       mMeta(meta),
    358       mWaveFormat(waveFormat),
    359       mSampleRate(0),
    360       mNumChannels(0),
    361       mBitsPerSample(bitsPerSample),
    362       mOffset(offset),
    363       mSize(size),
    364       mStarted(false),
    365       mGroup(NULL) {
    366     CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
    367     CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
    368 
    369     mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
    370 }
    371 
    372 WAVSource::~WAVSource() {
    373     if (mStarted) {
    374         stop();
    375     }
    376 }
    377 
    378 status_t WAVSource::start(MetaData * /* params */) {
    379     ALOGV("WAVSource::start");
    380 
    381     CHECK(!mStarted);
    382 
    383     // some WAV files may have large audio buffers that use shared memory transfer.
    384     mGroup = new MediaBufferGroup(4 /* buffers */, kMaxFrameSize);
    385 
    386     if (mBitsPerSample == 8) {
    387         // As a temporary buffer for 8->16 bit conversion.
    388         mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
    389     }
    390 
    391     mCurrentPos = mOffset;
    392 
    393     mStarted = true;
    394 
    395     return OK;
    396 }
    397 
    398 status_t WAVSource::stop() {
    399     ALOGV("WAVSource::stop");
    400 
    401     CHECK(mStarted);
    402 
    403     delete mGroup;
    404     mGroup = NULL;
    405 
    406     mStarted = false;
    407 
    408     return OK;
    409 }
    410 
    411 sp<MetaData> WAVSource::getFormat() {
    412     ALOGV("WAVSource::getFormat");
    413 
    414     return mMeta;
    415 }
    416 
    417 status_t WAVSource::read(
    418         MediaBuffer **out, const ReadOptions *options) {
    419     *out = NULL;
    420 
    421     if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
    422         return WOULD_BLOCK;
    423     }
    424 
    425     int64_t seekTimeUs;
    426     ReadOptions::SeekMode mode;
    427     if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
    428         int64_t pos = 0;
    429 
    430         if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    431             // 65 bytes decode to 320 8kHz samples
    432             int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
    433             int64_t framenumber = samplenumber / 320;
    434             pos = framenumber * 65;
    435         } else {
    436             pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
    437         }
    438         if (pos > (off64_t)mSize) {
    439             pos = mSize;
    440         }
    441         mCurrentPos = pos + mOffset;
    442     }
    443 
    444     MediaBuffer *buffer;
    445     status_t err = mGroup->acquire_buffer(&buffer);
    446     if (err != OK) {
    447         return err;
    448     }
    449 
    450     // make sure that maxBytesToRead is multiple of 3, in 24-bit case
    451     size_t maxBytesToRead =
    452         mBitsPerSample == 8 ? kMaxFrameSize / 2 :
    453         (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
    454 
    455     size_t maxBytesAvailable =
    456         (mCurrentPos - mOffset >= (off64_t)mSize)
    457             ? 0 : mSize - (mCurrentPos - mOffset);
    458 
    459     if (maxBytesToRead > maxBytesAvailable) {
    460         maxBytesToRead = maxBytesAvailable;
    461     }
    462 
    463     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    464         // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
    465         // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
    466         if (maxBytesToRead > 1024) {
    467             maxBytesToRead = 1024;
    468         }
    469         maxBytesToRead = (maxBytesToRead / 65) * 65;
    470     } else {
    471         // read only integral amounts of audio unit frames.
    472         const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
    473         maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
    474     }
    475 
    476     ssize_t n = mDataSource->readAt(
    477             mCurrentPos, buffer->data(),
    478             maxBytesToRead);
    479 
    480     if (n <= 0) {
    481         buffer->release();
    482         buffer = NULL;
    483 
    484         return ERROR_END_OF_STREAM;
    485     }
    486 
    487     buffer->set_range(0, n);
    488 
    489     // TODO: add capability to return data as float PCM instead of 16 bit PCM.
    490     if (mWaveFormat == WAVE_FORMAT_PCM) {
    491         if (mBitsPerSample == 8) {
    492             // Convert 8-bit unsigned samples to 16-bit signed.
    493 
    494             // Create new buffer with 2 byte wide samples
    495             MediaBuffer *tmp;
    496             CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
    497             tmp->set_range(0, 2 * n);
    498 
    499             memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n);
    500             buffer->release();
    501             buffer = tmp;
    502         } else if (mBitsPerSample == 24) {
    503             // Convert 24-bit signed samples to 16-bit signed in place
    504             const size_t numSamples = n / 3;
    505 
    506             memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples);
    507             buffer->set_range(0, 2 * numSamples);
    508         }  else if (mBitsPerSample == 32) {
    509             // Convert 32-bit signed samples to 16-bit signed in place
    510             const size_t numSamples = n / 4;
    511 
    512             memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples);
    513             buffer->set_range(0, 2 * numSamples);
    514         }
    515     } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
    516         if (mBitsPerSample == 32) {
    517             // Convert 32-bit float samples to 16-bit signed in place
    518             const size_t numSamples = n / 4;
    519 
    520             memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples);
    521             buffer->set_range(0, 2 * numSamples);
    522         }
    523     }
    524 
    525     int64_t timeStampUs = 0;
    526 
    527     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    528         timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
    529     } else {
    530         size_t bytesPerSample = mBitsPerSample >> 3;
    531         timeStampUs = 1000000LL * (mCurrentPos - mOffset)
    532                 / (mNumChannels * bytesPerSample) / mSampleRate;
    533     }
    534 
    535     buffer->meta_data()->setInt64(kKeyTime, timeStampUs);
    536 
    537     buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
    538     mCurrentPos += n;
    539 
    540     *out = buffer;
    541 
    542     return OK;
    543 }
    544 
    545 ////////////////////////////////////////////////////////////////////////////////
    546 
    547 bool SniffWAV(
    548         const sp<DataSource> &source, String8 *mimeType, float *confidence,
    549         sp<AMessage> *) {
    550     char header[12];
    551     if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
    552         return false;
    553     }
    554 
    555     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
    556         return false;
    557     }
    558 
    559     sp<MediaExtractor> extractor = new WAVExtractor(source);
    560     if (extractor->countTracks() == 0) {
    561         return false;
    562     }
    563 
    564     *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
    565     *confidence = 0.3f;
    566 
    567     return true;
    568 }
    569 
    570 }  // namespace android
    571