Home | History | Annotate | Download | only in libstagefright
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 //#define LOG_NDEBUG 0
     18 #define LOG_TAG "WAVExtractor"
     19 #include <utils/Log.h>
     20 
     21 #include "include/WAVExtractor.h"
     22 
     23 #include <audio_utils/primitives.h>
     24 #include <media/stagefright/foundation/ADebug.h>
     25 #include <media/stagefright/DataSource.h>
     26 #include <media/stagefright/MediaBufferGroup.h>
     27 #include <media/stagefright/MediaDefs.h>
     28 #include <media/stagefright/MediaErrors.h>
     29 #include <media/stagefright/MediaSource.h>
     30 #include <media/stagefright/MetaData.h>
     31 #include <utils/String8.h>
     32 #include <cutils/bitops.h>
     33 
     34 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
     35 
     36 namespace android {
     37 
     38 enum {
     39     WAVE_FORMAT_PCM        = 0x0001,
     40     WAVE_FORMAT_IEEE_FLOAT = 0x0003,
     41     WAVE_FORMAT_ALAW       = 0x0006,
     42     WAVE_FORMAT_MULAW      = 0x0007,
     43     WAVE_FORMAT_MSGSM      = 0x0031,
     44     WAVE_FORMAT_EXTENSIBLE = 0xFFFE
     45 };
     46 
     47 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
     48 
     49 
     50 static uint32_t U32_LE_AT(const uint8_t *ptr) {
     51     return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
     52 }
     53 
     54 static uint16_t U16_LE_AT(const uint8_t *ptr) {
     55     return ptr[1] << 8 | ptr[0];
     56 }
     57 
     58 struct WAVSource : public MediaSource {
     59     WAVSource(
     60             const sp<DataSource> &dataSource,
     61             const sp<MetaData> &meta,
     62             uint16_t waveFormat,
     63             int32_t bitsPerSample,
     64             off64_t offset, size_t size);
     65 
     66     virtual status_t start(MetaData *params = NULL);
     67     virtual status_t stop();
     68     virtual sp<MetaData> getFormat();
     69 
     70     virtual status_t read(
     71             MediaBuffer **buffer, const ReadOptions *options = NULL);
     72 
     73     virtual bool supportNonblockingRead() { return true; }
     74 
     75 protected:
     76     virtual ~WAVSource();
     77 
     78 private:
     79     static const size_t kMaxFrameSize;
     80 
     81     sp<DataSource> mDataSource;
     82     sp<MetaData> mMeta;
     83     uint16_t mWaveFormat;
     84     int32_t mSampleRate;
     85     int32_t mNumChannels;
     86     int32_t mBitsPerSample;
     87     off64_t mOffset;
     88     size_t mSize;
     89     bool mStarted;
     90     MediaBufferGroup *mGroup;
     91     off64_t mCurrentPos;
     92 
     93     WAVSource(const WAVSource &);
     94     WAVSource &operator=(const WAVSource &);
     95 };
     96 
     97 WAVExtractor::WAVExtractor(const sp<DataSource> &source)
     98     : mDataSource(source),
     99       mValidFormat(false),
    100       mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
    101     mInitCheck = init();
    102 }
    103 
    104 WAVExtractor::~WAVExtractor() {
    105 }
    106 
    107 sp<MetaData> WAVExtractor::getMetaData() {
    108     sp<MetaData> meta = new MetaData;
    109 
    110     if (mInitCheck != OK) {
    111         return meta;
    112     }
    113 
    114     meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
    115 
    116     return meta;
    117 }
    118 
    119 size_t WAVExtractor::countTracks() {
    120     return mInitCheck == OK ? 1 : 0;
    121 }
    122 
    123 sp<IMediaSource> WAVExtractor::getTrack(size_t index) {
    124     if (mInitCheck != OK || index > 0) {
    125         return NULL;
    126     }
    127 
    128     return new WAVSource(
    129             mDataSource, mTrackMeta,
    130             mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
    131 }
    132 
    133 sp<MetaData> WAVExtractor::getTrackMetaData(
    134         size_t index, uint32_t /* flags */) {
    135     if (mInitCheck != OK || index > 0) {
    136         return NULL;
    137     }
    138 
    139     return mTrackMeta;
    140 }
    141 
    142 status_t WAVExtractor::init() {
    143     uint8_t header[12];
    144     if (mDataSource->readAt(
    145                 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
    146         return NO_INIT;
    147     }
    148 
    149     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
    150         return NO_INIT;
    151     }
    152 
    153     size_t totalSize = U32_LE_AT(&header[4]);
    154 
    155     off64_t offset = 12;
    156     size_t remainingSize = totalSize;
    157     while (remainingSize >= 8) {
    158         uint8_t chunkHeader[8];
    159         if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
    160             return NO_INIT;
    161         }
    162 
    163         remainingSize -= 8;
    164         offset += 8;
    165 
    166         uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
    167 
    168         if (chunkSize > remainingSize) {
    169             return NO_INIT;
    170         }
    171 
    172         if (!memcmp(chunkHeader, "fmt ", 4)) {
    173             if (chunkSize < 16) {
    174                 return NO_INIT;
    175             }
    176 
    177             uint8_t formatSpec[40];
    178             if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
    179                 return NO_INIT;
    180             }
    181 
    182             mWaveFormat = U16_LE_AT(formatSpec);
    183             if (mWaveFormat != WAVE_FORMAT_PCM
    184                     && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT
    185                     && mWaveFormat != WAVE_FORMAT_ALAW
    186                     && mWaveFormat != WAVE_FORMAT_MULAW
    187                     && mWaveFormat != WAVE_FORMAT_MSGSM
    188                     && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
    189                 return ERROR_UNSUPPORTED;
    190             }
    191 
    192             uint8_t fmtSize = 16;
    193             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    194                 fmtSize = 40;
    195             }
    196             if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
    197                 return NO_INIT;
    198             }
    199 
    200             mNumChannels = U16_LE_AT(&formatSpec[2]);
    201 
    202             if (mNumChannels < 1 || mNumChannels > 8) {
    203                 ALOGE("Unsupported number of channels (%d)", mNumChannels);
    204                 return ERROR_UNSUPPORTED;
    205             }
    206 
    207             if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
    208                 if (mNumChannels != 1 && mNumChannels != 2) {
    209                     ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
    210                             mNumChannels);
    211                 }
    212             }
    213 
    214             mSampleRate = U32_LE_AT(&formatSpec[4]);
    215 
    216             if (mSampleRate == 0) {
    217                 return ERROR_MALFORMED;
    218             }
    219 
    220             mBitsPerSample = U16_LE_AT(&formatSpec[14]);
    221 
    222             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    223                 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
    224                 if (validBitsPerSample != mBitsPerSample) {
    225                     if (validBitsPerSample != 0) {
    226                         ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
    227                                 validBitsPerSample, mBitsPerSample);
    228                         return ERROR_UNSUPPORTED;
    229                     } else {
    230                         // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
    231                         // writers don't correctly set the valid bits value, and leave it at 0.
    232                         ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
    233                     }
    234                 }
    235 
    236                 mChannelMask = U32_LE_AT(&formatSpec[20]);
    237                 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
    238                 if ((mChannelMask >> 18) != 0) {
    239                     ALOGE("invalid channel mask 0x%x", mChannelMask);
    240                     return ERROR_MALFORMED;
    241                 }
    242 
    243                 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
    244                         && (popcount(mChannelMask) != mNumChannels)) {
    245                     ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
    246                             popcount(mChannelMask), mChannelMask);
    247                     return ERROR_MALFORMED;
    248                 }
    249 
    250                 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
    251                 // the sample format, using the same definitions as a regular WAV header
    252                 mWaveFormat = U16_LE_AT(&formatSpec[24]);
    253                 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) {
    254                     ALOGE("unsupported GUID");
    255                     return ERROR_UNSUPPORTED;
    256                 }
    257             }
    258 
    259             if (mWaveFormat == WAVE_FORMAT_PCM) {
    260                 if (mBitsPerSample != 8 && mBitsPerSample != 16
    261                     && mBitsPerSample != 24 && mBitsPerSample != 32) {
    262                     return ERROR_UNSUPPORTED;
    263                 }
    264             } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
    265                 if (mBitsPerSample != 32) {  // TODO we don't support double
    266                     return ERROR_UNSUPPORTED;
    267                 }
    268             }
    269             else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    270                 if (mBitsPerSample != 0) {
    271                     return ERROR_UNSUPPORTED;
    272                 }
    273             } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) {
    274                 if (mBitsPerSample != 8) {
    275                     return ERROR_UNSUPPORTED;
    276                 }
    277             } else {
    278                 return ERROR_UNSUPPORTED;
    279             }
    280 
    281             mValidFormat = true;
    282         } else if (!memcmp(chunkHeader, "data", 4)) {
    283             if (mValidFormat) {
    284                 mDataOffset = offset;
    285                 mDataSize = chunkSize;
    286 
    287                 mTrackMeta = new MetaData;
    288 
    289                 switch (mWaveFormat) {
    290                     case WAVE_FORMAT_PCM:
    291                     case WAVE_FORMAT_IEEE_FLOAT:
    292                         mTrackMeta->setCString(
    293                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
    294                         break;
    295                     case WAVE_FORMAT_ALAW:
    296                         mTrackMeta->setCString(
    297                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
    298                         break;
    299                     case WAVE_FORMAT_MSGSM:
    300                         mTrackMeta->setCString(
    301                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
    302                         break;
    303                     default:
    304                         CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
    305                         mTrackMeta->setCString(
    306                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
    307                         break;
    308                 }
    309 
    310                 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
    311                 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
    312                 mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
    313                 mTrackMeta->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
    314 
    315                 int64_t durationUs = 0;
    316                 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    317                     // 65 bytes decode to 320 8kHz samples
    318                     durationUs =
    319                         1000000LL * (mDataSize / 65 * 320) / 8000;
    320                 } else {
    321                     size_t bytesPerSample = mBitsPerSample >> 3;
    322 
    323                     if (!bytesPerSample || !mNumChannels)
    324                         return ERROR_MALFORMED;
    325 
    326                     size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
    327 
    328                     if (!mSampleRate)
    329                         return ERROR_MALFORMED;
    330 
    331                     durationUs =
    332                         1000000LL * num_samples / mSampleRate;
    333                 }
    334 
    335                 mTrackMeta->setInt64(kKeyDuration, durationUs);
    336 
    337                 return OK;
    338             }
    339         }
    340 
    341         offset += chunkSize;
    342     }
    343 
    344     return NO_INIT;
    345 }
    346 
    347 const size_t WAVSource::kMaxFrameSize = 32768;
    348 
    349 WAVSource::WAVSource(
    350         const sp<DataSource> &dataSource,
    351         const sp<MetaData> &meta,
    352         uint16_t waveFormat,
    353         int32_t bitsPerSample,
    354         off64_t offset, size_t size)
    355     : mDataSource(dataSource),
    356       mMeta(meta),
    357       mWaveFormat(waveFormat),
    358       mSampleRate(0),
    359       mNumChannels(0),
    360       mBitsPerSample(bitsPerSample),
    361       mOffset(offset),
    362       mSize(size),
    363       mStarted(false),
    364       mGroup(NULL) {
    365     CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
    366     CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
    367 
    368     mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
    369 }
    370 
    371 WAVSource::~WAVSource() {
    372     if (mStarted) {
    373         stop();
    374     }
    375 }
    376 
    377 status_t WAVSource::start(MetaData * /* params */) {
    378     ALOGV("WAVSource::start");
    379 
    380     CHECK(!mStarted);
    381 
    382     // some WAV files may have large audio buffers that use shared memory transfer.
    383     mGroup = new MediaBufferGroup(4 /* buffers */, kMaxFrameSize);
    384 
    385     if (mBitsPerSample == 8) {
    386         // As a temporary buffer for 8->16 bit conversion.
    387         mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
    388     }
    389 
    390     mCurrentPos = mOffset;
    391 
    392     mStarted = true;
    393 
    394     return OK;
    395 }
    396 
    397 status_t WAVSource::stop() {
    398     ALOGV("WAVSource::stop");
    399 
    400     CHECK(mStarted);
    401 
    402     delete mGroup;
    403     mGroup = NULL;
    404 
    405     mStarted = false;
    406 
    407     return OK;
    408 }
    409 
    410 sp<MetaData> WAVSource::getFormat() {
    411     ALOGV("WAVSource::getFormat");
    412 
    413     return mMeta;
    414 }
    415 
    416 status_t WAVSource::read(
    417         MediaBuffer **out, const ReadOptions *options) {
    418     *out = NULL;
    419 
    420     if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
    421         return WOULD_BLOCK;
    422     }
    423 
    424     int64_t seekTimeUs;
    425     ReadOptions::SeekMode mode;
    426     if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
    427         int64_t pos = 0;
    428 
    429         if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    430             // 65 bytes decode to 320 8kHz samples
    431             int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
    432             int64_t framenumber = samplenumber / 320;
    433             pos = framenumber * 65;
    434         } else {
    435             pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
    436         }
    437         if (pos > (off64_t)mSize) {
    438             pos = mSize;
    439         }
    440         mCurrentPos = pos + mOffset;
    441     }
    442 
    443     MediaBuffer *buffer;
    444     status_t err = mGroup->acquire_buffer(&buffer);
    445     if (err != OK) {
    446         return err;
    447     }
    448 
    449     // make sure that maxBytesToRead is multiple of 3, in 24-bit case
    450     size_t maxBytesToRead =
    451         mBitsPerSample == 8 ? kMaxFrameSize / 2 :
    452         (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
    453 
    454     size_t maxBytesAvailable =
    455         (mCurrentPos - mOffset >= (off64_t)mSize)
    456             ? 0 : mSize - (mCurrentPos - mOffset);
    457 
    458     if (maxBytesToRead > maxBytesAvailable) {
    459         maxBytesToRead = maxBytesAvailable;
    460     }
    461 
    462     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    463         // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
    464         // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
    465         if (maxBytesToRead > 1024) {
    466             maxBytesToRead = 1024;
    467         }
    468         maxBytesToRead = (maxBytesToRead / 65) * 65;
    469     } else {
    470         // read only integral amounts of audio unit frames.
    471         const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
    472         maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
    473     }
    474 
    475     ssize_t n = mDataSource->readAt(
    476             mCurrentPos, buffer->data(),
    477             maxBytesToRead);
    478 
    479     if (n <= 0) {
    480         buffer->release();
    481         buffer = NULL;
    482 
    483         return ERROR_END_OF_STREAM;
    484     }
    485 
    486     buffer->set_range(0, n);
    487 
    488     // TODO: add capability to return data as float PCM instead of 16 bit PCM.
    489     if (mWaveFormat == WAVE_FORMAT_PCM) {
    490         if (mBitsPerSample == 8) {
    491             // Convert 8-bit unsigned samples to 16-bit signed.
    492 
    493             // Create new buffer with 2 byte wide samples
    494             MediaBuffer *tmp;
    495             CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
    496             tmp->set_range(0, 2 * n);
    497 
    498             memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n);
    499             buffer->release();
    500             buffer = tmp;
    501         } else if (mBitsPerSample == 24) {
    502             // Convert 24-bit signed samples to 16-bit signed in place
    503             const size_t numSamples = n / 3;
    504 
    505             memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples);
    506             buffer->set_range(0, 2 * numSamples);
    507         }  else if (mBitsPerSample == 32) {
    508             // Convert 32-bit signed samples to 16-bit signed in place
    509             const size_t numSamples = n / 4;
    510 
    511             memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples);
    512             buffer->set_range(0, 2 * numSamples);
    513         }
    514     } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
    515         if (mBitsPerSample == 32) {
    516             // Convert 32-bit float samples to 16-bit signed in place
    517             const size_t numSamples = n / 4;
    518 
    519             memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples);
    520             buffer->set_range(0, 2 * numSamples);
    521         }
    522     }
    523 
    524     int64_t timeStampUs = 0;
    525 
    526     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    527         timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
    528     } else {
    529         size_t bytesPerSample = mBitsPerSample >> 3;
    530         timeStampUs = 1000000LL * (mCurrentPos - mOffset)
    531                 / (mNumChannels * bytesPerSample) / mSampleRate;
    532     }
    533 
    534     buffer->meta_data()->setInt64(kKeyTime, timeStampUs);
    535 
    536     buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
    537     mCurrentPos += n;
    538 
    539     *out = buffer;
    540 
    541     return OK;
    542 }
    543 
    544 ////////////////////////////////////////////////////////////////////////////////
    545 
    546 bool SniffWAV(
    547         const sp<DataSource> &source, String8 *mimeType, float *confidence,
    548         sp<AMessage> *) {
    549     char header[12];
    550     if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
    551         return false;
    552     }
    553 
    554     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
    555         return false;
    556     }
    557 
    558     sp<MediaExtractor> extractor = new WAVExtractor(source);
    559     if (extractor->countTracks() == 0) {
    560         return false;
    561     }
    562 
    563     *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
    564     *confidence = 0.3f;
    565 
    566     return true;
    567 }
    568 
    569 }  // namespace android
    570