Home | History | Annotate | Download | only in libstagefright
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 //#define LOG_NDEBUG 0
     18 #define LOG_TAG "WAVExtractor"
     19 #include <utils/Log.h>
     20 
     21 #include "include/WAVExtractor.h"
     22 
     23 #include <media/stagefright/foundation/ADebug.h>
     24 #include <media/stagefright/DataSource.h>
     25 #include <media/stagefright/MediaBufferGroup.h>
     26 #include <media/stagefright/MediaDefs.h>
     27 #include <media/stagefright/MediaErrors.h>
     28 #include <media/stagefright/MediaSource.h>
     29 #include <media/stagefright/MetaData.h>
     30 #include <utils/String8.h>
     31 #include <cutils/bitops.h>
     32 
     33 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
     34 
     35 namespace android {
     36 
     37 enum {
     38     WAVE_FORMAT_PCM        = 0x0001,
     39     WAVE_FORMAT_ALAW       = 0x0006,
     40     WAVE_FORMAT_MULAW      = 0x0007,
     41     WAVE_FORMAT_EXTENSIBLE = 0xFFFE
     42 };
     43 
     44 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
     45 
     46 
     47 static uint32_t U32_LE_AT(const uint8_t *ptr) {
     48     return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
     49 }
     50 
     51 static uint16_t U16_LE_AT(const uint8_t *ptr) {
     52     return ptr[1] << 8 | ptr[0];
     53 }
     54 
     55 struct WAVSource : public MediaSource {
     56     WAVSource(
     57             const sp<DataSource> &dataSource,
     58             const sp<MetaData> &meta,
     59             uint16_t waveFormat,
     60             int32_t bitsPerSample,
     61             off64_t offset, size_t size);
     62 
     63     virtual status_t start(MetaData *params = NULL);
     64     virtual status_t stop();
     65     virtual sp<MetaData> getFormat();
     66 
     67     virtual status_t read(
     68             MediaBuffer **buffer, const ReadOptions *options = NULL);
     69 
     70 protected:
     71     virtual ~WAVSource();
     72 
     73 private:
     74     static const size_t kMaxFrameSize;
     75 
     76     sp<DataSource> mDataSource;
     77     sp<MetaData> mMeta;
     78     uint16_t mWaveFormat;
     79     int32_t mSampleRate;
     80     int32_t mNumChannels;
     81     int32_t mBitsPerSample;
     82     off64_t mOffset;
     83     size_t mSize;
     84     bool mStarted;
     85     MediaBufferGroup *mGroup;
     86     off64_t mCurrentPos;
     87 
     88     WAVSource(const WAVSource &);
     89     WAVSource &operator=(const WAVSource &);
     90 };
     91 
     92 WAVExtractor::WAVExtractor(const sp<DataSource> &source)
     93     : mDataSource(source),
     94       mValidFormat(false),
     95       mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
     96     mInitCheck = init();
     97 }
     98 
     99 WAVExtractor::~WAVExtractor() {
    100 }
    101 
    102 sp<MetaData> WAVExtractor::getMetaData() {
    103     sp<MetaData> meta = new MetaData;
    104 
    105     if (mInitCheck != OK) {
    106         return meta;
    107     }
    108 
    109     meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
    110 
    111     return meta;
    112 }
    113 
    114 size_t WAVExtractor::countTracks() {
    115     return mInitCheck == OK ? 1 : 0;
    116 }
    117 
    118 sp<MediaSource> WAVExtractor::getTrack(size_t index) {
    119     if (mInitCheck != OK || index > 0) {
    120         return NULL;
    121     }
    122 
    123     return new WAVSource(
    124             mDataSource, mTrackMeta,
    125             mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
    126 }
    127 
    128 sp<MetaData> WAVExtractor::getTrackMetaData(
    129         size_t index, uint32_t flags) {
    130     if (mInitCheck != OK || index > 0) {
    131         return NULL;
    132     }
    133 
    134     return mTrackMeta;
    135 }
    136 
    137 status_t WAVExtractor::init() {
    138     uint8_t header[12];
    139     if (mDataSource->readAt(
    140                 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
    141         return NO_INIT;
    142     }
    143 
    144     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
    145         return NO_INIT;
    146     }
    147 
    148     size_t totalSize = U32_LE_AT(&header[4]);
    149 
    150     off64_t offset = 12;
    151     size_t remainingSize = totalSize;
    152     while (remainingSize >= 8) {
    153         uint8_t chunkHeader[8];
    154         if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
    155             return NO_INIT;
    156         }
    157 
    158         remainingSize -= 8;
    159         offset += 8;
    160 
    161         uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
    162 
    163         if (chunkSize > remainingSize) {
    164             return NO_INIT;
    165         }
    166 
    167         if (!memcmp(chunkHeader, "fmt ", 4)) {
    168             if (chunkSize < 16) {
    169                 return NO_INIT;
    170             }
    171 
    172             uint8_t formatSpec[40];
    173             if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
    174                 return NO_INIT;
    175             }
    176 
    177             mWaveFormat = U16_LE_AT(formatSpec);
    178             if (mWaveFormat != WAVE_FORMAT_PCM
    179                     && mWaveFormat != WAVE_FORMAT_ALAW
    180                     && mWaveFormat != WAVE_FORMAT_MULAW
    181                     && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
    182                 return ERROR_UNSUPPORTED;
    183             }
    184 
    185             uint8_t fmtSize = 16;
    186             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    187                 fmtSize = 40;
    188             }
    189             if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
    190                 return NO_INIT;
    191             }
    192 
    193             mNumChannels = U16_LE_AT(&formatSpec[2]);
    194             if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
    195                 if (mNumChannels != 1 && mNumChannels != 2) {
    196                     ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
    197                             mNumChannels);
    198                 }
    199             } else {
    200                 if (mNumChannels < 1 && mNumChannels > 8) {
    201                     return ERROR_UNSUPPORTED;
    202                 }
    203             }
    204 
    205             mSampleRate = U32_LE_AT(&formatSpec[4]);
    206 
    207             if (mSampleRate == 0) {
    208                 return ERROR_MALFORMED;
    209             }
    210 
    211             mBitsPerSample = U16_LE_AT(&formatSpec[14]);
    212 
    213             if (mWaveFormat == WAVE_FORMAT_PCM
    214                     || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    215                 if (mBitsPerSample != 8 && mBitsPerSample != 16
    216                     && mBitsPerSample != 24) {
    217                     return ERROR_UNSUPPORTED;
    218                 }
    219             } else {
    220                 CHECK(mWaveFormat == WAVE_FORMAT_MULAW
    221                         || mWaveFormat == WAVE_FORMAT_ALAW);
    222                 if (mBitsPerSample != 8) {
    223                     return ERROR_UNSUPPORTED;
    224                 }
    225             }
    226 
    227             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    228                 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
    229                 if (validBitsPerSample != mBitsPerSample) {
    230                     if (validBitsPerSample != 0) {
    231                         ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
    232                                 validBitsPerSample, mBitsPerSample);
    233                         return ERROR_UNSUPPORTED;
    234                     } else {
    235                         // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
    236                         // writers don't correctly set the valid bits value, and leave it at 0.
    237                         ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
    238                     }
    239                 }
    240 
    241                 mChannelMask = U32_LE_AT(&formatSpec[20]);
    242                 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
    243                 if ((mChannelMask >> 18) != 0) {
    244                     ALOGE("invalid channel mask 0x%x", mChannelMask);
    245                     return ERROR_MALFORMED;
    246                 }
    247 
    248                 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
    249                         && (popcount(mChannelMask) != mNumChannels)) {
    250                     ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
    251                             popcount(mChannelMask), mChannelMask);
    252                     return ERROR_MALFORMED;
    253                 }
    254 
    255                 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
    256                 // the sample format, using the same definitions as a regular WAV header
    257                 mWaveFormat = U16_LE_AT(&formatSpec[24]);
    258                 if (mWaveFormat != WAVE_FORMAT_PCM
    259                         && mWaveFormat != WAVE_FORMAT_ALAW
    260                         && mWaveFormat != WAVE_FORMAT_MULAW) {
    261                     return ERROR_UNSUPPORTED;
    262                 }
    263                 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) {
    264                     ALOGE("unsupported GUID");
    265                     return ERROR_UNSUPPORTED;
    266                 }
    267             }
    268 
    269             mValidFormat = true;
    270         } else if (!memcmp(chunkHeader, "data", 4)) {
    271             if (mValidFormat) {
    272                 mDataOffset = offset;
    273                 mDataSize = chunkSize;
    274 
    275                 mTrackMeta = new MetaData;
    276 
    277                 switch (mWaveFormat) {
    278                     case WAVE_FORMAT_PCM:
    279                         mTrackMeta->setCString(
    280                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
    281                         break;
    282                     case WAVE_FORMAT_ALAW:
    283                         mTrackMeta->setCString(
    284                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
    285                         break;
    286                     default:
    287                         CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
    288                         mTrackMeta->setCString(
    289                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
    290                         break;
    291                 }
    292 
    293                 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
    294                 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
    295                 mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
    296 
    297                 size_t bytesPerSample = mBitsPerSample >> 3;
    298 
    299                 int64_t durationUs =
    300                     1000000LL * (mDataSize / (mNumChannels * bytesPerSample))
    301                         / mSampleRate;
    302 
    303                 mTrackMeta->setInt64(kKeyDuration, durationUs);
    304 
    305                 return OK;
    306             }
    307         }
    308 
    309         offset += chunkSize;
    310     }
    311 
    312     return NO_INIT;
    313 }
    314 
    315 const size_t WAVSource::kMaxFrameSize = 32768;
    316 
    317 WAVSource::WAVSource(
    318         const sp<DataSource> &dataSource,
    319         const sp<MetaData> &meta,
    320         uint16_t waveFormat,
    321         int32_t bitsPerSample,
    322         off64_t offset, size_t size)
    323     : mDataSource(dataSource),
    324       mMeta(meta),
    325       mWaveFormat(waveFormat),
    326       mSampleRate(0),
    327       mNumChannels(0),
    328       mBitsPerSample(bitsPerSample),
    329       mOffset(offset),
    330       mSize(size),
    331       mStarted(false),
    332       mGroup(NULL) {
    333     CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
    334     CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
    335 
    336     mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
    337 }
    338 
    339 WAVSource::~WAVSource() {
    340     if (mStarted) {
    341         stop();
    342     }
    343 }
    344 
    345 status_t WAVSource::start(MetaData *params) {
    346     ALOGV("WAVSource::start");
    347 
    348     CHECK(!mStarted);
    349 
    350     mGroup = new MediaBufferGroup;
    351     mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
    352 
    353     if (mBitsPerSample == 8) {
    354         // As a temporary buffer for 8->16 bit conversion.
    355         mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
    356     }
    357 
    358     mCurrentPos = mOffset;
    359 
    360     mStarted = true;
    361 
    362     return OK;
    363 }
    364 
    365 status_t WAVSource::stop() {
    366     ALOGV("WAVSource::stop");
    367 
    368     CHECK(mStarted);
    369 
    370     delete mGroup;
    371     mGroup = NULL;
    372 
    373     mStarted = false;
    374 
    375     return OK;
    376 }
    377 
    378 sp<MetaData> WAVSource::getFormat() {
    379     ALOGV("WAVSource::getFormat");
    380 
    381     return mMeta;
    382 }
    383 
    384 status_t WAVSource::read(
    385         MediaBuffer **out, const ReadOptions *options) {
    386     *out = NULL;
    387 
    388     int64_t seekTimeUs;
    389     ReadOptions::SeekMode mode;
    390     if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
    391         int64_t pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
    392         if (pos > mSize) {
    393             pos = mSize;
    394         }
    395         mCurrentPos = pos + mOffset;
    396     }
    397 
    398     MediaBuffer *buffer;
    399     status_t err = mGroup->acquire_buffer(&buffer);
    400     if (err != OK) {
    401         return err;
    402     }
    403 
    404     size_t maxBytesToRead =
    405         mBitsPerSample == 8 ? kMaxFrameSize / 2 : kMaxFrameSize;
    406 
    407     size_t maxBytesAvailable =
    408         (mCurrentPos - mOffset >= (off64_t)mSize)
    409             ? 0 : mSize - (mCurrentPos - mOffset);
    410 
    411     if (maxBytesToRead > maxBytesAvailable) {
    412         maxBytesToRead = maxBytesAvailable;
    413     }
    414 
    415     ssize_t n = mDataSource->readAt(
    416             mCurrentPos, buffer->data(),
    417             maxBytesToRead);
    418 
    419     if (n <= 0) {
    420         buffer->release();
    421         buffer = NULL;
    422 
    423         return ERROR_END_OF_STREAM;
    424     }
    425 
    426     buffer->set_range(0, n);
    427 
    428     if (mWaveFormat == WAVE_FORMAT_PCM) {
    429         if (mBitsPerSample == 8) {
    430             // Convert 8-bit unsigned samples to 16-bit signed.
    431 
    432             MediaBuffer *tmp;
    433             CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
    434 
    435             // The new buffer holds the sample number of samples, but each
    436             // one is 2 bytes wide.
    437             tmp->set_range(0, 2 * n);
    438 
    439             int16_t *dst = (int16_t *)tmp->data();
    440             const uint8_t *src = (const uint8_t *)buffer->data();
    441             ssize_t numBytes = n;
    442 
    443             while (numBytes-- > 0) {
    444                 *dst++ = ((int16_t)(*src) - 128) * 256;
    445                 ++src;
    446             }
    447 
    448             buffer->release();
    449             buffer = tmp;
    450         } else if (mBitsPerSample == 24) {
    451             // Convert 24-bit signed samples to 16-bit signed.
    452 
    453             const uint8_t *src =
    454                 (const uint8_t *)buffer->data() + buffer->range_offset();
    455             int16_t *dst = (int16_t *)src;
    456 
    457             size_t numSamples = buffer->range_length() / 3;
    458             for (size_t i = 0; i < numSamples; ++i) {
    459                 int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16);
    460                 x = (x << 8) >> 8;  // sign extension
    461 
    462                 x = x >> 8;
    463                 *dst++ = (int16_t)x;
    464                 src += 3;
    465             }
    466 
    467             buffer->set_range(buffer->range_offset(), 2 * numSamples);
    468         }
    469     }
    470 
    471     size_t bytesPerSample = mBitsPerSample >> 3;
    472 
    473     buffer->meta_data()->setInt64(
    474             kKeyTime,
    475             1000000LL * (mCurrentPos - mOffset)
    476                 / (mNumChannels * bytesPerSample) / mSampleRate);
    477 
    478     buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
    479     mCurrentPos += n;
    480 
    481     *out = buffer;
    482 
    483     return OK;
    484 }
    485 
    486 ////////////////////////////////////////////////////////////////////////////////
    487 
    488 bool SniffWAV(
    489         const sp<DataSource> &source, String8 *mimeType, float *confidence,
    490         sp<AMessage> *) {
    491     char header[12];
    492     if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
    493         return false;
    494     }
    495 
    496     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
    497         return false;
    498     }
    499 
    500     sp<MediaExtractor> extractor = new WAVExtractor(source);
    501     if (extractor->countTracks() == 0) {
    502         return false;
    503     }
    504 
    505     *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
    506     *confidence = 0.3f;
    507 
    508     return true;
    509 }
    510 
    511 }  // namespace android
    512