Home | History | Annotate | Download | only in libstagefright
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 //#define LOG_NDEBUG 0
     18 #define LOG_TAG "WAVExtractor"
     19 #include <utils/Log.h>
     20 
     21 #include "include/WAVExtractor.h"
     22 
     23 #include <media/stagefright/foundation/ADebug.h>
     24 #include <media/stagefright/DataSource.h>
     25 #include <media/stagefright/MediaBufferGroup.h>
     26 #include <media/stagefright/MediaDefs.h>
     27 #include <media/stagefright/MediaErrors.h>
     28 #include <media/stagefright/MediaSource.h>
     29 #include <media/stagefright/MetaData.h>
     30 #include <utils/String8.h>
     31 #include <cutils/bitops.h>
     32 
     33 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
     34 
     35 namespace android {
     36 
     37 enum {
     38     WAVE_FORMAT_PCM        = 0x0001,
     39     WAVE_FORMAT_ALAW       = 0x0006,
     40     WAVE_FORMAT_MULAW      = 0x0007,
     41     WAVE_FORMAT_MSGSM      = 0x0031,
     42     WAVE_FORMAT_EXTENSIBLE = 0xFFFE
     43 };
     44 
     45 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
     46 
     47 
     48 static uint32_t U32_LE_AT(const uint8_t *ptr) {
     49     return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
     50 }
     51 
     52 static uint16_t U16_LE_AT(const uint8_t *ptr) {
     53     return ptr[1] << 8 | ptr[0];
     54 }
     55 
     56 struct WAVSource : public MediaSource {
     57     WAVSource(
     58             const sp<DataSource> &dataSource,
     59             const sp<MetaData> &meta,
     60             uint16_t waveFormat,
     61             int32_t bitsPerSample,
     62             off64_t offset, size_t size);
     63 
     64     virtual status_t start(MetaData *params = NULL);
     65     virtual status_t stop();
     66     virtual sp<MetaData> getFormat();
     67 
     68     virtual status_t read(
     69             MediaBuffer **buffer, const ReadOptions *options = NULL);
     70 
     71 protected:
     72     virtual ~WAVSource();
     73 
     74 private:
     75     static const size_t kMaxFrameSize;
     76 
     77     sp<DataSource> mDataSource;
     78     sp<MetaData> mMeta;
     79     uint16_t mWaveFormat;
     80     int32_t mSampleRate;
     81     int32_t mNumChannels;
     82     int32_t mBitsPerSample;
     83     off64_t mOffset;
     84     size_t mSize;
     85     bool mStarted;
     86     MediaBufferGroup *mGroup;
     87     off64_t mCurrentPos;
     88 
     89     WAVSource(const WAVSource &);
     90     WAVSource &operator=(const WAVSource &);
     91 };
     92 
     93 WAVExtractor::WAVExtractor(const sp<DataSource> &source)
     94     : mDataSource(source),
     95       mValidFormat(false),
     96       mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
     97     mInitCheck = init();
     98 }
     99 
    100 WAVExtractor::~WAVExtractor() {
    101 }
    102 
    103 sp<MetaData> WAVExtractor::getMetaData() {
    104     sp<MetaData> meta = new MetaData;
    105 
    106     if (mInitCheck != OK) {
    107         return meta;
    108     }
    109 
    110     meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
    111 
    112     return meta;
    113 }
    114 
    115 size_t WAVExtractor::countTracks() {
    116     return mInitCheck == OK ? 1 : 0;
    117 }
    118 
    119 sp<MediaSource> WAVExtractor::getTrack(size_t index) {
    120     if (mInitCheck != OK || index > 0) {
    121         return NULL;
    122     }
    123 
    124     return new WAVSource(
    125             mDataSource, mTrackMeta,
    126             mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
    127 }
    128 
    129 sp<MetaData> WAVExtractor::getTrackMetaData(
    130         size_t index, uint32_t flags) {
    131     if (mInitCheck != OK || index > 0) {
    132         return NULL;
    133     }
    134 
    135     return mTrackMeta;
    136 }
    137 
    138 status_t WAVExtractor::init() {
    139     uint8_t header[12];
    140     if (mDataSource->readAt(
    141                 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
    142         return NO_INIT;
    143     }
    144 
    145     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
    146         return NO_INIT;
    147     }
    148 
    149     size_t totalSize = U32_LE_AT(&header[4]);
    150 
    151     off64_t offset = 12;
    152     size_t remainingSize = totalSize;
    153     while (remainingSize >= 8) {
    154         uint8_t chunkHeader[8];
    155         if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
    156             return NO_INIT;
    157         }
    158 
    159         remainingSize -= 8;
    160         offset += 8;
    161 
    162         uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
    163 
    164         if (chunkSize > remainingSize) {
    165             return NO_INIT;
    166         }
    167 
    168         if (!memcmp(chunkHeader, "fmt ", 4)) {
    169             if (chunkSize < 16) {
    170                 return NO_INIT;
    171             }
    172 
    173             uint8_t formatSpec[40];
    174             if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
    175                 return NO_INIT;
    176             }
    177 
    178             mWaveFormat = U16_LE_AT(formatSpec);
    179             if (mWaveFormat != WAVE_FORMAT_PCM
    180                     && mWaveFormat != WAVE_FORMAT_ALAW
    181                     && mWaveFormat != WAVE_FORMAT_MULAW
    182                     && mWaveFormat != WAVE_FORMAT_MSGSM
    183                     && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
    184                 return ERROR_UNSUPPORTED;
    185             }
    186 
    187             uint8_t fmtSize = 16;
    188             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    189                 fmtSize = 40;
    190             }
    191             if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
    192                 return NO_INIT;
    193             }
    194 
    195             mNumChannels = U16_LE_AT(&formatSpec[2]);
    196             if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
    197                 if (mNumChannels != 1 && mNumChannels != 2) {
    198                     ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
    199                             mNumChannels);
    200                 }
    201             } else {
    202                 if (mNumChannels < 1 && mNumChannels > 8) {
    203                     return ERROR_UNSUPPORTED;
    204                 }
    205             }
    206 
    207             mSampleRate = U32_LE_AT(&formatSpec[4]);
    208 
    209             if (mSampleRate == 0) {
    210                 return ERROR_MALFORMED;
    211             }
    212 
    213             mBitsPerSample = U16_LE_AT(&formatSpec[14]);
    214 
    215             if (mWaveFormat == WAVE_FORMAT_PCM
    216                     || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    217                 if (mBitsPerSample != 8 && mBitsPerSample != 16
    218                     && mBitsPerSample != 24) {
    219                     return ERROR_UNSUPPORTED;
    220                 }
    221             } else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    222                 if (mBitsPerSample != 0) {
    223                     return ERROR_UNSUPPORTED;
    224                 }
    225             } else {
    226                 CHECK(mWaveFormat == WAVE_FORMAT_MULAW
    227                         || mWaveFormat == WAVE_FORMAT_ALAW);
    228                 if (mBitsPerSample != 8) {
    229                     return ERROR_UNSUPPORTED;
    230                 }
    231             }
    232 
    233             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    234                 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
    235                 if (validBitsPerSample != mBitsPerSample) {
    236                     if (validBitsPerSample != 0) {
    237                         ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
    238                                 validBitsPerSample, mBitsPerSample);
    239                         return ERROR_UNSUPPORTED;
    240                     } else {
    241                         // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
    242                         // writers don't correctly set the valid bits value, and leave it at 0.
    243                         ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
    244                     }
    245                 }
    246 
    247                 mChannelMask = U32_LE_AT(&formatSpec[20]);
    248                 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
    249                 if ((mChannelMask >> 18) != 0) {
    250                     ALOGE("invalid channel mask 0x%x", mChannelMask);
    251                     return ERROR_MALFORMED;
    252                 }
    253 
    254                 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
    255                         && (popcount(mChannelMask) != mNumChannels)) {
    256                     ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
    257                             popcount(mChannelMask), mChannelMask);
    258                     return ERROR_MALFORMED;
    259                 }
    260 
    261                 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
    262                 // the sample format, using the same definitions as a regular WAV header
    263                 mWaveFormat = U16_LE_AT(&formatSpec[24]);
    264                 if (mWaveFormat != WAVE_FORMAT_PCM
    265                         && mWaveFormat != WAVE_FORMAT_ALAW
    266                         && mWaveFormat != WAVE_FORMAT_MULAW) {
    267                     return ERROR_UNSUPPORTED;
    268                 }
    269                 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) {
    270                     ALOGE("unsupported GUID");
    271                     return ERROR_UNSUPPORTED;
    272                 }
    273             }
    274 
    275             mValidFormat = true;
    276         } else if (!memcmp(chunkHeader, "data", 4)) {
    277             if (mValidFormat) {
    278                 mDataOffset = offset;
    279                 mDataSize = chunkSize;
    280 
    281                 mTrackMeta = new MetaData;
    282 
    283                 switch (mWaveFormat) {
    284                     case WAVE_FORMAT_PCM:
    285                         mTrackMeta->setCString(
    286                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
    287                         break;
    288                     case WAVE_FORMAT_ALAW:
    289                         mTrackMeta->setCString(
    290                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
    291                         break;
    292                     case WAVE_FORMAT_MSGSM:
    293                         mTrackMeta->setCString(
    294                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
    295                         break;
    296                     default:
    297                         CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
    298                         mTrackMeta->setCString(
    299                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
    300                         break;
    301                 }
    302 
    303                 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
    304                 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
    305                 mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
    306 
    307                 int64_t durationUs = 0;
    308                 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    309                     // 65 bytes decode to 320 8kHz samples
    310                     durationUs =
    311                         1000000LL * (mDataSize / 65 * 320) / 8000;
    312                 } else {
    313                     size_t bytesPerSample = mBitsPerSample >> 3;
    314                     durationUs =
    315                         1000000LL * (mDataSize / (mNumChannels * bytesPerSample))
    316                             / mSampleRate;
    317                 }
    318 
    319                 mTrackMeta->setInt64(kKeyDuration, durationUs);
    320 
    321                 return OK;
    322             }
    323         }
    324 
    325         offset += chunkSize;
    326     }
    327 
    328     return NO_INIT;
    329 }
    330 
    331 const size_t WAVSource::kMaxFrameSize = 32768;
    332 
    333 WAVSource::WAVSource(
    334         const sp<DataSource> &dataSource,
    335         const sp<MetaData> &meta,
    336         uint16_t waveFormat,
    337         int32_t bitsPerSample,
    338         off64_t offset, size_t size)
    339     : mDataSource(dataSource),
    340       mMeta(meta),
    341       mWaveFormat(waveFormat),
    342       mSampleRate(0),
    343       mNumChannels(0),
    344       mBitsPerSample(bitsPerSample),
    345       mOffset(offset),
    346       mSize(size),
    347       mStarted(false),
    348       mGroup(NULL) {
    349     CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
    350     CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
    351 
    352     mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
    353 }
    354 
    355 WAVSource::~WAVSource() {
    356     if (mStarted) {
    357         stop();
    358     }
    359 }
    360 
    361 status_t WAVSource::start(MetaData *params) {
    362     ALOGV("WAVSource::start");
    363 
    364     CHECK(!mStarted);
    365 
    366     mGroup = new MediaBufferGroup;
    367     mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
    368 
    369     if (mBitsPerSample == 8) {
    370         // As a temporary buffer for 8->16 bit conversion.
    371         mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
    372     }
    373 
    374     mCurrentPos = mOffset;
    375 
    376     mStarted = true;
    377 
    378     return OK;
    379 }
    380 
    381 status_t WAVSource::stop() {
    382     ALOGV("WAVSource::stop");
    383 
    384     CHECK(mStarted);
    385 
    386     delete mGroup;
    387     mGroup = NULL;
    388 
    389     mStarted = false;
    390 
    391     return OK;
    392 }
    393 
    394 sp<MetaData> WAVSource::getFormat() {
    395     ALOGV("WAVSource::getFormat");
    396 
    397     return mMeta;
    398 }
    399 
    400 status_t WAVSource::read(
    401         MediaBuffer **out, const ReadOptions *options) {
    402     *out = NULL;
    403 
    404     int64_t seekTimeUs;
    405     ReadOptions::SeekMode mode;
    406     if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
    407         int64_t pos = 0;
    408 
    409         if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    410             // 65 bytes decode to 320 8kHz samples
    411             int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
    412             int64_t framenumber = samplenumber / 320;
    413             pos = framenumber * 65;
    414         } else {
    415             pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
    416         }
    417         if (pos > mSize) {
    418             pos = mSize;
    419         }
    420         mCurrentPos = pos + mOffset;
    421     }
    422 
    423     MediaBuffer *buffer;
    424     status_t err = mGroup->acquire_buffer(&buffer);
    425     if (err != OK) {
    426         return err;
    427     }
    428 
    429     // make sure that maxBytesToRead is multiple of 3, in 24-bit case
    430     size_t maxBytesToRead =
    431         mBitsPerSample == 8 ? kMaxFrameSize / 2 :
    432         (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
    433 
    434     size_t maxBytesAvailable =
    435         (mCurrentPos - mOffset >= (off64_t)mSize)
    436             ? 0 : mSize - (mCurrentPos - mOffset);
    437 
    438     if (maxBytesToRead > maxBytesAvailable) {
    439         maxBytesToRead = maxBytesAvailable;
    440     }
    441 
    442     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    443         // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
    444         // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
    445         if (maxBytesToRead > 1024) {
    446             maxBytesToRead = 1024;
    447         }
    448         maxBytesToRead = (maxBytesToRead / 65) * 65;
    449     }
    450 
    451     ssize_t n = mDataSource->readAt(
    452             mCurrentPos, buffer->data(),
    453             maxBytesToRead);
    454 
    455     if (n <= 0) {
    456         buffer->release();
    457         buffer = NULL;
    458 
    459         return ERROR_END_OF_STREAM;
    460     }
    461 
    462     buffer->set_range(0, n);
    463 
    464     if (mWaveFormat == WAVE_FORMAT_PCM || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    465         if (mBitsPerSample == 8) {
    466             // Convert 8-bit unsigned samples to 16-bit signed.
    467 
    468             MediaBuffer *tmp;
    469             CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
    470 
    471             // The new buffer holds the sample number of samples, but each
    472             // one is 2 bytes wide.
    473             tmp->set_range(0, 2 * n);
    474 
    475             int16_t *dst = (int16_t *)tmp->data();
    476             const uint8_t *src = (const uint8_t *)buffer->data();
    477             ssize_t numBytes = n;
    478 
    479             while (numBytes-- > 0) {
    480                 *dst++ = ((int16_t)(*src) - 128) * 256;
    481                 ++src;
    482             }
    483 
    484             buffer->release();
    485             buffer = tmp;
    486         } else if (mBitsPerSample == 24) {
    487             // Convert 24-bit signed samples to 16-bit signed.
    488 
    489             const uint8_t *src =
    490                 (const uint8_t *)buffer->data() + buffer->range_offset();
    491             int16_t *dst = (int16_t *)src;
    492 
    493             size_t numSamples = buffer->range_length() / 3;
    494             for (size_t i = 0; i < numSamples; ++i) {
    495                 int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16);
    496                 x = (x << 8) >> 8;  // sign extension
    497 
    498                 x = x >> 8;
    499                 *dst++ = (int16_t)x;
    500                 src += 3;
    501             }
    502 
    503             buffer->set_range(buffer->range_offset(), 2 * numSamples);
    504         }
    505     }
    506 
    507     int64_t timeStampUs = 0;
    508 
    509     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    510         timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
    511     } else {
    512         size_t bytesPerSample = mBitsPerSample >> 3;
    513         timeStampUs = 1000000LL * (mCurrentPos - mOffset)
    514                 / (mNumChannels * bytesPerSample) / mSampleRate;
    515     }
    516 
    517     buffer->meta_data()->setInt64(kKeyTime, timeStampUs);
    518 
    519     buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
    520     mCurrentPos += n;
    521 
    522     *out = buffer;
    523 
    524     return OK;
    525 }
    526 
    527 ////////////////////////////////////////////////////////////////////////////////
    528 
    529 bool SniffWAV(
    530         const sp<DataSource> &source, String8 *mimeType, float *confidence,
    531         sp<AMessage> *) {
    532     char header[12];
    533     if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
    534         return false;
    535     }
    536 
    537     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
    538         return false;
    539     }
    540 
    541     sp<MediaExtractor> extractor = new WAVExtractor(source);
    542     if (extractor->countTracks() == 0) {
    543         return false;
    544     }
    545 
    546     *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
    547     *confidence = 0.3f;
    548 
    549     return true;
    550 }
    551 
    552 }  // namespace android
    553