Home | History | Annotate | Download | only in wav
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 //#define LOG_NDEBUG 0
     18 #define LOG_TAG "WAVExtractor"
     19 #include <utils/Log.h>
     20 
     21 #include "WAVExtractor.h"
     22 
     23 #include <audio_utils/primitives.h>
     24 #include <media/DataSourceBase.h>
     25 #include <media/MediaTrack.h>
     26 #include <media/stagefright/foundation/ADebug.h>
     27 #include <media/stagefright/MediaBufferGroup.h>
     28 #include <media/stagefright/MediaDefs.h>
     29 #include <media/stagefright/MediaErrors.h>
     30 #include <media/stagefright/MetaData.h>
     31 #include <utils/String8.h>
     32 #include <cutils/bitops.h>
     33 
     34 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
     35 
     36 namespace android {
     37 
     38 enum {
     39     WAVE_FORMAT_PCM        = 0x0001,
     40     WAVE_FORMAT_IEEE_FLOAT = 0x0003,
     41     WAVE_FORMAT_ALAW       = 0x0006,
     42     WAVE_FORMAT_MULAW      = 0x0007,
     43     WAVE_FORMAT_MSGSM      = 0x0031,
     44     WAVE_FORMAT_EXTENSIBLE = 0xFFFE
     45 };
     46 
     47 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
     48 static const char* AMBISONIC_SUBFORMAT = "\x00\x00\x21\x07\xD3\x11\x86\x44\xC8\xC1\xCA\x00\x00\x00";
     49 
     50 static uint32_t U32_LE_AT(const uint8_t *ptr) {
     51     return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
     52 }
     53 
     54 static uint16_t U16_LE_AT(const uint8_t *ptr) {
     55     return ptr[1] << 8 | ptr[0];
     56 }
     57 
     58 struct WAVSource : public MediaTrack {
     59     WAVSource(
     60             DataSourceBase *dataSource,
     61             MetaDataBase &meta,
     62             uint16_t waveFormat,
     63             int32_t bitsPerSample,
     64             off64_t offset, size_t size);
     65 
     66     virtual status_t start(MetaDataBase *params = NULL);
     67     virtual status_t stop();
     68     virtual status_t getFormat(MetaDataBase &meta);
     69 
     70     virtual status_t read(
     71             MediaBufferBase **buffer, const ReadOptions *options = NULL);
     72 
     73     virtual bool supportNonblockingRead() { return true; }
     74 
     75 protected:
     76     virtual ~WAVSource();
     77 
     78 private:
     79     static const size_t kMaxFrameSize;
     80 
     81     DataSourceBase *mDataSource;
     82     MetaDataBase &mMeta;
     83     uint16_t mWaveFormat;
     84     int32_t mSampleRate;
     85     int32_t mNumChannels;
     86     int32_t mBitsPerSample;
     87     off64_t mOffset;
     88     size_t mSize;
     89     bool mStarted;
     90     MediaBufferGroup *mGroup;
     91     off64_t mCurrentPos;
     92 
     93     WAVSource(const WAVSource &);
     94     WAVSource &operator=(const WAVSource &);
     95 };
     96 
     97 WAVExtractor::WAVExtractor(DataSourceBase *source)
     98     : mDataSource(source),
     99       mValidFormat(false),
    100       mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
    101     mInitCheck = init();
    102 }
    103 
    104 WAVExtractor::~WAVExtractor() {
    105 }
    106 
    107 status_t WAVExtractor::getMetaData(MetaDataBase &meta) {
    108     meta.clear();
    109     if (mInitCheck == OK) {
    110         meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
    111     }
    112 
    113     return OK;
    114 }
    115 
    116 size_t WAVExtractor::countTracks() {
    117     return mInitCheck == OK ? 1 : 0;
    118 }
    119 
    120 MediaTrack *WAVExtractor::getTrack(size_t index) {
    121     if (mInitCheck != OK || index > 0) {
    122         return NULL;
    123     }
    124 
    125     return new WAVSource(
    126             mDataSource, mTrackMeta,
    127             mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
    128 }
    129 
    130 status_t WAVExtractor::getTrackMetaData(
    131         MetaDataBase &meta,
    132         size_t index, uint32_t /* flags */) {
    133     if (mInitCheck != OK || index > 0) {
    134         return UNKNOWN_ERROR;
    135     }
    136 
    137     meta = mTrackMeta;
    138     return OK;
    139 }
    140 
    141 status_t WAVExtractor::init() {
    142     uint8_t header[12];
    143     if (mDataSource->readAt(
    144                 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
    145         return NO_INIT;
    146     }
    147 
    148     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
    149         return NO_INIT;
    150     }
    151 
    152     size_t totalSize = U32_LE_AT(&header[4]);
    153 
    154     off64_t offset = 12;
    155     size_t remainingSize = totalSize;
    156     while (remainingSize >= 8) {
    157         uint8_t chunkHeader[8];
    158         if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
    159             return NO_INIT;
    160         }
    161 
    162         remainingSize -= 8;
    163         offset += 8;
    164 
    165         uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
    166 
    167         if (chunkSize > remainingSize) {
    168             return NO_INIT;
    169         }
    170 
    171         if (!memcmp(chunkHeader, "fmt ", 4)) {
    172             if (chunkSize < 16) {
    173                 return NO_INIT;
    174             }
    175 
    176             uint8_t formatSpec[40];
    177             if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
    178                 return NO_INIT;
    179             }
    180 
    181             mWaveFormat = U16_LE_AT(formatSpec);
    182             if (mWaveFormat != WAVE_FORMAT_PCM
    183                     && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT
    184                     && mWaveFormat != WAVE_FORMAT_ALAW
    185                     && mWaveFormat != WAVE_FORMAT_MULAW
    186                     && mWaveFormat != WAVE_FORMAT_MSGSM
    187                     && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
    188                 return ERROR_UNSUPPORTED;
    189             }
    190 
    191             uint8_t fmtSize = 16;
    192             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    193                 fmtSize = 40;
    194             }
    195             if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
    196                 return NO_INIT;
    197             }
    198 
    199             mNumChannels = U16_LE_AT(&formatSpec[2]);
    200 
    201             if (mNumChannels < 1 || mNumChannels > 8) {
    202                 ALOGE("Unsupported number of channels (%d)", mNumChannels);
    203                 return ERROR_UNSUPPORTED;
    204             }
    205 
    206             if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
    207                 if (mNumChannels != 1 && mNumChannels != 2) {
    208                     ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
    209                             mNumChannels);
    210                 }
    211             }
    212 
    213             mSampleRate = U32_LE_AT(&formatSpec[4]);
    214 
    215             if (mSampleRate == 0) {
    216                 return ERROR_MALFORMED;
    217             }
    218 
    219             mBitsPerSample = U16_LE_AT(&formatSpec[14]);
    220 
    221             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
    222                 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
    223                 if (validBitsPerSample != mBitsPerSample) {
    224                     if (validBitsPerSample != 0) {
    225                         ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
    226                                 validBitsPerSample, mBitsPerSample);
    227                         return ERROR_UNSUPPORTED;
    228                     } else {
    229                         // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
    230                         // writers don't correctly set the valid bits value, and leave it at 0.
    231                         ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
    232                     }
    233                 }
    234 
    235                 mChannelMask = U32_LE_AT(&formatSpec[20]);
    236                 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
    237                 if ((mChannelMask >> 18) != 0) {
    238                     ALOGE("invalid channel mask 0x%x", mChannelMask);
    239                     return ERROR_MALFORMED;
    240                 }
    241 
    242                 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
    243                         && (popcount(mChannelMask) != mNumChannels)) {
    244                     ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
    245                             popcount(mChannelMask), mChannelMask);
    246                     return ERROR_MALFORMED;
    247                 }
    248 
    249                 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
    250                 // the sample format, using the same definitions as a regular WAV header
    251                 mWaveFormat = U16_LE_AT(&formatSpec[24]);
    252                 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14) &&
    253                     memcmp(&formatSpec[26], AMBISONIC_SUBFORMAT, 14)) {
    254                     ALOGE("unsupported GUID");
    255                     return ERROR_UNSUPPORTED;
    256                 }
    257             }
    258 
    259             if (mWaveFormat == WAVE_FORMAT_PCM) {
    260                 if (mBitsPerSample != 8 && mBitsPerSample != 16
    261                     && mBitsPerSample != 24 && mBitsPerSample != 32) {
    262                     return ERROR_UNSUPPORTED;
    263                 }
    264             } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
    265                 if (mBitsPerSample != 32) {  // TODO we don't support double
    266                     return ERROR_UNSUPPORTED;
    267                 }
    268             }
    269             else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    270                 if (mBitsPerSample != 0) {
    271                     return ERROR_UNSUPPORTED;
    272                 }
    273             } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) {
    274                 if (mBitsPerSample != 8) {
    275                     return ERROR_UNSUPPORTED;
    276                 }
    277             } else {
    278                 return ERROR_UNSUPPORTED;
    279             }
    280 
    281             mValidFormat = true;
    282         } else if (!memcmp(chunkHeader, "data", 4)) {
    283             if (mValidFormat) {
    284                 mDataOffset = offset;
    285                 mDataSize = chunkSize;
    286 
    287                 mTrackMeta.clear();
    288 
    289                 switch (mWaveFormat) {
    290                     case WAVE_FORMAT_PCM:
    291                     case WAVE_FORMAT_IEEE_FLOAT:
    292                         mTrackMeta.setCString(
    293                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
    294                         break;
    295                     case WAVE_FORMAT_ALAW:
    296                         mTrackMeta.setCString(
    297                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
    298                         break;
    299                     case WAVE_FORMAT_MSGSM:
    300                         mTrackMeta.setCString(
    301                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
    302                         break;
    303                     default:
    304                         CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
    305                         mTrackMeta.setCString(
    306                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
    307                         break;
    308                 }
    309 
    310                 mTrackMeta.setInt32(kKeyChannelCount, mNumChannels);
    311                 mTrackMeta.setInt32(kKeyChannelMask, mChannelMask);
    312                 mTrackMeta.setInt32(kKeySampleRate, mSampleRate);
    313                 mTrackMeta.setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
    314 
    315                 int64_t durationUs = 0;
    316                 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    317                     // 65 bytes decode to 320 8kHz samples
    318                     durationUs =
    319                         1000000LL * (mDataSize / 65 * 320) / 8000;
    320                 } else {
    321                     size_t bytesPerSample = mBitsPerSample >> 3;
    322 
    323                     if (!bytesPerSample || !mNumChannels)
    324                         return ERROR_MALFORMED;
    325 
    326                     size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
    327 
    328                     if (!mSampleRate)
    329                         return ERROR_MALFORMED;
    330 
    331                     durationUs =
    332                         1000000LL * num_samples / mSampleRate;
    333                 }
    334 
    335                 mTrackMeta.setInt64(kKeyDuration, durationUs);
    336 
    337                 return OK;
    338             }
    339         }
    340 
    341         offset += chunkSize;
    342     }
    343 
    344     return NO_INIT;
    345 }
    346 
    347 const size_t WAVSource::kMaxFrameSize = 32768;
    348 
    349 WAVSource::WAVSource(
    350         DataSourceBase *dataSource,
    351         MetaDataBase &meta,
    352         uint16_t waveFormat,
    353         int32_t bitsPerSample,
    354         off64_t offset, size_t size)
    355     : mDataSource(dataSource),
    356       mMeta(meta),
    357       mWaveFormat(waveFormat),
    358       mSampleRate(0),
    359       mNumChannels(0),
    360       mBitsPerSample(bitsPerSample),
    361       mOffset(offset),
    362       mSize(size),
    363       mStarted(false),
    364       mGroup(NULL) {
    365     CHECK(mMeta.findInt32(kKeySampleRate, &mSampleRate));
    366     CHECK(mMeta.findInt32(kKeyChannelCount, &mNumChannels));
    367 
    368     mMeta.setInt32(kKeyMaxInputSize, kMaxFrameSize);
    369 }
    370 
    371 WAVSource::~WAVSource() {
    372     if (mStarted) {
    373         stop();
    374     }
    375 }
    376 
    377 status_t WAVSource::start(MetaDataBase * /* params */) {
    378     ALOGV("WAVSource::start");
    379 
    380     CHECK(!mStarted);
    381 
    382     // some WAV files may have large audio buffers that use shared memory transfer.
    383     mGroup = new MediaBufferGroup(4 /* buffers */, kMaxFrameSize);
    384 
    385     if (mBitsPerSample == 8) {
    386         // As a temporary buffer for 8->16 bit conversion.
    387         mGroup->add_buffer(MediaBufferBase::Create(kMaxFrameSize));
    388     }
    389 
    390     mCurrentPos = mOffset;
    391 
    392     mStarted = true;
    393 
    394     return OK;
    395 }
    396 
    397 status_t WAVSource::stop() {
    398     ALOGV("WAVSource::stop");
    399 
    400     CHECK(mStarted);
    401 
    402     delete mGroup;
    403     mGroup = NULL;
    404 
    405     mStarted = false;
    406 
    407     return OK;
    408 }
    409 
    410 status_t WAVSource::getFormat(MetaDataBase &meta) {
    411     ALOGV("WAVSource::getFormat");
    412 
    413     meta = mMeta;
    414     return OK;
    415 }
    416 
    417 status_t WAVSource::read(
    418         MediaBufferBase **out, const ReadOptions *options) {
    419     *out = NULL;
    420 
    421     if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
    422         return WOULD_BLOCK;
    423     }
    424 
    425     int64_t seekTimeUs;
    426     ReadOptions::SeekMode mode;
    427     if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
    428         int64_t pos = 0;
    429 
    430         if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    431             // 65 bytes decode to 320 8kHz samples
    432             int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
    433             int64_t framenumber = samplenumber / 320;
    434             pos = framenumber * 65;
    435         } else {
    436             pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
    437         }
    438         if (pos > (off64_t)mSize) {
    439             pos = mSize;
    440         }
    441         mCurrentPos = pos + mOffset;
    442     }
    443 
    444     MediaBufferBase *buffer;
    445     status_t err = mGroup->acquire_buffer(&buffer);
    446     if (err != OK) {
    447         return err;
    448     }
    449 
    450     // make sure that maxBytesToRead is multiple of 3, in 24-bit case
    451     size_t maxBytesToRead =
    452         mBitsPerSample == 8 ? kMaxFrameSize / 2 :
    453         (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
    454 
    455     size_t maxBytesAvailable =
    456         (mCurrentPos - mOffset >= (off64_t)mSize)
    457             ? 0 : mSize - (mCurrentPos - mOffset);
    458 
    459     if (maxBytesToRead > maxBytesAvailable) {
    460         maxBytesToRead = maxBytesAvailable;
    461     }
    462 
    463     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    464         // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
    465         // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
    466         if (maxBytesToRead > 1024) {
    467             maxBytesToRead = 1024;
    468         }
    469         maxBytesToRead = (maxBytesToRead / 65) * 65;
    470     } else {
    471         // read only integral amounts of audio unit frames.
    472         const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
    473         maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
    474     }
    475 
    476     ssize_t n = mDataSource->readAt(
    477             mCurrentPos, buffer->data(),
    478             maxBytesToRead);
    479 
    480     if (n <= 0) {
    481         buffer->release();
    482         buffer = NULL;
    483 
    484         return ERROR_END_OF_STREAM;
    485     }
    486 
    487     buffer->set_range(0, n);
    488 
    489     // TODO: add capability to return data as float PCM instead of 16 bit PCM.
    490     if (mWaveFormat == WAVE_FORMAT_PCM) {
    491         if (mBitsPerSample == 8) {
    492             // Convert 8-bit unsigned samples to 16-bit signed.
    493 
    494             // Create new buffer with 2 byte wide samples
    495             MediaBufferBase *tmp;
    496             CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
    497             tmp->set_range(0, 2 * n);
    498 
    499             memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n);
    500             buffer->release();
    501             buffer = tmp;
    502         } else if (mBitsPerSample == 24) {
    503             // Convert 24-bit signed samples to 16-bit signed in place
    504             const size_t numSamples = n / 3;
    505 
    506             memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples);
    507             buffer->set_range(0, 2 * numSamples);
    508         }  else if (mBitsPerSample == 32) {
    509             // Convert 32-bit signed samples to 16-bit signed in place
    510             const size_t numSamples = n / 4;
    511 
    512             memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples);
    513             buffer->set_range(0, 2 * numSamples);
    514         }
    515     } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
    516         if (mBitsPerSample == 32) {
    517             // Convert 32-bit float samples to 16-bit signed in place
    518             const size_t numSamples = n / 4;
    519 
    520             memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples);
    521             buffer->set_range(0, 2 * numSamples);
    522         }
    523     }
    524 
    525     int64_t timeStampUs = 0;
    526 
    527     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
    528         timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
    529     } else {
    530         size_t bytesPerSample = mBitsPerSample >> 3;
    531         timeStampUs = 1000000LL * (mCurrentPos - mOffset)
    532                 / (mNumChannels * bytesPerSample) / mSampleRate;
    533     }
    534 
    535     buffer->meta_data().setInt64(kKeyTime, timeStampUs);
    536 
    537     buffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
    538     mCurrentPos += n;
    539 
    540     *out = buffer;
    541 
    542     return OK;
    543 }
    544 
    545 ////////////////////////////////////////////////////////////////////////////////
    546 
    547 static MediaExtractor* CreateExtractor(
    548         DataSourceBase *source,
    549         void *) {
    550     return new WAVExtractor(source);
    551 }
    552 
    553 static MediaExtractor::CreatorFunc Sniff(
    554         DataSourceBase *source,
    555         float *confidence,
    556         void **,
    557         MediaExtractor::FreeMetaFunc *) {
    558     char header[12];
    559     if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
    560         return NULL;
    561     }
    562 
    563     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
    564         return NULL;
    565     }
    566 
    567     MediaExtractor *extractor = new WAVExtractor(source);
    568     int numTracks = extractor->countTracks();
    569     delete extractor;
    570     if (numTracks == 0) {
    571         return NULL;
    572     }
    573 
    574     *confidence = 0.3f;
    575 
    576     return CreateExtractor;
    577 }
    578 
    579 extern "C" {
    580 // This is the only symbol that needs to be exported
    581 __attribute__ ((visibility ("default")))
    582 MediaExtractor::ExtractorDef GETEXTRACTORDEF() {
    583     return {
    584         MediaExtractor::EXTRACTORDEF_VERSION,
    585         UUID("7d613858-5837-4a38-84c5-332d1cddee27"),
    586         1, // version
    587         "WAV Extractor",
    588         Sniff
    589     };
    590 }
    591 
    592 } // extern "C"
    593 
    594 } // namespace android
    595