1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 //#define LOG_NDEBUG 0 18 #define LOG_TAG "WAVExtractor" 19 #include <utils/Log.h> 20 21 #include "include/WAVExtractor.h" 22 23 #include <media/stagefright/foundation/ADebug.h> 24 #include <media/stagefright/DataSource.h> 25 #include <media/stagefright/MediaBufferGroup.h> 26 #include <media/stagefright/MediaDefs.h> 27 #include <media/stagefright/MediaErrors.h> 28 #include <media/stagefright/MediaSource.h> 29 #include <media/stagefright/MetaData.h> 30 #include <utils/String8.h> 31 #include <cutils/bitops.h> 32 33 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0 34 35 namespace android { 36 37 enum { 38 WAVE_FORMAT_PCM = 0x0001, 39 WAVE_FORMAT_ALAW = 0x0006, 40 WAVE_FORMAT_MULAW = 0x0007, 41 WAVE_FORMAT_MSGSM = 0x0031, 42 WAVE_FORMAT_EXTENSIBLE = 0xFFFE 43 }; 44 45 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"; 46 47 48 static uint32_t U32_LE_AT(const uint8_t *ptr) { 49 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0]; 50 } 51 52 static uint16_t U16_LE_AT(const uint8_t *ptr) { 53 return ptr[1] << 8 | ptr[0]; 54 } 55 56 struct WAVSource : public MediaSource { 57 WAVSource( 58 const sp<DataSource> &dataSource, 59 const sp<MetaData> &meta, 60 uint16_t waveFormat, 61 int32_t bitsPerSample, 62 off64_t offset, size_t size); 63 64 virtual status_t start(MetaData *params = NULL); 65 virtual status_t stop(); 66 virtual sp<MetaData> getFormat(); 67 68 virtual status_t read( 69 MediaBuffer **buffer, const ReadOptions *options = NULL); 70 71 protected: 72 virtual ~WAVSource(); 73 74 private: 75 static const size_t kMaxFrameSize; 76 77 sp<DataSource> mDataSource; 78 sp<MetaData> mMeta; 79 uint16_t mWaveFormat; 80 int32_t mSampleRate; 81 int32_t mNumChannels; 82 int32_t mBitsPerSample; 83 off64_t mOffset; 84 size_t mSize; 85 bool mStarted; 86 MediaBufferGroup *mGroup; 87 off64_t mCurrentPos; 88 89 WAVSource(const WAVSource &); 90 WAVSource &operator=(const WAVSource &); 91 }; 92 93 WAVExtractor::WAVExtractor(const sp<DataSource> &source) 94 : mDataSource(source), 95 mValidFormat(false), 96 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) { 97 mInitCheck = init(); 98 } 99 100 WAVExtractor::~WAVExtractor() { 101 } 102 103 sp<MetaData> WAVExtractor::getMetaData() { 104 sp<MetaData> meta = new MetaData; 105 106 if (mInitCheck != OK) { 107 return meta; 108 } 109 110 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV); 111 112 return meta; 113 } 114 115 size_t WAVExtractor::countTracks() { 116 return mInitCheck == OK ? 1 : 0; 117 } 118 119 sp<MediaSource> WAVExtractor::getTrack(size_t index) { 120 if (mInitCheck != OK || index > 0) { 121 return NULL; 122 } 123 124 return new WAVSource( 125 mDataSource, mTrackMeta, 126 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize); 127 } 128 129 sp<MetaData> WAVExtractor::getTrackMetaData( 130 size_t index, uint32_t flags) { 131 if (mInitCheck != OK || index > 0) { 132 return NULL; 133 } 134 135 return mTrackMeta; 136 } 137 138 status_t WAVExtractor::init() { 139 uint8_t header[12]; 140 if (mDataSource->readAt( 141 0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 142 return NO_INIT; 143 } 144 145 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 146 return NO_INIT; 147 } 148 149 size_t totalSize = U32_LE_AT(&header[4]); 150 151 off64_t offset = 12; 152 size_t remainingSize = totalSize; 153 while (remainingSize >= 8) { 154 uint8_t chunkHeader[8]; 155 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) { 156 return NO_INIT; 157 } 158 159 remainingSize -= 8; 160 offset += 8; 161 162 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]); 163 164 if (chunkSize > remainingSize) { 165 return NO_INIT; 166 } 167 168 if (!memcmp(chunkHeader, "fmt ", 4)) { 169 if (chunkSize < 16) { 170 return NO_INIT; 171 } 172 173 uint8_t formatSpec[40]; 174 if (mDataSource->readAt(offset, formatSpec, 2) < 2) { 175 return NO_INIT; 176 } 177 178 mWaveFormat = U16_LE_AT(formatSpec); 179 if (mWaveFormat != WAVE_FORMAT_PCM 180 && mWaveFormat != WAVE_FORMAT_ALAW 181 && mWaveFormat != WAVE_FORMAT_MULAW 182 && mWaveFormat != WAVE_FORMAT_MSGSM 183 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 184 return ERROR_UNSUPPORTED; 185 } 186 187 uint8_t fmtSize = 16; 188 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 189 fmtSize = 40; 190 } 191 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) { 192 return NO_INIT; 193 } 194 195 mNumChannels = U16_LE_AT(&formatSpec[2]); 196 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 197 if (mNumChannels != 1 && mNumChannels != 2) { 198 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask", 199 mNumChannels); 200 } 201 } else { 202 if (mNumChannels < 1 && mNumChannels > 8) { 203 return ERROR_UNSUPPORTED; 204 } 205 } 206 207 mSampleRate = U32_LE_AT(&formatSpec[4]); 208 209 if (mSampleRate == 0) { 210 return ERROR_MALFORMED; 211 } 212 213 mBitsPerSample = U16_LE_AT(&formatSpec[14]); 214 215 if (mWaveFormat == WAVE_FORMAT_PCM 216 || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 217 if (mBitsPerSample != 8 && mBitsPerSample != 16 218 && mBitsPerSample != 24) { 219 return ERROR_UNSUPPORTED; 220 } 221 } else if (mWaveFormat == WAVE_FORMAT_MSGSM) { 222 if (mBitsPerSample != 0) { 223 return ERROR_UNSUPPORTED; 224 } 225 } else { 226 CHECK(mWaveFormat == WAVE_FORMAT_MULAW 227 || mWaveFormat == WAVE_FORMAT_ALAW); 228 if (mBitsPerSample != 8) { 229 return ERROR_UNSUPPORTED; 230 } 231 } 232 233 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 234 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]); 235 if (validBitsPerSample != mBitsPerSample) { 236 if (validBitsPerSample != 0) { 237 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported", 238 validBitsPerSample, mBitsPerSample); 239 return ERROR_UNSUPPORTED; 240 } else { 241 // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT 242 // writers don't correctly set the valid bits value, and leave it at 0. 243 ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring"); 244 } 245 } 246 247 mChannelMask = U32_LE_AT(&formatSpec[20]); 248 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask); 249 if ((mChannelMask >> 18) != 0) { 250 ALOGE("invalid channel mask 0x%x", mChannelMask); 251 return ERROR_MALFORMED; 252 } 253 254 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER) 255 && (popcount(mChannelMask) != mNumChannels)) { 256 ALOGE("invalid number of channels (%d) in channel mask (0x%x)", 257 popcount(mChannelMask), mChannelMask); 258 return ERROR_MALFORMED; 259 } 260 261 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain 262 // the sample format, using the same definitions as a regular WAV header 263 mWaveFormat = U16_LE_AT(&formatSpec[24]); 264 if (mWaveFormat != WAVE_FORMAT_PCM 265 && mWaveFormat != WAVE_FORMAT_ALAW 266 && mWaveFormat != WAVE_FORMAT_MULAW) { 267 return ERROR_UNSUPPORTED; 268 } 269 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) { 270 ALOGE("unsupported GUID"); 271 return ERROR_UNSUPPORTED; 272 } 273 } 274 275 mValidFormat = true; 276 } else if (!memcmp(chunkHeader, "data", 4)) { 277 if (mValidFormat) { 278 mDataOffset = offset; 279 mDataSize = chunkSize; 280 281 mTrackMeta = new MetaData; 282 283 switch (mWaveFormat) { 284 case WAVE_FORMAT_PCM: 285 mTrackMeta->setCString( 286 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW); 287 break; 288 case WAVE_FORMAT_ALAW: 289 mTrackMeta->setCString( 290 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW); 291 break; 292 case WAVE_FORMAT_MSGSM: 293 mTrackMeta->setCString( 294 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM); 295 break; 296 default: 297 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW); 298 mTrackMeta->setCString( 299 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW); 300 break; 301 } 302 303 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels); 304 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask); 305 mTrackMeta->setInt32(kKeySampleRate, mSampleRate); 306 307 int64_t durationUs = 0; 308 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 309 // 65 bytes decode to 320 8kHz samples 310 durationUs = 311 1000000LL * (mDataSize / 65 * 320) / 8000; 312 } else { 313 size_t bytesPerSample = mBitsPerSample >> 3; 314 durationUs = 315 1000000LL * (mDataSize / (mNumChannels * bytesPerSample)) 316 / mSampleRate; 317 } 318 319 mTrackMeta->setInt64(kKeyDuration, durationUs); 320 321 return OK; 322 } 323 } 324 325 offset += chunkSize; 326 } 327 328 return NO_INIT; 329 } 330 331 const size_t WAVSource::kMaxFrameSize = 32768; 332 333 WAVSource::WAVSource( 334 const sp<DataSource> &dataSource, 335 const sp<MetaData> &meta, 336 uint16_t waveFormat, 337 int32_t bitsPerSample, 338 off64_t offset, size_t size) 339 : mDataSource(dataSource), 340 mMeta(meta), 341 mWaveFormat(waveFormat), 342 mSampleRate(0), 343 mNumChannels(0), 344 mBitsPerSample(bitsPerSample), 345 mOffset(offset), 346 mSize(size), 347 mStarted(false), 348 mGroup(NULL) { 349 CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate)); 350 CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels)); 351 352 mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize); 353 } 354 355 WAVSource::~WAVSource() { 356 if (mStarted) { 357 stop(); 358 } 359 } 360 361 status_t WAVSource::start(MetaData *params) { 362 ALOGV("WAVSource::start"); 363 364 CHECK(!mStarted); 365 366 mGroup = new MediaBufferGroup; 367 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 368 369 if (mBitsPerSample == 8) { 370 // As a temporary buffer for 8->16 bit conversion. 371 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 372 } 373 374 mCurrentPos = mOffset; 375 376 mStarted = true; 377 378 return OK; 379 } 380 381 status_t WAVSource::stop() { 382 ALOGV("WAVSource::stop"); 383 384 CHECK(mStarted); 385 386 delete mGroup; 387 mGroup = NULL; 388 389 mStarted = false; 390 391 return OK; 392 } 393 394 sp<MetaData> WAVSource::getFormat() { 395 ALOGV("WAVSource::getFormat"); 396 397 return mMeta; 398 } 399 400 status_t WAVSource::read( 401 MediaBuffer **out, const ReadOptions *options) { 402 *out = NULL; 403 404 int64_t seekTimeUs; 405 ReadOptions::SeekMode mode; 406 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { 407 int64_t pos = 0; 408 409 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 410 // 65 bytes decode to 320 8kHz samples 411 int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000; 412 int64_t framenumber = samplenumber / 320; 413 pos = framenumber * 65; 414 } else { 415 pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3); 416 } 417 if (pos > mSize) { 418 pos = mSize; 419 } 420 mCurrentPos = pos + mOffset; 421 } 422 423 MediaBuffer *buffer; 424 status_t err = mGroup->acquire_buffer(&buffer); 425 if (err != OK) { 426 return err; 427 } 428 429 // make sure that maxBytesToRead is multiple of 3, in 24-bit case 430 size_t maxBytesToRead = 431 mBitsPerSample == 8 ? kMaxFrameSize / 2 : 432 (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize); 433 434 size_t maxBytesAvailable = 435 (mCurrentPos - mOffset >= (off64_t)mSize) 436 ? 0 : mSize - (mCurrentPos - mOffset); 437 438 if (maxBytesToRead > maxBytesAvailable) { 439 maxBytesToRead = maxBytesAvailable; 440 } 441 442 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 443 // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames, 444 // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio 445 if (maxBytesToRead > 1024) { 446 maxBytesToRead = 1024; 447 } 448 maxBytesToRead = (maxBytesToRead / 65) * 65; 449 } 450 451 ssize_t n = mDataSource->readAt( 452 mCurrentPos, buffer->data(), 453 maxBytesToRead); 454 455 if (n <= 0) { 456 buffer->release(); 457 buffer = NULL; 458 459 return ERROR_END_OF_STREAM; 460 } 461 462 buffer->set_range(0, n); 463 464 if (mWaveFormat == WAVE_FORMAT_PCM || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 465 if (mBitsPerSample == 8) { 466 // Convert 8-bit unsigned samples to 16-bit signed. 467 468 MediaBuffer *tmp; 469 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK); 470 471 // The new buffer holds the sample number of samples, but each 472 // one is 2 bytes wide. 473 tmp->set_range(0, 2 * n); 474 475 int16_t *dst = (int16_t *)tmp->data(); 476 const uint8_t *src = (const uint8_t *)buffer->data(); 477 ssize_t numBytes = n; 478 479 while (numBytes-- > 0) { 480 *dst++ = ((int16_t)(*src) - 128) * 256; 481 ++src; 482 } 483 484 buffer->release(); 485 buffer = tmp; 486 } else if (mBitsPerSample == 24) { 487 // Convert 24-bit signed samples to 16-bit signed. 488 489 const uint8_t *src = 490 (const uint8_t *)buffer->data() + buffer->range_offset(); 491 int16_t *dst = (int16_t *)src; 492 493 size_t numSamples = buffer->range_length() / 3; 494 for (size_t i = 0; i < numSamples; ++i) { 495 int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16); 496 x = (x << 8) >> 8; // sign extension 497 498 x = x >> 8; 499 *dst++ = (int16_t)x; 500 src += 3; 501 } 502 503 buffer->set_range(buffer->range_offset(), 2 * numSamples); 504 } 505 } 506 507 int64_t timeStampUs = 0; 508 509 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 510 timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate; 511 } else { 512 size_t bytesPerSample = mBitsPerSample >> 3; 513 timeStampUs = 1000000LL * (mCurrentPos - mOffset) 514 / (mNumChannels * bytesPerSample) / mSampleRate; 515 } 516 517 buffer->meta_data()->setInt64(kKeyTime, timeStampUs); 518 519 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 520 mCurrentPos += n; 521 522 *out = buffer; 523 524 return OK; 525 } 526 527 //////////////////////////////////////////////////////////////////////////////// 528 529 bool SniffWAV( 530 const sp<DataSource> &source, String8 *mimeType, float *confidence, 531 sp<AMessage> *) { 532 char header[12]; 533 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 534 return false; 535 } 536 537 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 538 return false; 539 } 540 541 sp<MediaExtractor> extractor = new WAVExtractor(source); 542 if (extractor->countTracks() == 0) { 543 return false; 544 } 545 546 *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV; 547 *confidence = 0.3f; 548 549 return true; 550 } 551 552 } // namespace android 553