1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h" 12 13 #include <assert.h> 14 #include <string.h> // memmove 15 16 #ifdef WEBRTC_CODEC_CELT 17 #include "webrtc/modules/audio_coding/codecs/celt/include/celt_interface.h" 18 #endif 19 #include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h" 20 #include "webrtc/modules/audio_coding/codecs/g711/include/g711_interface.h" 21 #ifdef WEBRTC_CODEC_G722 22 #include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h" 23 #endif 24 #ifdef WEBRTC_CODEC_ILBC 25 #include "webrtc/modules/audio_coding/codecs/ilbc/interface/ilbc.h" 26 #endif 27 #ifdef WEBRTC_CODEC_ISACFX 28 #include "webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h" 29 #endif 30 #ifdef WEBRTC_CODEC_ISAC 31 #include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h" 32 #endif 33 #ifdef WEBRTC_CODEC_OPUS 34 #include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h" 35 #endif 36 #ifdef WEBRTC_CODEC_PCM16 37 #include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h" 38 #endif 39 40 namespace webrtc { 41 42 // PCMu 43 int AudioDecoderPcmU::Decode(const uint8_t* encoded, size_t encoded_len, 44 int16_t* decoded, SpeechType* speech_type) { 45 int16_t temp_type = 1; // Default is speech. 46 int16_t ret = WebRtcG711_DecodeU( 47 state_, reinterpret_cast<int16_t*>(const_cast<uint8_t*>(encoded)), 48 static_cast<int16_t>(encoded_len), decoded, &temp_type); 49 *speech_type = ConvertSpeechType(temp_type); 50 return ret; 51 } 52 53 int AudioDecoderPcmU::PacketDuration(const uint8_t* encoded, 54 size_t encoded_len) { 55 // One encoded byte per sample per channel. 56 return static_cast<int>(encoded_len / channels_); 57 } 58 59 // PCMa 60 int AudioDecoderPcmA::Decode(const uint8_t* encoded, size_t encoded_len, 61 int16_t* decoded, SpeechType* speech_type) { 62 int16_t temp_type = 1; // Default is speech. 63 int16_t ret = WebRtcG711_DecodeA( 64 state_, reinterpret_cast<int16_t*>(const_cast<uint8_t*>(encoded)), 65 static_cast<int16_t>(encoded_len), decoded, &temp_type); 66 *speech_type = ConvertSpeechType(temp_type); 67 return ret; 68 } 69 70 int AudioDecoderPcmA::PacketDuration(const uint8_t* encoded, 71 size_t encoded_len) { 72 // One encoded byte per sample per channel. 73 return static_cast<int>(encoded_len / channels_); 74 } 75 76 // PCM16B 77 #ifdef WEBRTC_CODEC_PCM16 78 AudioDecoderPcm16B::AudioDecoderPcm16B(enum NetEqDecoder type) 79 : AudioDecoder(type) { 80 assert(type == kDecoderPCM16B || 81 type == kDecoderPCM16Bwb || 82 type == kDecoderPCM16Bswb32kHz || 83 type == kDecoderPCM16Bswb48kHz); 84 } 85 86 int AudioDecoderPcm16B::Decode(const uint8_t* encoded, size_t encoded_len, 87 int16_t* decoded, SpeechType* speech_type) { 88 int16_t temp_type = 1; // Default is speech. 89 int16_t ret = WebRtcPcm16b_DecodeW16( 90 state_, reinterpret_cast<int16_t*>(const_cast<uint8_t*>(encoded)), 91 static_cast<int16_t>(encoded_len), decoded, &temp_type); 92 *speech_type = ConvertSpeechType(temp_type); 93 return ret; 94 } 95 96 int AudioDecoderPcm16B::PacketDuration(const uint8_t* encoded, 97 size_t encoded_len) { 98 // Two encoded byte per sample per channel. 99 return static_cast<int>(encoded_len / (2 * channels_)); 100 } 101 102 AudioDecoderPcm16BMultiCh::AudioDecoderPcm16BMultiCh( 103 enum NetEqDecoder type) 104 : AudioDecoderPcm16B(kDecoderPCM16B) { // This will be changed below. 105 codec_type_ = type; // Changing to actual type here. 106 switch (codec_type_) { 107 case kDecoderPCM16B_2ch: 108 case kDecoderPCM16Bwb_2ch: 109 case kDecoderPCM16Bswb32kHz_2ch: 110 case kDecoderPCM16Bswb48kHz_2ch: 111 channels_ = 2; 112 break; 113 case kDecoderPCM16B_5ch: 114 channels_ = 5; 115 break; 116 default: 117 assert(false); 118 } 119 } 120 #endif 121 122 // iLBC 123 #ifdef WEBRTC_CODEC_ILBC 124 AudioDecoderIlbc::AudioDecoderIlbc() : AudioDecoder(kDecoderILBC) { 125 WebRtcIlbcfix_DecoderCreate(reinterpret_cast<iLBC_decinst_t**>(&state_)); 126 } 127 128 AudioDecoderIlbc::~AudioDecoderIlbc() { 129 WebRtcIlbcfix_DecoderFree(static_cast<iLBC_decinst_t*>(state_)); 130 } 131 132 int AudioDecoderIlbc::Decode(const uint8_t* encoded, size_t encoded_len, 133 int16_t* decoded, SpeechType* speech_type) { 134 int16_t temp_type = 1; // Default is speech. 135 int16_t ret = WebRtcIlbcfix_Decode(static_cast<iLBC_decinst_t*>(state_), 136 reinterpret_cast<const int16_t*>(encoded), 137 static_cast<int16_t>(encoded_len), decoded, 138 &temp_type); 139 *speech_type = ConvertSpeechType(temp_type); 140 return ret; 141 } 142 143 int AudioDecoderIlbc::DecodePlc(int num_frames, int16_t* decoded) { 144 return WebRtcIlbcfix_NetEqPlc(static_cast<iLBC_decinst_t*>(state_), 145 decoded, num_frames); 146 } 147 148 int AudioDecoderIlbc::Init() { 149 return WebRtcIlbcfix_Decoderinit30Ms(static_cast<iLBC_decinst_t*>(state_)); 150 } 151 #endif 152 153 // iSAC float 154 #ifdef WEBRTC_CODEC_ISAC 155 AudioDecoderIsac::AudioDecoderIsac() : AudioDecoder(kDecoderISAC) { 156 WebRtcIsac_Create(reinterpret_cast<ISACStruct**>(&state_)); 157 WebRtcIsac_SetDecSampRate(static_cast<ISACStruct*>(state_), 16000); 158 } 159 160 AudioDecoderIsac::~AudioDecoderIsac() { 161 WebRtcIsac_Free(static_cast<ISACStruct*>(state_)); 162 } 163 164 int AudioDecoderIsac::Decode(const uint8_t* encoded, size_t encoded_len, 165 int16_t* decoded, SpeechType* speech_type) { 166 int16_t temp_type = 1; // Default is speech. 167 int16_t ret = WebRtcIsac_Decode(static_cast<ISACStruct*>(state_), 168 reinterpret_cast<const uint16_t*>(encoded), 169 static_cast<int16_t>(encoded_len), decoded, 170 &temp_type); 171 *speech_type = ConvertSpeechType(temp_type); 172 return ret; 173 } 174 175 int AudioDecoderIsac::DecodeRedundant(const uint8_t* encoded, 176 size_t encoded_len, int16_t* decoded, 177 SpeechType* speech_type) { 178 int16_t temp_type = 1; // Default is speech. 179 int16_t ret = WebRtcIsac_DecodeRcu(static_cast<ISACStruct*>(state_), 180 reinterpret_cast<const uint16_t*>(encoded), 181 static_cast<int16_t>(encoded_len), decoded, 182 &temp_type); 183 *speech_type = ConvertSpeechType(temp_type); 184 return ret; 185 } 186 187 int AudioDecoderIsac::DecodePlc(int num_frames, int16_t* decoded) { 188 return WebRtcIsac_DecodePlc(static_cast<ISACStruct*>(state_), 189 decoded, num_frames); 190 } 191 192 int AudioDecoderIsac::Init() { 193 return WebRtcIsac_DecoderInit(static_cast<ISACStruct*>(state_)); 194 } 195 196 int AudioDecoderIsac::IncomingPacket(const uint8_t* payload, 197 size_t payload_len, 198 uint16_t rtp_sequence_number, 199 uint32_t rtp_timestamp, 200 uint32_t arrival_timestamp) { 201 return WebRtcIsac_UpdateBwEstimate(static_cast<ISACStruct*>(state_), 202 reinterpret_cast<const uint16_t*>(payload), 203 static_cast<int32_t>(payload_len), 204 rtp_sequence_number, 205 rtp_timestamp, 206 arrival_timestamp); 207 } 208 209 int AudioDecoderIsac::ErrorCode() { 210 return WebRtcIsac_GetErrorCode(static_cast<ISACStruct*>(state_)); 211 } 212 213 // iSAC SWB 214 AudioDecoderIsacSwb::AudioDecoderIsacSwb() : AudioDecoderIsac() { 215 codec_type_ = kDecoderISACswb; 216 WebRtcIsac_SetDecSampRate(static_cast<ISACStruct*>(state_), 32000); 217 } 218 219 // iSAC FB 220 AudioDecoderIsacFb::AudioDecoderIsacFb() : AudioDecoderIsacSwb() { 221 codec_type_ = kDecoderISACfb; 222 } 223 #endif 224 225 // iSAC fix 226 #ifdef WEBRTC_CODEC_ISACFX 227 AudioDecoderIsacFix::AudioDecoderIsacFix() : AudioDecoder(kDecoderISAC) { 228 WebRtcIsacfix_Create(reinterpret_cast<ISACFIX_MainStruct**>(&state_)); 229 } 230 231 AudioDecoderIsacFix::~AudioDecoderIsacFix() { 232 WebRtcIsacfix_Free(static_cast<ISACFIX_MainStruct*>(state_)); 233 } 234 235 int AudioDecoderIsacFix::Decode(const uint8_t* encoded, size_t encoded_len, 236 int16_t* decoded, SpeechType* speech_type) { 237 int16_t temp_type = 1; // Default is speech. 238 int16_t ret = WebRtcIsacfix_Decode(static_cast<ISACFIX_MainStruct*>(state_), 239 reinterpret_cast<const uint16_t*>(encoded), 240 static_cast<int16_t>(encoded_len), decoded, 241 &temp_type); 242 *speech_type = ConvertSpeechType(temp_type); 243 return ret; 244 } 245 246 int AudioDecoderIsacFix::Init() { 247 return WebRtcIsacfix_DecoderInit(static_cast<ISACFIX_MainStruct*>(state_)); 248 } 249 250 int AudioDecoderIsacFix::IncomingPacket(const uint8_t* payload, 251 size_t payload_len, 252 uint16_t rtp_sequence_number, 253 uint32_t rtp_timestamp, 254 uint32_t arrival_timestamp) { 255 return WebRtcIsacfix_UpdateBwEstimate( 256 static_cast<ISACFIX_MainStruct*>(state_), 257 reinterpret_cast<const uint16_t*>(payload), 258 static_cast<int32_t>(payload_len), 259 rtp_sequence_number, rtp_timestamp, arrival_timestamp); 260 } 261 262 int AudioDecoderIsacFix::ErrorCode() { 263 return WebRtcIsacfix_GetErrorCode(static_cast<ISACFIX_MainStruct*>(state_)); 264 } 265 #endif 266 267 // G.722 268 #ifdef WEBRTC_CODEC_G722 269 AudioDecoderG722::AudioDecoderG722() : AudioDecoder(kDecoderG722) { 270 WebRtcG722_CreateDecoder(reinterpret_cast<G722DecInst**>(&state_)); 271 } 272 273 AudioDecoderG722::~AudioDecoderG722() { 274 WebRtcG722_FreeDecoder(static_cast<G722DecInst*>(state_)); 275 } 276 277 int AudioDecoderG722::Decode(const uint8_t* encoded, size_t encoded_len, 278 int16_t* decoded, SpeechType* speech_type) { 279 int16_t temp_type = 1; // Default is speech. 280 int16_t ret = WebRtcG722_Decode( 281 static_cast<G722DecInst*>(state_), 282 const_cast<int16_t*>(reinterpret_cast<const int16_t*>(encoded)), 283 static_cast<int16_t>(encoded_len), decoded, &temp_type); 284 *speech_type = ConvertSpeechType(temp_type); 285 return ret; 286 } 287 288 int AudioDecoderG722::Init() { 289 return WebRtcG722_DecoderInit(static_cast<G722DecInst*>(state_)); 290 } 291 292 int AudioDecoderG722::PacketDuration(const uint8_t* encoded, 293 size_t encoded_len) { 294 // 1/2 encoded byte per sample per channel. 295 return static_cast<int>(2 * encoded_len / channels_); 296 } 297 298 AudioDecoderG722Stereo::AudioDecoderG722Stereo() 299 : AudioDecoderG722(), 300 state_left_(state_), // Base member |state_| is used for left channel. 301 state_right_(NULL) { 302 channels_ = 2; 303 // |state_left_| already created by the base class AudioDecoderG722. 304 WebRtcG722_CreateDecoder(reinterpret_cast<G722DecInst**>(&state_right_)); 305 } 306 307 AudioDecoderG722Stereo::~AudioDecoderG722Stereo() { 308 // |state_left_| will be freed by the base class AudioDecoderG722. 309 WebRtcG722_FreeDecoder(static_cast<G722DecInst*>(state_right_)); 310 } 311 312 int AudioDecoderG722Stereo::Decode(const uint8_t* encoded, size_t encoded_len, 313 int16_t* decoded, SpeechType* speech_type) { 314 int16_t temp_type = 1; // Default is speech. 315 // De-interleave the bit-stream into two separate payloads. 316 uint8_t* encoded_deinterleaved = new uint8_t[encoded_len]; 317 SplitStereoPacket(encoded, encoded_len, encoded_deinterleaved); 318 // Decode left and right. 319 int16_t ret = WebRtcG722_Decode( 320 static_cast<G722DecInst*>(state_left_), 321 reinterpret_cast<int16_t*>(encoded_deinterleaved), 322 static_cast<int16_t>(encoded_len / 2), decoded, &temp_type); 323 if (ret >= 0) { 324 int decoded_len = ret; 325 ret = WebRtcG722_Decode( 326 static_cast<G722DecInst*>(state_right_), 327 reinterpret_cast<int16_t*>(&encoded_deinterleaved[encoded_len / 2]), 328 static_cast<int16_t>(encoded_len / 2), &decoded[decoded_len], &temp_type); 329 if (ret == decoded_len) { 330 decoded_len += ret; 331 // Interleave output. 332 for (int k = decoded_len / 2; k < decoded_len; k++) { 333 int16_t temp = decoded[k]; 334 memmove(&decoded[2 * k - decoded_len + 2], 335 &decoded[2 * k - decoded_len + 1], 336 (decoded_len - k - 1) * sizeof(int16_t)); 337 decoded[2 * k - decoded_len + 1] = temp; 338 } 339 ret = decoded_len; // Return total number of samples. 340 } 341 } 342 *speech_type = ConvertSpeechType(temp_type); 343 delete [] encoded_deinterleaved; 344 return ret; 345 } 346 347 int AudioDecoderG722Stereo::Init() { 348 int ret = WebRtcG722_DecoderInit(static_cast<G722DecInst*>(state_right_)); 349 if (ret != 0) { 350 return ret; 351 } 352 return AudioDecoderG722::Init(); 353 } 354 355 // Split the stereo packet and place left and right channel after each other 356 // in the output array. 357 void AudioDecoderG722Stereo::SplitStereoPacket(const uint8_t* encoded, 358 size_t encoded_len, 359 uint8_t* encoded_deinterleaved) { 360 assert(encoded); 361 // Regroup the 4 bits/sample so |l1 l2| |r1 r2| |l3 l4| |r3 r4| ..., 362 // where "lx" is 4 bits representing left sample number x, and "rx" right 363 // sample. Two samples fit in one byte, represented with |...|. 364 for (size_t i = 0; i + 1 < encoded_len; i += 2) { 365 uint8_t right_byte = ((encoded[i] & 0x0F) << 4) + (encoded[i + 1] & 0x0F); 366 encoded_deinterleaved[i] = (encoded[i] & 0xF0) + (encoded[i + 1] >> 4); 367 encoded_deinterleaved[i + 1] = right_byte; 368 } 369 370 // Move one byte representing right channel each loop, and place it at the 371 // end of the bytestream vector. After looping the data is reordered to: 372 // |l1 l2| |l3 l4| ... |l(N-1) lN| |r1 r2| |r3 r4| ... |r(N-1) r(N)|, 373 // where N is the total number of samples. 374 for (size_t i = 0; i < encoded_len / 2; i++) { 375 uint8_t right_byte = encoded_deinterleaved[i + 1]; 376 memmove(&encoded_deinterleaved[i + 1], &encoded_deinterleaved[i + 2], 377 encoded_len - i - 2); 378 encoded_deinterleaved[encoded_len - 1] = right_byte; 379 } 380 } 381 #endif 382 383 // CELT 384 #ifdef WEBRTC_CODEC_CELT 385 AudioDecoderCelt::AudioDecoderCelt(enum NetEqDecoder type) 386 : AudioDecoder(type) { 387 assert(type == kDecoderCELT_32 || type == kDecoderCELT_32_2ch); 388 if (type == kDecoderCELT_32) { 389 channels_ = 1; 390 } else { 391 channels_ = 2; 392 } 393 WebRtcCelt_CreateDec(reinterpret_cast<CELT_decinst_t**>(&state_), 394 static_cast<int>(channels_)); 395 } 396 397 AudioDecoderCelt::~AudioDecoderCelt() { 398 WebRtcCelt_FreeDec(static_cast<CELT_decinst_t*>(state_)); 399 } 400 401 int AudioDecoderCelt::Decode(const uint8_t* encoded, size_t encoded_len, 402 int16_t* decoded, SpeechType* speech_type) { 403 int16_t temp_type = 1; // Default to speech. 404 int ret = WebRtcCelt_DecodeUniversal(static_cast<CELT_decinst_t*>(state_), 405 encoded, static_cast<int>(encoded_len), 406 decoded, &temp_type); 407 *speech_type = ConvertSpeechType(temp_type); 408 if (ret < 0) { 409 return -1; 410 } 411 // Return the total number of samples. 412 return ret * static_cast<int>(channels_); 413 } 414 415 int AudioDecoderCelt::Init() { 416 return WebRtcCelt_DecoderInit(static_cast<CELT_decinst_t*>(state_)); 417 } 418 419 bool AudioDecoderCelt::HasDecodePlc() const { return true; } 420 421 int AudioDecoderCelt::DecodePlc(int num_frames, int16_t* decoded) { 422 int ret = WebRtcCelt_DecodePlc(static_cast<CELT_decinst_t*>(state_), 423 decoded, num_frames); 424 if (ret < 0) { 425 return -1; 426 } 427 // Return the total number of samples. 428 return ret * static_cast<int>(channels_); 429 } 430 #endif 431 432 // Opus 433 #ifdef WEBRTC_CODEC_OPUS 434 AudioDecoderOpus::AudioDecoderOpus(enum NetEqDecoder type) 435 : AudioDecoder(type) { 436 if (type == kDecoderOpus_2ch) { 437 channels_ = 2; 438 } else { 439 channels_ = 1; 440 } 441 WebRtcOpus_DecoderCreate(reinterpret_cast<OpusDecInst**>(&state_), 442 static_cast<int>(channels_)); 443 } 444 445 AudioDecoderOpus::~AudioDecoderOpus() { 446 WebRtcOpus_DecoderFree(static_cast<OpusDecInst*>(state_)); 447 } 448 449 int AudioDecoderOpus::Decode(const uint8_t* encoded, size_t encoded_len, 450 int16_t* decoded, SpeechType* speech_type) { 451 int16_t temp_type = 1; // Default is speech. 452 int16_t ret = WebRtcOpus_DecodeNew(static_cast<OpusDecInst*>(state_), encoded, 453 static_cast<int16_t>(encoded_len), decoded, 454 &temp_type); 455 if (ret > 0) 456 ret *= static_cast<int16_t>(channels_); // Return total number of samples. 457 *speech_type = ConvertSpeechType(temp_type); 458 return ret; 459 } 460 461 int AudioDecoderOpus::DecodeRedundant(const uint8_t* encoded, 462 size_t encoded_len, int16_t* decoded, 463 SpeechType* speech_type) { 464 int16_t temp_type = 1; // Default is speech. 465 int16_t ret = WebRtcOpus_DecodeFec(static_cast<OpusDecInst*>(state_), encoded, 466 static_cast<int16_t>(encoded_len), decoded, 467 &temp_type); 468 if (ret > 0) 469 ret *= static_cast<int16_t>(channels_); // Return total number of samples. 470 *speech_type = ConvertSpeechType(temp_type); 471 return ret; 472 } 473 474 int AudioDecoderOpus::Init() { 475 return WebRtcOpus_DecoderInitNew(static_cast<OpusDecInst*>(state_)); 476 } 477 478 int AudioDecoderOpus::PacketDuration(const uint8_t* encoded, 479 size_t encoded_len) { 480 return WebRtcOpus_DurationEst(static_cast<OpusDecInst*>(state_), 481 encoded, static_cast<int>(encoded_len)); 482 } 483 484 int AudioDecoderOpus::PacketDurationRedundant(const uint8_t* encoded, 485 size_t encoded_len) const { 486 return WebRtcOpus_FecDurationEst(encoded, static_cast<int>(encoded_len)); 487 } 488 489 bool AudioDecoderOpus::PacketHasFec(const uint8_t* encoded, 490 size_t encoded_len) const { 491 int fec; 492 fec = WebRtcOpus_PacketHasFec(encoded, static_cast<int>(encoded_len)); 493 return (fec == 1); 494 } 495 #endif 496 497 AudioDecoderCng::AudioDecoderCng(enum NetEqDecoder type) 498 : AudioDecoder(type) { 499 assert(type == kDecoderCNGnb || type == kDecoderCNGwb || 500 kDecoderCNGswb32kHz || type == kDecoderCNGswb48kHz); 501 WebRtcCng_CreateDec(reinterpret_cast<CNG_dec_inst**>(&state_)); 502 assert(state_); 503 } 504 505 AudioDecoderCng::~AudioDecoderCng() { 506 if (state_) { 507 WebRtcCng_FreeDec(static_cast<CNG_dec_inst*>(state_)); 508 } 509 } 510 511 int AudioDecoderCng::Init() { 512 assert(state_); 513 return WebRtcCng_InitDec(static_cast<CNG_dec_inst*>(state_)); 514 } 515 516 } // namespace webrtc 517