1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h" 12 #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h" 13 14 #include <stdlib.h> 15 #include <string.h> 16 17 enum { 18 /* Maximum supported frame size in WebRTC is 60 ms. */ 19 kWebRtcOpusMaxEncodeFrameSizeMs = 60, 20 21 /* The format allows up to 120 ms frames. Since we don't control the other 22 * side, we must allow for packets of that size. NetEq is currently limited 23 * to 60 ms on the receive side. */ 24 kWebRtcOpusMaxDecodeFrameSizeMs = 120, 25 26 /* Maximum sample count per channel is 48 kHz * maximum frame size in 27 * milliseconds. */ 28 kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs, 29 30 /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */ 31 kWebRtcOpusDefaultFrameSize = 960, 32 }; 33 34 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, int32_t channels) { 35 OpusEncInst* state; 36 if (inst != NULL) { 37 state = (OpusEncInst*) calloc(1, sizeof(OpusEncInst)); 38 if (state) { 39 int error; 40 /* Default to VoIP application for mono, and AUDIO for stereo. */ 41 int application = (channels == 1) ? OPUS_APPLICATION_VOIP : 42 OPUS_APPLICATION_AUDIO; 43 44 state->encoder = opus_encoder_create(48000, channels, application, 45 &error); 46 if (error == OPUS_OK && state->encoder != NULL) { 47 *inst = state; 48 return 0; 49 } 50 free(state); 51 } 52 } 53 return -1; 54 } 55 56 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) { 57 if (inst) { 58 opus_encoder_destroy(inst->encoder); 59 free(inst); 60 return 0; 61 } else { 62 return -1; 63 } 64 } 65 66 int16_t WebRtcOpus_Encode(OpusEncInst* inst, int16_t* audio_in, int16_t samples, 67 int16_t length_encoded_buffer, uint8_t* encoded) { 68 opus_int16* audio = (opus_int16*) audio_in; 69 unsigned char* coded = encoded; 70 int res; 71 72 if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) { 73 return -1; 74 } 75 76 res = opus_encode(inst->encoder, audio, samples, coded, 77 length_encoded_buffer); 78 79 if (res > 0) { 80 return res; 81 } 82 return -1; 83 } 84 85 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) { 86 if (inst) { 87 return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate)); 88 } else { 89 return -1; 90 } 91 } 92 93 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) { 94 if (inst) { 95 return opus_encoder_ctl(inst->encoder, 96 OPUS_SET_PACKET_LOSS_PERC(loss_rate)); 97 } else { 98 return -1; 99 } 100 } 101 102 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) { 103 opus_int32 set_bandwidth; 104 105 if (!inst) 106 return -1; 107 108 if (frequency_hz <= 8000) { 109 set_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 110 } else if (frequency_hz <= 12000) { 111 set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 112 } else if (frequency_hz <= 16000) { 113 set_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 114 } else if (frequency_hz <= 24000) { 115 set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 116 } else { 117 set_bandwidth = OPUS_BANDWIDTH_FULLBAND; 118 } 119 return opus_encoder_ctl(inst->encoder, 120 OPUS_SET_MAX_BANDWIDTH(set_bandwidth)); 121 } 122 123 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) { 124 if (inst) { 125 return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1)); 126 } else { 127 return -1; 128 } 129 } 130 131 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) { 132 if (inst) { 133 return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0)); 134 } else { 135 return -1; 136 } 137 } 138 139 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) { 140 if (inst) { 141 return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity)); 142 } else { 143 return -1; 144 } 145 } 146 147 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, int channels) { 148 int error_l; 149 int error_r; 150 OpusDecInst* state; 151 152 if (inst != NULL) { 153 /* Create Opus decoder state. */ 154 state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst)); 155 if (state == NULL) { 156 return -1; 157 } 158 159 /* Create new memory for left and right channel, always at 48000 Hz. */ 160 state->decoder_left = opus_decoder_create(48000, channels, &error_l); 161 state->decoder_right = opus_decoder_create(48000, channels, &error_r); 162 if (error_l == OPUS_OK && error_r == OPUS_OK && state->decoder_left != NULL 163 && state->decoder_right != NULL) { 164 /* Creation of memory all ok. */ 165 state->channels = channels; 166 state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize; 167 *inst = state; 168 return 0; 169 } 170 171 /* If memory allocation was unsuccessful, free the entire state. */ 172 if (state->decoder_left) { 173 opus_decoder_destroy(state->decoder_left); 174 } 175 if (state->decoder_right) { 176 opus_decoder_destroy(state->decoder_right); 177 } 178 free(state); 179 } 180 return -1; 181 } 182 183 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) { 184 if (inst) { 185 opus_decoder_destroy(inst->decoder_left); 186 opus_decoder_destroy(inst->decoder_right); 187 free(inst); 188 return 0; 189 } else { 190 return -1; 191 } 192 } 193 194 int WebRtcOpus_DecoderChannels(OpusDecInst* inst) { 195 return inst->channels; 196 } 197 198 int16_t WebRtcOpus_DecoderInitNew(OpusDecInst* inst) { 199 int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE); 200 if (error == OPUS_OK) { 201 return 0; 202 } 203 return -1; 204 } 205 206 int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) { 207 int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE); 208 if (error == OPUS_OK) { 209 return 0; 210 } 211 return -1; 212 } 213 214 int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst) { 215 int error = opus_decoder_ctl(inst->decoder_right, OPUS_RESET_STATE); 216 if (error == OPUS_OK) { 217 return 0; 218 } 219 return -1; 220 } 221 222 /* |frame_size| is set to maximum Opus frame size in the normal case, and 223 * is set to the number of samples needed for PLC in case of losses. 224 * It is up to the caller to make sure the value is correct. */ 225 static int DecodeNative(OpusDecoder* inst, const int16_t* encoded, 226 int16_t encoded_bytes, int frame_size, 227 int16_t* decoded, int16_t* audio_type) { 228 unsigned char* coded = (unsigned char*) encoded; 229 opus_int16* audio = (opus_int16*) decoded; 230 231 int res = opus_decode(inst, coded, encoded_bytes, audio, frame_size, 0); 232 233 /* TODO(tlegrand): set to DTX for zero-length packets? */ 234 *audio_type = 0; 235 236 if (res > 0) { 237 return res; 238 } 239 return -1; 240 } 241 242 static int DecodeFec(OpusDecoder* inst, const int16_t* encoded, 243 int16_t encoded_bytes, int frame_size, 244 int16_t* decoded, int16_t* audio_type) { 245 unsigned char* coded = (unsigned char*) encoded; 246 opus_int16* audio = (opus_int16*) decoded; 247 248 int res = opus_decode(inst, coded, encoded_bytes, audio, frame_size, 1); 249 250 /* TODO(tlegrand): set to DTX for zero-length packets? */ 251 *audio_type = 0; 252 253 if (res > 0) { 254 return res; 255 } 256 return -1; 257 } 258 259 int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded, 260 int16_t encoded_bytes, int16_t* decoded, 261 int16_t* audio_type) { 262 int16_t* coded = (int16_t*)encoded; 263 int decoded_samples; 264 265 decoded_samples = DecodeNative(inst->decoder_left, coded, encoded_bytes, 266 kWebRtcOpusMaxFrameSizePerChannel, 267 decoded, audio_type); 268 if (decoded_samples < 0) { 269 return -1; 270 } 271 272 /* Update decoded sample memory, to be used by the PLC in case of losses. */ 273 inst->prev_decoded_samples = decoded_samples; 274 275 return decoded_samples; 276 } 277 278 int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded, 279 int16_t encoded_bytes, int16_t* decoded, 280 int16_t* audio_type) { 281 int decoded_samples; 282 int i; 283 284 /* If mono case, just do a regular call to the decoder. 285 * If stereo, call to WebRtcOpus_Decode() gives left channel as output, and 286 * calls to WebRtcOpus_Decode_slave() give right channel as output. 287 * This is to make stereo work with the current setup of NetEQ, which 288 * requires two calls to the decoder to produce stereo. */ 289 290 decoded_samples = DecodeNative(inst->decoder_left, encoded, encoded_bytes, 291 kWebRtcOpusMaxFrameSizePerChannel, decoded, 292 audio_type); 293 if (decoded_samples < 0) { 294 return -1; 295 } 296 if (inst->channels == 2) { 297 /* The parameter |decoded_samples| holds the number of samples pairs, in 298 * case of stereo. Number of samples in |decoded| equals |decoded_samples| 299 * times 2. */ 300 for (i = 0; i < decoded_samples; i++) { 301 /* Take every second sample, starting at the first sample. This gives 302 * the left channel. */ 303 decoded[i] = decoded[i * 2]; 304 } 305 } 306 307 /* Update decoded sample memory, to be used by the PLC in case of losses. */ 308 inst->prev_decoded_samples = decoded_samples; 309 310 return decoded_samples; 311 } 312 313 int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, const int16_t* encoded, 314 int16_t encoded_bytes, int16_t* decoded, 315 int16_t* audio_type) { 316 int decoded_samples; 317 int i; 318 319 decoded_samples = DecodeNative(inst->decoder_right, encoded, encoded_bytes, 320 kWebRtcOpusMaxFrameSizePerChannel, decoded, 321 audio_type); 322 if (decoded_samples < 0) { 323 return -1; 324 } 325 if (inst->channels == 2) { 326 /* The parameter |decoded_samples| holds the number of samples pairs, in 327 * case of stereo. Number of samples in |decoded| equals |decoded_samples| 328 * times 2. */ 329 for (i = 0; i < decoded_samples; i++) { 330 /* Take every second sample, starting at the second sample. This gives 331 * the right channel. */ 332 decoded[i] = decoded[i * 2 + 1]; 333 } 334 } else { 335 /* Decode slave should never be called for mono packets. */ 336 return -1; 337 } 338 339 return decoded_samples; 340 } 341 342 int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded, 343 int16_t number_of_lost_frames) { 344 int16_t audio_type = 0; 345 int decoded_samples; 346 int plc_samples; 347 348 /* The number of samples we ask for is |number_of_lost_frames| times 349 * |prev_decoded_samples_|. Limit the number of samples to maximum 350 * |kWebRtcOpusMaxFrameSizePerChannel|. */ 351 plc_samples = number_of_lost_frames * inst->prev_decoded_samples; 352 plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? 353 plc_samples : kWebRtcOpusMaxFrameSizePerChannel; 354 decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples, 355 decoded, &audio_type); 356 if (decoded_samples < 0) { 357 return -1; 358 } 359 360 return decoded_samples; 361 } 362 363 int16_t WebRtcOpus_DecodePlcMaster(OpusDecInst* inst, int16_t* decoded, 364 int16_t number_of_lost_frames) { 365 int decoded_samples; 366 int16_t audio_type = 0; 367 int plc_samples; 368 int i; 369 370 /* If mono case, just do a regular call to the decoder. 371 * If stereo, call to WebRtcOpus_DecodePlcMaster() gives left channel as 372 * output, and calls to WebRtcOpus_DecodePlcSlave() give right channel as 373 * output. This is to make stereo work with the current setup of NetEQ, which 374 * requires two calls to the decoder to produce stereo. */ 375 376 /* The number of samples we ask for is |number_of_lost_frames| times 377 * |prev_decoded_samples_|. Limit the number of samples to maximum 378 * |kWebRtcOpusMaxFrameSizePerChannel|. */ 379 plc_samples = number_of_lost_frames * inst->prev_decoded_samples; 380 plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? 381 plc_samples : kWebRtcOpusMaxFrameSizePerChannel; 382 decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples, 383 decoded, &audio_type); 384 if (decoded_samples < 0) { 385 return -1; 386 } 387 388 if (inst->channels == 2) { 389 /* The parameter |decoded_samples| holds the number of sample pairs, in 390 * case of stereo. The original number of samples in |decoded| equals 391 * |decoded_samples| times 2. */ 392 for (i = 0; i < decoded_samples; i++) { 393 /* Take every second sample, starting at the first sample. This gives 394 * the left channel. */ 395 decoded[i] = decoded[i * 2]; 396 } 397 } 398 399 return decoded_samples; 400 } 401 402 int16_t WebRtcOpus_DecodePlcSlave(OpusDecInst* inst, int16_t* decoded, 403 int16_t number_of_lost_frames) { 404 int decoded_samples; 405 int16_t audio_type = 0; 406 int plc_samples; 407 int i; 408 409 /* Calls to WebRtcOpus_DecodePlcSlave() give right channel as output. 410 * The function should never be called in the mono case. */ 411 if (inst->channels != 2) { 412 return -1; 413 } 414 415 /* The number of samples we ask for is |number_of_lost_frames| times 416 * |prev_decoded_samples_|. Limit the number of samples to maximum 417 * |kWebRtcOpusMaxFrameSizePerChannel|. */ 418 plc_samples = number_of_lost_frames * inst->prev_decoded_samples; 419 plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) 420 ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel; 421 decoded_samples = DecodeNative(inst->decoder_right, NULL, 0, plc_samples, 422 decoded, &audio_type); 423 if (decoded_samples < 0) { 424 return -1; 425 } 426 427 /* The parameter |decoded_samples| holds the number of sample pairs, 428 * The original number of samples in |decoded| equals |decoded_samples| 429 * times 2. */ 430 for (i = 0; i < decoded_samples; i++) { 431 /* Take every second sample, starting at the second sample. This gives 432 * the right channel. */ 433 decoded[i] = decoded[i * 2 + 1]; 434 } 435 436 return decoded_samples; 437 } 438 439 int16_t WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded, 440 int16_t encoded_bytes, int16_t* decoded, 441 int16_t* audio_type) { 442 int16_t* coded = (int16_t*)encoded; 443 int decoded_samples; 444 int fec_samples; 445 446 if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) { 447 return 0; 448 } 449 450 fec_samples = opus_packet_get_samples_per_frame(encoded, 48000); 451 452 decoded_samples = DecodeFec(inst->decoder_left, coded, encoded_bytes, 453 fec_samples, decoded, audio_type); 454 if (decoded_samples < 0) { 455 return -1; 456 } 457 458 return decoded_samples; 459 } 460 461 int WebRtcOpus_DurationEst(OpusDecInst* inst, 462 const uint8_t* payload, 463 int payload_length_bytes) { 464 int frames, samples; 465 frames = opus_packet_get_nb_frames(payload, payload_length_bytes); 466 if (frames < 0) { 467 /* Invalid payload data. */ 468 return 0; 469 } 470 samples = frames * opus_packet_get_samples_per_frame(payload, 48000); 471 if (samples < 120 || samples > 5760) { 472 /* Invalid payload duration. */ 473 return 0; 474 } 475 return samples; 476 } 477 478 int WebRtcOpus_FecDurationEst(const uint8_t* payload, 479 int payload_length_bytes) { 480 int samples; 481 if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) { 482 return 0; 483 } 484 485 samples = opus_packet_get_samples_per_frame(payload, 48000); 486 if (samples < 480 || samples > 5760) { 487 /* Invalid payload duration. */ 488 return 0; 489 } 490 return samples; 491 } 492 493 int WebRtcOpus_PacketHasFec(const uint8_t* payload, 494 int payload_length_bytes) { 495 int frames, channels, payload_length_ms; 496 int n; 497 opus_int16 frame_sizes[48]; 498 const unsigned char *frame_data[48]; 499 500 if (payload == NULL || payload_length_bytes <= 0) 501 return 0; 502 503 /* In CELT_ONLY mode, packets should not have FEC. */ 504 if (payload[0] & 0x80) 505 return 0; 506 507 payload_length_ms = opus_packet_get_samples_per_frame(payload, 48000) / 48; 508 if (10 > payload_length_ms) 509 payload_length_ms = 10; 510 511 channels = opus_packet_get_nb_channels(payload); 512 513 switch (payload_length_ms) { 514 case 10: 515 case 20: { 516 frames = 1; 517 break; 518 } 519 case 40: { 520 frames = 2; 521 break; 522 } 523 case 60: { 524 frames = 3; 525 break; 526 } 527 default: { 528 return 0; // It is actually even an invalid packet. 529 } 530 } 531 532 /* The following is to parse the LBRR flags. */ 533 if (opus_packet_parse(payload, payload_length_bytes, NULL, frame_data, 534 frame_sizes, NULL) < 0) { 535 return 0; 536 } 537 538 if (frame_sizes[0] <= 1) { 539 return 0; 540 } 541 542 for (n = 0; n < channels; n++) { 543 if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1))) 544 return 1; 545 } 546 547 return 0; 548 } 549