1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_coding/codecs/opus/opus_interface.h" 12 #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h" 13 14 #include <assert.h> 15 #include <stdlib.h> 16 #include <string.h> 17 18 enum { 19 /* Maximum supported frame size in WebRTC is 60 ms. */ 20 kWebRtcOpusMaxEncodeFrameSizeMs = 60, 21 22 /* The format allows up to 120 ms frames. Since we don't control the other 23 * side, we must allow for packets of that size. NetEq is currently limited 24 * to 60 ms on the receive side. */ 25 kWebRtcOpusMaxDecodeFrameSizeMs = 120, 26 27 /* Maximum sample count per channel is 48 kHz * maximum frame size in 28 * milliseconds. */ 29 kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs, 30 31 /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */ 32 kWebRtcOpusDefaultFrameSize = 960, 33 34 // Maximum number of consecutive zeros, beyond or equal to which DTX can fail. 35 kZeroBreakCount = 157, 36 37 #if defined(OPUS_FIXED_POINT) 38 kZeroBreakValue = 10, 39 #else 40 kZeroBreakValue = 1, 41 #endif 42 }; 43 44 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, 45 size_t channels, 46 int32_t application) { 47 int opus_app; 48 if (!inst) 49 return -1; 50 51 switch (application) { 52 case 0: 53 opus_app = OPUS_APPLICATION_VOIP; 54 break; 55 case 1: 56 opus_app = OPUS_APPLICATION_AUDIO; 57 break; 58 default: 59 return -1; 60 } 61 62 OpusEncInst* state = calloc(1, sizeof(OpusEncInst)); 63 assert(state); 64 65 // Allocate zero counters. 66 state->zero_counts = calloc(channels, sizeof(size_t)); 67 assert(state->zero_counts); 68 69 int error; 70 state->encoder = opus_encoder_create(48000, (int)channels, opus_app, 71 &error); 72 if (error != OPUS_OK || !state->encoder) { 73 WebRtcOpus_EncoderFree(state); 74 return -1; 75 } 76 77 state->in_dtx_mode = 0; 78 state->channels = channels; 79 80 *inst = state; 81 return 0; 82 } 83 84 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) { 85 if (inst) { 86 opus_encoder_destroy(inst->encoder); 87 free(inst->zero_counts); 88 free(inst); 89 return 0; 90 } else { 91 return -1; 92 } 93 } 94 95 int WebRtcOpus_Encode(OpusEncInst* inst, 96 const int16_t* audio_in, 97 size_t samples, 98 size_t length_encoded_buffer, 99 uint8_t* encoded) { 100 int res; 101 size_t i; 102 size_t c; 103 104 int16_t buffer[2 * 48 * kWebRtcOpusMaxEncodeFrameSizeMs]; 105 106 if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) { 107 return -1; 108 } 109 110 const size_t channels = inst->channels; 111 int use_buffer = 0; 112 113 // Break long consecutive zeros by forcing a "1" every |kZeroBreakCount| 114 // samples. 115 if (inst->in_dtx_mode) { 116 for (i = 0; i < samples; ++i) { 117 for (c = 0; c < channels; ++c) { 118 if (audio_in[i * channels + c] == 0) { 119 ++inst->zero_counts[c]; 120 if (inst->zero_counts[c] == kZeroBreakCount) { 121 if (!use_buffer) { 122 memcpy(buffer, audio_in, samples * channels * sizeof(int16_t)); 123 use_buffer = 1; 124 } 125 buffer[i * channels + c] = kZeroBreakValue; 126 inst->zero_counts[c] = 0; 127 } 128 } else { 129 inst->zero_counts[c] = 0; 130 } 131 } 132 } 133 } 134 135 res = opus_encode(inst->encoder, 136 use_buffer ? buffer : audio_in, 137 (int)samples, 138 encoded, 139 (opus_int32)length_encoded_buffer); 140 141 if (res == 1) { 142 // Indicates DTX since the packet has nothing but a header. In principle, 143 // there is no need to send this packet. However, we do transmit the first 144 // occurrence to let the decoder know that the encoder enters DTX mode. 145 if (inst->in_dtx_mode) { 146 return 0; 147 } else { 148 inst->in_dtx_mode = 1; 149 return 1; 150 } 151 } else if (res > 1) { 152 inst->in_dtx_mode = 0; 153 return res; 154 } 155 156 return -1; 157 } 158 159 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) { 160 if (inst) { 161 return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate)); 162 } else { 163 return -1; 164 } 165 } 166 167 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) { 168 if (inst) { 169 return opus_encoder_ctl(inst->encoder, 170 OPUS_SET_PACKET_LOSS_PERC(loss_rate)); 171 } else { 172 return -1; 173 } 174 } 175 176 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) { 177 opus_int32 set_bandwidth; 178 179 if (!inst) 180 return -1; 181 182 if (frequency_hz <= 8000) { 183 set_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 184 } else if (frequency_hz <= 12000) { 185 set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 186 } else if (frequency_hz <= 16000) { 187 set_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 188 } else if (frequency_hz <= 24000) { 189 set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 190 } else { 191 set_bandwidth = OPUS_BANDWIDTH_FULLBAND; 192 } 193 return opus_encoder_ctl(inst->encoder, 194 OPUS_SET_MAX_BANDWIDTH(set_bandwidth)); 195 } 196 197 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) { 198 if (inst) { 199 return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1)); 200 } else { 201 return -1; 202 } 203 } 204 205 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) { 206 if (inst) { 207 return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0)); 208 } else { 209 return -1; 210 } 211 } 212 213 int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) { 214 if (!inst) { 215 return -1; 216 } 217 218 // To prevent Opus from entering CELT-only mode by forcing signal type to 219 // voice to make sure that DTX behaves correctly. Currently, DTX does not 220 // last long during a pure silence, if the signal type is not forced. 221 // TODO(minyue): Remove the signal type forcing when Opus DTX works properly 222 // without it. 223 int ret = opus_encoder_ctl(inst->encoder, 224 OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); 225 if (ret != OPUS_OK) 226 return ret; 227 228 return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1)); 229 } 230 231 int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) { 232 if (inst) { 233 int ret = opus_encoder_ctl(inst->encoder, 234 OPUS_SET_SIGNAL(OPUS_AUTO)); 235 if (ret != OPUS_OK) 236 return ret; 237 return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0)); 238 } else { 239 return -1; 240 } 241 } 242 243 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) { 244 if (inst) { 245 return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity)); 246 } else { 247 return -1; 248 } 249 } 250 251 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, size_t channels) { 252 int error; 253 OpusDecInst* state; 254 255 if (inst != NULL) { 256 /* Create Opus decoder state. */ 257 state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst)); 258 if (state == NULL) { 259 return -1; 260 } 261 262 /* Create new memory, always at 48000 Hz. */ 263 state->decoder = opus_decoder_create(48000, (int)channels, &error); 264 if (error == OPUS_OK && state->decoder != NULL) { 265 /* Creation of memory all ok. */ 266 state->channels = channels; 267 state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize; 268 state->in_dtx_mode = 0; 269 *inst = state; 270 return 0; 271 } 272 273 /* If memory allocation was unsuccessful, free the entire state. */ 274 if (state->decoder) { 275 opus_decoder_destroy(state->decoder); 276 } 277 free(state); 278 } 279 return -1; 280 } 281 282 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) { 283 if (inst) { 284 opus_decoder_destroy(inst->decoder); 285 free(inst); 286 return 0; 287 } else { 288 return -1; 289 } 290 } 291 292 size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) { 293 return inst->channels; 294 } 295 296 void WebRtcOpus_DecoderInit(OpusDecInst* inst) { 297 opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE); 298 inst->in_dtx_mode = 0; 299 } 300 301 /* For decoder to determine if it is to output speech or comfort noise. */ 302 static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) { 303 // Audio type becomes comfort noise if |encoded_byte| is 1 and keeps 304 // to be so if the following |encoded_byte| are 0 or 1. 305 if (encoded_bytes == 0 && inst->in_dtx_mode) { 306 return 2; // Comfort noise. 307 } else if (encoded_bytes == 1) { 308 inst->in_dtx_mode = 1; 309 return 2; // Comfort noise. 310 } else { 311 inst->in_dtx_mode = 0; 312 return 0; // Speech. 313 } 314 } 315 316 /* |frame_size| is set to maximum Opus frame size in the normal case, and 317 * is set to the number of samples needed for PLC in case of losses. 318 * It is up to the caller to make sure the value is correct. */ 319 static int DecodeNative(OpusDecInst* inst, const uint8_t* encoded, 320 size_t encoded_bytes, int frame_size, 321 int16_t* decoded, int16_t* audio_type, int decode_fec) { 322 int res = opus_decode(inst->decoder, encoded, (opus_int32)encoded_bytes, 323 (opus_int16*)decoded, frame_size, decode_fec); 324 325 if (res <= 0) 326 return -1; 327 328 *audio_type = DetermineAudioType(inst, encoded_bytes); 329 330 return res; 331 } 332 333 int WebRtcOpus_Decode(OpusDecInst* inst, const uint8_t* encoded, 334 size_t encoded_bytes, int16_t* decoded, 335 int16_t* audio_type) { 336 int decoded_samples; 337 338 if (encoded_bytes == 0) { 339 *audio_type = DetermineAudioType(inst, encoded_bytes); 340 decoded_samples = WebRtcOpus_DecodePlc(inst, decoded, 1); 341 } else { 342 decoded_samples = DecodeNative(inst, 343 encoded, 344 encoded_bytes, 345 kWebRtcOpusMaxFrameSizePerChannel, 346 decoded, 347 audio_type, 348 0); 349 } 350 if (decoded_samples < 0) { 351 return -1; 352 } 353 354 /* Update decoded sample memory, to be used by the PLC in case of losses. */ 355 inst->prev_decoded_samples = decoded_samples; 356 357 return decoded_samples; 358 } 359 360 int WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded, 361 int number_of_lost_frames) { 362 int16_t audio_type = 0; 363 int decoded_samples; 364 int plc_samples; 365 366 /* The number of samples we ask for is |number_of_lost_frames| times 367 * |prev_decoded_samples_|. Limit the number of samples to maximum 368 * |kWebRtcOpusMaxFrameSizePerChannel|. */ 369 plc_samples = number_of_lost_frames * inst->prev_decoded_samples; 370 plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? 371 plc_samples : kWebRtcOpusMaxFrameSizePerChannel; 372 decoded_samples = DecodeNative(inst, NULL, 0, plc_samples, 373 decoded, &audio_type, 0); 374 if (decoded_samples < 0) { 375 return -1; 376 } 377 378 return decoded_samples; 379 } 380 381 int WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded, 382 size_t encoded_bytes, int16_t* decoded, 383 int16_t* audio_type) { 384 int decoded_samples; 385 int fec_samples; 386 387 if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) { 388 return 0; 389 } 390 391 fec_samples = opus_packet_get_samples_per_frame(encoded, 48000); 392 393 decoded_samples = DecodeNative(inst, encoded, encoded_bytes, 394 fec_samples, decoded, audio_type, 1); 395 if (decoded_samples < 0) { 396 return -1; 397 } 398 399 return decoded_samples; 400 } 401 402 int WebRtcOpus_DurationEst(OpusDecInst* inst, 403 const uint8_t* payload, 404 size_t payload_length_bytes) { 405 if (payload_length_bytes == 0) { 406 // WebRtcOpus_Decode calls PLC when payload length is zero. So we return 407 // PLC duration correspondingly. 408 return WebRtcOpus_PlcDuration(inst); 409 } 410 411 int frames, samples; 412 frames = opus_packet_get_nb_frames(payload, (opus_int32)payload_length_bytes); 413 if (frames < 0) { 414 /* Invalid payload data. */ 415 return 0; 416 } 417 samples = frames * opus_packet_get_samples_per_frame(payload, 48000); 418 if (samples < 120 || samples > 5760) { 419 /* Invalid payload duration. */ 420 return 0; 421 } 422 return samples; 423 } 424 425 int WebRtcOpus_PlcDuration(OpusDecInst* inst) { 426 /* The number of samples we ask for is |number_of_lost_frames| times 427 * |prev_decoded_samples_|. Limit the number of samples to maximum 428 * |kWebRtcOpusMaxFrameSizePerChannel|. */ 429 const int plc_samples = inst->prev_decoded_samples; 430 return (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? 431 plc_samples : kWebRtcOpusMaxFrameSizePerChannel; 432 } 433 434 int WebRtcOpus_FecDurationEst(const uint8_t* payload, 435 size_t payload_length_bytes) { 436 int samples; 437 if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) { 438 return 0; 439 } 440 441 samples = opus_packet_get_samples_per_frame(payload, 48000); 442 if (samples < 480 || samples > 5760) { 443 /* Invalid payload duration. */ 444 return 0; 445 } 446 return samples; 447 } 448 449 int WebRtcOpus_PacketHasFec(const uint8_t* payload, 450 size_t payload_length_bytes) { 451 int frames, channels, payload_length_ms; 452 int n; 453 opus_int16 frame_sizes[48]; 454 const unsigned char *frame_data[48]; 455 456 if (payload == NULL || payload_length_bytes == 0) 457 return 0; 458 459 /* In CELT_ONLY mode, packets should not have FEC. */ 460 if (payload[0] & 0x80) 461 return 0; 462 463 payload_length_ms = opus_packet_get_samples_per_frame(payload, 48000) / 48; 464 if (10 > payload_length_ms) 465 payload_length_ms = 10; 466 467 channels = opus_packet_get_nb_channels(payload); 468 469 switch (payload_length_ms) { 470 case 10: 471 case 20: { 472 frames = 1; 473 break; 474 } 475 case 40: { 476 frames = 2; 477 break; 478 } 479 case 60: { 480 frames = 3; 481 break; 482 } 483 default: { 484 return 0; // It is actually even an invalid packet. 485 } 486 } 487 488 /* The following is to parse the LBRR flags. */ 489 if (opus_packet_parse(payload, (opus_int32)payload_length_bytes, NULL, 490 frame_data, frame_sizes, NULL) < 0) { 491 return 0; 492 } 493 494 if (frame_sizes[0] <= 1) { 495 return 0; 496 } 497 498 for (n = 0; n < channels; n++) { 499 if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1))) 500 return 1; 501 } 502 503 return 0; 504 } 505