Home | History | Annotate | Download | only in opus
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_coding/codecs/opus/opus_interface.h"
     12 #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h"
     13 
     14 #include <assert.h>
     15 #include <stdlib.h>
     16 #include <string.h>
     17 
     18 enum {
     19   /* Maximum supported frame size in WebRTC is 60 ms. */
     20   kWebRtcOpusMaxEncodeFrameSizeMs = 60,
     21 
     22   /* The format allows up to 120 ms frames. Since we don't control the other
     23    * side, we must allow for packets of that size. NetEq is currently limited
     24    * to 60 ms on the receive side. */
     25   kWebRtcOpusMaxDecodeFrameSizeMs = 120,
     26 
     27   /* Maximum sample count per channel is 48 kHz * maximum frame size in
     28    * milliseconds. */
     29   kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,
     30 
     31   /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
     32   kWebRtcOpusDefaultFrameSize = 960,
     33 
     34   // Maximum number of consecutive zeros, beyond or equal to which DTX can fail.
     35   kZeroBreakCount = 157,
     36 
     37 #if defined(OPUS_FIXED_POINT)
     38   kZeroBreakValue = 10,
     39 #else
     40   kZeroBreakValue = 1,
     41 #endif
     42 };
     43 
     44 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
     45                                  size_t channels,
     46                                  int32_t application) {
     47   int opus_app;
     48   if (!inst)
     49     return -1;
     50 
     51   switch (application) {
     52     case 0:
     53       opus_app = OPUS_APPLICATION_VOIP;
     54       break;
     55     case 1:
     56       opus_app = OPUS_APPLICATION_AUDIO;
     57       break;
     58     default:
     59       return -1;
     60   }
     61 
     62   OpusEncInst* state = calloc(1, sizeof(OpusEncInst));
     63   assert(state);
     64 
     65   // Allocate zero counters.
     66   state->zero_counts = calloc(channels, sizeof(size_t));
     67   assert(state->zero_counts);
     68 
     69   int error;
     70   state->encoder = opus_encoder_create(48000, (int)channels, opus_app,
     71                                        &error);
     72   if (error != OPUS_OK || !state->encoder) {
     73     WebRtcOpus_EncoderFree(state);
     74     return -1;
     75   }
     76 
     77   state->in_dtx_mode = 0;
     78   state->channels = channels;
     79 
     80   *inst = state;
     81   return 0;
     82 }
     83 
     84 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
     85   if (inst) {
     86     opus_encoder_destroy(inst->encoder);
     87     free(inst->zero_counts);
     88     free(inst);
     89     return 0;
     90   } else {
     91     return -1;
     92   }
     93 }
     94 
     95 int WebRtcOpus_Encode(OpusEncInst* inst,
     96                       const int16_t* audio_in,
     97                       size_t samples,
     98                       size_t length_encoded_buffer,
     99                       uint8_t* encoded) {
    100   int res;
    101   size_t i;
    102   size_t c;
    103 
    104   int16_t buffer[2 * 48 * kWebRtcOpusMaxEncodeFrameSizeMs];
    105 
    106   if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
    107     return -1;
    108   }
    109 
    110   const size_t channels = inst->channels;
    111   int use_buffer = 0;
    112 
    113   // Break long consecutive zeros by forcing a "1" every |kZeroBreakCount|
    114   // samples.
    115   if (inst->in_dtx_mode) {
    116     for (i = 0; i < samples; ++i) {
    117       for (c = 0; c < channels; ++c) {
    118         if (audio_in[i * channels + c] == 0) {
    119           ++inst->zero_counts[c];
    120           if (inst->zero_counts[c] == kZeroBreakCount) {
    121             if (!use_buffer) {
    122               memcpy(buffer, audio_in, samples * channels * sizeof(int16_t));
    123               use_buffer = 1;
    124             }
    125             buffer[i * channels + c] = kZeroBreakValue;
    126             inst->zero_counts[c] = 0;
    127           }
    128         } else {
    129           inst->zero_counts[c] = 0;
    130         }
    131       }
    132     }
    133   }
    134 
    135   res = opus_encode(inst->encoder,
    136                     use_buffer ? buffer : audio_in,
    137                     (int)samples,
    138                     encoded,
    139                     (opus_int32)length_encoded_buffer);
    140 
    141   if (res == 1) {
    142     // Indicates DTX since the packet has nothing but a header. In principle,
    143     // there is no need to send this packet. However, we do transmit the first
    144     // occurrence to let the decoder know that the encoder enters DTX mode.
    145     if (inst->in_dtx_mode) {
    146       return 0;
    147     } else {
    148       inst->in_dtx_mode = 1;
    149       return 1;
    150     }
    151   } else if (res > 1) {
    152     inst->in_dtx_mode = 0;
    153     return res;
    154   }
    155 
    156   return -1;
    157 }
    158 
    159 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
    160   if (inst) {
    161     return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
    162   } else {
    163     return -1;
    164   }
    165 }
    166 
    167 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
    168   if (inst) {
    169     return opus_encoder_ctl(inst->encoder,
    170                             OPUS_SET_PACKET_LOSS_PERC(loss_rate));
    171   } else {
    172     return -1;
    173   }
    174 }
    175 
    176 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
    177   opus_int32 set_bandwidth;
    178 
    179   if (!inst)
    180     return -1;
    181 
    182   if (frequency_hz <= 8000) {
    183     set_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
    184   } else if (frequency_hz <= 12000) {
    185     set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
    186   } else if (frequency_hz <= 16000) {
    187     set_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
    188   } else if (frequency_hz <= 24000) {
    189     set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
    190   } else {
    191     set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
    192   }
    193   return opus_encoder_ctl(inst->encoder,
    194                           OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
    195 }
    196 
    197 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
    198   if (inst) {
    199     return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1));
    200   } else {
    201     return -1;
    202   }
    203 }
    204 
    205 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
    206   if (inst) {
    207     return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0));
    208   } else {
    209     return -1;
    210   }
    211 }
    212 
    213 int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
    214   if (!inst) {
    215     return -1;
    216   }
    217 
    218   // To prevent Opus from entering CELT-only mode by forcing signal type to
    219   // voice to make sure that DTX behaves correctly. Currently, DTX does not
    220   // last long during a pure silence, if the signal type is not forced.
    221   // TODO(minyue): Remove the signal type forcing when Opus DTX works properly
    222   // without it.
    223   int ret = opus_encoder_ctl(inst->encoder,
    224                              OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
    225   if (ret != OPUS_OK)
    226     return ret;
    227 
    228   return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1));
    229 }
    230 
    231 int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
    232   if (inst) {
    233     int ret = opus_encoder_ctl(inst->encoder,
    234                                OPUS_SET_SIGNAL(OPUS_AUTO));
    235     if (ret != OPUS_OK)
    236       return ret;
    237     return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0));
    238   } else {
    239     return -1;
    240   }
    241 }
    242 
    243 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
    244   if (inst) {
    245     return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity));
    246   } else {
    247     return -1;
    248   }
    249 }
    250 
    251 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, size_t channels) {
    252   int error;
    253   OpusDecInst* state;
    254 
    255   if (inst != NULL) {
    256     /* Create Opus decoder state. */
    257     state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
    258     if (state == NULL) {
    259       return -1;
    260     }
    261 
    262     /* Create new memory, always at 48000 Hz. */
    263     state->decoder = opus_decoder_create(48000, (int)channels, &error);
    264     if (error == OPUS_OK && state->decoder != NULL) {
    265       /* Creation of memory all ok. */
    266       state->channels = channels;
    267       state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
    268       state->in_dtx_mode = 0;
    269       *inst = state;
    270       return 0;
    271     }
    272 
    273     /* If memory allocation was unsuccessful, free the entire state. */
    274     if (state->decoder) {
    275       opus_decoder_destroy(state->decoder);
    276     }
    277     free(state);
    278   }
    279   return -1;
    280 }
    281 
    282 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
    283   if (inst) {
    284     opus_decoder_destroy(inst->decoder);
    285     free(inst);
    286     return 0;
    287   } else {
    288     return -1;
    289   }
    290 }
    291 
    292 size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
    293   return inst->channels;
    294 }
    295 
    296 void WebRtcOpus_DecoderInit(OpusDecInst* inst) {
    297   opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE);
    298   inst->in_dtx_mode = 0;
    299 }
    300 
    301 /* For decoder to determine if it is to output speech or comfort noise. */
    302 static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) {
    303   // Audio type becomes comfort noise if |encoded_byte| is 1 and keeps
    304   // to be so if the following |encoded_byte| are 0 or 1.
    305   if (encoded_bytes == 0 && inst->in_dtx_mode) {
    306     return 2;  // Comfort noise.
    307   } else if (encoded_bytes == 1) {
    308     inst->in_dtx_mode = 1;
    309     return 2;  // Comfort noise.
    310   } else {
    311     inst->in_dtx_mode = 0;
    312     return 0;  // Speech.
    313   }
    314 }
    315 
    316 /* |frame_size| is set to maximum Opus frame size in the normal case, and
    317  * is set to the number of samples needed for PLC in case of losses.
    318  * It is up to the caller to make sure the value is correct. */
    319 static int DecodeNative(OpusDecInst* inst, const uint8_t* encoded,
    320                         size_t encoded_bytes, int frame_size,
    321                         int16_t* decoded, int16_t* audio_type, int decode_fec) {
    322   int res = opus_decode(inst->decoder, encoded, (opus_int32)encoded_bytes,
    323                         (opus_int16*)decoded, frame_size, decode_fec);
    324 
    325   if (res <= 0)
    326     return -1;
    327 
    328   *audio_type = DetermineAudioType(inst, encoded_bytes);
    329 
    330   return res;
    331 }
    332 
    333 int WebRtcOpus_Decode(OpusDecInst* inst, const uint8_t* encoded,
    334                       size_t encoded_bytes, int16_t* decoded,
    335                       int16_t* audio_type) {
    336   int decoded_samples;
    337 
    338   if (encoded_bytes == 0) {
    339     *audio_type = DetermineAudioType(inst, encoded_bytes);
    340     decoded_samples = WebRtcOpus_DecodePlc(inst, decoded, 1);
    341   } else {
    342     decoded_samples = DecodeNative(inst,
    343                                    encoded,
    344                                    encoded_bytes,
    345                                    kWebRtcOpusMaxFrameSizePerChannel,
    346                                    decoded,
    347                                    audio_type,
    348                                    0);
    349   }
    350   if (decoded_samples < 0) {
    351     return -1;
    352   }
    353 
    354   /* Update decoded sample memory, to be used by the PLC in case of losses. */
    355   inst->prev_decoded_samples = decoded_samples;
    356 
    357   return decoded_samples;
    358 }
    359 
    360 int WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
    361                          int number_of_lost_frames) {
    362   int16_t audio_type = 0;
    363   int decoded_samples;
    364   int plc_samples;
    365 
    366   /* The number of samples we ask for is |number_of_lost_frames| times
    367    * |prev_decoded_samples_|. Limit the number of samples to maximum
    368    * |kWebRtcOpusMaxFrameSizePerChannel|. */
    369   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
    370   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
    371       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
    372   decoded_samples = DecodeNative(inst, NULL, 0, plc_samples,
    373                                  decoded, &audio_type, 0);
    374   if (decoded_samples < 0) {
    375     return -1;
    376   }
    377 
    378   return decoded_samples;
    379 }
    380 
    381 int WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded,
    382                          size_t encoded_bytes, int16_t* decoded,
    383                          int16_t* audio_type) {
    384   int decoded_samples;
    385   int fec_samples;
    386 
    387   if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) {
    388     return 0;
    389   }
    390 
    391   fec_samples = opus_packet_get_samples_per_frame(encoded, 48000);
    392 
    393   decoded_samples = DecodeNative(inst, encoded, encoded_bytes,
    394                                  fec_samples, decoded, audio_type, 1);
    395   if (decoded_samples < 0) {
    396     return -1;
    397   }
    398 
    399   return decoded_samples;
    400 }
    401 
    402 int WebRtcOpus_DurationEst(OpusDecInst* inst,
    403                            const uint8_t* payload,
    404                            size_t payload_length_bytes) {
    405   if (payload_length_bytes == 0) {
    406     // WebRtcOpus_Decode calls PLC when payload length is zero. So we return
    407     // PLC duration correspondingly.
    408     return WebRtcOpus_PlcDuration(inst);
    409   }
    410 
    411   int frames, samples;
    412   frames = opus_packet_get_nb_frames(payload, (opus_int32)payload_length_bytes);
    413   if (frames < 0) {
    414     /* Invalid payload data. */
    415     return 0;
    416   }
    417   samples = frames * opus_packet_get_samples_per_frame(payload, 48000);
    418   if (samples < 120 || samples > 5760) {
    419     /* Invalid payload duration. */
    420     return 0;
    421   }
    422   return samples;
    423 }
    424 
    425 int WebRtcOpus_PlcDuration(OpusDecInst* inst) {
    426   /* The number of samples we ask for is |number_of_lost_frames| times
    427    * |prev_decoded_samples_|. Limit the number of samples to maximum
    428    * |kWebRtcOpusMaxFrameSizePerChannel|. */
    429   const int plc_samples = inst->prev_decoded_samples;
    430   return (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
    431       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
    432 }
    433 
    434 int WebRtcOpus_FecDurationEst(const uint8_t* payload,
    435                               size_t payload_length_bytes) {
    436   int samples;
    437   if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) {
    438     return 0;
    439   }
    440 
    441   samples = opus_packet_get_samples_per_frame(payload, 48000);
    442   if (samples < 480 || samples > 5760) {
    443     /* Invalid payload duration. */
    444     return 0;
    445   }
    446   return samples;
    447 }
    448 
    449 int WebRtcOpus_PacketHasFec(const uint8_t* payload,
    450                             size_t payload_length_bytes) {
    451   int frames, channels, payload_length_ms;
    452   int n;
    453   opus_int16 frame_sizes[48];
    454   const unsigned char *frame_data[48];
    455 
    456   if (payload == NULL || payload_length_bytes == 0)
    457     return 0;
    458 
    459   /* In CELT_ONLY mode, packets should not have FEC. */
    460   if (payload[0] & 0x80)
    461     return 0;
    462 
    463   payload_length_ms = opus_packet_get_samples_per_frame(payload, 48000) / 48;
    464   if (10 > payload_length_ms)
    465     payload_length_ms = 10;
    466 
    467   channels = opus_packet_get_nb_channels(payload);
    468 
    469   switch (payload_length_ms) {
    470     case 10:
    471     case 20: {
    472       frames = 1;
    473       break;
    474     }
    475     case 40: {
    476       frames = 2;
    477       break;
    478     }
    479     case 60: {
    480       frames = 3;
    481       break;
    482     }
    483     default: {
    484       return 0; // It is actually even an invalid packet.
    485     }
    486   }
    487 
    488   /* The following is to parse the LBRR flags. */
    489   if (opus_packet_parse(payload, (opus_int32)payload_length_bytes, NULL,
    490                         frame_data, frame_sizes, NULL) < 0) {
    491     return 0;
    492   }
    493 
    494   if (frame_sizes[0] <= 1) {
    495     return 0;
    496   }
    497 
    498   for (n = 0; n < channels; n++) {
    499     if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1)))
    500       return 1;
    501   }
    502 
    503   return 0;
    504 }
    505