Home | History | Annotate | Download | only in opus
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h"
     12 #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h"
     13 
     14 #include <stdlib.h>
     15 #include <string.h>
     16 
     17 enum {
     18   /* Maximum supported frame size in WebRTC is 60 ms. */
     19   kWebRtcOpusMaxEncodeFrameSizeMs = 60,
     20 
     21   /* The format allows up to 120 ms frames. Since we don't control the other
     22    * side, we must allow for packets of that size. NetEq is currently limited
     23    * to 60 ms on the receive side. */
     24   kWebRtcOpusMaxDecodeFrameSizeMs = 120,
     25 
     26   /* Maximum sample count per channel is 48 kHz * maximum frame size in
     27    * milliseconds. */
     28   kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,
     29 
     30   /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
     31   kWebRtcOpusDefaultFrameSize = 960,
     32 };
     33 
     34 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, int32_t channels) {
     35   OpusEncInst* state;
     36   if (inst != NULL) {
     37     state = (OpusEncInst*) calloc(1, sizeof(OpusEncInst));
     38     if (state) {
     39       int error;
     40       /* Default to VoIP application for mono, and AUDIO for stereo. */
     41       int application = (channels == 1) ? OPUS_APPLICATION_VOIP :
     42           OPUS_APPLICATION_AUDIO;
     43 
     44       state->encoder = opus_encoder_create(48000, channels, application,
     45                                            &error);
     46       if (error == OPUS_OK && state->encoder != NULL) {
     47         *inst = state;
     48         return 0;
     49       }
     50       free(state);
     51     }
     52   }
     53   return -1;
     54 }
     55 
     56 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
     57   if (inst) {
     58     opus_encoder_destroy(inst->encoder);
     59     free(inst);
     60     return 0;
     61   } else {
     62     return -1;
     63   }
     64 }
     65 
     66 int16_t WebRtcOpus_Encode(OpusEncInst* inst, int16_t* audio_in, int16_t samples,
     67                           int16_t length_encoded_buffer, uint8_t* encoded) {
     68   opus_int16* audio = (opus_int16*) audio_in;
     69   unsigned char* coded = encoded;
     70   int res;
     71 
     72   if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
     73     return -1;
     74   }
     75 
     76   res = opus_encode(inst->encoder, audio, samples, coded,
     77                     length_encoded_buffer);
     78 
     79   if (res > 0) {
     80     return res;
     81   }
     82   return -1;
     83 }
     84 
     85 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
     86   if (inst) {
     87     return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
     88   } else {
     89     return -1;
     90   }
     91 }
     92 
     93 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
     94   if (inst) {
     95     return opus_encoder_ctl(inst->encoder,
     96                             OPUS_SET_PACKET_LOSS_PERC(loss_rate));
     97   } else {
     98     return -1;
     99   }
    100 }
    101 
    102 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
    103   opus_int32 set_bandwidth;
    104 
    105   if (!inst)
    106     return -1;
    107 
    108   if (frequency_hz <= 8000) {
    109     set_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
    110   } else if (frequency_hz <= 12000) {
    111     set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
    112   } else if (frequency_hz <= 16000) {
    113     set_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
    114   } else if (frequency_hz <= 24000) {
    115     set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
    116   } else {
    117     set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
    118   }
    119   return opus_encoder_ctl(inst->encoder,
    120                           OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
    121 }
    122 
    123 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
    124   if (inst) {
    125     return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1));
    126   } else {
    127     return -1;
    128   }
    129 }
    130 
    131 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
    132   if (inst) {
    133     return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0));
    134   } else {
    135     return -1;
    136   }
    137 }
    138 
    139 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
    140   if (inst) {
    141     return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity));
    142   } else {
    143     return -1;
    144   }
    145 }
    146 
    147 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, int channels) {
    148   int error_l;
    149   int error_r;
    150   OpusDecInst* state;
    151 
    152   if (inst != NULL) {
    153     /* Create Opus decoder state. */
    154     state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
    155     if (state == NULL) {
    156       return -1;
    157     }
    158 
    159     /* Create new memory for left and right channel, always at 48000 Hz. */
    160     state->decoder_left = opus_decoder_create(48000, channels, &error_l);
    161     state->decoder_right = opus_decoder_create(48000, channels, &error_r);
    162     if (error_l == OPUS_OK && error_r == OPUS_OK && state->decoder_left != NULL
    163         && state->decoder_right != NULL) {
    164       /* Creation of memory all ok. */
    165       state->channels = channels;
    166       state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
    167       *inst = state;
    168       return 0;
    169     }
    170 
    171     /* If memory allocation was unsuccessful, free the entire state. */
    172     if (state->decoder_left) {
    173       opus_decoder_destroy(state->decoder_left);
    174     }
    175     if (state->decoder_right) {
    176       opus_decoder_destroy(state->decoder_right);
    177     }
    178     free(state);
    179   }
    180   return -1;
    181 }
    182 
    183 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
    184   if (inst) {
    185     opus_decoder_destroy(inst->decoder_left);
    186     opus_decoder_destroy(inst->decoder_right);
    187     free(inst);
    188     return 0;
    189   } else {
    190     return -1;
    191   }
    192 }
    193 
    194 int WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
    195   return inst->channels;
    196 }
    197 
    198 int16_t WebRtcOpus_DecoderInitNew(OpusDecInst* inst) {
    199   int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE);
    200   if (error == OPUS_OK) {
    201     return 0;
    202   }
    203   return -1;
    204 }
    205 
    206 int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) {
    207   int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE);
    208   if (error == OPUS_OK) {
    209     return 0;
    210   }
    211   return -1;
    212 }
    213 
    214 int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst) {
    215   int error = opus_decoder_ctl(inst->decoder_right, OPUS_RESET_STATE);
    216   if (error == OPUS_OK) {
    217     return 0;
    218   }
    219   return -1;
    220 }
    221 
    222 /* |frame_size| is set to maximum Opus frame size in the normal case, and
    223  * is set to the number of samples needed for PLC in case of losses.
    224  * It is up to the caller to make sure the value is correct. */
    225 static int DecodeNative(OpusDecoder* inst, const int16_t* encoded,
    226                         int16_t encoded_bytes, int frame_size,
    227                         int16_t* decoded, int16_t* audio_type) {
    228   unsigned char* coded = (unsigned char*) encoded;
    229   opus_int16* audio = (opus_int16*) decoded;
    230 
    231   int res = opus_decode(inst, coded, encoded_bytes, audio, frame_size, 0);
    232 
    233   /* TODO(tlegrand): set to DTX for zero-length packets? */
    234   *audio_type = 0;
    235 
    236   if (res > 0) {
    237     return res;
    238   }
    239   return -1;
    240 }
    241 
    242 static int DecodeFec(OpusDecoder* inst, const int16_t* encoded,
    243                      int16_t encoded_bytes, int frame_size,
    244                      int16_t* decoded, int16_t* audio_type) {
    245   unsigned char* coded = (unsigned char*) encoded;
    246   opus_int16* audio = (opus_int16*) decoded;
    247 
    248   int res = opus_decode(inst, coded, encoded_bytes, audio, frame_size, 1);
    249 
    250   /* TODO(tlegrand): set to DTX for zero-length packets? */
    251   *audio_type = 0;
    252 
    253   if (res > 0) {
    254     return res;
    255   }
    256   return -1;
    257 }
    258 
    259 int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded,
    260                              int16_t encoded_bytes, int16_t* decoded,
    261                              int16_t* audio_type) {
    262   int16_t* coded = (int16_t*)encoded;
    263   int decoded_samples;
    264 
    265   decoded_samples = DecodeNative(inst->decoder_left, coded, encoded_bytes,
    266                                  kWebRtcOpusMaxFrameSizePerChannel,
    267                                  decoded, audio_type);
    268   if (decoded_samples < 0) {
    269     return -1;
    270   }
    271 
    272   /* Update decoded sample memory, to be used by the PLC in case of losses. */
    273   inst->prev_decoded_samples = decoded_samples;
    274 
    275   return decoded_samples;
    276 }
    277 
    278 int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded,
    279                           int16_t encoded_bytes, int16_t* decoded,
    280                           int16_t* audio_type) {
    281   int decoded_samples;
    282   int i;
    283 
    284   /* If mono case, just do a regular call to the decoder.
    285    * If stereo, call to WebRtcOpus_Decode() gives left channel as output, and
    286    * calls to WebRtcOpus_Decode_slave() give right channel as output.
    287    * This is to make stereo work with the current setup of NetEQ, which
    288    * requires two calls to the decoder to produce stereo. */
    289 
    290   decoded_samples = DecodeNative(inst->decoder_left, encoded, encoded_bytes,
    291                                  kWebRtcOpusMaxFrameSizePerChannel, decoded,
    292                                  audio_type);
    293   if (decoded_samples < 0) {
    294     return -1;
    295   }
    296   if (inst->channels == 2) {
    297     /* The parameter |decoded_samples| holds the number of samples pairs, in
    298      * case of stereo. Number of samples in |decoded| equals |decoded_samples|
    299      * times 2. */
    300     for (i = 0; i < decoded_samples; i++) {
    301       /* Take every second sample, starting at the first sample. This gives
    302        * the left channel. */
    303       decoded[i] = decoded[i * 2];
    304     }
    305   }
    306 
    307   /* Update decoded sample memory, to be used by the PLC in case of losses. */
    308   inst->prev_decoded_samples = decoded_samples;
    309 
    310   return decoded_samples;
    311 }
    312 
    313 int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, const int16_t* encoded,
    314                                int16_t encoded_bytes, int16_t* decoded,
    315                                int16_t* audio_type) {
    316   int decoded_samples;
    317   int i;
    318 
    319   decoded_samples = DecodeNative(inst->decoder_right, encoded, encoded_bytes,
    320                                  kWebRtcOpusMaxFrameSizePerChannel, decoded,
    321                                  audio_type);
    322   if (decoded_samples < 0) {
    323     return -1;
    324   }
    325   if (inst->channels == 2) {
    326     /* The parameter |decoded_samples| holds the number of samples pairs, in
    327      * case of stereo. Number of samples in |decoded| equals |decoded_samples|
    328      * times 2. */
    329     for (i = 0; i < decoded_samples; i++) {
    330       /* Take every second sample, starting at the second sample. This gives
    331        * the right channel. */
    332       decoded[i] = decoded[i * 2 + 1];
    333     }
    334   } else {
    335     /* Decode slave should never be called for mono packets. */
    336     return -1;
    337   }
    338 
    339   return decoded_samples;
    340 }
    341 
    342 int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
    343                              int16_t number_of_lost_frames) {
    344   int16_t audio_type = 0;
    345   int decoded_samples;
    346   int plc_samples;
    347 
    348   /* The number of samples we ask for is |number_of_lost_frames| times
    349    * |prev_decoded_samples_|. Limit the number of samples to maximum
    350    * |kWebRtcOpusMaxFrameSizePerChannel|. */
    351   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
    352   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
    353       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
    354   decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples,
    355                                  decoded, &audio_type);
    356   if (decoded_samples < 0) {
    357     return -1;
    358   }
    359 
    360   return decoded_samples;
    361 }
    362 
    363 int16_t WebRtcOpus_DecodePlcMaster(OpusDecInst* inst, int16_t* decoded,
    364                                    int16_t number_of_lost_frames) {
    365   int decoded_samples;
    366   int16_t audio_type = 0;
    367   int plc_samples;
    368   int i;
    369 
    370   /* If mono case, just do a regular call to the decoder.
    371    * If stereo, call to WebRtcOpus_DecodePlcMaster() gives left channel as
    372    * output, and calls to WebRtcOpus_DecodePlcSlave() give right channel as
    373    * output. This is to make stereo work with the current setup of NetEQ, which
    374    * requires two calls to the decoder to produce stereo. */
    375 
    376   /* The number of samples we ask for is |number_of_lost_frames| times
    377    * |prev_decoded_samples_|. Limit the number of samples to maximum
    378    * |kWebRtcOpusMaxFrameSizePerChannel|. */
    379   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
    380   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
    381       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
    382   decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples,
    383                                  decoded, &audio_type);
    384   if (decoded_samples < 0) {
    385     return -1;
    386   }
    387 
    388   if (inst->channels == 2) {
    389     /* The parameter |decoded_samples| holds the number of sample pairs, in
    390      * case of stereo. The original number of samples in |decoded| equals
    391      * |decoded_samples| times 2. */
    392     for (i = 0; i < decoded_samples; i++) {
    393       /* Take every second sample, starting at the first sample. This gives
    394        * the left channel. */
    395       decoded[i] = decoded[i * 2];
    396     }
    397   }
    398 
    399   return decoded_samples;
    400 }
    401 
    402 int16_t WebRtcOpus_DecodePlcSlave(OpusDecInst* inst, int16_t* decoded,
    403                                   int16_t number_of_lost_frames) {
    404   int decoded_samples;
    405   int16_t audio_type = 0;
    406   int plc_samples;
    407   int i;
    408 
    409   /* Calls to WebRtcOpus_DecodePlcSlave() give right channel as output.
    410    * The function should never be called in the mono case. */
    411   if (inst->channels != 2) {
    412     return -1;
    413   }
    414 
    415   /* The number of samples we ask for is |number_of_lost_frames| times
    416    *  |prev_decoded_samples_|. Limit the number of samples to maximum
    417    *  |kWebRtcOpusMaxFrameSizePerChannel|. */
    418   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
    419   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel)
    420       ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
    421   decoded_samples = DecodeNative(inst->decoder_right, NULL, 0, plc_samples,
    422                                  decoded, &audio_type);
    423   if (decoded_samples < 0) {
    424     return -1;
    425   }
    426 
    427   /* The parameter |decoded_samples| holds the number of sample pairs,
    428    * The original number of samples in |decoded| equals |decoded_samples|
    429    * times 2. */
    430   for (i = 0; i < decoded_samples; i++) {
    431     /* Take every second sample, starting at the second sample. This gives
    432      * the right channel. */
    433     decoded[i] = decoded[i * 2 + 1];
    434   }
    435 
    436   return decoded_samples;
    437 }
    438 
    439 int16_t WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded,
    440                              int16_t encoded_bytes, int16_t* decoded,
    441                              int16_t* audio_type) {
    442   int16_t* coded = (int16_t*)encoded;
    443   int decoded_samples;
    444   int fec_samples;
    445 
    446   if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) {
    447     return 0;
    448   }
    449 
    450   fec_samples = opus_packet_get_samples_per_frame(encoded, 48000);
    451 
    452   decoded_samples = DecodeFec(inst->decoder_left, coded, encoded_bytes,
    453                               fec_samples, decoded, audio_type);
    454   if (decoded_samples < 0) {
    455     return -1;
    456   }
    457 
    458   return decoded_samples;
    459 }
    460 
    461 int WebRtcOpus_DurationEst(OpusDecInst* inst,
    462                            const uint8_t* payload,
    463                            int payload_length_bytes) {
    464   int frames, samples;
    465   frames = opus_packet_get_nb_frames(payload, payload_length_bytes);
    466   if (frames < 0) {
    467     /* Invalid payload data. */
    468     return 0;
    469   }
    470   samples = frames * opus_packet_get_samples_per_frame(payload, 48000);
    471   if (samples < 120 || samples > 5760) {
    472     /* Invalid payload duration. */
    473     return 0;
    474   }
    475   return samples;
    476 }
    477 
    478 int WebRtcOpus_FecDurationEst(const uint8_t* payload,
    479                               int payload_length_bytes) {
    480   int samples;
    481   if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) {
    482     return 0;
    483   }
    484 
    485   samples = opus_packet_get_samples_per_frame(payload, 48000);
    486   if (samples < 480 || samples > 5760) {
    487     /* Invalid payload duration. */
    488     return 0;
    489   }
    490   return samples;
    491 }
    492 
    493 int WebRtcOpus_PacketHasFec(const uint8_t* payload,
    494                             int payload_length_bytes) {
    495   int frames, channels, payload_length_ms;
    496   int n;
    497   opus_int16 frame_sizes[48];
    498   const unsigned char *frame_data[48];
    499 
    500   if (payload == NULL || payload_length_bytes <= 0)
    501     return 0;
    502 
    503   /* In CELT_ONLY mode, packets should not have FEC. */
    504   if (payload[0] & 0x80)
    505     return 0;
    506 
    507   payload_length_ms = opus_packet_get_samples_per_frame(payload, 48000) / 48;
    508   if (10 > payload_length_ms)
    509     payload_length_ms = 10;
    510 
    511   channels = opus_packet_get_nb_channels(payload);
    512 
    513   switch (payload_length_ms) {
    514     case 10:
    515     case 20: {
    516       frames = 1;
    517       break;
    518     }
    519     case 40: {
    520       frames = 2;
    521       break;
    522     }
    523     case 60: {
    524       frames = 3;
    525       break;
    526     }
    527     default: {
    528       return 0; // It is actually even an invalid packet.
    529     }
    530   }
    531 
    532   /* The following is to parse the LBRR flags. */
    533   if (opus_packet_parse(payload, payload_length_bytes, NULL, frame_data,
    534                         frame_sizes, NULL) < 0) {
    535     return 0;
    536   }
    537 
    538   if (frame_sizes[0] <= 1) {
    539     return 0;
    540   }
    541 
    542   for (n = 0; n < channels; n++) {
    543     if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1)))
    544       return 1;
    545   }
    546 
    547   return 0;
    548 }
    549