Home | History | Annotate | Download | only in include
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
     12 #define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
     13 
     14 #include <string.h>  // Provide access to size_t.
     15 
     16 #include <string>
     17 
     18 #include "webrtc/base/constructormagic.h"
     19 #include "webrtc/common_types.h"
     20 #include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
     21 #include "webrtc/typedefs.h"
     22 
     23 namespace webrtc {
     24 
     25 // Forward declarations.
     26 struct WebRtcRTPHeader;
     27 
     28 struct NetEqNetworkStatistics {
     29   uint16_t current_buffer_size_ms;  // Current jitter buffer size in ms.
     30   uint16_t preferred_buffer_size_ms;  // Target buffer size in ms.
     31   uint16_t jitter_peaks_found;  // 1 if adding extra delay due to peaky
     32                                 // jitter; 0 otherwise.
     33   uint16_t packet_loss_rate;  // Loss rate (network + late) in Q14.
     34   uint16_t packet_discard_rate;  // Late loss rate in Q14.
     35   uint16_t expand_rate;  // Fraction (of original stream) of synthesized
     36                          // audio inserted through expansion (in Q14).
     37   uint16_t speech_expand_rate;  // Fraction (of original stream) of synthesized
     38                                 // speech inserted through expansion (in Q14).
     39   uint16_t preemptive_rate;  // Fraction of data inserted through pre-emptive
     40                              // expansion (in Q14).
     41   uint16_t accelerate_rate;  // Fraction of data removed through acceleration
     42                              // (in Q14).
     43   uint16_t secondary_decoded_rate;  // Fraction of data coming from secondary
     44                                     // decoding (in Q14).
     45   int32_t clockdrift_ppm;  // Average clock-drift in parts-per-million
     46                            // (positive or negative).
     47   size_t added_zero_samples;  // Number of zero samples added in "off" mode.
     48   // Statistics for packet waiting times, i.e., the time between a packet
     49   // arrives until it is decoded.
     50   int mean_waiting_time_ms;
     51   int median_waiting_time_ms;
     52   int min_waiting_time_ms;
     53   int max_waiting_time_ms;
     54 };
     55 
     56 enum NetEqOutputType {
     57   kOutputNormal,
     58   kOutputPLC,
     59   kOutputCNG,
     60   kOutputPLCtoCNG,
     61   kOutputVADPassive
     62 };
     63 
     64 enum NetEqPlayoutMode {
     65   kPlayoutOn,
     66   kPlayoutOff,
     67   kPlayoutFax,
     68   kPlayoutStreaming
     69 };
     70 
     71 // This is the interface class for NetEq.
     72 class NetEq {
     73  public:
     74   enum BackgroundNoiseMode {
     75     kBgnOn,    // Default behavior with eternal noise.
     76     kBgnFade,  // Noise fades to zero after some time.
     77     kBgnOff    // Background noise is always zero.
     78   };
     79 
     80   struct Config {
     81     Config()
     82         : sample_rate_hz(16000),
     83           enable_audio_classifier(false),
     84           enable_post_decode_vad(false),
     85           max_packets_in_buffer(50),
     86           // |max_delay_ms| has the same effect as calling SetMaximumDelay().
     87           max_delay_ms(2000),
     88           background_noise_mode(kBgnOff),
     89           playout_mode(kPlayoutOn),
     90           enable_fast_accelerate(false) {}
     91 
     92     std::string ToString() const;
     93 
     94     int sample_rate_hz;  // Initial value. Will change with input data.
     95     bool enable_audio_classifier;
     96     bool enable_post_decode_vad;
     97     size_t max_packets_in_buffer;
     98     int max_delay_ms;
     99     BackgroundNoiseMode background_noise_mode;
    100     NetEqPlayoutMode playout_mode;
    101     bool enable_fast_accelerate;
    102   };
    103 
    104   enum ReturnCodes {
    105     kOK = 0,
    106     kFail = -1,
    107     kNotImplemented = -2
    108   };
    109 
    110   enum ErrorCodes {
    111     kNoError = 0,
    112     kOtherError,
    113     kInvalidRtpPayloadType,
    114     kUnknownRtpPayloadType,
    115     kCodecNotSupported,
    116     kDecoderExists,
    117     kDecoderNotFound,
    118     kInvalidSampleRate,
    119     kInvalidPointer,
    120     kAccelerateError,
    121     kPreemptiveExpandError,
    122     kComfortNoiseErrorCode,
    123     kDecoderErrorCode,
    124     kOtherDecoderError,
    125     kInvalidOperation,
    126     kDtmfParameterError,
    127     kDtmfParsingError,
    128     kDtmfInsertError,
    129     kStereoNotSupported,
    130     kSampleUnderrun,
    131     kDecodedTooMuch,
    132     kFrameSplitError,
    133     kRedundancySplitError,
    134     kPacketBufferCorruption,
    135     kSyncPacketNotAccepted
    136   };
    137 
    138   // Creates a new NetEq object, with parameters set in |config|. The |config|
    139   // object will only have to be valid for the duration of the call to this
    140   // method.
    141   static NetEq* Create(const NetEq::Config& config);
    142 
    143   virtual ~NetEq() {}
    144 
    145   // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
    146   // of the time when the packet was received, and should be measured with
    147   // the same tick rate as the RTP timestamp of the current payload.
    148   // Returns 0 on success, -1 on failure.
    149   virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
    150                            rtc::ArrayView<const uint8_t> payload,
    151                            uint32_t receive_timestamp) = 0;
    152 
    153   // Inserts a sync-packet into packet queue. Sync-packets are decoded to
    154   // silence and are intended to keep AV-sync intact in an event of long packet
    155   // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
    156   // might insert sync-packet when they observe that buffer level of NetEq is
    157   // decreasing below a certain threshold, defined by the application.
    158   // Sync-packets should have the same payload type as the last audio payload
    159   // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
    160   // can be implied by inserting a sync-packet.
    161   // Returns kOk on success, kFail on failure.
    162   virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
    163                                uint32_t receive_timestamp) = 0;
    164 
    165   // Instructs NetEq to deliver 10 ms of audio data. The data is written to
    166   // |output_audio|, which can hold (at least) |max_length| elements.
    167   // The number of channels that were written to the output is provided in
    168   // the output variable |num_channels|, and each channel contains
    169   // |samples_per_channel| elements. If more than one channel is written,
    170   // the samples are interleaved.
    171   // The speech type is written to |type|, if |type| is not NULL.
    172   // Returns kOK on success, or kFail in case of an error.
    173   virtual int GetAudio(size_t max_length, int16_t* output_audio,
    174                        size_t* samples_per_channel, size_t* num_channels,
    175                        NetEqOutputType* type) = 0;
    176 
    177   // Associates |rtp_payload_type| with |codec| and |codec_name|, and stores the
    178   // information in the codec database. Returns 0 on success, -1 on failure.
    179   // The name is only used to provide information back to the caller about the
    180   // decoders. Hence, the name is arbitrary, and may be empty.
    181   virtual int RegisterPayloadType(NetEqDecoder codec,
    182                                   const std::string& codec_name,
    183                                   uint8_t rtp_payload_type) = 0;
    184 
    185   // Provides an externally created decoder object |decoder| to insert in the
    186   // decoder database. The decoder implements a decoder of type |codec| and
    187   // associates it with |rtp_payload_type| and |codec_name|. The decoder will
    188   // produce samples at the rate |sample_rate_hz|. Returns kOK on success, kFail
    189   // on failure.
    190   // The name is only used to provide information back to the caller about the
    191   // decoders. Hence, the name is arbitrary, and may be empty.
    192   virtual int RegisterExternalDecoder(AudioDecoder* decoder,
    193                                       NetEqDecoder codec,
    194                                       const std::string& codec_name,
    195                                       uint8_t rtp_payload_type,
    196                                       int sample_rate_hz) = 0;
    197 
    198   // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
    199   // -1 on failure.
    200   virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
    201 
    202   // Sets a minimum delay in millisecond for packet buffer. The minimum is
    203   // maintained unless a higher latency is dictated by channel condition.
    204   // Returns true if the minimum is successfully applied, otherwise false is
    205   // returned.
    206   virtual bool SetMinimumDelay(int delay_ms) = 0;
    207 
    208   // Sets a maximum delay in milliseconds for packet buffer. The latency will
    209   // not exceed the given value, even required delay (given the channel
    210   // conditions) is higher. Calling this method has the same effect as setting
    211   // the |max_delay_ms| value in the NetEq::Config struct.
    212   virtual bool SetMaximumDelay(int delay_ms) = 0;
    213 
    214   // The smallest latency required. This is computed bases on inter-arrival
    215   // time and internal NetEq logic. Note that in computing this latency none of
    216   // the user defined limits (applied by calling setMinimumDelay() and/or
    217   // SetMaximumDelay()) are applied.
    218   virtual int LeastRequiredDelayMs() const = 0;
    219 
    220   // Not implemented.
    221   virtual int SetTargetDelay() = 0;
    222 
    223   // Not implemented.
    224   virtual int TargetDelay() = 0;
    225 
    226   // Returns the current total delay (packet buffer and sync buffer) in ms.
    227   virtual int CurrentDelayMs() const = 0;
    228 
    229   // Sets the playout mode to |mode|.
    230   // Deprecated. Set the mode in the Config struct passed to the constructor.
    231   // TODO(henrik.lundin) Delete.
    232   virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
    233 
    234   // Returns the current playout mode.
    235   // Deprecated.
    236   // TODO(henrik.lundin) Delete.
    237   virtual NetEqPlayoutMode PlayoutMode() const = 0;
    238 
    239   // Writes the current network statistics to |stats|. The statistics are reset
    240   // after the call.
    241   virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
    242 
    243   // Writes the current RTCP statistics to |stats|. The statistics are reset
    244   // and a new report period is started with the call.
    245   virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
    246 
    247   // Same as RtcpStatistics(), but does not reset anything.
    248   virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
    249 
    250   // Enables post-decode VAD. When enabled, GetAudio() will return
    251   // kOutputVADPassive when the signal contains no speech.
    252   virtual void EnableVad() = 0;
    253 
    254   // Disables post-decode VAD.
    255   virtual void DisableVad() = 0;
    256 
    257   // Gets the RTP timestamp for the last sample delivered by GetAudio().
    258   // Returns true if the RTP timestamp is valid, otherwise false.
    259   virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0;
    260 
    261   // Returns the sample rate in Hz of the audio produced in the last GetAudio
    262   // call. If GetAudio has not been called yet, the configured sample rate
    263   // (Config::sample_rate_hz) is returned.
    264   virtual int last_output_sample_rate_hz() const = 0;
    265 
    266   // Not implemented.
    267   virtual int SetTargetNumberOfChannels() = 0;
    268 
    269   // Not implemented.
    270   virtual int SetTargetSampleRate() = 0;
    271 
    272   // Returns the error code for the last occurred error. If no error has
    273   // occurred, 0 is returned.
    274   virtual int LastError() const = 0;
    275 
    276   // Returns the error code last returned by a decoder (audio or comfort noise).
    277   // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
    278   // this method to get the decoder's error code.
    279   virtual int LastDecoderError() = 0;
    280 
    281   // Flushes both the packet buffer and the sync buffer.
    282   virtual void FlushBuffers() = 0;
    283 
    284   // Current usage of packet-buffer and it's limits.
    285   virtual void PacketBufferStatistics(int* current_num_packets,
    286                                       int* max_num_packets) const = 0;
    287 
    288   // Enables NACK and sets the maximum size of the NACK list, which should be
    289   // positive and no larger than Nack::kNackListSizeLimit. If NACK is already
    290   // enabled then the maximum NACK list size is modified accordingly.
    291   virtual void EnableNack(size_t max_nack_list_size) = 0;
    292 
    293   virtual void DisableNack() = 0;
    294 
    295   // Returns a list of RTP sequence numbers corresponding to packets to be
    296   // retransmitted, given an estimate of the round-trip time in milliseconds.
    297   virtual std::vector<uint16_t> GetNackList(
    298       int64_t round_trip_time_ms) const = 0;
    299 
    300  protected:
    301   NetEq() {}
    302 
    303  private:
    304   RTC_DISALLOW_COPY_AND_ASSIGN(NetEq);
    305 };
    306 
    307 }  // namespace webrtc
    308 #endif  // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
    309