Home | History | Annotate | Download | only in interface
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_
     12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_
     13 
     14 #include <vector>
     15 
     16 #include "webrtc/common_types.h"
     17 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
     18 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module_typedefs.h"
     19 #include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
     20 #include "webrtc/modules/interface/module.h"
     21 #include "webrtc/system_wrappers/interface/clock.h"
     22 #include "webrtc/typedefs.h"
     23 
     24 namespace webrtc {
     25 
     26 // forward declarations
     27 struct CodecInst;
     28 struct WebRtcRTPHeader;
     29 class AudioFrame;
     30 class RTPFragmentationHeader;
     31 
     32 #define WEBRTC_10MS_PCM_AUDIO 960  // 16 bits super wideband 48 kHz
     33 
     34 // Callback class used for sending data ready to be packetized
     35 class AudioPacketizationCallback {
     36  public:
     37   virtual ~AudioPacketizationCallback() {}
     38 
     39   virtual int32_t SendData(
     40       FrameType frame_type,
     41       uint8_t payload_type,
     42       uint32_t timestamp,
     43       const uint8_t* payload_data,
     44       uint16_t payload_len_bytes,
     45       const RTPFragmentationHeader* fragmentation) = 0;
     46 };
     47 
     48 // Callback class used for inband Dtmf detection
     49 class AudioCodingFeedback {
     50  public:
     51   virtual ~AudioCodingFeedback() {}
     52 
     53   virtual int32_t IncomingDtmf(const uint8_t digit_dtmf,
     54                                const bool end) = 0;
     55 };
     56 
     57 // Callback class used for reporting VAD decision
     58 class ACMVADCallback {
     59  public:
     60   virtual ~ACMVADCallback() {}
     61 
     62   virtual int32_t InFrameType(int16_t frameType) = 0;
     63 };
     64 
     65 // Callback class used for reporting receiver statistics
     66 class ACMVQMonCallback {
     67  public:
     68   virtual ~ACMVQMonCallback() {}
     69 
     70   virtual int32_t NetEqStatistics(
     71       const int32_t id,  // current ACM id
     72       const uint16_t MIUsValid,  // valid voice duration in ms
     73       const uint16_t MIUsReplaced,  // concealed voice duration in ms
     74       const uint8_t eventFlags,  // concealed voice flags
     75       const uint16_t delayMS) = 0;  // average delay in ms
     76 };
     77 
     78 class AudioCodingModule: public Module {
     79  protected:
     80   AudioCodingModule() {}
     81 
     82  public:
     83   struct Config {
     84     Config()
     85         : id(0),
     86           neteq_config(),
     87           clock(Clock::GetRealTimeClock()) {}
     88 
     89     int id;
     90     NetEq::Config neteq_config;
     91     Clock* clock;
     92   };
     93 
     94   ///////////////////////////////////////////////////////////////////////////
     95   // Creation and destruction of a ACM.
     96   //
     97   // The second method is used for testing where a simulated clock can be
     98   // injected into ACM. ACM will take the ownership of the object clock and
     99   // delete it when destroyed.
    100   //
    101   static AudioCodingModule* Create(int id);
    102   static AudioCodingModule* Create(int id, Clock* clock);
    103   virtual ~AudioCodingModule() {};
    104 
    105   ///////////////////////////////////////////////////////////////////////////
    106   //   Utility functions
    107   //
    108 
    109   ///////////////////////////////////////////////////////////////////////////
    110   // uint8_t NumberOfCodecs()
    111   // Returns number of supported codecs.
    112   //
    113   // Return value:
    114   //   number of supported codecs.
    115   ///
    116   static int NumberOfCodecs();
    117 
    118   ///////////////////////////////////////////////////////////////////////////
    119   // int32_t Codec()
    120   // Get supported codec with list number.
    121   //
    122   // Input:
    123   //   -list_id             : list number.
    124   //
    125   // Output:
    126   //   -codec              : a structure where the parameters of the codec,
    127   //                         given by list number is written to.
    128   //
    129   // Return value:
    130   //   -1 if the list number (list_id) is invalid.
    131   //    0 if succeeded.
    132   //
    133   static int Codec(int list_id, CodecInst* codec);
    134 
    135   ///////////////////////////////////////////////////////////////////////////
    136   // int32_t Codec()
    137   // Get supported codec with the given codec name, sampling frequency, and
    138   // a given number of channels.
    139   //
    140   // Input:
    141   //   -payload_name       : name of the codec.
    142   //   -sampling_freq_hz   : sampling frequency of the codec. Note! for RED
    143   //                         a sampling frequency of -1 is a valid input.
    144   //   -channels           : number of channels ( 1 - mono, 2 - stereo).
    145   //
    146   // Output:
    147   //   -codec              : a structure where the function returns the
    148   //                         default parameters of the codec.
    149   //
    150   // Return value:
    151   //   -1 if no codec matches the given parameters.
    152   //    0 if succeeded.
    153   //
    154   static int Codec(const char* payload_name, CodecInst* codec,
    155                        int sampling_freq_hz, int channels);
    156 
    157   ///////////////////////////////////////////////////////////////////////////
    158   // int32_t Codec()
    159   //
    160   // Returns the list number of the given codec name, sampling frequency, and
    161   // a given number of channels.
    162   //
    163   // Input:
    164   //   -payload_name        : name of the codec.
    165   //   -sampling_freq_hz    : sampling frequency of the codec. Note! for RED
    166   //                          a sampling frequency of -1 is a valid input.
    167   //   -channels            : number of channels ( 1 - mono, 2 - stereo).
    168   //
    169   // Return value:
    170   //   if the codec is found, the index of the codec in the list,
    171   //   -1 if the codec is not found.
    172   //
    173   static int Codec(const char* payload_name, int sampling_freq_hz,
    174                              int channels);
    175 
    176   ///////////////////////////////////////////////////////////////////////////
    177   // bool IsCodecValid()
    178   // Checks the validity of the parameters of the given codec.
    179   //
    180   // Input:
    181   //   -codec              : the structure which keeps the parameters of the
    182   //                         codec.
    183   //
    184   // Return value:
    185   //   true if the parameters are valid,
    186   //   false if any parameter is not valid.
    187   //
    188   static bool IsCodecValid(const CodecInst& codec);
    189 
    190   ///////////////////////////////////////////////////////////////////////////
    191   //   Sender
    192   //
    193 
    194   ///////////////////////////////////////////////////////////////////////////
    195   // int32_t InitializeSender()
    196   // Any encoder-related state of ACM will be initialized to the
    197   // same state when ACM is created. This will not interrupt or
    198   // effect decoding functionality of ACM. ACM will lose all the
    199   // encoding-related settings by calling this function.
    200   // For instance, a send codec has to be registered again.
    201   //
    202   // Return value:
    203   //   -1 if failed to initialize,
    204   //    0 if succeeded.
    205   //
    206   virtual int32_t InitializeSender() = 0;
    207 
    208   ///////////////////////////////////////////////////////////////////////////
    209   // int32_t ResetEncoder()
    210   // This API resets the states of encoder. All the encoder settings, such as
    211   // send-codec or VAD/DTX, will be preserved.
    212   //
    213   // Return value:
    214   //   -1 if failed to initialize,
    215   //    0 if succeeded.
    216   //
    217   virtual int32_t ResetEncoder() = 0;
    218 
    219   ///////////////////////////////////////////////////////////////////////////
    220   // int32_t RegisterSendCodec()
    221   // Registers a codec, specified by |send_codec|, as sending codec.
    222   // This API can be called multiple of times to register Codec. The last codec
    223   // registered overwrites the previous ones.
    224   // The API can also be used to change payload type for CNG and RED, which are
    225   // registered by default to default payload types.
    226   // Note that registering CNG and RED won't overwrite speech codecs.
    227   // This API can be called to set/change the send payload-type, frame-size
    228   // or encoding rate (if applicable for the codec).
    229   //
    230   // Note: If a stereo codec is registered as send codec, VAD/DTX will
    231   // automatically be turned off, since it is not supported for stereo sending.
    232   //
    233   // Note: If a secondary encoder is already registered, and the new send-codec
    234   // has a sampling rate that does not match the secondary encoder, the
    235   // secondary encoder will be unregistered.
    236   //
    237   // Input:
    238   //   -send_codec         : Parameters of the codec to be registered, c.f.
    239   //                         common_types.h for the definition of
    240   //                         CodecInst.
    241   //
    242   // Return value:
    243   //   -1 if failed to initialize,
    244   //    0 if succeeded.
    245   //
    246   virtual int32_t RegisterSendCodec(const CodecInst& send_codec) = 0;
    247 
    248   ///////////////////////////////////////////////////////////////////////////
    249   // int RegisterSecondarySendCodec()
    250   // Register a secondary encoder to enable dual-streaming. If a secondary
    251   // codec is already registered, it will be removed before the new one is
    252   // registered.
    253   //
    254   // Note: The secondary encoder will be unregistered if a primary codec
    255   // is set with a sampling rate which does not match that of the existing
    256   // secondary codec.
    257   //
    258   // Input:
    259   //   -send_codec         : Parameters of the codec to be registered, c.f.
    260   //                         common_types.h for the definition of
    261   //                         CodecInst.
    262   //
    263   // Return value:
    264   //   -1 if failed to register,
    265   //    0 if succeeded.
    266   //
    267   virtual int RegisterSecondarySendCodec(const CodecInst& send_codec) = 0;
    268 
    269   ///////////////////////////////////////////////////////////////////////////
    270   // void UnregisterSecondarySendCodec()
    271   // Unregister the secondary encoder to disable dual-streaming.
    272   //
    273   virtual void UnregisterSecondarySendCodec() = 0;
    274 
    275   ///////////////////////////////////////////////////////////////////////////
    276   // int32_t SendCodec()
    277   // Get parameters for the codec currently registered as send codec.
    278   //
    279   // Output:
    280   //   -current_send_codec          : parameters of the send codec.
    281   //
    282   // Return value:
    283   //   -1 if failed to get send codec,
    284   //    0 if succeeded.
    285   //
    286   virtual int32_t SendCodec(CodecInst* current_send_codec) const = 0;
    287 
    288   ///////////////////////////////////////////////////////////////////////////
    289   // int SecondarySendCodec()
    290   // Get the codec parameters for the current secondary send codec.
    291   //
    292   // Output:
    293   //   -secondary_codec          : parameters of the secondary send codec.
    294   //
    295   // Return value:
    296   //   -1 if failed to get send codec,
    297   //    0 if succeeded.
    298   //
    299   virtual int SecondarySendCodec(CodecInst* secondary_codec) const = 0;
    300 
    301   ///////////////////////////////////////////////////////////////////////////
    302   // int32_t SendFrequency()
    303   // Get the sampling frequency of the current encoder in Hertz.
    304   //
    305   // Return value:
    306   //   positive; sampling frequency [Hz] of the current encoder.
    307   //   -1 if an error has happened.
    308   //
    309   virtual int32_t SendFrequency() const = 0;
    310 
    311   ///////////////////////////////////////////////////////////////////////////
    312   // int32_t Bitrate()
    313   // Get encoding bit-rate in bits per second.
    314   //
    315   // Return value:
    316   //   positive; encoding rate in bits/sec,
    317   //   -1 if an error is happened.
    318   //
    319   virtual int32_t SendBitrate() const = 0;
    320 
    321   ///////////////////////////////////////////////////////////////////////////
    322   // int32_t SetReceivedEstimatedBandwidth()
    323   // Set available bandwidth [bits/sec] of the up-link channel.
    324   // This information is used for traffic shaping, and is currently only
    325   // supported if iSAC is the send codec.
    326   //
    327   // Input:
    328   //   -bw                 : bandwidth in bits/sec estimated for
    329   //                         up-link.
    330   // Return value
    331   //   -1 if error occurred in setting the bandwidth,
    332   //    0 bandwidth is set successfully.
    333   //
    334   // TODO(henrik.lundin) Unused. Remove?
    335   virtual int32_t SetReceivedEstimatedBandwidth(
    336       const int32_t bw) = 0;
    337 
    338   ///////////////////////////////////////////////////////////////////////////
    339   // int32_t RegisterTransportCallback()
    340   // Register a transport callback which will be called to deliver
    341   // the encoded buffers whenever Process() is called and a
    342   // bit-stream is ready.
    343   //
    344   // Input:
    345   //   -transport          : pointer to the callback class
    346   //                         transport->SendData() is called whenever
    347   //                         Process() is called and bit-stream is ready
    348   //                         to deliver.
    349   //
    350   // Return value:
    351   //   -1 if the transport callback could not be registered
    352   //    0 if registration is successful.
    353   //
    354   virtual int32_t RegisterTransportCallback(
    355       AudioPacketizationCallback* transport) = 0;
    356 
    357   ///////////////////////////////////////////////////////////////////////////
    358   // int32_t Add10MsData()
    359   // Add 10MS of raw (PCM) audio data to the encoder. If the sampling
    360   // frequency of the audio does not match the sampling frequency of the
    361   // current encoder ACM will resample the audio.
    362   //
    363   // Input:
    364   //   -audio_frame        : the input audio frame, containing raw audio
    365   //                         sampling frequency etc.,
    366   //                         c.f. module_common_types.h for definition of
    367   //                         AudioFrame.
    368   //
    369   // Return value:
    370   //      0   successfully added the frame.
    371   //     -1   some error occurred and data is not added.
    372   //   < -1   to add the frame to the buffer n samples had to be
    373   //          overwritten, -n is the return value in this case.
    374   //
    375   virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0;
    376 
    377   ///////////////////////////////////////////////////////////////////////////
    378   // (RED) Redundant Coding
    379   //
    380 
    381   ///////////////////////////////////////////////////////////////////////////
    382   // int32_t SetREDStatus()
    383   // configure RED status i.e. on/off.
    384   //
    385   // RFC 2198 describes a solution which has a single payload type which
    386   // signifies a packet with redundancy. That packet then becomes a container,
    387   // encapsulating multiple payloads into a single RTP packet.
    388   // Such a scheme is flexible, since any amount of redundancy may be
    389   // encapsulated within a single packet.  There is, however, a small overhead
    390   // since each encapsulated payload must be preceded by a header indicating
    391   // the type of data enclosed.
    392   //
    393   // Input:
    394   //   -enable_red         : if true RED is enabled, otherwise RED is
    395   //                         disabled.
    396   //
    397   // Return value:
    398   //   -1 if failed to set RED status,
    399   //    0 if succeeded.
    400   //
    401   virtual int32_t SetREDStatus(bool enable_red) = 0;
    402 
    403   ///////////////////////////////////////////////////////////////////////////
    404   // bool REDStatus()
    405   // Get RED status
    406   //
    407   // Return value:
    408   //   true if RED is enabled,
    409   //   false if RED is disabled.
    410   //
    411   virtual bool REDStatus() const = 0;
    412 
    413   ///////////////////////////////////////////////////////////////////////////
    414   // (FEC) Forward Error Correction (codec internal)
    415   //
    416 
    417   ///////////////////////////////////////////////////////////////////////////
    418   // int32_t SetCodecFEC()
    419   // Configures codec internal FEC status i.e. on/off. No effects on codecs that
    420   // do not provide internal FEC.
    421   //
    422   // Input:
    423   //   -enable_fec         : if true FEC will be enabled otherwise the FEC is
    424   //                         disabled.
    425   //
    426   // Return value:
    427   //   -1 if failed, or the codec does not support FEC
    428   //    0 if succeeded.
    429   //
    430   virtual int SetCodecFEC(bool enable_codec_fec) = 0;
    431 
    432   ///////////////////////////////////////////////////////////////////////////
    433   // bool CodecFEC()
    434   // Gets status of codec internal FEC.
    435   //
    436   // Return value:
    437   //   true if FEC is enabled,
    438   //   false if FEC is disabled.
    439   //
    440   virtual bool CodecFEC() const = 0;
    441 
    442   ///////////////////////////////////////////////////////////////////////////
    443   // int SetPacketLossRate()
    444   // Sets expected packet loss rate for encoding. Some encoders provide packet
    445   // loss gnostic encoding to make stream less sensitive to packet losses,
    446   // through e.g., FEC. No effects on codecs that do not provide such encoding.
    447   //
    448   // Input:
    449   //   -packet_loss_rate   : expected packet loss rate (0 -- 100 inclusive).
    450   //
    451   // Return value
    452   //   -1 if failed to set packet loss rate,
    453   //   0 if succeeded.
    454   //
    455   virtual int SetPacketLossRate(int packet_loss_rate) = 0;
    456 
    457   ///////////////////////////////////////////////////////////////////////////
    458   //   (VAD) Voice Activity Detection
    459   //
    460 
    461   ///////////////////////////////////////////////////////////////////////////
    462   // int32_t SetVAD()
    463   // If DTX is enabled & the codec does not have internal DTX/VAD
    464   // WebRtc VAD will be automatically enabled and |enable_vad| is ignored.
    465   //
    466   // If DTX is disabled but VAD is enabled no DTX packets are send,
    467   // regardless of whether the codec has internal DTX/VAD or not. In this
    468   // case, WebRtc VAD is running to label frames as active/in-active.
    469   //
    470   // NOTE! VAD/DTX is not supported when sending stereo.
    471   //
    472   // Inputs:
    473   //   -enable_dtx         : if true DTX is enabled,
    474   //                         otherwise DTX is disabled.
    475   //   -enable_vad         : if true VAD is enabled,
    476   //                         otherwise VAD is disabled.
    477   //   -vad_mode           : determines the aggressiveness of VAD. A more
    478   //                         aggressive mode results in more frames labeled
    479   //                         as in-active, c.f. definition of
    480   //                         ACMVADMode in audio_coding_module_typedefs.h
    481   //                         for valid values.
    482   //
    483   // Return value:
    484   //   -1 if failed to set up VAD/DTX,
    485   //    0 if succeeded.
    486   //
    487   virtual int32_t SetVAD(const bool enable_dtx = true,
    488                                const bool enable_vad = false,
    489                                const ACMVADMode vad_mode = VADNormal) = 0;
    490 
    491   ///////////////////////////////////////////////////////////////////////////
    492   // int32_t VAD()
    493   // Get VAD status.
    494   //
    495   // Outputs:
    496   //   -dtx_enabled        : is set to true if DTX is enabled, otherwise
    497   //                         is set to false.
    498   //   -vad_enabled        : is set to true if VAD is enabled, otherwise
    499   //                         is set to false.
    500   //   -vad_mode            : is set to the current aggressiveness of VAD.
    501   //
    502   // Return value:
    503   //   -1 if fails to retrieve the setting of DTX/VAD,
    504   //    0 if succeeded.
    505   //
    506   virtual int32_t VAD(bool* dtx_enabled, bool* vad_enabled,
    507                             ACMVADMode* vad_mode) const = 0;
    508 
    509   ///////////////////////////////////////////////////////////////////////////
    510   // int32_t ReplaceInternalDTXWithWebRtc()
    511   // Used to replace codec internal DTX scheme with WebRtc. This is only
    512   // supported for G729, where this call replaces AnnexB with WebRtc DTX.
    513   //
    514   // Input:
    515   //   -use_webrtc_dtx     : if false (default) the codec built-in DTX/VAD
    516   //                         scheme is used, otherwise the internal DTX is
    517   //                         replaced with WebRtc DTX/VAD.
    518   //
    519   // Return value:
    520   //   -1 if failed to replace codec internal DTX with WebRtc,
    521   //    0 if succeeded.
    522   //
    523   virtual int32_t ReplaceInternalDTXWithWebRtc(
    524       const bool use_webrtc_dtx = false) = 0;
    525 
    526   ///////////////////////////////////////////////////////////////////////////
    527   // int32_t IsInternalDTXReplacedWithWebRtc()
    528   // Get status if the codec internal DTX (when such exists) is replaced with
    529   // WebRtc DTX. This is only supported for G729.
    530   //
    531   // Output:
    532   //   -uses_webrtc_dtx    : is set to true if the codec internal DTX is
    533   //                         replaced with WebRtc DTX/VAD, otherwise it is set
    534   //                         to false.
    535   //
    536   // Return value:
    537   //   -1 if failed to determine if codec internal DTX is replaced with WebRtc,
    538   //    0 if succeeded.
    539   //
    540   virtual int32_t IsInternalDTXReplacedWithWebRtc(
    541       bool* uses_webrtc_dtx) = 0;
    542 
    543   ///////////////////////////////////////////////////////////////////////////
    544   // int32_t RegisterVADCallback()
    545   // Call this method to register a callback function which is called
    546   // any time that ACM encounters an empty frame. That is a frame which is
    547   // recognized inactive. Depending on the codec WebRtc VAD or internal codec
    548   // VAD is employed to identify a frame as active/inactive.
    549   //
    550   // Input:
    551   //   -vad_callback        : pointer to a callback function.
    552   //
    553   // Return value:
    554   //   -1 if failed to register the callback function.
    555   //    0 if the callback function is registered successfully.
    556   //
    557   virtual int32_t RegisterVADCallback(ACMVADCallback* vad_callback) = 0;
    558 
    559   ///////////////////////////////////////////////////////////////////////////
    560   //   Receiver
    561   //
    562 
    563   ///////////////////////////////////////////////////////////////////////////
    564   // int32_t InitializeReceiver()
    565   // Any decoder-related state of ACM will be initialized to the
    566   // same state when ACM is created. This will not interrupt or
    567   // effect encoding functionality of ACM. ACM would lose all the
    568   // decoding-related settings by calling this function.
    569   // For instance, all registered codecs are deleted and have to be
    570   // registered again.
    571   //
    572   // Return value:
    573   //   -1 if failed to initialize,
    574   //    0 if succeeded.
    575   //
    576   virtual int32_t InitializeReceiver() = 0;
    577 
    578   ///////////////////////////////////////////////////////////////////////////
    579   // int32_t ResetDecoder()
    580   // This API resets the states of decoders. ACM will not lose any
    581   // decoder-related settings, such as registered codecs.
    582   //
    583   // Return value:
    584   //   -1 if failed to initialize,
    585   //    0 if succeeded.
    586   //
    587   virtual int32_t ResetDecoder() = 0;
    588 
    589   ///////////////////////////////////////////////////////////////////////////
    590   // int32_t ReceiveFrequency()
    591   // Get sampling frequency of the last received payload.
    592   //
    593   // Return value:
    594   //   non-negative the sampling frequency in Hertz.
    595   //   -1 if an error has occurred.
    596   //
    597   virtual int32_t ReceiveFrequency() const = 0;
    598 
    599   ///////////////////////////////////////////////////////////////////////////
    600   // int32_t PlayoutFrequency()
    601   // Get sampling frequency of audio played out.
    602   //
    603   // Return value:
    604   //   the sampling frequency in Hertz.
    605   //
    606   virtual int32_t PlayoutFrequency() const = 0;
    607 
    608   ///////////////////////////////////////////////////////////////////////////
    609   // int32_t RegisterReceiveCodec()
    610   // Register possible decoders, can be called multiple times for
    611   // codecs, CNG-NB, CNG-WB, CNG-SWB, AVT and RED.
    612   //
    613   // Input:
    614   //   -receive_codec      : parameters of the codec to be registered, c.f.
    615   //                         common_types.h for the definition of
    616   //                         CodecInst.
    617   //
    618   // Return value:
    619   //   -1 if failed to register the codec
    620   //    0 if the codec registered successfully.
    621   //
    622   virtual int32_t RegisterReceiveCodec(
    623       const CodecInst& receive_codec) = 0;
    624 
    625   ///////////////////////////////////////////////////////////////////////////
    626   // int32_t UnregisterReceiveCodec()
    627   // Unregister the codec currently registered with a specific payload type
    628   // from the list of possible receive codecs.
    629   //
    630   // Input:
    631   //   -payload_type        : The number representing the payload type to
    632   //                         unregister.
    633   //
    634   // Output:
    635   //   -1 if fails to unregister.
    636   //    0 if the given codec is successfully unregistered.
    637   //
    638   virtual int UnregisterReceiveCodec(
    639       uint8_t payload_type) = 0;
    640 
    641   ///////////////////////////////////////////////////////////////////////////
    642   // int32_t ReceiveCodec()
    643   // Get the codec associated with last received payload.
    644   //
    645   // Output:
    646   //   -curr_receive_codec : parameters of the codec associated with the last
    647   //                         received payload, c.f. common_types.h for
    648   //                         the definition of CodecInst.
    649   //
    650   // Return value:
    651   //   -1 if failed to retrieve the codec,
    652   //    0 if the codec is successfully retrieved.
    653   //
    654   virtual int32_t ReceiveCodec(CodecInst* curr_receive_codec) const = 0;
    655 
    656   ///////////////////////////////////////////////////////////////////////////
    657   // int32_t IncomingPacket()
    658   // Call this function to insert a parsed RTP packet into ACM.
    659   //
    660   // Inputs:
    661   //   -incoming_payload   : received payload.
    662   //   -payload_len_bytes  : the length of payload in bytes.
    663   //   -rtp_info           : the relevant information retrieved from RTP
    664   //                         header.
    665   //
    666   // Return value:
    667   //   -1 if failed to push in the payload
    668   //    0 if payload is successfully pushed in.
    669   //
    670   virtual int32_t IncomingPacket(const uint8_t* incoming_payload,
    671                                        const int32_t payload_len_bytes,
    672                                        const WebRtcRTPHeader& rtp_info) = 0;
    673 
    674   ///////////////////////////////////////////////////////////////////////////
    675   // int32_t IncomingPayload()
    676   // Call this API to push incoming payloads when there is no rtp-info.
    677   // The rtp-info will be created in ACM. One usage for this API is when
    678   // pre-encoded files are pushed in ACM
    679   //
    680   // Inputs:
    681   //   -incoming_payload   : received payload.
    682   //   -payload_len_byte   : the length, in bytes, of the received payload.
    683   //   -payload_type       : the payload-type. This specifies which codec has
    684   //                         to be used to decode the payload.
    685   //   -timestamp          : send timestamp of the payload. ACM starts with
    686   //                         a random value and increment it by the
    687   //                         packet-size, which is given when the codec in
    688   //                         question is registered by RegisterReceiveCodec().
    689   //                         Therefore, it is essential to have the timestamp
    690   //                         if the frame-size differ from the registered
    691   //                         value or if the incoming payload contains DTX
    692   //                         packets.
    693   //
    694   // Return value:
    695   //   -1 if failed to push in the payload
    696   //    0 if payload is successfully pushed in.
    697   //
    698   virtual int32_t IncomingPayload(const uint8_t* incoming_payload,
    699                                         const int32_t payload_len_byte,
    700                                         const uint8_t payload_type,
    701                                         const uint32_t timestamp = 0) = 0;
    702 
    703   ///////////////////////////////////////////////////////////////////////////
    704   // int SetMinimumPlayoutDelay()
    705   // Set a minimum for the playout delay, used for lip-sync. NetEq maintains
    706   // such a delay unless channel condition yields to a higher delay.
    707   //
    708   // Input:
    709   //   -time_ms            : minimum delay in milliseconds.
    710   //
    711   // Return value:
    712   //   -1 if failed to set the delay,
    713   //    0 if the minimum delay is set.
    714   //
    715   virtual int SetMinimumPlayoutDelay(int time_ms) = 0;
    716 
    717   ///////////////////////////////////////////////////////////////////////////
    718   // int SetMaximumPlayoutDelay()
    719   // Set a maximum for the playout delay
    720   //
    721   // Input:
    722   //   -time_ms            : maximum delay in milliseconds.
    723   //
    724   // Return value:
    725   //   -1 if failed to set the delay,
    726   //    0 if the maximum delay is set.
    727   //
    728   virtual int SetMaximumPlayoutDelay(int time_ms) = 0;
    729 
    730   //
    731   // The shortest latency, in milliseconds, required by jitter buffer. This
    732   // is computed based on inter-arrival times and playout mode of NetEq. The
    733   // actual delay is the maximum of least-required-delay and the minimum-delay
    734   // specified by SetMinumumPlayoutDelay() API.
    735   //
    736   virtual int LeastRequiredDelayMs() const = 0;
    737 
    738   ///////////////////////////////////////////////////////////////////////////
    739   // int32_t SetDtmfPlayoutStatus()
    740   // Configure DTMF playout, i.e. whether out-of-band
    741   // DTMF tones are played or not.
    742   //
    743   // Input:
    744   //   -enable             : if true to enable playout out-of-band DTMF tones,
    745   //                         false to disable.
    746   //
    747   // Return value:
    748   //   -1 if the method fails, e.g. DTMF playout is not supported.
    749   //    0 if the status is set successfully.
    750   //
    751   virtual int32_t SetDtmfPlayoutStatus(const bool enable) = 0;
    752 
    753   ///////////////////////////////////////////////////////////////////////////
    754   // bool DtmfPlayoutStatus()
    755   // Get Dtmf playout status.
    756   //
    757   // Return value:
    758   //   true if out-of-band Dtmf tones are played,
    759   //   false if playout of Dtmf tones is disabled.
    760   //
    761   virtual bool DtmfPlayoutStatus() const = 0;
    762 
    763   ///////////////////////////////////////////////////////////////////////////
    764   // int32_t PlayoutTimestamp()
    765   // The send timestamp of an RTP packet is associated with the decoded
    766   // audio of the packet in question. This function returns the timestamp of
    767   // the latest audio obtained by calling PlayoutData10ms().
    768   //
    769   // Input:
    770   //   -timestamp          : a reference to a uint32_t to receive the
    771   //                         timestamp.
    772   // Return value:
    773   //    0 if the output is a correct timestamp.
    774   //   -1 if failed to output the correct timestamp.
    775   //
    776   // TODO(tlegrand): Change function to return the timestamp.
    777   virtual int32_t PlayoutTimestamp(uint32_t* timestamp) = 0;
    778 
    779   ///////////////////////////////////////////////////////////////////////////
    780   // int32_t DecoderEstimatedBandwidth()
    781   // Get the estimate of the Bandwidth, in bits/second, based on the incoming
    782   // stream. This API is useful in one-way communication scenarios, where
    783   // the bandwidth information is sent in an out-of-band fashion.
    784   // Currently only supported if iSAC is registered as a receiver.
    785   //
    786   // Return value:
    787   //   >0 bandwidth in bits/second.
    788   //   -1 if failed to get a bandwidth estimate.
    789   //
    790   virtual int32_t DecoderEstimatedBandwidth() const = 0;
    791 
    792   ///////////////////////////////////////////////////////////////////////////
    793   // int32_t SetPlayoutMode()
    794   // Call this API to set the playout mode. Playout mode could be optimized
    795   // for i) voice, ii) FAX or iii) streaming. In Voice mode, NetEQ is
    796   // optimized to deliver highest audio quality while maintaining a minimum
    797   // delay. In FAX mode, NetEQ is optimized to have few delay changes as
    798   // possible and maintain a constant delay, perhaps large relative to voice
    799   // mode, to avoid PLC. In streaming mode, we tolerate a little more delay
    800   // to achieve better jitter robustness.
    801   //
    802   // Input:
    803   //   -mode               : playout mode. Possible inputs are:
    804   //                         "voice",
    805   //                         "fax" and
    806   //                         "streaming".
    807   //
    808   // Return value:
    809   //   -1 if failed to set the mode,
    810   //    0 if succeeding.
    811   //
    812   virtual int32_t SetPlayoutMode(const AudioPlayoutMode mode) = 0;
    813 
    814   ///////////////////////////////////////////////////////////////////////////
    815   // AudioPlayoutMode PlayoutMode()
    816   // Get playout mode, i.e. whether it is speech, FAX or streaming. See
    817   // audio_coding_module_typedefs.h for definition of AudioPlayoutMode.
    818   //
    819   // Return value:
    820   //   voice:       is for voice output,
    821   //   fax:         a mode that is optimized for receiving FAX signals.
    822   //                In this mode NetEq tries to maintain a constant high
    823   //                delay to avoid PLC if possible.
    824   //   streaming:   a mode that is suitable for streaming. In this mode we
    825   //                accept longer delay to improve jitter robustness.
    826   //
    827   virtual AudioPlayoutMode PlayoutMode() const = 0;
    828 
    829   ///////////////////////////////////////////////////////////////////////////
    830   // int32_t PlayoutData10Ms(
    831   // Get 10 milliseconds of raw audio data for playout, at the given sampling
    832   // frequency. ACM will perform a resampling if required.
    833   //
    834   // Input:
    835   //   -desired_freq_hz    : the desired sampling frequency, in Hertz, of the
    836   //                         output audio. If set to -1, the function returns
    837   //                         the audio at the current sampling frequency.
    838   //
    839   // Output:
    840   //   -audio_frame        : output audio frame which contains raw audio data
    841   //                         and other relevant parameters, c.f.
    842   //                         module_common_types.h for the definition of
    843   //                         AudioFrame.
    844   //
    845   // Return value:
    846   //   -1 if the function fails,
    847   //    0 if the function succeeds.
    848   //
    849   virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz,
    850                                         AudioFrame* audio_frame) = 0;
    851 
    852   ///////////////////////////////////////////////////////////////////////////
    853   //   Codec specific
    854   //
    855 
    856   ///////////////////////////////////////////////////////////////////////////
    857   // int32_t SetISACMaxRate()
    858   // Set the maximum instantaneous rate of iSAC. For a payload of B bits
    859   // with a frame-size of T sec the instantaneous rate is B/T bits per
    860   // second. Therefore, (B/T < |max_rate_bps|) and
    861   // (B < |max_payload_len_bytes| * 8) are always satisfied for iSAC payloads,
    862   // c.f SetISACMaxPayloadSize().
    863   //
    864   // Input:
    865   //   -max_rate_bps       : maximum instantaneous bit-rate given in bits/sec.
    866   //
    867   // Return value:
    868   //   -1 if failed to set the maximum rate.
    869   //    0 if the maximum rate is set successfully.
    870   //
    871   virtual int SetISACMaxRate(int max_rate_bps) = 0;
    872 
    873   ///////////////////////////////////////////////////////////////////////////
    874   // int32_t SetISACMaxPayloadSize()
    875   // Set the maximum payload size of iSAC packets. No iSAC payload,
    876   // regardless of its frame-size, may exceed the given limit. For
    877   // an iSAC payload of size B bits and frame-size T seconds we have;
    878   // (B < |max_payload_len_bytes| * 8) and (B/T < |max_rate_bps|), c.f.
    879   // SetISACMaxRate().
    880   //
    881   // Input:
    882   //   -max_payload_len_bytes : maximum payload size in bytes.
    883   //
    884   // Return value:
    885   //   -1 if failed to set the maximum  payload-size.
    886   //    0 if the given length is set successfully.
    887   //
    888   virtual int SetISACMaxPayloadSize(int max_payload_len_bytes) = 0;
    889 
    890   ///////////////////////////////////////////////////////////////////////////
    891   // int32_t ConfigISACBandwidthEstimator()
    892   // Call this function to configure the bandwidth estimator of ISAC.
    893   // During the adaptation of bit-rate, iSAC automatically adjusts the
    894   // frame-size (either 30 or 60 ms) to save on RTP header. The initial
    895   // frame-size can be specified by the first argument. The configuration also
    896   // regards the initial estimate of bandwidths. The estimator starts from
    897   // this point and converges to the actual bottleneck. This is given by the
    898   // second parameter. Furthermore, it is also possible to control the
    899   // adaptation of frame-size. This is specified by the last parameter.
    900   //
    901   // Input:
    902   //   -init_frame_size_ms : initial frame-size in milliseconds. For iSAC-wb
    903   //                         30 ms and 60 ms (default) are acceptable values,
    904   //                         and for iSAC-swb 30 ms is the only acceptable
    905   //                         value. Zero indicates default value.
    906   //   -init_rate_bps      : initial estimate of the bandwidth. Values
    907   //                         between 10000 and 58000 are acceptable.
    908   //   -enforce_srame_size : if true, the frame-size will not be adapted.
    909   //
    910   // Return value:
    911   //   -1 if failed to configure the bandwidth estimator,
    912   //    0 if the configuration was successfully applied.
    913   //
    914   virtual int32_t ConfigISACBandwidthEstimator(
    915       int init_frame_size_ms,
    916       int init_rate_bps,
    917       bool enforce_frame_size = false) = 0;
    918 
    919   ///////////////////////////////////////////////////////////////////////////
    920   // int SetOpusMaxPlaybackRate()
    921   // If current send codec is Opus, informs it about maximum playback rate the
    922   // receiver will render. Opus can use this information to optimize the bit
    923   // rate and increase the computation efficiency.
    924   //
    925   // Input:
    926   //   -frequency_hz            : maximum playback rate in Hz.
    927   //
    928   // Return value:
    929   //   -1 if current send codec is not Opus or
    930   //      error occurred in setting the maximum playback rate,
    931   //    0 maximum bandwidth is set successfully.
    932   //
    933   virtual int SetOpusMaxPlaybackRate(int frequency_hz) = 0;
    934 
    935   ///////////////////////////////////////////////////////////////////////////
    936   //   statistics
    937   //
    938 
    939   ///////////////////////////////////////////////////////////////////////////
    940   // int32_t  NetworkStatistics()
    941   // Get network statistics. Note that the internal statistics of NetEq are
    942   // reset by this call.
    943   //
    944   // Input:
    945   //   -network_statistics : a structure that contains network statistics.
    946   //
    947   // Return value:
    948   //   -1 if failed to set the network statistics,
    949   //    0 if statistics are set successfully.
    950   //
    951   virtual int32_t NetworkStatistics(
    952       ACMNetworkStatistics* network_statistics) = 0;
    953 
    954   //
    955   // Set an initial delay for playout.
    956   // An initial delay yields ACM playout silence until equivalent of |delay_ms|
    957   // audio payload is accumulated in NetEq jitter. Thereafter, ACM pulls audio
    958   // from NetEq in its regular fashion, and the given delay is maintained
    959   // through out the call, unless channel conditions yield to a higher jitter
    960   // buffer delay.
    961   //
    962   // Input:
    963   //   -delay_ms           : delay in milliseconds.
    964   //
    965   // Return values:
    966   //   -1 if failed to set the delay.
    967   //    0 if delay is set successfully.
    968   //
    969   virtual int SetInitialPlayoutDelay(int delay_ms) = 0;
    970 
    971   //
    972   // Enable NACK and set the maximum size of the NACK list. If NACK is already
    973   // enable then the maximum NACK list size is modified accordingly.
    974   //
    975   // If the sequence number of last received packet is N, the sequence numbers
    976   // of NACK list are in the range of [N - |max_nack_list_size|, N).
    977   //
    978   // |max_nack_list_size| should be positive (none zero) and less than or
    979   // equal to |Nack::kNackListSizeLimit|. Otherwise, No change is applied and -1
    980   // is returned. 0 is returned at success.
    981   //
    982   virtual int EnableNack(size_t max_nack_list_size) = 0;
    983 
    984   // Disable NACK.
    985   virtual void DisableNack() = 0;
    986 
    987   //
    988   // Get a list of packets to be retransmitted. |round_trip_time_ms| is an
    989   // estimate of the round-trip-time (in milliseconds). Missing packets which
    990   // will be playout in a shorter time than the round-trip-time (with respect
    991   // to the time this API is called) will not be included in the list.
    992   //
    993   // Negative |round_trip_time_ms| results is an error message and empty list
    994   // is returned.
    995   //
    996   virtual std::vector<uint16_t> GetNackList(int round_trip_time_ms) const = 0;
    997 
    998   virtual void GetDecodingCallStatistics(
    999       AudioDecodingCallStats* call_stats) const = 0;
   1000 };
   1001 
   1002 class AudioEncoder;
   1003 class ReceiverInfo;
   1004 
   1005 class AudioCoding {
   1006  public:
   1007   struct Config {
   1008     Config()
   1009         : neteq_config(),
   1010           clock(Clock::GetRealTimeClock()),
   1011           transport(NULL),
   1012           vad_callback(NULL),
   1013           play_dtmf(true),
   1014           initial_playout_delay_ms(0),
   1015           playout_channels(1),
   1016           playout_frequency_hz(32000) {}
   1017 
   1018     AudioCodingModule::Config ToOldConfig() const {
   1019       AudioCodingModule::Config old_config;
   1020       old_config.id = 0;
   1021       old_config.neteq_config = neteq_config;
   1022       old_config.clock = clock;
   1023       return old_config;
   1024     }
   1025 
   1026     NetEq::Config neteq_config;
   1027     Clock* clock;
   1028     AudioPacketizationCallback* transport;
   1029     ACMVADCallback* vad_callback;
   1030     bool play_dtmf;
   1031     int initial_playout_delay_ms;
   1032     int playout_channels;
   1033     int playout_frequency_hz;
   1034   };
   1035 
   1036   static AudioCoding* Create(const Config& config);
   1037   virtual ~AudioCoding() {};
   1038 
   1039   // Registers a codec, specified by |send_codec|, as sending codec.
   1040   // This API can be called multiple times. The last codec registered overwrites
   1041   // the previous ones. Returns true if successful, false if not.
   1042   //
   1043   // Note: If a stereo codec is registered as send codec, VAD/DTX will
   1044   // automatically be turned off, since it is not supported for stereo sending.
   1045   virtual bool RegisterSendCodec(AudioEncoder* send_codec) = 0;
   1046 
   1047   // Temporary solution to be used during refactoring:
   1048   // |encoder_type| should be from the anonymous enum in acm2::ACMCodecDB.
   1049   virtual bool RegisterSendCodec(int encoder_type,
   1050                                  uint8_t payload_type,
   1051                                  int frame_size_samples = 0) = 0;
   1052 
   1053   // Returns the encoder object currently in use. This is the same as the
   1054   // codec that was registered in the latest call to RegisterSendCodec().
   1055   virtual const AudioEncoder* GetSenderInfo() const = 0;
   1056 
   1057   // Temporary solution to be used during refactoring.
   1058   virtual const CodecInst* GetSenderCodecInst() = 0;
   1059 
   1060   // Adds 10 ms of raw (PCM) audio data to the encoder. If the sampling
   1061   // frequency of the audio does not match the sampling frequency of the
   1062   // current encoder, ACM will resample the audio.
   1063   //
   1064   // Return value:
   1065   //      0   successfully added the frame.
   1066   //     -1   some error occurred and data is not added.
   1067   //   < -1   to add the frame to the buffer n samples had to be
   1068   //          overwritten, -n is the return value in this case.
   1069   // TODO(henrik.lundin): Make a better design for the return values. This one
   1070   // is just a copy of the old API.
   1071   virtual int Add10MsAudio(const AudioFrame& audio_frame) = 0;
   1072 
   1073   // Returns a combined info about the currently used decoder(s).
   1074   virtual const ReceiverInfo* GetReceiverInfo() const = 0;
   1075 
   1076   // Registers a codec, specified by |receive_codec|, as receiving codec.
   1077   // This API can be called multiple times. If registering with a payload type
   1078   // that was already registered in a previous call, the latest call will
   1079   // override previous calls. Returns true if successful, false if not.
   1080   virtual bool RegisterReceiveCodec(AudioDecoder* receive_codec) = 0;
   1081 
   1082   // Temporary solution:
   1083   // |decoder_type| should be from the anonymous enum in acm2::ACMCodecDB.
   1084   virtual bool RegisterReceiveCodec(int decoder_type, uint8_t payload_type) = 0;
   1085 
   1086   // The following two methods both inserts a new packet to the receiver.
   1087   // InsertPacket takes an RTP header input in |rtp_info|, while InsertPayload
   1088   // only requires a payload type and a timestamp. The latter assumes that the
   1089   // payloads come in the right order, and without any losses. In both cases,
   1090   // |incoming_payload| contains the RTP payload after the RTP header. Return
   1091   // true if successful, false if not.
   1092   virtual bool InsertPacket(const uint8_t* incoming_payload,
   1093                             int32_t payload_len_bytes,
   1094                             const WebRtcRTPHeader& rtp_info) = 0;
   1095 
   1096   // TODO(henrik.lundin): Remove this method?
   1097   virtual bool InsertPayload(const uint8_t* incoming_payload,
   1098                              int32_t payload_len_byte,
   1099                              uint8_t payload_type,
   1100                              uint32_t timestamp) = 0;
   1101 
   1102   // These two methods set a minimum and maximum jitter buffer delay in
   1103   // milliseconds. The pupose is mainly to adjust the delay to synchronize
   1104   // audio and video. The preferred jitter buffer size, computed by NetEq based
   1105   // on the current channel conditions, is clamped from below and above by these
   1106   // two methods. The given delay limits must be non-negative, less than
   1107   // 10000 ms, and the minimum must be strictly smaller than the maximum.
   1108   // Further, the maximum must be at lest one frame duration. If these
   1109   // conditions are not met, false is returned. Giving the value 0 effectively
   1110   // unsets the minimum or maximum delay limits.
   1111   // Note that calling these methods is optional. If not called, NetEq will
   1112   // determine the optimal buffer size based on the network conditions.
   1113   virtual bool SetMinimumPlayoutDelay(int time_ms) = 0;
   1114 
   1115   virtual bool SetMaximumPlayoutDelay(int time_ms) = 0;
   1116 
   1117   // Returns the current value of the jitter buffer's preferred latency. This
   1118   // is computed based on inter-arrival times and playout mode of NetEq. The
   1119   // actual target delay is this value clamped from below and above by the
   1120   // values specified through SetMinimumPlayoutDelay() and
   1121   // SetMaximumPlayoutDelay(), respectively, if provided.
   1122   // TODO(henrik.lundin) Rename to PreferredDelayMs?
   1123   virtual int LeastRequiredDelayMs() const = 0;
   1124 
   1125   // The send timestamp of an RTP packet is associated with the decoded
   1126   // audio of the packet in question. This function returns the timestamp of
   1127   // the latest audio delivered by Get10MsAudio(). Returns false if no timestamp
   1128   // can be provided, true otherwise.
   1129   virtual bool PlayoutTimestamp(uint32_t* timestamp) = 0;
   1130 
   1131   // Delivers 10 ms of audio in |audio_frame|. Returns true if successful,
   1132   // false otherwise.
   1133   virtual bool Get10MsAudio(AudioFrame* audio_frame) = 0;
   1134 
   1135   // Returns the network statistics. Note that the internal statistics of NetEq
   1136   // are reset by this call. Returns true if successful, false otherwise.
   1137   virtual bool NetworkStatistics(ACMNetworkStatistics* network_statistics) = 0;
   1138 
   1139   // Enables NACK and sets the maximum size of the NACK list. If NACK is already
   1140   // enabled then the maximum NACK list size is modified accordingly. Returns
   1141   // true if successful, false otherwise.
   1142   //
   1143   // If the sequence number of last received packet is N, the sequence numbers
   1144   // of NACK list are in the range of [N - |max_nack_list_size|, N).
   1145   //
   1146   // |max_nack_list_size| should be positive and less than or equal to
   1147   // |Nack::kNackListSizeLimit|.
   1148   virtual bool EnableNack(size_t max_nack_list_size) = 0;
   1149 
   1150   // Disables NACK.
   1151   virtual void DisableNack() = 0;
   1152 
   1153 
   1154   // Temporary solution to be used during refactoring.
   1155   // If DTX is enabled and the codec does not have internal DTX/VAD
   1156   // WebRtc VAD will be automatically enabled and |enable_vad| is ignored.
   1157   //
   1158   // If DTX is disabled but VAD is enabled no DTX packets are sent,
   1159   // regardless of whether the codec has internal DTX/VAD or not. In this
   1160   // case, WebRtc VAD is running to label frames as active/in-active.
   1161   //
   1162   // NOTE! VAD/DTX is not supported when sending stereo.
   1163   //
   1164   // Return true if successful, false otherwise.
   1165   virtual bool SetVad(bool enable_dtx,
   1166                       bool enable_vad,
   1167                       ACMVADMode vad_mode) = 0;
   1168 
   1169   // Returns a list of packets to request retransmission of.
   1170   // |round_trip_time_ms| is an estimate of the round-trip-time (in
   1171   // milliseconds). Missing packets which will be decoded sooner than the
   1172   // round-trip-time (with respect to the time this API is called) will not be
   1173   // included in the list.
   1174   // |round_trip_time_ms| must be non-negative.
   1175   virtual std::vector<uint16_t> GetNackList(int round_trip_time_ms) const = 0;
   1176 
   1177   // Returns the timing statistics for calls to Get10MsAudio.
   1178   virtual void GetDecodingCallStatistics(
   1179       AudioDecodingCallStats* call_stats) const = 0;
   1180 };
   1181 
   1182 }  // namespace webrtc
   1183 
   1184 #endif  // WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_
   1185