Home | History | Annotate | Download | only in acm2
      1 /*
      2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
     12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
     13 
     14 #include <vector>
     15 
     16 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
     17 #include "webrtc/engine_configurations.h"
     18 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
     19 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
     20 #include "webrtc/modules/audio_coding/main/acm2/acm_resampler.h"
     21 #include "webrtc/modules/audio_coding/main/acm2/call_statistics.h"
     22 #include "webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h"
     23 #include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
     24 #include "webrtc/modules/interface/module_common_types.h"
     25 #include "webrtc/system_wrappers/interface/scoped_ptr.h"
     26 #include "webrtc/system_wrappers/interface/thread_annotations.h"
     27 #include "webrtc/typedefs.h"
     28 
     29 namespace webrtc {
     30 
     31 struct CodecInst;
     32 class CriticalSectionWrapper;
     33 class RWLockWrapper;
     34 class NetEq;
     35 
     36 namespace acm2 {
     37 
     38 class Nack;
     39 
     40 class AcmReceiver {
     41  public:
     42   struct Decoder {
     43     bool registered;
     44     uint8_t payload_type;
     45     // This field is meaningful for codecs where both mono and
     46     // stereo versions are registered under the same ID.
     47     int channels;
     48   };
     49 
     50   // Constructor of the class
     51   explicit AcmReceiver(const AudioCodingModule::Config& config);
     52 
     53   // Destructor of the class.
     54   ~AcmReceiver();
     55 
     56   //
     57   // Inserts a payload with its associated RTP-header into NetEq.
     58   //
     59   // Input:
     60   //   - rtp_header           : RTP header for the incoming payload containing
     61   //                            information about payload type, sequence number,
     62   //                            timestamp, SSRC and marker bit.
     63   //   - incoming_payload     : Incoming audio payload.
     64   //   - length_payload       : Length of incoming audio payload in bytes.
     65   //
     66   // Return value             : 0 if OK.
     67   //                           <0 if NetEq returned an error.
     68   //
     69   int InsertPacket(const WebRtcRTPHeader& rtp_header,
     70                    const uint8_t* incoming_payload,
     71                    int length_payload);
     72 
     73   //
     74   // Asks NetEq for 10 milliseconds of decoded audio.
     75   //
     76   // Input:
     77   //   -desired_freq_hz       : specifies the sampling rate [Hz] of the output
     78   //                            audio. If set -1 indicates to resampling is
     79   //                            is required and the audio returned at the
     80   //                            sampling rate of the decoder.
     81   //
     82   // Output:
     83   //   -audio_frame           : an audio frame were output data and
     84   //                            associated parameters are written to.
     85   //
     86   // Return value             : 0 if OK.
     87   //                           -1 if NetEq returned an error.
     88   //
     89   int GetAudio(int desired_freq_hz, AudioFrame* audio_frame);
     90 
     91   //
     92   // Adds a new codec to the NetEq codec database.
     93   //
     94   // Input:
     95   //   - acm_codec_id        : ACM codec ID.
     96   //   - payload_type        : payload type.
     97   //   - audio_decoder       : pointer to a decoder object. If it is NULL
     98   //                           then NetEq will internally create the decoder
     99   //                           object. Otherwise, NetEq will store this pointer
    100   //                           as the decoder corresponding with the given
    101   //                           payload type. NetEq won't acquire the ownership
    102   //                           of this pointer. It is up to the client of this
    103   //                           class (ACM) to delete it. By providing
    104   //                           |audio_decoder| ACM will have control over the
    105   //                           decoder instance of the codec. This is essential
    106   //                           for a codec like iSAC which encoder/decoder
    107   //                           encoder has to know about decoder (bandwidth
    108   //                           estimator that is updated at decoding time).
    109   //
    110   // Return value             : 0 if OK.
    111   //                           <0 if NetEq returned an error.
    112   //
    113   int AddCodec(int acm_codec_id,
    114                uint8_t payload_type,
    115                int channels,
    116                AudioDecoder* audio_decoder);
    117 
    118   //
    119   // Sets a minimum delay for packet buffer. The given delay is maintained,
    120   // unless channel condition dictates a higher delay.
    121   //
    122   // Input:
    123   //   - delay_ms             : minimum delay in milliseconds.
    124   //
    125   // Return value             : 0 if OK.
    126   //                           <0 if NetEq returned an error.
    127   //
    128   int SetMinimumDelay(int delay_ms);
    129 
    130   //
    131   // Sets a maximum delay [ms] for the packet buffer. The target delay does not
    132   // exceed the given value, even if channel condition requires so.
    133   //
    134   // Input:
    135   //   - delay_ms             : maximum delay in milliseconds.
    136   //
    137   // Return value             : 0 if OK.
    138   //                           <0 if NetEq returned an error.
    139   //
    140   int SetMaximumDelay(int delay_ms);
    141 
    142   //
    143   // Get least required delay computed based on channel conditions. Note that
    144   // this is before applying any user-defined limits (specified by calling
    145   // (SetMinimumDelay() and/or SetMaximumDelay()).
    146   //
    147   int LeastRequiredDelayMs() const;
    148 
    149   //
    150   // Sets an initial delay of |delay_ms| milliseconds. This introduces a playout
    151   // delay. Silence (zero signal) is played out until equivalent of |delay_ms|
    152   // millisecond of audio is buffered. Then, NetEq maintains the delay.
    153   //
    154   // Input:
    155   //   - delay_ms             : initial delay in milliseconds.
    156   //
    157   // Return value             : 0 if OK.
    158   //                           <0 if NetEq returned an error.
    159   //
    160   int SetInitialDelay(int delay_ms);
    161 
    162   //
    163   // Resets the initial delay to zero.
    164   //
    165   void ResetInitialDelay();
    166 
    167   //
    168   // Get the current sampling frequency in Hz.
    169   //
    170   // Return value             : Sampling frequency in Hz.
    171   //
    172   int current_sample_rate_hz() const;
    173 
    174   //
    175   // Sets the playout mode.
    176   //
    177   // Input:
    178   //   - mode                 : an enumerator specifying the playout mode.
    179   //
    180   void SetPlayoutMode(AudioPlayoutMode mode);
    181 
    182   //
    183   // Get the current playout mode.
    184   //
    185   // Return value             : The current playout mode.
    186   //
    187   AudioPlayoutMode PlayoutMode() const;
    188 
    189   //
    190   // Get the current network statistics from NetEq.
    191   //
    192   // Output:
    193   //   - statistics           : The current network statistics.
    194   //
    195   void NetworkStatistics(ACMNetworkStatistics* statistics);
    196 
    197   //
    198   // Enable post-decoding VAD.
    199   //
    200   void EnableVad();
    201 
    202   //
    203   // Disable post-decoding VAD.
    204   //
    205   void DisableVad();
    206 
    207   //
    208   // Returns whether post-decoding VAD is enabled (true) or disabled (false).
    209   //
    210   bool vad_enabled() const { return vad_enabled_; }
    211 
    212   //
    213   // Get the decode lock used to protect decoder instances while decoding.
    214   //
    215   // Return value             : Pointer to the decode lock.
    216   //
    217   RWLockWrapper* DecodeLock() const { return decode_lock_; }
    218 
    219   //
    220   // Flushes the NetEq packet and speech buffers.
    221   //
    222   void FlushBuffers();
    223 
    224   //
    225   // Removes a payload-type from the NetEq codec database.
    226   //
    227   // Input:
    228   //   - payload_type         : the payload-type to be removed.
    229   //
    230   // Return value             : 0 if OK.
    231   //                           -1 if an error occurred.
    232   //
    233   int RemoveCodec(uint8_t payload_type);
    234 
    235   //
    236   // Remove all registered codecs.
    237   //
    238   int RemoveAllCodecs();
    239 
    240   //
    241   // Set ID.
    242   //
    243   void set_id(int id);  // TODO(turajs): can be inline.
    244 
    245   //
    246   // Gets the RTP timestamp of the last sample delivered by GetAudio().
    247   // Returns true if the RTP timestamp is valid, otherwise false.
    248   //
    249   bool GetPlayoutTimestamp(uint32_t* timestamp);
    250 
    251   //
    252   // Return the index of the codec associated with the last non-CNG/non-DTMF
    253   // received payload. If no non-CNG/non-DTMF payload is received -1 is
    254   // returned.
    255   //
    256   int last_audio_codec_id() const;  // TODO(turajs): can be inline.
    257 
    258   //
    259   // Return the payload-type of the last non-CNG/non-DTMF RTP packet. If no
    260   // non-CNG/non-DTMF packet is received -1 is returned.
    261   //
    262   int last_audio_payload_type() const;  // TODO(turajs): can be inline.
    263 
    264   //
    265   // Get the audio codec associated with the last non-CNG/non-DTMF received
    266   // payload. If no non-CNG/non-DTMF packet is received -1 is returned,
    267   // otherwise return 0.
    268   //
    269   int LastAudioCodec(CodecInst* codec) const;
    270 
    271   //
    272   // Return payload type of RED if it is registered, otherwise return -1;
    273   //
    274   int RedPayloadType() const;
    275 
    276   //
    277   // Get a decoder given its registered payload-type.
    278   //
    279   // Input:
    280   //    -payload_type         : the payload-type of the codec to be retrieved.
    281   //
    282   // Output:
    283   //    -codec                : codec associated with the given payload-type.
    284   //
    285   // Return value             : 0 if succeeded.
    286   //                           -1 if failed, e.g. given payload-type is not
    287   //                              registered.
    288   //
    289   int DecoderByPayloadType(uint8_t payload_type,
    290                            CodecInst* codec) const;
    291 
    292   //
    293   // Enable NACK and set the maximum size of the NACK list. If NACK is already
    294   // enabled then the maximum NACK list size is modified accordingly.
    295   //
    296   // Input:
    297   //    -max_nack_list_size  : maximum NACK list size
    298   //                           should be positive (none zero) and less than or
    299   //                           equal to |Nack::kNackListSizeLimit|
    300   // Return value
    301   //                         : 0 if succeeded.
    302   //                          -1 if failed
    303   //
    304   int EnableNack(size_t max_nack_list_size);
    305 
    306   // Disable NACK.
    307   void DisableNack();
    308 
    309   //
    310   // Get a list of packets to be retransmitted.
    311   //
    312   // Input:
    313   //    -round_trip_time_ms : estimate of the round-trip-time (in milliseconds).
    314   // Return value           : list of packets to be retransmitted.
    315   //
    316   std::vector<uint16_t> GetNackList(int round_trip_time_ms) const;
    317 
    318   //
    319   // Returns the background noise mode. This is only for testing and ACM is not
    320   // calling this function. Used in acm_receiver_unittest.cc.
    321   //
    322   NetEqBackgroundNoiseMode BackgroundNoiseModeForTest() const;
    323 
    324   //
    325   // Get statistics of calls to GetAudio().
    326   void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const;
    327 
    328  private:
    329   int PayloadType2CodecIndex(uint8_t payload_type) const;
    330 
    331   bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame)
    332       EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
    333 
    334   int GetNumSyncPacketToInsert(uint16_t received_squence_number);
    335 
    336   int RtpHeaderToCodecIndex(
    337       const RTPHeader& rtp_header, const uint8_t* payload) const;
    338 
    339   uint32_t NowInTimestamp(int decoder_sampling_rate) const;
    340 
    341   void InsertStreamOfSyncPackets(InitialDelayManager::SyncStream* sync_stream);
    342 
    343   scoped_ptr<CriticalSectionWrapper> crit_sect_;
    344   int id_;  // TODO(henrik.lundin) Make const.
    345   int last_audio_decoder_ GUARDED_BY(crit_sect_);
    346   AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_);
    347   int current_sample_rate_hz_ GUARDED_BY(crit_sect_);
    348   ACMResampler resampler_ GUARDED_BY(crit_sect_);
    349   // Used in GetAudio, declared as member to avoid allocating every 10ms.
    350   // TODO(henrik.lundin) Stack-allocate in GetAudio instead?
    351   int16_t audio_buffer_[AudioFrame::kMaxDataSizeSamples] GUARDED_BY(crit_sect_);
    352   scoped_ptr<Nack> nack_ GUARDED_BY(crit_sect_);
    353   bool nack_enabled_ GUARDED_BY(crit_sect_);
    354   CallStatistics call_stats_ GUARDED_BY(crit_sect_);
    355   NetEq* neteq_;
    356   Decoder decoders_[ACMCodecDB::kMaxNumCodecs];
    357   RWLockWrapper* decode_lock_;
    358   bool vad_enabled_;
    359   Clock* clock_;  // TODO(henrik.lundin) Make const if possible.
    360 
    361   // Indicates if a non-zero initial delay is set, and the receiver is in
    362   // AV-sync mode.
    363   bool av_sync_;
    364   scoped_ptr<InitialDelayManager> initial_delay_manager_;
    365 
    366   // The following are defined as members to avoid creating them in every
    367   // iteration. |missing_packets_sync_stream_| is *ONLY* used in InsertPacket().
    368   // |late_packets_sync_stream_| is only used in GetAudio(). Both of these
    369   // member variables are allocated only when we AV-sync is enabled, i.e.
    370   // initial delay is set.
    371   scoped_ptr<InitialDelayManager::SyncStream> missing_packets_sync_stream_;
    372   scoped_ptr<InitialDelayManager::SyncStream> late_packets_sync_stream_;
    373 };
    374 
    375 }  // namespace acm2
    376 
    377 }  // namespace webrtc
    378 
    379 #endif  // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
    380