Home | History | Annotate | Download | only in acm2
      1 /*
      2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
     12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
     13 
     14 #include <vector>
     15 
     16 #include "webrtc/base/thread_annotations.h"
     17 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
     18 #include "webrtc/engine_configurations.h"
     19 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
     20 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
     21 #include "webrtc/modules/audio_coding/main/acm2/acm_resampler.h"
     22 #include "webrtc/modules/audio_coding/main/acm2/call_statistics.h"
     23 #include "webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h"
     24 #include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
     25 #include "webrtc/modules/interface/module_common_types.h"
     26 #include "webrtc/system_wrappers/interface/scoped_ptr.h"
     27 #include "webrtc/typedefs.h"
     28 
     29 namespace webrtc {
     30 
     31 struct CodecInst;
     32 class CriticalSectionWrapper;
     33 class NetEq;
     34 
     35 namespace acm2 {
     36 
     37 class Nack;
     38 
     39 class AcmReceiver {
     40  public:
     41   struct Decoder {
     42     bool registered;
     43     uint8_t payload_type;
     44     // This field is meaningful for codecs where both mono and
     45     // stereo versions are registered under the same ID.
     46     int channels;
     47   };
     48 
     49   // Constructor of the class
     50   explicit AcmReceiver(const AudioCodingModule::Config& config);
     51 
     52   // Destructor of the class.
     53   ~AcmReceiver();
     54 
     55   //
     56   // Inserts a payload with its associated RTP-header into NetEq.
     57   //
     58   // Input:
     59   //   - rtp_header           : RTP header for the incoming payload containing
     60   //                            information about payload type, sequence number,
     61   //                            timestamp, SSRC and marker bit.
     62   //   - incoming_payload     : Incoming audio payload.
     63   //   - length_payload       : Length of incoming audio payload in bytes.
     64   //
     65   // Return value             : 0 if OK.
     66   //                           <0 if NetEq returned an error.
     67   //
     68   int InsertPacket(const WebRtcRTPHeader& rtp_header,
     69                    const uint8_t* incoming_payload,
     70                    int length_payload);
     71 
     72   //
     73   // Asks NetEq for 10 milliseconds of decoded audio.
     74   //
     75   // Input:
     76   //   -desired_freq_hz       : specifies the sampling rate [Hz] of the output
     77   //                            audio. If set -1 indicates to resampling is
     78   //                            is required and the audio returned at the
     79   //                            sampling rate of the decoder.
     80   //
     81   // Output:
     82   //   -audio_frame           : an audio frame were output data and
     83   //                            associated parameters are written to.
     84   //
     85   // Return value             : 0 if OK.
     86   //                           -1 if NetEq returned an error.
     87   //
     88   int GetAudio(int desired_freq_hz, AudioFrame* audio_frame);
     89 
     90   //
     91   // Adds a new codec to the NetEq codec database.
     92   //
     93   // Input:
     94   //   - acm_codec_id        : ACM codec ID.
     95   //   - payload_type        : payload type.
     96   //   - audio_decoder       : pointer to a decoder object. If it is NULL
     97   //                           then NetEq will internally create the decoder
     98   //                           object. Otherwise, NetEq will store this pointer
     99   //                           as the decoder corresponding with the given
    100   //                           payload type. NetEq won't acquire the ownership
    101   //                           of this pointer. It is up to the client of this
    102   //                           class (ACM) to delete it. By providing
    103   //                           |audio_decoder| ACM will have control over the
    104   //                           decoder instance of the codec. This is essential
    105   //                           for a codec like iSAC which encoder/decoder
    106   //                           encoder has to know about decoder (bandwidth
    107   //                           estimator that is updated at decoding time).
    108   //
    109   // Return value             : 0 if OK.
    110   //                           <0 if NetEq returned an error.
    111   //
    112   int AddCodec(int acm_codec_id,
    113                uint8_t payload_type,
    114                int channels,
    115                AudioDecoder* audio_decoder);
    116 
    117   //
    118   // Sets a minimum delay for packet buffer. The given delay is maintained,
    119   // unless channel condition dictates a higher delay.
    120   //
    121   // Input:
    122   //   - delay_ms             : minimum delay in milliseconds.
    123   //
    124   // Return value             : 0 if OK.
    125   //                           <0 if NetEq returned an error.
    126   //
    127   int SetMinimumDelay(int delay_ms);
    128 
    129   //
    130   // Sets a maximum delay [ms] for the packet buffer. The target delay does not
    131   // exceed the given value, even if channel condition requires so.
    132   //
    133   // Input:
    134   //   - delay_ms             : maximum delay in milliseconds.
    135   //
    136   // Return value             : 0 if OK.
    137   //                           <0 if NetEq returned an error.
    138   //
    139   int SetMaximumDelay(int delay_ms);
    140 
    141   //
    142   // Get least required delay computed based on channel conditions. Note that
    143   // this is before applying any user-defined limits (specified by calling
    144   // (SetMinimumDelay() and/or SetMaximumDelay()).
    145   //
    146   int LeastRequiredDelayMs() const;
    147 
    148   //
    149   // Sets an initial delay of |delay_ms| milliseconds. This introduces a playout
    150   // delay. Silence (zero signal) is played out until equivalent of |delay_ms|
    151   // millisecond of audio is buffered. Then, NetEq maintains the delay.
    152   //
    153   // Input:
    154   //   - delay_ms             : initial delay in milliseconds.
    155   //
    156   // Return value             : 0 if OK.
    157   //                           <0 if NetEq returned an error.
    158   //
    159   int SetInitialDelay(int delay_ms);
    160 
    161   //
    162   // Resets the initial delay to zero.
    163   //
    164   void ResetInitialDelay();
    165 
    166   //
    167   // Get the current sampling frequency in Hz.
    168   //
    169   // Return value             : Sampling frequency in Hz.
    170   //
    171   int current_sample_rate_hz() const;
    172 
    173   //
    174   // Sets the playout mode.
    175   //
    176   // Input:
    177   //   - mode                 : an enumerator specifying the playout mode.
    178   //
    179   void SetPlayoutMode(AudioPlayoutMode mode);
    180 
    181   //
    182   // Get the current playout mode.
    183   //
    184   // Return value             : The current playout mode.
    185   //
    186   AudioPlayoutMode PlayoutMode() const;
    187 
    188   //
    189   // Get the current network statistics from NetEq.
    190   //
    191   // Output:
    192   //   - statistics           : The current network statistics.
    193   //
    194   void NetworkStatistics(ACMNetworkStatistics* statistics);
    195 
    196   //
    197   // Enable post-decoding VAD.
    198   //
    199   void EnableVad();
    200 
    201   //
    202   // Disable post-decoding VAD.
    203   //
    204   void DisableVad();
    205 
    206   //
    207   // Returns whether post-decoding VAD is enabled (true) or disabled (false).
    208   //
    209   bool vad_enabled() const { return vad_enabled_; }
    210 
    211   //
    212   // Flushes the NetEq packet and speech buffers.
    213   //
    214   void FlushBuffers();
    215 
    216   //
    217   // Removes a payload-type from the NetEq codec database.
    218   //
    219   // Input:
    220   //   - payload_type         : the payload-type to be removed.
    221   //
    222   // Return value             : 0 if OK.
    223   //                           -1 if an error occurred.
    224   //
    225   int RemoveCodec(uint8_t payload_type);
    226 
    227   //
    228   // Remove all registered codecs.
    229   //
    230   int RemoveAllCodecs();
    231 
    232   //
    233   // Set ID.
    234   //
    235   void set_id(int id);  // TODO(turajs): can be inline.
    236 
    237   //
    238   // Gets the RTP timestamp of the last sample delivered by GetAudio().
    239   // Returns true if the RTP timestamp is valid, otherwise false.
    240   //
    241   bool GetPlayoutTimestamp(uint32_t* timestamp);
    242 
    243   //
    244   // Return the index of the codec associated with the last non-CNG/non-DTMF
    245   // received payload. If no non-CNG/non-DTMF payload is received -1 is
    246   // returned.
    247   //
    248   int last_audio_codec_id() const;  // TODO(turajs): can be inline.
    249 
    250   //
    251   // Return the payload-type of the last non-CNG/non-DTMF RTP packet. If no
    252   // non-CNG/non-DTMF packet is received -1 is returned.
    253   //
    254   int last_audio_payload_type() const;  // TODO(turajs): can be inline.
    255 
    256   //
    257   // Get the audio codec associated with the last non-CNG/non-DTMF received
    258   // payload. If no non-CNG/non-DTMF packet is received -1 is returned,
    259   // otherwise return 0.
    260   //
    261   int LastAudioCodec(CodecInst* codec) const;
    262 
    263   //
    264   // Return payload type of RED if it is registered, otherwise return -1;
    265   //
    266   int RedPayloadType() const;
    267 
    268   //
    269   // Get a decoder given its registered payload-type.
    270   //
    271   // Input:
    272   //    -payload_type         : the payload-type of the codec to be retrieved.
    273   //
    274   // Output:
    275   //    -codec                : codec associated with the given payload-type.
    276   //
    277   // Return value             : 0 if succeeded.
    278   //                           -1 if failed, e.g. given payload-type is not
    279   //                              registered.
    280   //
    281   int DecoderByPayloadType(uint8_t payload_type,
    282                            CodecInst* codec) const;
    283 
    284   //
    285   // Enable NACK and set the maximum size of the NACK list. If NACK is already
    286   // enabled then the maximum NACK list size is modified accordingly.
    287   //
    288   // Input:
    289   //    -max_nack_list_size  : maximum NACK list size
    290   //                           should be positive (none zero) and less than or
    291   //                           equal to |Nack::kNackListSizeLimit|
    292   // Return value
    293   //                         : 0 if succeeded.
    294   //                          -1 if failed
    295   //
    296   int EnableNack(size_t max_nack_list_size);
    297 
    298   // Disable NACK.
    299   void DisableNack();
    300 
    301   //
    302   // Get a list of packets to be retransmitted.
    303   //
    304   // Input:
    305   //    -round_trip_time_ms : estimate of the round-trip-time (in milliseconds).
    306   // Return value           : list of packets to be retransmitted.
    307   //
    308   std::vector<uint16_t> GetNackList(int round_trip_time_ms) const;
    309 
    310   //
    311   // Get statistics of calls to GetAudio().
    312   void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const;
    313 
    314  private:
    315   int PayloadType2CodecIndex(uint8_t payload_type) const;
    316 
    317   bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame)
    318       EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
    319 
    320   int GetNumSyncPacketToInsert(uint16_t received_squence_number);
    321 
    322   int RtpHeaderToCodecIndex(
    323       const RTPHeader& rtp_header, const uint8_t* payload) const;
    324 
    325   uint32_t NowInTimestamp(int decoder_sampling_rate) const;
    326 
    327   void InsertStreamOfSyncPackets(InitialDelayManager::SyncStream* sync_stream);
    328 
    329   scoped_ptr<CriticalSectionWrapper> crit_sect_;
    330   int id_;  // TODO(henrik.lundin) Make const.
    331   int last_audio_decoder_ GUARDED_BY(crit_sect_);
    332   AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_);
    333   int current_sample_rate_hz_ GUARDED_BY(crit_sect_);
    334   ACMResampler resampler_ GUARDED_BY(crit_sect_);
    335   // Used in GetAudio, declared as member to avoid allocating every 10ms.
    336   // TODO(henrik.lundin) Stack-allocate in GetAudio instead?
    337   int16_t audio_buffer_[AudioFrame::kMaxDataSizeSamples] GUARDED_BY(crit_sect_);
    338   scoped_ptr<Nack> nack_ GUARDED_BY(crit_sect_);
    339   bool nack_enabled_ GUARDED_BY(crit_sect_);
    340   CallStatistics call_stats_ GUARDED_BY(crit_sect_);
    341   NetEq* neteq_;
    342   Decoder decoders_[ACMCodecDB::kMaxNumCodecs];
    343   bool vad_enabled_;
    344   Clock* clock_;  // TODO(henrik.lundin) Make const if possible.
    345 
    346   // Indicates if a non-zero initial delay is set, and the receiver is in
    347   // AV-sync mode.
    348   bool av_sync_;
    349   scoped_ptr<InitialDelayManager> initial_delay_manager_;
    350 
    351   // The following are defined as members to avoid creating them in every
    352   // iteration. |missing_packets_sync_stream_| is *ONLY* used in InsertPacket().
    353   // |late_packets_sync_stream_| is only used in GetAudio(). Both of these
    354   // member variables are allocated only when we AV-sync is enabled, i.e.
    355   // initial delay is set.
    356   scoped_ptr<InitialDelayManager::SyncStream> missing_packets_sync_stream_;
    357   scoped_ptr<InitialDelayManager::SyncStream> late_packets_sync_stream_;
    358 };
    359 
    360 }  // namespace acm2
    361 
    362 }  // namespace webrtc
    363 
    364 #endif  // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
    365