1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_ 12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_ 13 14 #include <vector> 15 16 #include "webrtc/base/thread_annotations.h" 17 #include "webrtc/common_audio/vad/include/webrtc_vad.h" 18 #include "webrtc/engine_configurations.h" 19 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h" 20 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h" 21 #include "webrtc/modules/audio_coding/main/acm2/acm_resampler.h" 22 #include "webrtc/modules/audio_coding/main/acm2/call_statistics.h" 23 #include "webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h" 24 #include "webrtc/modules/audio_coding/neteq/interface/neteq.h" 25 #include "webrtc/modules/interface/module_common_types.h" 26 #include "webrtc/system_wrappers/interface/scoped_ptr.h" 27 #include "webrtc/typedefs.h" 28 29 namespace webrtc { 30 31 struct CodecInst; 32 class CriticalSectionWrapper; 33 class NetEq; 34 35 namespace acm2 { 36 37 class Nack; 38 39 class AcmReceiver { 40 public: 41 struct Decoder { 42 bool registered; 43 uint8_t payload_type; 44 // This field is meaningful for codecs where both mono and 45 // stereo versions are registered under the same ID. 46 int channels; 47 }; 48 49 // Constructor of the class 50 explicit AcmReceiver(const AudioCodingModule::Config& config); 51 52 // Destructor of the class. 53 ~AcmReceiver(); 54 55 // 56 // Inserts a payload with its associated RTP-header into NetEq. 57 // 58 // Input: 59 // - rtp_header : RTP header for the incoming payload containing 60 // information about payload type, sequence number, 61 // timestamp, SSRC and marker bit. 62 // - incoming_payload : Incoming audio payload. 63 // - length_payload : Length of incoming audio payload in bytes. 64 // 65 // Return value : 0 if OK. 66 // <0 if NetEq returned an error. 67 // 68 int InsertPacket(const WebRtcRTPHeader& rtp_header, 69 const uint8_t* incoming_payload, 70 int length_payload); 71 72 // 73 // Asks NetEq for 10 milliseconds of decoded audio. 74 // 75 // Input: 76 // -desired_freq_hz : specifies the sampling rate [Hz] of the output 77 // audio. If set -1 indicates to resampling is 78 // is required and the audio returned at the 79 // sampling rate of the decoder. 80 // 81 // Output: 82 // -audio_frame : an audio frame were output data and 83 // associated parameters are written to. 84 // 85 // Return value : 0 if OK. 86 // -1 if NetEq returned an error. 87 // 88 int GetAudio(int desired_freq_hz, AudioFrame* audio_frame); 89 90 // 91 // Adds a new codec to the NetEq codec database. 92 // 93 // Input: 94 // - acm_codec_id : ACM codec ID. 95 // - payload_type : payload type. 96 // - audio_decoder : pointer to a decoder object. If it is NULL 97 // then NetEq will internally create the decoder 98 // object. Otherwise, NetEq will store this pointer 99 // as the decoder corresponding with the given 100 // payload type. NetEq won't acquire the ownership 101 // of this pointer. It is up to the client of this 102 // class (ACM) to delete it. By providing 103 // |audio_decoder| ACM will have control over the 104 // decoder instance of the codec. This is essential 105 // for a codec like iSAC which encoder/decoder 106 // encoder has to know about decoder (bandwidth 107 // estimator that is updated at decoding time). 108 // 109 // Return value : 0 if OK. 110 // <0 if NetEq returned an error. 111 // 112 int AddCodec(int acm_codec_id, 113 uint8_t payload_type, 114 int channels, 115 AudioDecoder* audio_decoder); 116 117 // 118 // Sets a minimum delay for packet buffer. The given delay is maintained, 119 // unless channel condition dictates a higher delay. 120 // 121 // Input: 122 // - delay_ms : minimum delay in milliseconds. 123 // 124 // Return value : 0 if OK. 125 // <0 if NetEq returned an error. 126 // 127 int SetMinimumDelay(int delay_ms); 128 129 // 130 // Sets a maximum delay [ms] for the packet buffer. The target delay does not 131 // exceed the given value, even if channel condition requires so. 132 // 133 // Input: 134 // - delay_ms : maximum delay in milliseconds. 135 // 136 // Return value : 0 if OK. 137 // <0 if NetEq returned an error. 138 // 139 int SetMaximumDelay(int delay_ms); 140 141 // 142 // Get least required delay computed based on channel conditions. Note that 143 // this is before applying any user-defined limits (specified by calling 144 // (SetMinimumDelay() and/or SetMaximumDelay()). 145 // 146 int LeastRequiredDelayMs() const; 147 148 // 149 // Sets an initial delay of |delay_ms| milliseconds. This introduces a playout 150 // delay. Silence (zero signal) is played out until equivalent of |delay_ms| 151 // millisecond of audio is buffered. Then, NetEq maintains the delay. 152 // 153 // Input: 154 // - delay_ms : initial delay in milliseconds. 155 // 156 // Return value : 0 if OK. 157 // <0 if NetEq returned an error. 158 // 159 int SetInitialDelay(int delay_ms); 160 161 // 162 // Resets the initial delay to zero. 163 // 164 void ResetInitialDelay(); 165 166 // 167 // Get the current sampling frequency in Hz. 168 // 169 // Return value : Sampling frequency in Hz. 170 // 171 int current_sample_rate_hz() const; 172 173 // 174 // Sets the playout mode. 175 // 176 // Input: 177 // - mode : an enumerator specifying the playout mode. 178 // 179 void SetPlayoutMode(AudioPlayoutMode mode); 180 181 // 182 // Get the current playout mode. 183 // 184 // Return value : The current playout mode. 185 // 186 AudioPlayoutMode PlayoutMode() const; 187 188 // 189 // Get the current network statistics from NetEq. 190 // 191 // Output: 192 // - statistics : The current network statistics. 193 // 194 void NetworkStatistics(ACMNetworkStatistics* statistics); 195 196 // 197 // Enable post-decoding VAD. 198 // 199 void EnableVad(); 200 201 // 202 // Disable post-decoding VAD. 203 // 204 void DisableVad(); 205 206 // 207 // Returns whether post-decoding VAD is enabled (true) or disabled (false). 208 // 209 bool vad_enabled() const { return vad_enabled_; } 210 211 // 212 // Flushes the NetEq packet and speech buffers. 213 // 214 void FlushBuffers(); 215 216 // 217 // Removes a payload-type from the NetEq codec database. 218 // 219 // Input: 220 // - payload_type : the payload-type to be removed. 221 // 222 // Return value : 0 if OK. 223 // -1 if an error occurred. 224 // 225 int RemoveCodec(uint8_t payload_type); 226 227 // 228 // Remove all registered codecs. 229 // 230 int RemoveAllCodecs(); 231 232 // 233 // Set ID. 234 // 235 void set_id(int id); // TODO(turajs): can be inline. 236 237 // 238 // Gets the RTP timestamp of the last sample delivered by GetAudio(). 239 // Returns true if the RTP timestamp is valid, otherwise false. 240 // 241 bool GetPlayoutTimestamp(uint32_t* timestamp); 242 243 // 244 // Return the index of the codec associated with the last non-CNG/non-DTMF 245 // received payload. If no non-CNG/non-DTMF payload is received -1 is 246 // returned. 247 // 248 int last_audio_codec_id() const; // TODO(turajs): can be inline. 249 250 // 251 // Return the payload-type of the last non-CNG/non-DTMF RTP packet. If no 252 // non-CNG/non-DTMF packet is received -1 is returned. 253 // 254 int last_audio_payload_type() const; // TODO(turajs): can be inline. 255 256 // 257 // Get the audio codec associated with the last non-CNG/non-DTMF received 258 // payload. If no non-CNG/non-DTMF packet is received -1 is returned, 259 // otherwise return 0. 260 // 261 int LastAudioCodec(CodecInst* codec) const; 262 263 // 264 // Return payload type of RED if it is registered, otherwise return -1; 265 // 266 int RedPayloadType() const; 267 268 // 269 // Get a decoder given its registered payload-type. 270 // 271 // Input: 272 // -payload_type : the payload-type of the codec to be retrieved. 273 // 274 // Output: 275 // -codec : codec associated with the given payload-type. 276 // 277 // Return value : 0 if succeeded. 278 // -1 if failed, e.g. given payload-type is not 279 // registered. 280 // 281 int DecoderByPayloadType(uint8_t payload_type, 282 CodecInst* codec) const; 283 284 // 285 // Enable NACK and set the maximum size of the NACK list. If NACK is already 286 // enabled then the maximum NACK list size is modified accordingly. 287 // 288 // Input: 289 // -max_nack_list_size : maximum NACK list size 290 // should be positive (none zero) and less than or 291 // equal to |Nack::kNackListSizeLimit| 292 // Return value 293 // : 0 if succeeded. 294 // -1 if failed 295 // 296 int EnableNack(size_t max_nack_list_size); 297 298 // Disable NACK. 299 void DisableNack(); 300 301 // 302 // Get a list of packets to be retransmitted. 303 // 304 // Input: 305 // -round_trip_time_ms : estimate of the round-trip-time (in milliseconds). 306 // Return value : list of packets to be retransmitted. 307 // 308 std::vector<uint16_t> GetNackList(int round_trip_time_ms) const; 309 310 // 311 // Get statistics of calls to GetAudio(). 312 void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const; 313 314 private: 315 int PayloadType2CodecIndex(uint8_t payload_type) const; 316 317 bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame) 318 EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 319 320 int GetNumSyncPacketToInsert(uint16_t received_squence_number); 321 322 int RtpHeaderToCodecIndex( 323 const RTPHeader& rtp_header, const uint8_t* payload) const; 324 325 uint32_t NowInTimestamp(int decoder_sampling_rate) const; 326 327 void InsertStreamOfSyncPackets(InitialDelayManager::SyncStream* sync_stream); 328 329 scoped_ptr<CriticalSectionWrapper> crit_sect_; 330 int id_; // TODO(henrik.lundin) Make const. 331 int last_audio_decoder_ GUARDED_BY(crit_sect_); 332 AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_); 333 int current_sample_rate_hz_ GUARDED_BY(crit_sect_); 334 ACMResampler resampler_ GUARDED_BY(crit_sect_); 335 // Used in GetAudio, declared as member to avoid allocating every 10ms. 336 // TODO(henrik.lundin) Stack-allocate in GetAudio instead? 337 int16_t audio_buffer_[AudioFrame::kMaxDataSizeSamples] GUARDED_BY(crit_sect_); 338 scoped_ptr<Nack> nack_ GUARDED_BY(crit_sect_); 339 bool nack_enabled_ GUARDED_BY(crit_sect_); 340 CallStatistics call_stats_ GUARDED_BY(crit_sect_); 341 NetEq* neteq_; 342 Decoder decoders_[ACMCodecDB::kMaxNumCodecs]; 343 bool vad_enabled_; 344 Clock* clock_; // TODO(henrik.lundin) Make const if possible. 345 346 // Indicates if a non-zero initial delay is set, and the receiver is in 347 // AV-sync mode. 348 bool av_sync_; 349 scoped_ptr<InitialDelayManager> initial_delay_manager_; 350 351 // The following are defined as members to avoid creating them in every 352 // iteration. |missing_packets_sync_stream_| is *ONLY* used in InsertPacket(). 353 // |late_packets_sync_stream_| is only used in GetAudio(). Both of these 354 // member variables are allocated only when we AV-sync is enabled, i.e. 355 // initial delay is set. 356 scoped_ptr<InitialDelayManager::SyncStream> missing_packets_sync_stream_; 357 scoped_ptr<InitialDelayManager::SyncStream> late_packets_sync_stream_; 358 }; 359 360 } // namespace acm2 361 362 } // namespace webrtc 363 364 #endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_ 365