1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_ 12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_ 13 14 #include <vector> 15 16 #include "webrtc/common_audio/vad/include/webrtc_vad.h" 17 #include "webrtc/engine_configurations.h" 18 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h" 19 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h" 20 #include "webrtc/modules/audio_coding/main/acm2/acm_resampler.h" 21 #include "webrtc/modules/audio_coding/main/acm2/call_statistics.h" 22 #include "webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h" 23 #include "webrtc/modules/audio_coding/neteq/interface/neteq.h" 24 #include "webrtc/modules/interface/module_common_types.h" 25 #include "webrtc/system_wrappers/interface/scoped_ptr.h" 26 #include "webrtc/system_wrappers/interface/thread_annotations.h" 27 #include "webrtc/typedefs.h" 28 29 namespace webrtc { 30 31 struct CodecInst; 32 class CriticalSectionWrapper; 33 class RWLockWrapper; 34 class NetEq; 35 36 namespace acm2 { 37 38 class Nack; 39 40 class AcmReceiver { 41 public: 42 struct Decoder { 43 bool registered; 44 uint8_t payload_type; 45 // This field is meaningful for codecs where both mono and 46 // stereo versions are registered under the same ID. 47 int channels; 48 }; 49 50 // Constructor of the class 51 explicit AcmReceiver(const AudioCodingModule::Config& config); 52 53 // Destructor of the class. 54 ~AcmReceiver(); 55 56 // 57 // Inserts a payload with its associated RTP-header into NetEq. 58 // 59 // Input: 60 // - rtp_header : RTP header for the incoming payload containing 61 // information about payload type, sequence number, 62 // timestamp, SSRC and marker bit. 63 // - incoming_payload : Incoming audio payload. 64 // - length_payload : Length of incoming audio payload in bytes. 65 // 66 // Return value : 0 if OK. 67 // <0 if NetEq returned an error. 68 // 69 int InsertPacket(const WebRtcRTPHeader& rtp_header, 70 const uint8_t* incoming_payload, 71 int length_payload); 72 73 // 74 // Asks NetEq for 10 milliseconds of decoded audio. 75 // 76 // Input: 77 // -desired_freq_hz : specifies the sampling rate [Hz] of the output 78 // audio. If set -1 indicates to resampling is 79 // is required and the audio returned at the 80 // sampling rate of the decoder. 81 // 82 // Output: 83 // -audio_frame : an audio frame were output data and 84 // associated parameters are written to. 85 // 86 // Return value : 0 if OK. 87 // -1 if NetEq returned an error. 88 // 89 int GetAudio(int desired_freq_hz, AudioFrame* audio_frame); 90 91 // 92 // Adds a new codec to the NetEq codec database. 93 // 94 // Input: 95 // - acm_codec_id : ACM codec ID. 96 // - payload_type : payload type. 97 // - audio_decoder : pointer to a decoder object. If it is NULL 98 // then NetEq will internally create the decoder 99 // object. Otherwise, NetEq will store this pointer 100 // as the decoder corresponding with the given 101 // payload type. NetEq won't acquire the ownership 102 // of this pointer. It is up to the client of this 103 // class (ACM) to delete it. By providing 104 // |audio_decoder| ACM will have control over the 105 // decoder instance of the codec. This is essential 106 // for a codec like iSAC which encoder/decoder 107 // encoder has to know about decoder (bandwidth 108 // estimator that is updated at decoding time). 109 // 110 // Return value : 0 if OK. 111 // <0 if NetEq returned an error. 112 // 113 int AddCodec(int acm_codec_id, 114 uint8_t payload_type, 115 int channels, 116 AudioDecoder* audio_decoder); 117 118 // 119 // Sets a minimum delay for packet buffer. The given delay is maintained, 120 // unless channel condition dictates a higher delay. 121 // 122 // Input: 123 // - delay_ms : minimum delay in milliseconds. 124 // 125 // Return value : 0 if OK. 126 // <0 if NetEq returned an error. 127 // 128 int SetMinimumDelay(int delay_ms); 129 130 // 131 // Sets a maximum delay [ms] for the packet buffer. The target delay does not 132 // exceed the given value, even if channel condition requires so. 133 // 134 // Input: 135 // - delay_ms : maximum delay in milliseconds. 136 // 137 // Return value : 0 if OK. 138 // <0 if NetEq returned an error. 139 // 140 int SetMaximumDelay(int delay_ms); 141 142 // 143 // Get least required delay computed based on channel conditions. Note that 144 // this is before applying any user-defined limits (specified by calling 145 // (SetMinimumDelay() and/or SetMaximumDelay()). 146 // 147 int LeastRequiredDelayMs() const; 148 149 // 150 // Sets an initial delay of |delay_ms| milliseconds. This introduces a playout 151 // delay. Silence (zero signal) is played out until equivalent of |delay_ms| 152 // millisecond of audio is buffered. Then, NetEq maintains the delay. 153 // 154 // Input: 155 // - delay_ms : initial delay in milliseconds. 156 // 157 // Return value : 0 if OK. 158 // <0 if NetEq returned an error. 159 // 160 int SetInitialDelay(int delay_ms); 161 162 // 163 // Resets the initial delay to zero. 164 // 165 void ResetInitialDelay(); 166 167 // 168 // Get the current sampling frequency in Hz. 169 // 170 // Return value : Sampling frequency in Hz. 171 // 172 int current_sample_rate_hz() const; 173 174 // 175 // Sets the playout mode. 176 // 177 // Input: 178 // - mode : an enumerator specifying the playout mode. 179 // 180 void SetPlayoutMode(AudioPlayoutMode mode); 181 182 // 183 // Get the current playout mode. 184 // 185 // Return value : The current playout mode. 186 // 187 AudioPlayoutMode PlayoutMode() const; 188 189 // 190 // Get the current network statistics from NetEq. 191 // 192 // Output: 193 // - statistics : The current network statistics. 194 // 195 void NetworkStatistics(ACMNetworkStatistics* statistics); 196 197 // 198 // Enable post-decoding VAD. 199 // 200 void EnableVad(); 201 202 // 203 // Disable post-decoding VAD. 204 // 205 void DisableVad(); 206 207 // 208 // Returns whether post-decoding VAD is enabled (true) or disabled (false). 209 // 210 bool vad_enabled() const { return vad_enabled_; } 211 212 // 213 // Get the decode lock used to protect decoder instances while decoding. 214 // 215 // Return value : Pointer to the decode lock. 216 // 217 RWLockWrapper* DecodeLock() const { return decode_lock_; } 218 219 // 220 // Flushes the NetEq packet and speech buffers. 221 // 222 void FlushBuffers(); 223 224 // 225 // Removes a payload-type from the NetEq codec database. 226 // 227 // Input: 228 // - payload_type : the payload-type to be removed. 229 // 230 // Return value : 0 if OK. 231 // -1 if an error occurred. 232 // 233 int RemoveCodec(uint8_t payload_type); 234 235 // 236 // Remove all registered codecs. 237 // 238 int RemoveAllCodecs(); 239 240 // 241 // Set ID. 242 // 243 void set_id(int id); // TODO(turajs): can be inline. 244 245 // 246 // Gets the RTP timestamp of the last sample delivered by GetAudio(). 247 // Returns true if the RTP timestamp is valid, otherwise false. 248 // 249 bool GetPlayoutTimestamp(uint32_t* timestamp); 250 251 // 252 // Return the index of the codec associated with the last non-CNG/non-DTMF 253 // received payload. If no non-CNG/non-DTMF payload is received -1 is 254 // returned. 255 // 256 int last_audio_codec_id() const; // TODO(turajs): can be inline. 257 258 // 259 // Return the payload-type of the last non-CNG/non-DTMF RTP packet. If no 260 // non-CNG/non-DTMF packet is received -1 is returned. 261 // 262 int last_audio_payload_type() const; // TODO(turajs): can be inline. 263 264 // 265 // Get the audio codec associated with the last non-CNG/non-DTMF received 266 // payload. If no non-CNG/non-DTMF packet is received -1 is returned, 267 // otherwise return 0. 268 // 269 int LastAudioCodec(CodecInst* codec) const; 270 271 // 272 // Return payload type of RED if it is registered, otherwise return -1; 273 // 274 int RedPayloadType() const; 275 276 // 277 // Get a decoder given its registered payload-type. 278 // 279 // Input: 280 // -payload_type : the payload-type of the codec to be retrieved. 281 // 282 // Output: 283 // -codec : codec associated with the given payload-type. 284 // 285 // Return value : 0 if succeeded. 286 // -1 if failed, e.g. given payload-type is not 287 // registered. 288 // 289 int DecoderByPayloadType(uint8_t payload_type, 290 CodecInst* codec) const; 291 292 // 293 // Enable NACK and set the maximum size of the NACK list. If NACK is already 294 // enabled then the maximum NACK list size is modified accordingly. 295 // 296 // Input: 297 // -max_nack_list_size : maximum NACK list size 298 // should be positive (none zero) and less than or 299 // equal to |Nack::kNackListSizeLimit| 300 // Return value 301 // : 0 if succeeded. 302 // -1 if failed 303 // 304 int EnableNack(size_t max_nack_list_size); 305 306 // Disable NACK. 307 void DisableNack(); 308 309 // 310 // Get a list of packets to be retransmitted. 311 // 312 // Input: 313 // -round_trip_time_ms : estimate of the round-trip-time (in milliseconds). 314 // Return value : list of packets to be retransmitted. 315 // 316 std::vector<uint16_t> GetNackList(int round_trip_time_ms) const; 317 318 // 319 // Returns the background noise mode. This is only for testing and ACM is not 320 // calling this function. Used in acm_receiver_unittest.cc. 321 // 322 NetEqBackgroundNoiseMode BackgroundNoiseModeForTest() const; 323 324 // 325 // Get statistics of calls to GetAudio(). 326 void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const; 327 328 private: 329 int PayloadType2CodecIndex(uint8_t payload_type) const; 330 331 bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame) 332 EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 333 334 int GetNumSyncPacketToInsert(uint16_t received_squence_number); 335 336 int RtpHeaderToCodecIndex( 337 const RTPHeader& rtp_header, const uint8_t* payload) const; 338 339 uint32_t NowInTimestamp(int decoder_sampling_rate) const; 340 341 void InsertStreamOfSyncPackets(InitialDelayManager::SyncStream* sync_stream); 342 343 scoped_ptr<CriticalSectionWrapper> crit_sect_; 344 int id_; // TODO(henrik.lundin) Make const. 345 int last_audio_decoder_ GUARDED_BY(crit_sect_); 346 AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_); 347 int current_sample_rate_hz_ GUARDED_BY(crit_sect_); 348 ACMResampler resampler_ GUARDED_BY(crit_sect_); 349 // Used in GetAudio, declared as member to avoid allocating every 10ms. 350 // TODO(henrik.lundin) Stack-allocate in GetAudio instead? 351 int16_t audio_buffer_[AudioFrame::kMaxDataSizeSamples] GUARDED_BY(crit_sect_); 352 scoped_ptr<Nack> nack_ GUARDED_BY(crit_sect_); 353 bool nack_enabled_ GUARDED_BY(crit_sect_); 354 CallStatistics call_stats_ GUARDED_BY(crit_sect_); 355 NetEq* neteq_; 356 Decoder decoders_[ACMCodecDB::kMaxNumCodecs]; 357 RWLockWrapper* decode_lock_; 358 bool vad_enabled_; 359 Clock* clock_; // TODO(henrik.lundin) Make const if possible. 360 361 // Indicates if a non-zero initial delay is set, and the receiver is in 362 // AV-sync mode. 363 bool av_sync_; 364 scoped_ptr<InitialDelayManager> initial_delay_manager_; 365 366 // The following are defined as members to avoid creating them in every 367 // iteration. |missing_packets_sync_stream_| is *ONLY* used in InsertPacket(). 368 // |late_packets_sync_stream_| is only used in GetAudio(). Both of these 369 // member variables are allocated only when we AV-sync is enabled, i.e. 370 // initial delay is set. 371 scoped_ptr<InitialDelayManager::SyncStream> missing_packets_sync_stream_; 372 scoped_ptr<InitialDelayManager::SyncStream> late_packets_sync_stream_; 373 }; 374 375 } // namespace acm2 376 377 } // namespace webrtc 378 379 #endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_ 380