1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.h" 12 13 #include <assert.h> // assert 14 #include <math.h> // pow() 15 #include <string.h> // memcpy() 16 17 #include "webrtc/base/logging.h" 18 #include "webrtc/base/trace_event.h" 19 #include "webrtc/system_wrappers/include/critical_section_wrapper.h" 20 21 namespace webrtc { 22 RTPReceiverStrategy* RTPReceiverStrategy::CreateAudioStrategy( 23 RtpData* data_callback, 24 RtpAudioFeedback* incoming_messages_callback) { 25 return new RTPReceiverAudio(data_callback, incoming_messages_callback); 26 } 27 28 RTPReceiverAudio::RTPReceiverAudio(RtpData* data_callback, 29 RtpAudioFeedback* incoming_messages_callback) 30 : RTPReceiverStrategy(data_callback), 31 TelephoneEventHandler(), 32 last_received_frequency_(8000), 33 telephone_event_forward_to_decoder_(false), 34 telephone_event_payload_type_(-1), 35 cng_nb_payload_type_(-1), 36 cng_wb_payload_type_(-1), 37 cng_swb_payload_type_(-1), 38 cng_fb_payload_type_(-1), 39 cng_payload_type_(-1), 40 g722_payload_type_(-1), 41 last_received_g722_(false), 42 num_energy_(0), 43 current_remote_energy_(), 44 cb_audio_feedback_(incoming_messages_callback) { 45 last_payload_.Audio.channels = 1; 46 memset(current_remote_energy_, 0, sizeof(current_remote_energy_)); 47 } 48 49 // Outband TelephoneEvent(DTMF) detection 50 void RTPReceiverAudio::SetTelephoneEventForwardToDecoder( 51 bool forward_to_decoder) { 52 CriticalSectionScoped lock(crit_sect_.get()); 53 telephone_event_forward_to_decoder_ = forward_to_decoder; 54 } 55 56 // Is forwarding of outband telephone events turned on/off? 57 bool RTPReceiverAudio::TelephoneEventForwardToDecoder() const { 58 CriticalSectionScoped lock(crit_sect_.get()); 59 return telephone_event_forward_to_decoder_; 60 } 61 62 bool RTPReceiverAudio::TelephoneEventPayloadType( 63 int8_t payload_type) const { 64 CriticalSectionScoped lock(crit_sect_.get()); 65 return telephone_event_payload_type_ == payload_type; 66 } 67 68 bool RTPReceiverAudio::CNGPayloadType(int8_t payload_type, 69 uint32_t* frequency, 70 bool* cng_payload_type_has_changed) { 71 CriticalSectionScoped lock(crit_sect_.get()); 72 *cng_payload_type_has_changed = false; 73 74 // We can have four CNG on 8000Hz, 16000Hz, 32000Hz and 48000Hz. 75 if (cng_nb_payload_type_ == payload_type) { 76 *frequency = 8000; 77 if (cng_payload_type_ != -1 && cng_payload_type_ != cng_nb_payload_type_) 78 *cng_payload_type_has_changed = true; 79 80 cng_payload_type_ = cng_nb_payload_type_; 81 return true; 82 } else if (cng_wb_payload_type_ == payload_type) { 83 // if last received codec is G.722 we must use frequency 8000 84 if (last_received_g722_) { 85 *frequency = 8000; 86 } else { 87 *frequency = 16000; 88 } 89 if (cng_payload_type_ != -1 && cng_payload_type_ != cng_wb_payload_type_) 90 *cng_payload_type_has_changed = true; 91 cng_payload_type_ = cng_wb_payload_type_; 92 return true; 93 } else if (cng_swb_payload_type_ == payload_type) { 94 *frequency = 32000; 95 if ((cng_payload_type_ != -1) && 96 (cng_payload_type_ != cng_swb_payload_type_)) 97 *cng_payload_type_has_changed = true; 98 cng_payload_type_ = cng_swb_payload_type_; 99 return true; 100 } else if (cng_fb_payload_type_ == payload_type) { 101 *frequency = 48000; 102 if (cng_payload_type_ != -1 && cng_payload_type_ != cng_fb_payload_type_) 103 *cng_payload_type_has_changed = true; 104 cng_payload_type_ = cng_fb_payload_type_; 105 return true; 106 } else { 107 // not CNG 108 if (g722_payload_type_ == payload_type) { 109 last_received_g722_ = true; 110 } else { 111 last_received_g722_ = false; 112 } 113 } 114 return false; 115 } 116 117 bool RTPReceiverAudio::ShouldReportCsrcChanges(uint8_t payload_type) const { 118 // Don't do this for DTMF packets, otherwise it's fine. 119 return !TelephoneEventPayloadType(payload_type); 120 } 121 122 // - Sample based or frame based codecs based on RFC 3551 123 // - 124 // - NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples. 125 // - The correct rate is 4 bits/sample. 126 // - 127 // - name of sampling default 128 // - encoding sample/frame bits/sample rate ms/frame ms/packet 129 // - 130 // - Sample based audio codecs 131 // - DVI4 sample 4 var. 20 132 // - G722 sample 4 16,000 20 133 // - G726-40 sample 5 8,000 20 134 // - G726-32 sample 4 8,000 20 135 // - G726-24 sample 3 8,000 20 136 // - G726-16 sample 2 8,000 20 137 // - L8 sample 8 var. 20 138 // - L16 sample 16 var. 20 139 // - PCMA sample 8 var. 20 140 // - PCMU sample 8 var. 20 141 // - 142 // - Frame based audio codecs 143 // - G723 frame N/A 8,000 30 30 144 // - G728 frame N/A 8,000 2.5 20 145 // - G729 frame N/A 8,000 10 20 146 // - G729D frame N/A 8,000 10 20 147 // - G729E frame N/A 8,000 10 20 148 // - GSM frame N/A 8,000 20 20 149 // - GSM-EFR frame N/A 8,000 20 20 150 // - LPC frame N/A 8,000 20 20 151 // - MPA frame N/A var. var. 152 // - 153 // - G7221 frame N/A 154 int32_t RTPReceiverAudio::OnNewPayloadTypeCreated( 155 const char payload_name[RTP_PAYLOAD_NAME_SIZE], 156 int8_t payload_type, 157 uint32_t frequency) { 158 CriticalSectionScoped lock(crit_sect_.get()); 159 160 if (RtpUtility::StringCompare(payload_name, "telephone-event", 15)) { 161 telephone_event_payload_type_ = payload_type; 162 } 163 if (RtpUtility::StringCompare(payload_name, "cn", 2)) { 164 // we can have three CNG on 8000Hz, 16000Hz and 32000Hz 165 if (frequency == 8000) { 166 cng_nb_payload_type_ = payload_type; 167 } else if (frequency == 16000) { 168 cng_wb_payload_type_ = payload_type; 169 } else if (frequency == 32000) { 170 cng_swb_payload_type_ = payload_type; 171 } else if (frequency == 48000) { 172 cng_fb_payload_type_ = payload_type; 173 } else { 174 assert(false); 175 return -1; 176 } 177 } 178 return 0; 179 } 180 181 int32_t RTPReceiverAudio::ParseRtpPacket(WebRtcRTPHeader* rtp_header, 182 const PayloadUnion& specific_payload, 183 bool is_red, 184 const uint8_t* payload, 185 size_t payload_length, 186 int64_t timestamp_ms, 187 bool is_first_packet) { 188 TRACE_EVENT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"), "Audio::ParseRtp", 189 "seqnum", rtp_header->header.sequenceNumber, "timestamp", 190 rtp_header->header.timestamp); 191 rtp_header->type.Audio.numEnergy = rtp_header->header.numCSRCs; 192 num_energy_ = rtp_header->type.Audio.numEnergy; 193 if (rtp_header->type.Audio.numEnergy > 0 && 194 rtp_header->type.Audio.numEnergy <= kRtpCsrcSize) { 195 memcpy(current_remote_energy_, 196 rtp_header->type.Audio.arrOfEnergy, 197 rtp_header->type.Audio.numEnergy); 198 } 199 200 return ParseAudioCodecSpecific(rtp_header, 201 payload, 202 payload_length, 203 specific_payload.Audio, 204 is_red); 205 } 206 207 int RTPReceiverAudio::GetPayloadTypeFrequency() const { 208 CriticalSectionScoped lock(crit_sect_.get()); 209 if (last_received_g722_) { 210 return 8000; 211 } 212 return last_received_frequency_; 213 } 214 215 RTPAliveType RTPReceiverAudio::ProcessDeadOrAlive( 216 uint16_t last_payload_length) const { 217 218 // Our CNG is 9 bytes; if it's a likely CNG the receiver needs to check 219 // kRtpNoRtp against NetEq speech_type kOutputPLCtoCNG. 220 if (last_payload_length < 10) { // our CNG is 9 bytes 221 return kRtpNoRtp; 222 } else { 223 return kRtpDead; 224 } 225 } 226 227 void RTPReceiverAudio::CheckPayloadChanged(int8_t payload_type, 228 PayloadUnion* specific_payload, 229 bool* should_discard_changes) { 230 *should_discard_changes = false; 231 232 if (TelephoneEventPayloadType(payload_type)) { 233 // Don't do callbacks for DTMF packets. 234 *should_discard_changes = true; 235 return; 236 } 237 // frequency is updated for CNG 238 bool cng_payload_type_has_changed = false; 239 bool is_cng_payload_type = CNGPayloadType(payload_type, 240 &specific_payload->Audio.frequency, 241 &cng_payload_type_has_changed); 242 243 if (is_cng_payload_type) { 244 // Don't do callbacks for DTMF packets. 245 *should_discard_changes = true; 246 return; 247 } 248 } 249 250 int RTPReceiverAudio::Energy(uint8_t array_of_energy[kRtpCsrcSize]) const { 251 CriticalSectionScoped cs(crit_sect_.get()); 252 253 assert(num_energy_ <= kRtpCsrcSize); 254 255 if (num_energy_ > 0) { 256 memcpy(array_of_energy, current_remote_energy_, 257 sizeof(uint8_t) * num_energy_); 258 } 259 return num_energy_; 260 } 261 262 int32_t RTPReceiverAudio::InvokeOnInitializeDecoder( 263 RtpFeedback* callback, 264 int8_t payload_type, 265 const char payload_name[RTP_PAYLOAD_NAME_SIZE], 266 const PayloadUnion& specific_payload) const { 267 if (-1 == 268 callback->OnInitializeDecoder( 269 payload_type, payload_name, specific_payload.Audio.frequency, 270 specific_payload.Audio.channels, specific_payload.Audio.rate)) { 271 LOG(LS_ERROR) << "Failed to create decoder for payload type: " 272 << payload_name << "/" << static_cast<int>(payload_type); 273 return -1; 274 } 275 return 0; 276 } 277 278 // We are not allowed to have any critsects when calling data_callback. 279 int32_t RTPReceiverAudio::ParseAudioCodecSpecific( 280 WebRtcRTPHeader* rtp_header, 281 const uint8_t* payload_data, 282 size_t payload_length, 283 const AudioPayload& audio_specific, 284 bool is_red) { 285 286 if (payload_length == 0) { 287 return 0; 288 } 289 290 bool telephone_event_packet = 291 TelephoneEventPayloadType(rtp_header->header.payloadType); 292 if (telephone_event_packet) { 293 CriticalSectionScoped lock(crit_sect_.get()); 294 295 // RFC 4733 2.3 296 // 0 1 2 3 297 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 298 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 299 // | event |E|R| volume | duration | 300 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 301 // 302 if (payload_length % 4 != 0) { 303 return -1; 304 } 305 size_t number_of_events = payload_length / 4; 306 307 // sanity 308 if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) { 309 number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS; 310 } 311 for (size_t n = 0; n < number_of_events; ++n) { 312 bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false; 313 314 std::set<uint8_t>::iterator event = 315 telephone_event_reported_.find(payload_data[4 * n]); 316 317 if (event != telephone_event_reported_.end()) { 318 // we have already seen this event 319 if (end) { 320 telephone_event_reported_.erase(payload_data[4 * n]); 321 } 322 } else { 323 if (end) { 324 // don't add if it's a end of a tone 325 } else { 326 telephone_event_reported_.insert(payload_data[4 * n]); 327 } 328 } 329 } 330 331 // RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events 332 // should not be a problem since we don't care about the duration 333 334 // RFC 4733 See 2.5.1.5. & 2.5.2.4. Multiple Events in a Packet 335 } 336 337 { 338 CriticalSectionScoped lock(crit_sect_.get()); 339 340 if (!telephone_event_packet) { 341 last_received_frequency_ = audio_specific.frequency; 342 } 343 344 // Check if this is a CNG packet, receiver might want to know 345 uint32_t ignored; 346 bool also_ignored; 347 if (CNGPayloadType(rtp_header->header.payloadType, 348 &ignored, 349 &also_ignored)) { 350 rtp_header->type.Audio.isCNG = true; 351 rtp_header->frameType = kAudioFrameCN; 352 } else { 353 rtp_header->frameType = kAudioFrameSpeech; 354 rtp_header->type.Audio.isCNG = false; 355 } 356 357 // check if it's a DTMF event, hence something we can playout 358 if (telephone_event_packet) { 359 if (!telephone_event_forward_to_decoder_) { 360 // don't forward event to decoder 361 return 0; 362 } 363 std::set<uint8_t>::iterator first = 364 telephone_event_reported_.begin(); 365 if (first != telephone_event_reported_.end() && *first > 15) { 366 // don't forward non DTMF events 367 return 0; 368 } 369 } 370 } 371 // TODO(holmer): Break this out to have RED parsing handled generically. 372 if (is_red && !(payload_data[0] & 0x80)) { 373 // we recive only one frame packed in a RED packet remove the RED wrapper 374 rtp_header->header.payloadType = payload_data[0]; 375 376 // only one frame in the RED strip the one byte to help NetEq 377 return data_callback_->OnReceivedPayloadData( 378 payload_data + 1, payload_length - 1, rtp_header); 379 } 380 381 rtp_header->type.Audio.channel = audio_specific.channels; 382 return data_callback_->OnReceivedPayloadData( 383 payload_data, payload_length, rtp_header); 384 } 385 } // namespace webrtc 386