1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/rtp_rtcp/source/rtp_sender_audio.h" 12 13 #include <string.h> 14 15 #include "webrtc/base/trace_event.h" 16 #include "webrtc/modules/rtp_rtcp/include/rtp_rtcp_defines.h" 17 #include "webrtc/modules/rtp_rtcp/source/byte_io.h" 18 #include "webrtc/system_wrappers/include/tick_util.h" 19 20 namespace webrtc { 21 22 static const int kDtmfFrequencyHz = 8000; 23 24 RTPSenderAudio::RTPSenderAudio(Clock* clock, 25 RTPSender* rtpSender, 26 RtpAudioFeedback* audio_feedback) 27 : _clock(clock), 28 _rtpSender(rtpSender), 29 _audioFeedback(audio_feedback), 30 _sendAudioCritsect(CriticalSectionWrapper::CreateCriticalSection()), 31 _packetSizeSamples(160), 32 _dtmfEventIsOn(false), 33 _dtmfEventFirstPacketSent(false), 34 _dtmfPayloadType(-1), 35 _dtmfTimestamp(0), 36 _dtmfKey(0), 37 _dtmfLengthSamples(0), 38 _dtmfLevel(0), 39 _dtmfTimeLastSent(0), 40 _dtmfTimestampLastSent(0), 41 _REDPayloadType(-1), 42 _inbandVADactive(false), 43 _cngNBPayloadType(-1), 44 _cngWBPayloadType(-1), 45 _cngSWBPayloadType(-1), 46 _cngFBPayloadType(-1), 47 _lastPayloadType(-1), 48 _audioLevel_dBov(0) {} 49 50 RTPSenderAudio::~RTPSenderAudio() {} 51 52 int RTPSenderAudio::AudioFrequency() const { 53 return kDtmfFrequencyHz; 54 } 55 56 // set audio packet size, used to determine when it's time to send a DTMF packet 57 // in silence (CNG) 58 int32_t RTPSenderAudio::SetAudioPacketSize(uint16_t packetSizeSamples) { 59 CriticalSectionScoped cs(_sendAudioCritsect.get()); 60 61 _packetSizeSamples = packetSizeSamples; 62 return 0; 63 } 64 65 int32_t RTPSenderAudio::RegisterAudioPayload( 66 const char payloadName[RTP_PAYLOAD_NAME_SIZE], 67 const int8_t payloadType, 68 const uint32_t frequency, 69 const size_t channels, 70 const uint32_t rate, 71 RtpUtility::Payload** payload) { 72 if (RtpUtility::StringCompare(payloadName, "cn", 2)) { 73 CriticalSectionScoped cs(_sendAudioCritsect.get()); 74 // we can have multiple CNG payload types 75 switch (frequency) { 76 case 8000: 77 _cngNBPayloadType = payloadType; 78 break; 79 case 16000: 80 _cngWBPayloadType = payloadType; 81 break; 82 case 32000: 83 _cngSWBPayloadType = payloadType; 84 break; 85 case 48000: 86 _cngFBPayloadType = payloadType; 87 break; 88 default: 89 return -1; 90 } 91 } else if (RtpUtility::StringCompare(payloadName, "telephone-event", 15)) { 92 CriticalSectionScoped cs(_sendAudioCritsect.get()); 93 // Don't add it to the list 94 // we dont want to allow send with a DTMF payloadtype 95 _dtmfPayloadType = payloadType; 96 return 0; 97 // The default timestamp rate is 8000 Hz, but other rates may be defined. 98 } 99 *payload = new RtpUtility::Payload; 100 (*payload)->typeSpecific.Audio.frequency = frequency; 101 (*payload)->typeSpecific.Audio.channels = channels; 102 (*payload)->typeSpecific.Audio.rate = rate; 103 (*payload)->audio = true; 104 (*payload)->name[RTP_PAYLOAD_NAME_SIZE - 1] = '\0'; 105 strncpy((*payload)->name, payloadName, RTP_PAYLOAD_NAME_SIZE - 1); 106 return 0; 107 } 108 109 bool RTPSenderAudio::MarkerBit(FrameType frameType, int8_t payload_type) { 110 CriticalSectionScoped cs(_sendAudioCritsect.get()); 111 // for audio true for first packet in a speech burst 112 bool markerBit = false; 113 if (_lastPayloadType != payload_type) { 114 if (payload_type != -1 && (_cngNBPayloadType == payload_type || 115 _cngWBPayloadType == payload_type || 116 _cngSWBPayloadType == payload_type || 117 _cngFBPayloadType == payload_type)) { 118 // Only set a marker bit when we change payload type to a non CNG 119 return false; 120 } 121 122 // payload_type differ 123 if (_lastPayloadType == -1) { 124 if (frameType != kAudioFrameCN) { 125 // first packet and NOT CNG 126 return true; 127 } else { 128 // first packet and CNG 129 _inbandVADactive = true; 130 return false; 131 } 132 } 133 134 // not first packet AND 135 // not CNG AND 136 // payload_type changed 137 138 // set a marker bit when we change payload type 139 markerBit = true; 140 } 141 142 // For G.723 G.729, AMR etc we can have inband VAD 143 if (frameType == kAudioFrameCN) { 144 _inbandVADactive = true; 145 } else if (_inbandVADactive) { 146 _inbandVADactive = false; 147 markerBit = true; 148 } 149 return markerBit; 150 } 151 152 int32_t RTPSenderAudio::SendAudio(FrameType frameType, 153 int8_t payloadType, 154 uint32_t captureTimeStamp, 155 const uint8_t* payloadData, 156 size_t dataSize, 157 const RTPFragmentationHeader* fragmentation) { 158 // TODO(pwestin) Breakup function in smaller functions. 159 size_t payloadSize = dataSize; 160 size_t maxPayloadLength = _rtpSender->MaxPayloadLength(); 161 bool dtmfToneStarted = false; 162 uint16_t dtmfLengthMS = 0; 163 uint8_t key = 0; 164 int red_payload_type; 165 uint8_t audio_level_dbov; 166 int8_t dtmf_payload_type; 167 uint16_t packet_size_samples; 168 { 169 CriticalSectionScoped cs(_sendAudioCritsect.get()); 170 red_payload_type = _REDPayloadType; 171 audio_level_dbov = _audioLevel_dBov; 172 dtmf_payload_type = _dtmfPayloadType; 173 packet_size_samples = _packetSizeSamples; 174 } 175 176 // Check if we have pending DTMFs to send 177 if (!_dtmfEventIsOn && PendingDTMF()) { 178 int64_t delaySinceLastDTMF = 179 _clock->TimeInMilliseconds() - _dtmfTimeLastSent; 180 181 if (delaySinceLastDTMF > 100) { 182 // New tone to play 183 _dtmfTimestamp = captureTimeStamp; 184 if (NextDTMF(&key, &dtmfLengthMS, &_dtmfLevel) >= 0) { 185 _dtmfEventFirstPacketSent = false; 186 _dtmfKey = key; 187 _dtmfLengthSamples = (kDtmfFrequencyHz / 1000) * dtmfLengthMS; 188 dtmfToneStarted = true; 189 _dtmfEventIsOn = true; 190 } 191 } 192 } 193 if (dtmfToneStarted) { 194 if (_audioFeedback) 195 _audioFeedback->OnPlayTelephoneEvent(key, dtmfLengthMS, _dtmfLevel); 196 } 197 198 // A source MAY send events and coded audio packets for the same time 199 // but we don't support it 200 if (_dtmfEventIsOn) { 201 if (frameType == kEmptyFrame) { 202 // kEmptyFrame is used to drive the DTMF when in CN mode 203 // it can be triggered more frequently than we want to send the 204 // DTMF packets. 205 if (packet_size_samples > (captureTimeStamp - _dtmfTimestampLastSent)) { 206 // not time to send yet 207 return 0; 208 } 209 } 210 _dtmfTimestampLastSent = captureTimeStamp; 211 uint32_t dtmfDurationSamples = captureTimeStamp - _dtmfTimestamp; 212 bool ended = false; 213 bool send = true; 214 215 if (_dtmfLengthSamples > dtmfDurationSamples) { 216 if (dtmfDurationSamples <= 0) { 217 // Skip send packet at start, since we shouldn't use duration 0 218 send = false; 219 } 220 } else { 221 ended = true; 222 _dtmfEventIsOn = false; 223 _dtmfTimeLastSent = _clock->TimeInMilliseconds(); 224 } 225 if (send) { 226 if (dtmfDurationSamples > 0xffff) { 227 // RFC 4733 2.5.2.3 Long-Duration Events 228 SendTelephoneEventPacket(ended, dtmf_payload_type, _dtmfTimestamp, 229 static_cast<uint16_t>(0xffff), false); 230 231 // set new timestap for this segment 232 _dtmfTimestamp = captureTimeStamp; 233 dtmfDurationSamples -= 0xffff; 234 _dtmfLengthSamples -= 0xffff; 235 236 return SendTelephoneEventPacket( 237 ended, dtmf_payload_type, _dtmfTimestamp, 238 static_cast<uint16_t>(dtmfDurationSamples), false); 239 } else { 240 if (SendTelephoneEventPacket(ended, dtmf_payload_type, _dtmfTimestamp, 241 static_cast<uint16_t>(dtmfDurationSamples), 242 !_dtmfEventFirstPacketSent) != 0) { 243 return -1; 244 } 245 _dtmfEventFirstPacketSent = true; 246 return 0; 247 } 248 } 249 return 0; 250 } 251 if (payloadSize == 0 || payloadData == NULL) { 252 if (frameType == kEmptyFrame) { 253 // we don't send empty audio RTP packets 254 // no error since we use it to drive DTMF when we use VAD 255 return 0; 256 } 257 return -1; 258 } 259 uint8_t dataBuffer[IP_PACKET_SIZE]; 260 bool markerBit = MarkerBit(frameType, payloadType); 261 262 int32_t rtpHeaderLength = 0; 263 uint16_t timestampOffset = 0; 264 265 if (red_payload_type >= 0 && fragmentation && !markerBit && 266 fragmentation->fragmentationVectorSize > 1) { 267 // have we configured RED? use its payload type 268 // we need to get the current timestamp to calc the diff 269 uint32_t oldTimeStamp = _rtpSender->Timestamp(); 270 rtpHeaderLength = _rtpSender->BuildRTPheader(dataBuffer, red_payload_type, 271 markerBit, captureTimeStamp, 272 _clock->TimeInMilliseconds()); 273 274 timestampOffset = uint16_t(_rtpSender->Timestamp() - oldTimeStamp); 275 } else { 276 rtpHeaderLength = _rtpSender->BuildRTPheader(dataBuffer, payloadType, 277 markerBit, captureTimeStamp, 278 _clock->TimeInMilliseconds()); 279 } 280 if (rtpHeaderLength <= 0) { 281 return -1; 282 } 283 if (maxPayloadLength < (rtpHeaderLength + payloadSize)) { 284 // Too large payload buffer. 285 return -1; 286 } 287 if (red_payload_type >= 0 && // Have we configured RED? 288 fragmentation && fragmentation->fragmentationVectorSize > 1 && 289 !markerBit) { 290 if (timestampOffset <= 0x3fff) { 291 if (fragmentation->fragmentationVectorSize != 2) { 292 // we only support 2 codecs when using RED 293 return -1; 294 } 295 // only 0x80 if we have multiple blocks 296 dataBuffer[rtpHeaderLength++] = 297 0x80 + fragmentation->fragmentationPlType[1]; 298 size_t blockLength = fragmentation->fragmentationLength[1]; 299 300 // sanity blockLength 301 if (blockLength > 0x3ff) { // block length 10 bits 1023 bytes 302 return -1; 303 } 304 uint32_t REDheader = (timestampOffset << 10) + blockLength; 305 ByteWriter<uint32_t>::WriteBigEndian(dataBuffer + rtpHeaderLength, 306 REDheader); 307 rtpHeaderLength += 3; 308 309 dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0]; 310 // copy the RED data 311 memcpy(dataBuffer + rtpHeaderLength, 312 payloadData + fragmentation->fragmentationOffset[1], 313 fragmentation->fragmentationLength[1]); 314 315 // copy the normal data 316 memcpy( 317 dataBuffer + rtpHeaderLength + fragmentation->fragmentationLength[1], 318 payloadData + fragmentation->fragmentationOffset[0], 319 fragmentation->fragmentationLength[0]); 320 321 payloadSize = fragmentation->fragmentationLength[0] + 322 fragmentation->fragmentationLength[1]; 323 } else { 324 // silence for too long send only new data 325 dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0]; 326 memcpy(dataBuffer + rtpHeaderLength, 327 payloadData + fragmentation->fragmentationOffset[0], 328 fragmentation->fragmentationLength[0]); 329 330 payloadSize = fragmentation->fragmentationLength[0]; 331 } 332 } else { 333 if (fragmentation && fragmentation->fragmentationVectorSize > 0) { 334 // use the fragment info if we have one 335 dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0]; 336 memcpy(dataBuffer + rtpHeaderLength, 337 payloadData + fragmentation->fragmentationOffset[0], 338 fragmentation->fragmentationLength[0]); 339 340 payloadSize = fragmentation->fragmentationLength[0]; 341 } else { 342 memcpy(dataBuffer + rtpHeaderLength, payloadData, payloadSize); 343 } 344 } 345 { 346 CriticalSectionScoped cs(_sendAudioCritsect.get()); 347 _lastPayloadType = payloadType; 348 } 349 // Update audio level extension, if included. 350 size_t packetSize = payloadSize + rtpHeaderLength; 351 RtpUtility::RtpHeaderParser rtp_parser(dataBuffer, packetSize); 352 RTPHeader rtp_header; 353 rtp_parser.Parse(&rtp_header); 354 _rtpSender->UpdateAudioLevel(dataBuffer, packetSize, rtp_header, 355 (frameType == kAudioFrameSpeech), 356 audio_level_dbov); 357 TRACE_EVENT_ASYNC_END2("webrtc", "Audio", captureTimeStamp, "timestamp", 358 _rtpSender->Timestamp(), "seqnum", 359 _rtpSender->SequenceNumber()); 360 return _rtpSender->SendToNetwork(dataBuffer, payloadSize, rtpHeaderLength, 361 TickTime::MillisecondTimestamp(), 362 kAllowRetransmission, 363 RtpPacketSender::kHighPriority); 364 } 365 366 // Audio level magnitude and voice activity flag are set for each RTP packet 367 int32_t RTPSenderAudio::SetAudioLevel(uint8_t level_dBov) { 368 if (level_dBov > 127) { 369 return -1; 370 } 371 CriticalSectionScoped cs(_sendAudioCritsect.get()); 372 _audioLevel_dBov = level_dBov; 373 return 0; 374 } 375 376 // Set payload type for Redundant Audio Data RFC 2198 377 int32_t RTPSenderAudio::SetRED(int8_t payloadType) { 378 if (payloadType < -1) { 379 return -1; 380 } 381 CriticalSectionScoped cs(_sendAudioCritsect.get()); 382 _REDPayloadType = payloadType; 383 return 0; 384 } 385 386 // Get payload type for Redundant Audio Data RFC 2198 387 int32_t RTPSenderAudio::RED(int8_t* payloadType) const { 388 CriticalSectionScoped cs(_sendAudioCritsect.get()); 389 if (_REDPayloadType == -1) { 390 // not configured 391 return -1; 392 } 393 *payloadType = _REDPayloadType; 394 return 0; 395 } 396 397 // Send a TelephoneEvent tone using RFC 2833 (4733) 398 int32_t RTPSenderAudio::SendTelephoneEvent(uint8_t key, 399 uint16_t time_ms, 400 uint8_t level) { 401 { 402 CriticalSectionScoped lock(_sendAudioCritsect.get()); 403 if (_dtmfPayloadType < 0) { 404 // TelephoneEvent payloadtype not configured 405 return -1; 406 } 407 } 408 return AddDTMF(key, time_ms, level); 409 } 410 411 int32_t RTPSenderAudio::SendTelephoneEventPacket(bool ended, 412 int8_t dtmf_payload_type, 413 uint32_t dtmfTimeStamp, 414 uint16_t duration, 415 bool markerBit) { 416 uint8_t dtmfbuffer[IP_PACKET_SIZE]; 417 uint8_t sendCount = 1; 418 int32_t retVal = 0; 419 420 if (ended) { 421 // resend last packet in an event 3 times 422 sendCount = 3; 423 } 424 do { 425 // Send DTMF data 426 _rtpSender->BuildRTPheader(dtmfbuffer, dtmf_payload_type, markerBit, 427 dtmfTimeStamp, _clock->TimeInMilliseconds()); 428 429 // reset CSRC and X bit 430 dtmfbuffer[0] &= 0xe0; 431 432 // Create DTMF data 433 /* From RFC 2833: 434 435 0 1 2 3 436 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 437 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 438 | event |E|R| volume | duration | 439 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 440 */ 441 // R bit always cleared 442 uint8_t R = 0x00; 443 uint8_t volume = _dtmfLevel; 444 445 // First packet un-ended 446 uint8_t E = ended ? 0x80 : 0x00; 447 448 // First byte is Event number, equals key number 449 dtmfbuffer[12] = _dtmfKey; 450 dtmfbuffer[13] = E | R | volume; 451 ByteWriter<uint16_t>::WriteBigEndian(dtmfbuffer + 14, duration); 452 453 TRACE_EVENT_INSTANT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"), 454 "Audio::SendTelephoneEvent", "timestamp", 455 dtmfTimeStamp, "seqnum", _rtpSender->SequenceNumber()); 456 retVal = _rtpSender->SendToNetwork( 457 dtmfbuffer, 4, 12, TickTime::MillisecondTimestamp(), 458 kAllowRetransmission, RtpPacketSender::kHighPriority); 459 sendCount--; 460 } while (sendCount > 0 && retVal == 0); 461 462 return retVal; 463 } 464 } // namespace webrtc 465