1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_ 12 #define WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_ 13 14 #include <assert.h> 15 #include <string.h> // memcpy 16 17 #include <algorithm> 18 #include <limits> 19 20 #include "webrtc/base/constructormagic.h" 21 #include "webrtc/common_types.h" 22 #include "webrtc/common_video/rotation.h" 23 #include "webrtc/typedefs.h" 24 25 namespace webrtc { 26 27 struct RTPAudioHeader { 28 uint8_t numEnergy; // number of valid entries in arrOfEnergy 29 uint8_t arrOfEnergy[kRtpCsrcSize]; // one energy byte (0-9) per channel 30 bool isCNG; // is this CNG 31 size_t channel; // number of channels 2 = stereo 32 }; 33 34 const int16_t kNoPictureId = -1; 35 const int16_t kMaxOneBytePictureId = 0x7F; // 7 bits 36 const int16_t kMaxTwoBytePictureId = 0x7FFF; // 15 bits 37 const int16_t kNoTl0PicIdx = -1; 38 const uint8_t kNoTemporalIdx = 0xFF; 39 const uint8_t kNoSpatialIdx = 0xFF; 40 const uint8_t kNoGofIdx = 0xFF; 41 const uint8_t kNumVp9Buffers = 8; 42 const size_t kMaxVp9RefPics = 3; 43 const size_t kMaxVp9FramesInGof = 0xFF; // 8 bits 44 const size_t kMaxVp9NumberOfSpatialLayers = 8; 45 const int kNoKeyIdx = -1; 46 47 struct RTPVideoHeaderVP8 { 48 void InitRTPVideoHeaderVP8() { 49 nonReference = false; 50 pictureId = kNoPictureId; 51 tl0PicIdx = kNoTl0PicIdx; 52 temporalIdx = kNoTemporalIdx; 53 layerSync = false; 54 keyIdx = kNoKeyIdx; 55 partitionId = 0; 56 beginningOfPartition = false; 57 } 58 59 bool nonReference; // Frame is discardable. 60 int16_t pictureId; // Picture ID index, 15 bits; 61 // kNoPictureId if PictureID does not exist. 62 int16_t tl0PicIdx; // TL0PIC_IDX, 8 bits; 63 // kNoTl0PicIdx means no value provided. 64 uint8_t temporalIdx; // Temporal layer index, or kNoTemporalIdx. 65 bool layerSync; // This frame is a layer sync frame. 66 // Disabled if temporalIdx == kNoTemporalIdx. 67 int keyIdx; // 5 bits; kNoKeyIdx means not used. 68 int partitionId; // VP8 partition ID 69 bool beginningOfPartition; // True if this packet is the first 70 // in a VP8 partition. Otherwise false 71 }; 72 73 enum TemporalStructureMode { 74 kTemporalStructureMode1, // 1 temporal layer structure - i.e., IPPP... 75 kTemporalStructureMode2, // 2 temporal layers 0-1-0-1... 76 kTemporalStructureMode3 // 3 temporal layers 0-2-1-2-0-2-1-2... 77 }; 78 79 struct GofInfoVP9 { 80 void SetGofInfoVP9(TemporalStructureMode tm) { 81 switch (tm) { 82 case kTemporalStructureMode1: 83 num_frames_in_gof = 1; 84 temporal_idx[0] = 0; 85 temporal_up_switch[0] = false; 86 num_ref_pics[0] = 1; 87 pid_diff[0][0] = 1; 88 break; 89 case kTemporalStructureMode2: 90 num_frames_in_gof = 2; 91 temporal_idx[0] = 0; 92 temporal_up_switch[0] = false; 93 num_ref_pics[0] = 1; 94 pid_diff[0][0] = 2; 95 96 temporal_idx[1] = 1; 97 temporal_up_switch[1] = true; 98 num_ref_pics[1] = 1; 99 pid_diff[1][0] = 1; 100 break; 101 case kTemporalStructureMode3: 102 num_frames_in_gof = 4; 103 temporal_idx[0] = 0; 104 temporal_up_switch[0] = false; 105 num_ref_pics[0] = 1; 106 pid_diff[0][0] = 4; 107 108 temporal_idx[1] = 2; 109 temporal_up_switch[1] = true; 110 num_ref_pics[1] = 1; 111 pid_diff[1][0] = 1; 112 113 temporal_idx[2] = 1; 114 temporal_up_switch[2] = true; 115 num_ref_pics[2] = 1; 116 pid_diff[2][0] = 2; 117 118 temporal_idx[3] = 2; 119 temporal_up_switch[3] = false; 120 num_ref_pics[3] = 2; 121 pid_diff[3][0] = 1; 122 pid_diff[3][1] = 2; 123 break; 124 default: 125 assert(false); 126 } 127 } 128 129 void CopyGofInfoVP9(const GofInfoVP9& src) { 130 num_frames_in_gof = src.num_frames_in_gof; 131 for (size_t i = 0; i < num_frames_in_gof; ++i) { 132 temporal_idx[i] = src.temporal_idx[i]; 133 temporal_up_switch[i] = src.temporal_up_switch[i]; 134 num_ref_pics[i] = src.num_ref_pics[i]; 135 for (uint8_t r = 0; r < num_ref_pics[i]; ++r) { 136 pid_diff[i][r] = src.pid_diff[i][r]; 137 } 138 } 139 } 140 141 size_t num_frames_in_gof; 142 uint8_t temporal_idx[kMaxVp9FramesInGof]; 143 bool temporal_up_switch[kMaxVp9FramesInGof]; 144 uint8_t num_ref_pics[kMaxVp9FramesInGof]; 145 uint8_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics]; 146 }; 147 148 struct RTPVideoHeaderVP9 { 149 void InitRTPVideoHeaderVP9() { 150 inter_pic_predicted = false; 151 flexible_mode = false; 152 beginning_of_frame = false; 153 end_of_frame = false; 154 ss_data_available = false; 155 picture_id = kNoPictureId; 156 max_picture_id = kMaxTwoBytePictureId; 157 tl0_pic_idx = kNoTl0PicIdx; 158 temporal_idx = kNoTemporalIdx; 159 spatial_idx = kNoSpatialIdx; 160 temporal_up_switch = false; 161 inter_layer_predicted = false; 162 gof_idx = kNoGofIdx; 163 num_ref_pics = 0; 164 num_spatial_layers = 1; 165 } 166 167 bool inter_pic_predicted; // This layer frame is dependent on previously 168 // coded frame(s). 169 bool flexible_mode; // This frame is in flexible mode. 170 bool beginning_of_frame; // True if this packet is the first in a VP9 layer 171 // frame. 172 bool end_of_frame; // True if this packet is the last in a VP9 layer frame. 173 bool ss_data_available; // True if SS data is available in this payload 174 // descriptor. 175 int16_t picture_id; // PictureID index, 15 bits; 176 // kNoPictureId if PictureID does not exist. 177 int16_t max_picture_id; // Maximum picture ID index; either 0x7F or 0x7FFF; 178 int16_t tl0_pic_idx; // TL0PIC_IDX, 8 bits; 179 // kNoTl0PicIdx means no value provided. 180 uint8_t temporal_idx; // Temporal layer index, or kNoTemporalIdx. 181 uint8_t spatial_idx; // Spatial layer index, or kNoSpatialIdx. 182 bool temporal_up_switch; // True if upswitch to higher frame rate is possible 183 // starting from this frame. 184 bool inter_layer_predicted; // Frame is dependent on directly lower spatial 185 // layer frame. 186 187 uint8_t gof_idx; // Index to predefined temporal frame info in SS data. 188 189 uint8_t num_ref_pics; // Number of reference pictures used by this layer 190 // frame. 191 uint8_t pid_diff[kMaxVp9RefPics]; // P_DIFF signaled to derive the PictureID 192 // of the reference pictures. 193 int16_t ref_picture_id[kMaxVp9RefPics]; // PictureID of reference pictures. 194 195 // SS data. 196 size_t num_spatial_layers; // Always populated. 197 bool spatial_layer_resolution_present; 198 uint16_t width[kMaxVp9NumberOfSpatialLayers]; 199 uint16_t height[kMaxVp9NumberOfSpatialLayers]; 200 GofInfoVP9 gof; 201 }; 202 203 // The packetization types that we support: single, aggregated, and fragmented. 204 enum H264PacketizationTypes { 205 kH264SingleNalu, // This packet contains a single NAL unit. 206 kH264StapA, // This packet contains STAP-A (single time 207 // aggregation) packets. If this packet has an 208 // associated NAL unit type, it'll be for the 209 // first such aggregated packet. 210 kH264FuA, // This packet contains a FU-A (fragmentation 211 // unit) packet, meaning it is a part of a frame 212 // that was too large to fit into a single packet. 213 }; 214 215 struct RTPVideoHeaderH264 { 216 uint8_t nalu_type; // The NAL unit type. If this is a header for a 217 // fragmented packet, it's the NAL unit type of 218 // the original data. If this is the header for an 219 // aggregated packet, it's the NAL unit type of 220 // the first NAL unit in the packet. 221 H264PacketizationTypes packetization_type; 222 }; 223 224 union RTPVideoTypeHeader { 225 RTPVideoHeaderVP8 VP8; 226 RTPVideoHeaderVP9 VP9; 227 RTPVideoHeaderH264 H264; 228 }; 229 230 enum RtpVideoCodecTypes { 231 kRtpVideoNone, 232 kRtpVideoGeneric, 233 kRtpVideoVp8, 234 kRtpVideoVp9, 235 kRtpVideoH264 236 }; 237 // Since RTPVideoHeader is used as a member of a union, it can't have a 238 // non-trivial default constructor. 239 struct RTPVideoHeader { 240 uint16_t width; // size 241 uint16_t height; 242 VideoRotation rotation; 243 244 bool isFirstPacket; // first packet in frame 245 uint8_t simulcastIdx; // Index if the simulcast encoder creating 246 // this frame, 0 if not using simulcast. 247 RtpVideoCodecTypes codec; 248 RTPVideoTypeHeader codecHeader; 249 }; 250 union RTPTypeHeader { 251 RTPAudioHeader Audio; 252 RTPVideoHeader Video; 253 }; 254 255 struct WebRtcRTPHeader { 256 RTPHeader header; 257 FrameType frameType; 258 RTPTypeHeader type; 259 // NTP time of the capture time in local timebase in milliseconds. 260 int64_t ntp_time_ms; 261 }; 262 263 class RTPFragmentationHeader { 264 public: 265 RTPFragmentationHeader() 266 : fragmentationVectorSize(0), 267 fragmentationOffset(NULL), 268 fragmentationLength(NULL), 269 fragmentationTimeDiff(NULL), 270 fragmentationPlType(NULL) {}; 271 272 ~RTPFragmentationHeader() { 273 delete[] fragmentationOffset; 274 delete[] fragmentationLength; 275 delete[] fragmentationTimeDiff; 276 delete[] fragmentationPlType; 277 } 278 279 void CopyFrom(const RTPFragmentationHeader& src) { 280 if (this == &src) { 281 return; 282 } 283 284 if (src.fragmentationVectorSize != fragmentationVectorSize) { 285 // new size of vectors 286 287 // delete old 288 delete[] fragmentationOffset; 289 fragmentationOffset = NULL; 290 delete[] fragmentationLength; 291 fragmentationLength = NULL; 292 delete[] fragmentationTimeDiff; 293 fragmentationTimeDiff = NULL; 294 delete[] fragmentationPlType; 295 fragmentationPlType = NULL; 296 297 if (src.fragmentationVectorSize > 0) { 298 // allocate new 299 if (src.fragmentationOffset) { 300 fragmentationOffset = new size_t[src.fragmentationVectorSize]; 301 } 302 if (src.fragmentationLength) { 303 fragmentationLength = new size_t[src.fragmentationVectorSize]; 304 } 305 if (src.fragmentationTimeDiff) { 306 fragmentationTimeDiff = new uint16_t[src.fragmentationVectorSize]; 307 } 308 if (src.fragmentationPlType) { 309 fragmentationPlType = new uint8_t[src.fragmentationVectorSize]; 310 } 311 } 312 // set new size 313 fragmentationVectorSize = src.fragmentationVectorSize; 314 } 315 316 if (src.fragmentationVectorSize > 0) { 317 // copy values 318 if (src.fragmentationOffset) { 319 memcpy(fragmentationOffset, src.fragmentationOffset, 320 src.fragmentationVectorSize * sizeof(size_t)); 321 } 322 if (src.fragmentationLength) { 323 memcpy(fragmentationLength, src.fragmentationLength, 324 src.fragmentationVectorSize * sizeof(size_t)); 325 } 326 if (src.fragmentationTimeDiff) { 327 memcpy(fragmentationTimeDiff, src.fragmentationTimeDiff, 328 src.fragmentationVectorSize * sizeof(uint16_t)); 329 } 330 if (src.fragmentationPlType) { 331 memcpy(fragmentationPlType, src.fragmentationPlType, 332 src.fragmentationVectorSize * sizeof(uint8_t)); 333 } 334 } 335 } 336 337 void VerifyAndAllocateFragmentationHeader(const size_t size) { 338 assert(size <= std::numeric_limits<uint16_t>::max()); 339 const uint16_t size16 = static_cast<uint16_t>(size); 340 if (fragmentationVectorSize < size16) { 341 uint16_t oldVectorSize = fragmentationVectorSize; 342 { 343 // offset 344 size_t* oldOffsets = fragmentationOffset; 345 fragmentationOffset = new size_t[size16]; 346 memset(fragmentationOffset + oldVectorSize, 0, 347 sizeof(size_t) * (size16 - oldVectorSize)); 348 // copy old values 349 memcpy(fragmentationOffset, oldOffsets, 350 sizeof(size_t) * oldVectorSize); 351 delete[] oldOffsets; 352 } 353 // length 354 { 355 size_t* oldLengths = fragmentationLength; 356 fragmentationLength = new size_t[size16]; 357 memset(fragmentationLength + oldVectorSize, 0, 358 sizeof(size_t) * (size16 - oldVectorSize)); 359 memcpy(fragmentationLength, oldLengths, 360 sizeof(size_t) * oldVectorSize); 361 delete[] oldLengths; 362 } 363 // time diff 364 { 365 uint16_t* oldTimeDiffs = fragmentationTimeDiff; 366 fragmentationTimeDiff = new uint16_t[size16]; 367 memset(fragmentationTimeDiff + oldVectorSize, 0, 368 sizeof(uint16_t) * (size16 - oldVectorSize)); 369 memcpy(fragmentationTimeDiff, oldTimeDiffs, 370 sizeof(uint16_t) * oldVectorSize); 371 delete[] oldTimeDiffs; 372 } 373 // payload type 374 { 375 uint8_t* oldTimePlTypes = fragmentationPlType; 376 fragmentationPlType = new uint8_t[size16]; 377 memset(fragmentationPlType + oldVectorSize, 0, 378 sizeof(uint8_t) * (size16 - oldVectorSize)); 379 memcpy(fragmentationPlType, oldTimePlTypes, 380 sizeof(uint8_t) * oldVectorSize); 381 delete[] oldTimePlTypes; 382 } 383 fragmentationVectorSize = size16; 384 } 385 } 386 387 uint16_t fragmentationVectorSize; // Number of fragmentations 388 size_t* fragmentationOffset; // Offset of pointer to data for each 389 // fragmentation 390 size_t* fragmentationLength; // Data size for each fragmentation 391 uint16_t* fragmentationTimeDiff; // Timestamp difference relative "now" for 392 // each fragmentation 393 uint8_t* fragmentationPlType; // Payload type of each fragmentation 394 395 private: 396 RTC_DISALLOW_COPY_AND_ASSIGN(RTPFragmentationHeader); 397 }; 398 399 struct RTCPVoIPMetric { 400 // RFC 3611 4.7 401 uint8_t lossRate; 402 uint8_t discardRate; 403 uint8_t burstDensity; 404 uint8_t gapDensity; 405 uint16_t burstDuration; 406 uint16_t gapDuration; 407 uint16_t roundTripDelay; 408 uint16_t endSystemDelay; 409 uint8_t signalLevel; 410 uint8_t noiseLevel; 411 uint8_t RERL; 412 uint8_t Gmin; 413 uint8_t Rfactor; 414 uint8_t extRfactor; 415 uint8_t MOSLQ; 416 uint8_t MOSCQ; 417 uint8_t RXconfig; 418 uint16_t JBnominal; 419 uint16_t JBmax; 420 uint16_t JBabsMax; 421 }; 422 423 // Types for the FEC packet masks. The type |kFecMaskRandom| is based on a 424 // random loss model. The type |kFecMaskBursty| is based on a bursty/consecutive 425 // loss model. The packet masks are defined in 426 // modules/rtp_rtcp/fec_private_tables_random(bursty).h 427 enum FecMaskType { 428 kFecMaskRandom, 429 kFecMaskBursty, 430 }; 431 432 // Struct containing forward error correction settings. 433 struct FecProtectionParams { 434 int fec_rate; 435 bool use_uep_protection; 436 int max_fec_frames; 437 FecMaskType fec_mask_type; 438 }; 439 440 // Interface used by the CallStats class to distribute call statistics. 441 // Callbacks will be triggered as soon as the class has been registered to a 442 // CallStats object using RegisterStatsObserver. 443 class CallStatsObserver { 444 public: 445 virtual void OnRttUpdate(int64_t avg_rtt_ms, int64_t max_rtt_ms) = 0; 446 447 virtual ~CallStatsObserver() {} 448 }; 449 450 struct VideoContentMetrics { 451 VideoContentMetrics() 452 : motion_magnitude(0.0f), 453 spatial_pred_err(0.0f), 454 spatial_pred_err_h(0.0f), 455 spatial_pred_err_v(0.0f) {} 456 457 void Reset() { 458 motion_magnitude = 0.0f; 459 spatial_pred_err = 0.0f; 460 spatial_pred_err_h = 0.0f; 461 spatial_pred_err_v = 0.0f; 462 } 463 float motion_magnitude; 464 float spatial_pred_err; 465 float spatial_pred_err_h; 466 float spatial_pred_err_v; 467 }; 468 469 /* This class holds up to 60 ms of super-wideband (32 kHz) stereo audio. It 470 * allows for adding and subtracting frames while keeping track of the resulting 471 * states. 472 * 473 * Notes 474 * - The total number of samples in |data_| is 475 * samples_per_channel_ * num_channels_ 476 * 477 * - Stereo data is interleaved starting with the left channel. 478 * 479 * - The +operator assume that you would never add exactly opposite frames when 480 * deciding the resulting state. To do this use the -operator. 481 */ 482 class AudioFrame { 483 public: 484 // Stereo, 32 kHz, 60 ms (2 * 32 * 60) 485 static const size_t kMaxDataSizeSamples = 3840; 486 487 enum VADActivity { 488 kVadActive = 0, 489 kVadPassive = 1, 490 kVadUnknown = 2 491 }; 492 enum SpeechType { 493 kNormalSpeech = 0, 494 kPLC = 1, 495 kCNG = 2, 496 kPLCCNG = 3, 497 kUndefined = 4 498 }; 499 500 AudioFrame(); 501 virtual ~AudioFrame() {} 502 503 // Resets all members to their default state (except does not modify the 504 // contents of |data_|). 505 void Reset(); 506 507 // |interleaved_| is not changed by this method. 508 void UpdateFrame(int id, uint32_t timestamp, const int16_t* data, 509 size_t samples_per_channel, int sample_rate_hz, 510 SpeechType speech_type, VADActivity vad_activity, 511 size_t num_channels = 1, uint32_t energy = -1); 512 513 AudioFrame& Append(const AudioFrame& rhs); 514 515 void CopyFrom(const AudioFrame& src); 516 517 void Mute(); 518 519 AudioFrame& operator>>=(const int rhs); 520 AudioFrame& operator+=(const AudioFrame& rhs); 521 AudioFrame& operator-=(const AudioFrame& rhs); 522 523 int id_; 524 // RTP timestamp of the first sample in the AudioFrame. 525 uint32_t timestamp_; 526 // Time since the first frame in milliseconds. 527 // -1 represents an uninitialized value. 528 int64_t elapsed_time_ms_; 529 // NTP time of the estimated capture time in local timebase in milliseconds. 530 // -1 represents an uninitialized value. 531 int64_t ntp_time_ms_; 532 int16_t data_[kMaxDataSizeSamples]; 533 size_t samples_per_channel_; 534 int sample_rate_hz_; 535 size_t num_channels_; 536 SpeechType speech_type_; 537 VADActivity vad_activity_; 538 // Note that there is no guarantee that |energy_| is correct. Any user of this 539 // member must verify that the value is correct. 540 // TODO(henrike) Remove |energy_|. 541 // See https://code.google.com/p/webrtc/issues/detail?id=3315. 542 uint32_t energy_; 543 bool interleaved_; 544 545 private: 546 RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame); 547 }; 548 549 inline AudioFrame::AudioFrame() 550 : data_() { 551 Reset(); 552 } 553 554 inline void AudioFrame::Reset() { 555 id_ = -1; 556 // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize 557 // to an invalid value, or add a new member to indicate invalidity. 558 timestamp_ = 0; 559 elapsed_time_ms_ = -1; 560 ntp_time_ms_ = -1; 561 samples_per_channel_ = 0; 562 sample_rate_hz_ = 0; 563 num_channels_ = 0; 564 speech_type_ = kUndefined; 565 vad_activity_ = kVadUnknown; 566 energy_ = 0xffffffff; 567 interleaved_ = true; 568 } 569 570 inline void AudioFrame::UpdateFrame(int id, 571 uint32_t timestamp, 572 const int16_t* data, 573 size_t samples_per_channel, 574 int sample_rate_hz, 575 SpeechType speech_type, 576 VADActivity vad_activity, 577 size_t num_channels, 578 uint32_t energy) { 579 id_ = id; 580 timestamp_ = timestamp; 581 samples_per_channel_ = samples_per_channel; 582 sample_rate_hz_ = sample_rate_hz; 583 speech_type_ = speech_type; 584 vad_activity_ = vad_activity; 585 num_channels_ = num_channels; 586 energy_ = energy; 587 588 const size_t length = samples_per_channel * num_channels; 589 assert(length <= kMaxDataSizeSamples); 590 if (data != NULL) { 591 memcpy(data_, data, sizeof(int16_t) * length); 592 } else { 593 memset(data_, 0, sizeof(int16_t) * length); 594 } 595 } 596 597 inline void AudioFrame::CopyFrom(const AudioFrame& src) { 598 if (this == &src) return; 599 600 id_ = src.id_; 601 timestamp_ = src.timestamp_; 602 elapsed_time_ms_ = src.elapsed_time_ms_; 603 ntp_time_ms_ = src.ntp_time_ms_; 604 samples_per_channel_ = src.samples_per_channel_; 605 sample_rate_hz_ = src.sample_rate_hz_; 606 speech_type_ = src.speech_type_; 607 vad_activity_ = src.vad_activity_; 608 num_channels_ = src.num_channels_; 609 energy_ = src.energy_; 610 interleaved_ = src.interleaved_; 611 612 const size_t length = samples_per_channel_ * num_channels_; 613 assert(length <= kMaxDataSizeSamples); 614 memcpy(data_, src.data_, sizeof(int16_t) * length); 615 } 616 617 inline void AudioFrame::Mute() { 618 memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t)); 619 } 620 621 inline AudioFrame& AudioFrame::operator>>=(const int rhs) { 622 assert((num_channels_ > 0) && (num_channels_ < 3)); 623 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; 624 625 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { 626 data_[i] = static_cast<int16_t>(data_[i] >> rhs); 627 } 628 return *this; 629 } 630 631 inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) { 632 // Sanity check 633 assert((num_channels_ > 0) && (num_channels_ < 3)); 634 assert(interleaved_ == rhs.interleaved_); 635 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; 636 if (num_channels_ != rhs.num_channels_) return *this; 637 638 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { 639 vad_activity_ = kVadActive; 640 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { 641 vad_activity_ = kVadUnknown; 642 } 643 if (speech_type_ != rhs.speech_type_) { 644 speech_type_ = kUndefined; 645 } 646 647 size_t offset = samples_per_channel_ * num_channels_; 648 for (size_t i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) { 649 data_[offset + i] = rhs.data_[i]; 650 } 651 samples_per_channel_ += rhs.samples_per_channel_; 652 return *this; 653 } 654 655 namespace { 656 inline int16_t ClampToInt16(int32_t input) { 657 if (input < -0x00008000) { 658 return -0x8000; 659 } else if (input > 0x00007FFF) { 660 return 0x7FFF; 661 } else { 662 return static_cast<int16_t>(input); 663 } 664 } 665 } 666 667 inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) { 668 // Sanity check 669 assert((num_channels_ > 0) && (num_channels_ < 3)); 670 assert(interleaved_ == rhs.interleaved_); 671 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; 672 if (num_channels_ != rhs.num_channels_) return *this; 673 674 bool noPrevData = false; 675 if (samples_per_channel_ != rhs.samples_per_channel_) { 676 if (samples_per_channel_ == 0) { 677 // special case we have no data to start with 678 samples_per_channel_ = rhs.samples_per_channel_; 679 noPrevData = true; 680 } else { 681 return *this; 682 } 683 } 684 685 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { 686 vad_activity_ = kVadActive; 687 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { 688 vad_activity_ = kVadUnknown; 689 } 690 691 if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined; 692 693 if (noPrevData) { 694 memcpy(data_, rhs.data_, 695 sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_); 696 } else { 697 // IMPROVEMENT this can be done very fast in assembly 698 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { 699 int32_t wrap_guard = 700 static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]); 701 data_[i] = ClampToInt16(wrap_guard); 702 } 703 } 704 energy_ = 0xffffffff; 705 return *this; 706 } 707 708 inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) { 709 // Sanity check 710 assert((num_channels_ > 0) && (num_channels_ < 3)); 711 assert(interleaved_ == rhs.interleaved_); 712 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; 713 714 if ((samples_per_channel_ != rhs.samples_per_channel_) || 715 (num_channels_ != rhs.num_channels_)) { 716 return *this; 717 } 718 if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) { 719 vad_activity_ = kVadUnknown; 720 } 721 speech_type_ = kUndefined; 722 723 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { 724 int32_t wrap_guard = 725 static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]); 726 data_[i] = ClampToInt16(wrap_guard); 727 } 728 energy_ = 0xffffffff; 729 return *this; 730 } 731 732 inline bool IsNewerSequenceNumber(uint16_t sequence_number, 733 uint16_t prev_sequence_number) { 734 // Distinguish between elements that are exactly 0x8000 apart. 735 // If s1>s2 and |s1-s2| = 0x8000: IsNewer(s1,s2)=true, IsNewer(s2,s1)=false 736 // rather than having IsNewer(s1,s2) = IsNewer(s2,s1) = false. 737 if (static_cast<uint16_t>(sequence_number - prev_sequence_number) == 0x8000) { 738 return sequence_number > prev_sequence_number; 739 } 740 return sequence_number != prev_sequence_number && 741 static_cast<uint16_t>(sequence_number - prev_sequence_number) < 0x8000; 742 } 743 744 inline bool IsNewerTimestamp(uint32_t timestamp, uint32_t prev_timestamp) { 745 // Distinguish between elements that are exactly 0x80000000 apart. 746 // If t1>t2 and |t1-t2| = 0x80000000: IsNewer(t1,t2)=true, 747 // IsNewer(t2,t1)=false 748 // rather than having IsNewer(t1,t2) = IsNewer(t2,t1) = false. 749 if (static_cast<uint32_t>(timestamp - prev_timestamp) == 0x80000000) { 750 return timestamp > prev_timestamp; 751 } 752 return timestamp != prev_timestamp && 753 static_cast<uint32_t>(timestamp - prev_timestamp) < 0x80000000; 754 } 755 756 inline uint16_t LatestSequenceNumber(uint16_t sequence_number1, 757 uint16_t sequence_number2) { 758 return IsNewerSequenceNumber(sequence_number1, sequence_number2) 759 ? sequence_number1 760 : sequence_number2; 761 } 762 763 inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) { 764 return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2; 765 } 766 767 // Utility class to unwrap a sequence number to a larger type, for easier 768 // handling large ranges. Note that sequence numbers will never be unwrapped 769 // to a negative value. 770 class SequenceNumberUnwrapper { 771 public: 772 SequenceNumberUnwrapper() : last_seq_(-1) {} 773 774 // Get the unwrapped sequence, but don't update the internal state. 775 int64_t UnwrapWithoutUpdate(uint16_t sequence_number) { 776 if (last_seq_ == -1) 777 return sequence_number; 778 779 uint16_t cropped_last = static_cast<uint16_t>(last_seq_); 780 int64_t delta = sequence_number - cropped_last; 781 if (IsNewerSequenceNumber(sequence_number, cropped_last)) { 782 if (delta < 0) 783 delta += (1 << 16); // Wrap forwards. 784 } else if (delta > 0 && (last_seq_ + delta - (1 << 16)) >= 0) { 785 // If sequence_number is older but delta is positive, this is a backwards 786 // wrap-around. However, don't wrap backwards past 0 (unwrapped). 787 delta -= (1 << 16); 788 } 789 790 return last_seq_ + delta; 791 } 792 793 // Only update the internal state to the specified last (unwrapped) sequence. 794 void UpdateLast(int64_t last_sequence) { last_seq_ = last_sequence; } 795 796 // Unwrap the sequence number and update the internal state. 797 int64_t Unwrap(uint16_t sequence_number) { 798 int64_t unwrapped = UnwrapWithoutUpdate(sequence_number); 799 UpdateLast(unwrapped); 800 return unwrapped; 801 } 802 803 private: 804 int64_t last_seq_; 805 }; 806 807 } // namespace webrtc 808 809 #endif // WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_ 810