1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_ 12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_ 13 14 #include <vector> 15 16 #include "webrtc/common_types.h" 17 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h" 18 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module_typedefs.h" 19 #include "webrtc/modules/audio_coding/neteq/interface/neteq.h" 20 #include "webrtc/modules/interface/module.h" 21 #include "webrtc/system_wrappers/interface/clock.h" 22 #include "webrtc/typedefs.h" 23 24 namespace webrtc { 25 26 // forward declarations 27 struct CodecInst; 28 struct WebRtcRTPHeader; 29 class AudioFrame; 30 class RTPFragmentationHeader; 31 32 #define WEBRTC_10MS_PCM_AUDIO 960 // 16 bits super wideband 48 kHz 33 34 // Callback class used for sending data ready to be packetized 35 class AudioPacketizationCallback { 36 public: 37 virtual ~AudioPacketizationCallback() {} 38 39 virtual int32_t SendData( 40 FrameType frame_type, 41 uint8_t payload_type, 42 uint32_t timestamp, 43 const uint8_t* payload_data, 44 uint16_t payload_len_bytes, 45 const RTPFragmentationHeader* fragmentation) = 0; 46 }; 47 48 // Callback class used for inband Dtmf detection 49 class AudioCodingFeedback { 50 public: 51 virtual ~AudioCodingFeedback() {} 52 53 virtual int32_t IncomingDtmf(const uint8_t digit_dtmf, 54 const bool end) = 0; 55 }; 56 57 // Callback class used for reporting VAD decision 58 class ACMVADCallback { 59 public: 60 virtual ~ACMVADCallback() {} 61 62 virtual int32_t InFrameType(int16_t frameType) = 0; 63 }; 64 65 // Callback class used for reporting receiver statistics 66 class ACMVQMonCallback { 67 public: 68 virtual ~ACMVQMonCallback() {} 69 70 virtual int32_t NetEqStatistics( 71 const int32_t id, // current ACM id 72 const uint16_t MIUsValid, // valid voice duration in ms 73 const uint16_t MIUsReplaced, // concealed voice duration in ms 74 const uint8_t eventFlags, // concealed voice flags 75 const uint16_t delayMS) = 0; // average delay in ms 76 }; 77 78 class AudioCodingModule: public Module { 79 protected: 80 AudioCodingModule() {} 81 82 public: 83 struct Config { 84 Config() 85 : id(0), 86 neteq_config(), 87 clock(Clock::GetRealTimeClock()) {} 88 89 int id; 90 NetEq::Config neteq_config; 91 Clock* clock; 92 }; 93 94 /////////////////////////////////////////////////////////////////////////// 95 // Creation and destruction of a ACM. 96 // 97 // The second method is used for testing where a simulated clock can be 98 // injected into ACM. ACM will take the ownership of the object clock and 99 // delete it when destroyed. 100 // 101 static AudioCodingModule* Create(int id); 102 static AudioCodingModule* Create(int id, Clock* clock); 103 virtual ~AudioCodingModule() {}; 104 105 /////////////////////////////////////////////////////////////////////////// 106 // Utility functions 107 // 108 109 /////////////////////////////////////////////////////////////////////////// 110 // uint8_t NumberOfCodecs() 111 // Returns number of supported codecs. 112 // 113 // Return value: 114 // number of supported codecs. 115 /// 116 static int NumberOfCodecs(); 117 118 /////////////////////////////////////////////////////////////////////////// 119 // int32_t Codec() 120 // Get supported codec with list number. 121 // 122 // Input: 123 // -list_id : list number. 124 // 125 // Output: 126 // -codec : a structure where the parameters of the codec, 127 // given by list number is written to. 128 // 129 // Return value: 130 // -1 if the list number (list_id) is invalid. 131 // 0 if succeeded. 132 // 133 static int Codec(int list_id, CodecInst* codec); 134 135 /////////////////////////////////////////////////////////////////////////// 136 // int32_t Codec() 137 // Get supported codec with the given codec name, sampling frequency, and 138 // a given number of channels. 139 // 140 // Input: 141 // -payload_name : name of the codec. 142 // -sampling_freq_hz : sampling frequency of the codec. Note! for RED 143 // a sampling frequency of -1 is a valid input. 144 // -channels : number of channels ( 1 - mono, 2 - stereo). 145 // 146 // Output: 147 // -codec : a structure where the function returns the 148 // default parameters of the codec. 149 // 150 // Return value: 151 // -1 if no codec matches the given parameters. 152 // 0 if succeeded. 153 // 154 static int Codec(const char* payload_name, CodecInst* codec, 155 int sampling_freq_hz, int channels); 156 157 /////////////////////////////////////////////////////////////////////////// 158 // int32_t Codec() 159 // 160 // Returns the list number of the given codec name, sampling frequency, and 161 // a given number of channels. 162 // 163 // Input: 164 // -payload_name : name of the codec. 165 // -sampling_freq_hz : sampling frequency of the codec. Note! for RED 166 // a sampling frequency of -1 is a valid input. 167 // -channels : number of channels ( 1 - mono, 2 - stereo). 168 // 169 // Return value: 170 // if the codec is found, the index of the codec in the list, 171 // -1 if the codec is not found. 172 // 173 static int Codec(const char* payload_name, int sampling_freq_hz, 174 int channels); 175 176 /////////////////////////////////////////////////////////////////////////// 177 // bool IsCodecValid() 178 // Checks the validity of the parameters of the given codec. 179 // 180 // Input: 181 // -codec : the structure which keeps the parameters of the 182 // codec. 183 // 184 // Return value: 185 // true if the parameters are valid, 186 // false if any parameter is not valid. 187 // 188 static bool IsCodecValid(const CodecInst& codec); 189 190 /////////////////////////////////////////////////////////////////////////// 191 // Sender 192 // 193 194 /////////////////////////////////////////////////////////////////////////// 195 // int32_t InitializeSender() 196 // Any encoder-related state of ACM will be initialized to the 197 // same state when ACM is created. This will not interrupt or 198 // effect decoding functionality of ACM. ACM will lose all the 199 // encoding-related settings by calling this function. 200 // For instance, a send codec has to be registered again. 201 // 202 // Return value: 203 // -1 if failed to initialize, 204 // 0 if succeeded. 205 // 206 virtual int32_t InitializeSender() = 0; 207 208 /////////////////////////////////////////////////////////////////////////// 209 // int32_t ResetEncoder() 210 // This API resets the states of encoder. All the encoder settings, such as 211 // send-codec or VAD/DTX, will be preserved. 212 // 213 // Return value: 214 // -1 if failed to initialize, 215 // 0 if succeeded. 216 // 217 virtual int32_t ResetEncoder() = 0; 218 219 /////////////////////////////////////////////////////////////////////////// 220 // int32_t RegisterSendCodec() 221 // Registers a codec, specified by |send_codec|, as sending codec. 222 // This API can be called multiple of times to register Codec. The last codec 223 // registered overwrites the previous ones. 224 // The API can also be used to change payload type for CNG and RED, which are 225 // registered by default to default payload types. 226 // Note that registering CNG and RED won't overwrite speech codecs. 227 // This API can be called to set/change the send payload-type, frame-size 228 // or encoding rate (if applicable for the codec). 229 // 230 // Note: If a stereo codec is registered as send codec, VAD/DTX will 231 // automatically be turned off, since it is not supported for stereo sending. 232 // 233 // Note: If a secondary encoder is already registered, and the new send-codec 234 // has a sampling rate that does not match the secondary encoder, the 235 // secondary encoder will be unregistered. 236 // 237 // Input: 238 // -send_codec : Parameters of the codec to be registered, c.f. 239 // common_types.h for the definition of 240 // CodecInst. 241 // 242 // Return value: 243 // -1 if failed to initialize, 244 // 0 if succeeded. 245 // 246 virtual int32_t RegisterSendCodec(const CodecInst& send_codec) = 0; 247 248 /////////////////////////////////////////////////////////////////////////// 249 // int RegisterSecondarySendCodec() 250 // Register a secondary encoder to enable dual-streaming. If a secondary 251 // codec is already registered, it will be removed before the new one is 252 // registered. 253 // 254 // Note: The secondary encoder will be unregistered if a primary codec 255 // is set with a sampling rate which does not match that of the existing 256 // secondary codec. 257 // 258 // Input: 259 // -send_codec : Parameters of the codec to be registered, c.f. 260 // common_types.h for the definition of 261 // CodecInst. 262 // 263 // Return value: 264 // -1 if failed to register, 265 // 0 if succeeded. 266 // 267 virtual int RegisterSecondarySendCodec(const CodecInst& send_codec) = 0; 268 269 /////////////////////////////////////////////////////////////////////////// 270 // void UnregisterSecondarySendCodec() 271 // Unregister the secondary encoder to disable dual-streaming. 272 // 273 virtual void UnregisterSecondarySendCodec() = 0; 274 275 /////////////////////////////////////////////////////////////////////////// 276 // int32_t SendCodec() 277 // Get parameters for the codec currently registered as send codec. 278 // 279 // Output: 280 // -current_send_codec : parameters of the send codec. 281 // 282 // Return value: 283 // -1 if failed to get send codec, 284 // 0 if succeeded. 285 // 286 virtual int32_t SendCodec(CodecInst* current_send_codec) const = 0; 287 288 /////////////////////////////////////////////////////////////////////////// 289 // int SecondarySendCodec() 290 // Get the codec parameters for the current secondary send codec. 291 // 292 // Output: 293 // -secondary_codec : parameters of the secondary send codec. 294 // 295 // Return value: 296 // -1 if failed to get send codec, 297 // 0 if succeeded. 298 // 299 virtual int SecondarySendCodec(CodecInst* secondary_codec) const = 0; 300 301 /////////////////////////////////////////////////////////////////////////// 302 // int32_t SendFrequency() 303 // Get the sampling frequency of the current encoder in Hertz. 304 // 305 // Return value: 306 // positive; sampling frequency [Hz] of the current encoder. 307 // -1 if an error has happened. 308 // 309 virtual int32_t SendFrequency() const = 0; 310 311 /////////////////////////////////////////////////////////////////////////// 312 // int32_t Bitrate() 313 // Get encoding bit-rate in bits per second. 314 // 315 // Return value: 316 // positive; encoding rate in bits/sec, 317 // -1 if an error is happened. 318 // 319 virtual int32_t SendBitrate() const = 0; 320 321 /////////////////////////////////////////////////////////////////////////// 322 // int32_t SetReceivedEstimatedBandwidth() 323 // Set available bandwidth [bits/sec] of the up-link channel. 324 // This information is used for traffic shaping, and is currently only 325 // supported if iSAC is the send codec. 326 // 327 // Input: 328 // -bw : bandwidth in bits/sec estimated for 329 // up-link. 330 // Return value 331 // -1 if error occurred in setting the bandwidth, 332 // 0 bandwidth is set successfully. 333 // 334 // TODO(henrik.lundin) Unused. Remove? 335 virtual int32_t SetReceivedEstimatedBandwidth( 336 const int32_t bw) = 0; 337 338 /////////////////////////////////////////////////////////////////////////// 339 // int32_t RegisterTransportCallback() 340 // Register a transport callback which will be called to deliver 341 // the encoded buffers whenever Process() is called and a 342 // bit-stream is ready. 343 // 344 // Input: 345 // -transport : pointer to the callback class 346 // transport->SendData() is called whenever 347 // Process() is called and bit-stream is ready 348 // to deliver. 349 // 350 // Return value: 351 // -1 if the transport callback could not be registered 352 // 0 if registration is successful. 353 // 354 virtual int32_t RegisterTransportCallback( 355 AudioPacketizationCallback* transport) = 0; 356 357 /////////////////////////////////////////////////////////////////////////// 358 // int32_t Add10MsData() 359 // Add 10MS of raw (PCM) audio data to the encoder. If the sampling 360 // frequency of the audio does not match the sampling frequency of the 361 // current encoder ACM will resample the audio. 362 // 363 // Input: 364 // -audio_frame : the input audio frame, containing raw audio 365 // sampling frequency etc., 366 // c.f. module_common_types.h for definition of 367 // AudioFrame. 368 // 369 // Return value: 370 // 0 successfully added the frame. 371 // -1 some error occurred and data is not added. 372 // < -1 to add the frame to the buffer n samples had to be 373 // overwritten, -n is the return value in this case. 374 // 375 virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0; 376 377 /////////////////////////////////////////////////////////////////////////// 378 // (RED) Redundant Coding 379 // 380 381 /////////////////////////////////////////////////////////////////////////// 382 // int32_t SetREDStatus() 383 // configure RED status i.e. on/off. 384 // 385 // RFC 2198 describes a solution which has a single payload type which 386 // signifies a packet with redundancy. That packet then becomes a container, 387 // encapsulating multiple payloads into a single RTP packet. 388 // Such a scheme is flexible, since any amount of redundancy may be 389 // encapsulated within a single packet. There is, however, a small overhead 390 // since each encapsulated payload must be preceded by a header indicating 391 // the type of data enclosed. 392 // 393 // Input: 394 // -enable_red : if true RED is enabled, otherwise RED is 395 // disabled. 396 // 397 // Return value: 398 // -1 if failed to set RED status, 399 // 0 if succeeded. 400 // 401 virtual int32_t SetREDStatus(bool enable_red) = 0; 402 403 /////////////////////////////////////////////////////////////////////////// 404 // bool REDStatus() 405 // Get RED status 406 // 407 // Return value: 408 // true if RED is enabled, 409 // false if RED is disabled. 410 // 411 virtual bool REDStatus() const = 0; 412 413 /////////////////////////////////////////////////////////////////////////// 414 // (FEC) Forward Error Correction (codec internal) 415 // 416 417 /////////////////////////////////////////////////////////////////////////// 418 // int32_t SetCodecFEC() 419 // Configures codec internal FEC status i.e. on/off. No effects on codecs that 420 // do not provide internal FEC. 421 // 422 // Input: 423 // -enable_fec : if true FEC will be enabled otherwise the FEC is 424 // disabled. 425 // 426 // Return value: 427 // -1 if failed, or the codec does not support FEC 428 // 0 if succeeded. 429 // 430 virtual int SetCodecFEC(bool enable_codec_fec) = 0; 431 432 /////////////////////////////////////////////////////////////////////////// 433 // bool CodecFEC() 434 // Gets status of codec internal FEC. 435 // 436 // Return value: 437 // true if FEC is enabled, 438 // false if FEC is disabled. 439 // 440 virtual bool CodecFEC() const = 0; 441 442 /////////////////////////////////////////////////////////////////////////// 443 // int SetPacketLossRate() 444 // Sets expected packet loss rate for encoding. Some encoders provide packet 445 // loss gnostic encoding to make stream less sensitive to packet losses, 446 // through e.g., FEC. No effects on codecs that do not provide such encoding. 447 // 448 // Input: 449 // -packet_loss_rate : expected packet loss rate (0 -- 100 inclusive). 450 // 451 // Return value 452 // -1 if failed to set packet loss rate, 453 // 0 if succeeded. 454 // 455 virtual int SetPacketLossRate(int packet_loss_rate) = 0; 456 457 /////////////////////////////////////////////////////////////////////////// 458 // (VAD) Voice Activity Detection 459 // 460 461 /////////////////////////////////////////////////////////////////////////// 462 // int32_t SetVAD() 463 // If DTX is enabled & the codec does not have internal DTX/VAD 464 // WebRtc VAD will be automatically enabled and |enable_vad| is ignored. 465 // 466 // If DTX is disabled but VAD is enabled no DTX packets are send, 467 // regardless of whether the codec has internal DTX/VAD or not. In this 468 // case, WebRtc VAD is running to label frames as active/in-active. 469 // 470 // NOTE! VAD/DTX is not supported when sending stereo. 471 // 472 // Inputs: 473 // -enable_dtx : if true DTX is enabled, 474 // otherwise DTX is disabled. 475 // -enable_vad : if true VAD is enabled, 476 // otherwise VAD is disabled. 477 // -vad_mode : determines the aggressiveness of VAD. A more 478 // aggressive mode results in more frames labeled 479 // as in-active, c.f. definition of 480 // ACMVADMode in audio_coding_module_typedefs.h 481 // for valid values. 482 // 483 // Return value: 484 // -1 if failed to set up VAD/DTX, 485 // 0 if succeeded. 486 // 487 virtual int32_t SetVAD(const bool enable_dtx = true, 488 const bool enable_vad = false, 489 const ACMVADMode vad_mode = VADNormal) = 0; 490 491 /////////////////////////////////////////////////////////////////////////// 492 // int32_t VAD() 493 // Get VAD status. 494 // 495 // Outputs: 496 // -dtx_enabled : is set to true if DTX is enabled, otherwise 497 // is set to false. 498 // -vad_enabled : is set to true if VAD is enabled, otherwise 499 // is set to false. 500 // -vad_mode : is set to the current aggressiveness of VAD. 501 // 502 // Return value: 503 // -1 if fails to retrieve the setting of DTX/VAD, 504 // 0 if succeeded. 505 // 506 virtual int32_t VAD(bool* dtx_enabled, bool* vad_enabled, 507 ACMVADMode* vad_mode) const = 0; 508 509 /////////////////////////////////////////////////////////////////////////// 510 // int32_t ReplaceInternalDTXWithWebRtc() 511 // Used to replace codec internal DTX scheme with WebRtc. This is only 512 // supported for G729, where this call replaces AnnexB with WebRtc DTX. 513 // 514 // Input: 515 // -use_webrtc_dtx : if false (default) the codec built-in DTX/VAD 516 // scheme is used, otherwise the internal DTX is 517 // replaced with WebRtc DTX/VAD. 518 // 519 // Return value: 520 // -1 if failed to replace codec internal DTX with WebRtc, 521 // 0 if succeeded. 522 // 523 virtual int32_t ReplaceInternalDTXWithWebRtc( 524 const bool use_webrtc_dtx = false) = 0; 525 526 /////////////////////////////////////////////////////////////////////////// 527 // int32_t IsInternalDTXReplacedWithWebRtc() 528 // Get status if the codec internal DTX (when such exists) is replaced with 529 // WebRtc DTX. This is only supported for G729. 530 // 531 // Output: 532 // -uses_webrtc_dtx : is set to true if the codec internal DTX is 533 // replaced with WebRtc DTX/VAD, otherwise it is set 534 // to false. 535 // 536 // Return value: 537 // -1 if failed to determine if codec internal DTX is replaced with WebRtc, 538 // 0 if succeeded. 539 // 540 virtual int32_t IsInternalDTXReplacedWithWebRtc( 541 bool* uses_webrtc_dtx) = 0; 542 543 /////////////////////////////////////////////////////////////////////////// 544 // int32_t RegisterVADCallback() 545 // Call this method to register a callback function which is called 546 // any time that ACM encounters an empty frame. That is a frame which is 547 // recognized inactive. Depending on the codec WebRtc VAD or internal codec 548 // VAD is employed to identify a frame as active/inactive. 549 // 550 // Input: 551 // -vad_callback : pointer to a callback function. 552 // 553 // Return value: 554 // -1 if failed to register the callback function. 555 // 0 if the callback function is registered successfully. 556 // 557 virtual int32_t RegisterVADCallback(ACMVADCallback* vad_callback) = 0; 558 559 /////////////////////////////////////////////////////////////////////////// 560 // Receiver 561 // 562 563 /////////////////////////////////////////////////////////////////////////// 564 // int32_t InitializeReceiver() 565 // Any decoder-related state of ACM will be initialized to the 566 // same state when ACM is created. This will not interrupt or 567 // effect encoding functionality of ACM. ACM would lose all the 568 // decoding-related settings by calling this function. 569 // For instance, all registered codecs are deleted and have to be 570 // registered again. 571 // 572 // Return value: 573 // -1 if failed to initialize, 574 // 0 if succeeded. 575 // 576 virtual int32_t InitializeReceiver() = 0; 577 578 /////////////////////////////////////////////////////////////////////////// 579 // int32_t ResetDecoder() 580 // This API resets the states of decoders. ACM will not lose any 581 // decoder-related settings, such as registered codecs. 582 // 583 // Return value: 584 // -1 if failed to initialize, 585 // 0 if succeeded. 586 // 587 virtual int32_t ResetDecoder() = 0; 588 589 /////////////////////////////////////////////////////////////////////////// 590 // int32_t ReceiveFrequency() 591 // Get sampling frequency of the last received payload. 592 // 593 // Return value: 594 // non-negative the sampling frequency in Hertz. 595 // -1 if an error has occurred. 596 // 597 virtual int32_t ReceiveFrequency() const = 0; 598 599 /////////////////////////////////////////////////////////////////////////// 600 // int32_t PlayoutFrequency() 601 // Get sampling frequency of audio played out. 602 // 603 // Return value: 604 // the sampling frequency in Hertz. 605 // 606 virtual int32_t PlayoutFrequency() const = 0; 607 608 /////////////////////////////////////////////////////////////////////////// 609 // int32_t RegisterReceiveCodec() 610 // Register possible decoders, can be called multiple times for 611 // codecs, CNG-NB, CNG-WB, CNG-SWB, AVT and RED. 612 // 613 // Input: 614 // -receive_codec : parameters of the codec to be registered, c.f. 615 // common_types.h for the definition of 616 // CodecInst. 617 // 618 // Return value: 619 // -1 if failed to register the codec 620 // 0 if the codec registered successfully. 621 // 622 virtual int32_t RegisterReceiveCodec( 623 const CodecInst& receive_codec) = 0; 624 625 /////////////////////////////////////////////////////////////////////////// 626 // int32_t UnregisterReceiveCodec() 627 // Unregister the codec currently registered with a specific payload type 628 // from the list of possible receive codecs. 629 // 630 // Input: 631 // -payload_type : The number representing the payload type to 632 // unregister. 633 // 634 // Output: 635 // -1 if fails to unregister. 636 // 0 if the given codec is successfully unregistered. 637 // 638 virtual int UnregisterReceiveCodec( 639 uint8_t payload_type) = 0; 640 641 /////////////////////////////////////////////////////////////////////////// 642 // int32_t ReceiveCodec() 643 // Get the codec associated with last received payload. 644 // 645 // Output: 646 // -curr_receive_codec : parameters of the codec associated with the last 647 // received payload, c.f. common_types.h for 648 // the definition of CodecInst. 649 // 650 // Return value: 651 // -1 if failed to retrieve the codec, 652 // 0 if the codec is successfully retrieved. 653 // 654 virtual int32_t ReceiveCodec(CodecInst* curr_receive_codec) const = 0; 655 656 /////////////////////////////////////////////////////////////////////////// 657 // int32_t IncomingPacket() 658 // Call this function to insert a parsed RTP packet into ACM. 659 // 660 // Inputs: 661 // -incoming_payload : received payload. 662 // -payload_len_bytes : the length of payload in bytes. 663 // -rtp_info : the relevant information retrieved from RTP 664 // header. 665 // 666 // Return value: 667 // -1 if failed to push in the payload 668 // 0 if payload is successfully pushed in. 669 // 670 virtual int32_t IncomingPacket(const uint8_t* incoming_payload, 671 const int32_t payload_len_bytes, 672 const WebRtcRTPHeader& rtp_info) = 0; 673 674 /////////////////////////////////////////////////////////////////////////// 675 // int32_t IncomingPayload() 676 // Call this API to push incoming payloads when there is no rtp-info. 677 // The rtp-info will be created in ACM. One usage for this API is when 678 // pre-encoded files are pushed in ACM 679 // 680 // Inputs: 681 // -incoming_payload : received payload. 682 // -payload_len_byte : the length, in bytes, of the received payload. 683 // -payload_type : the payload-type. This specifies which codec has 684 // to be used to decode the payload. 685 // -timestamp : send timestamp of the payload. ACM starts with 686 // a random value and increment it by the 687 // packet-size, which is given when the codec in 688 // question is registered by RegisterReceiveCodec(). 689 // Therefore, it is essential to have the timestamp 690 // if the frame-size differ from the registered 691 // value or if the incoming payload contains DTX 692 // packets. 693 // 694 // Return value: 695 // -1 if failed to push in the payload 696 // 0 if payload is successfully pushed in. 697 // 698 virtual int32_t IncomingPayload(const uint8_t* incoming_payload, 699 const int32_t payload_len_byte, 700 const uint8_t payload_type, 701 const uint32_t timestamp = 0) = 0; 702 703 /////////////////////////////////////////////////////////////////////////// 704 // int SetMinimumPlayoutDelay() 705 // Set a minimum for the playout delay, used for lip-sync. NetEq maintains 706 // such a delay unless channel condition yields to a higher delay. 707 // 708 // Input: 709 // -time_ms : minimum delay in milliseconds. 710 // 711 // Return value: 712 // -1 if failed to set the delay, 713 // 0 if the minimum delay is set. 714 // 715 virtual int SetMinimumPlayoutDelay(int time_ms) = 0; 716 717 /////////////////////////////////////////////////////////////////////////// 718 // int SetMaximumPlayoutDelay() 719 // Set a maximum for the playout delay 720 // 721 // Input: 722 // -time_ms : maximum delay in milliseconds. 723 // 724 // Return value: 725 // -1 if failed to set the delay, 726 // 0 if the maximum delay is set. 727 // 728 virtual int SetMaximumPlayoutDelay(int time_ms) = 0; 729 730 // 731 // The shortest latency, in milliseconds, required by jitter buffer. This 732 // is computed based on inter-arrival times and playout mode of NetEq. The 733 // actual delay is the maximum of least-required-delay and the minimum-delay 734 // specified by SetMinumumPlayoutDelay() API. 735 // 736 virtual int LeastRequiredDelayMs() const = 0; 737 738 /////////////////////////////////////////////////////////////////////////// 739 // int32_t SetDtmfPlayoutStatus() 740 // Configure DTMF playout, i.e. whether out-of-band 741 // DTMF tones are played or not. 742 // 743 // Input: 744 // -enable : if true to enable playout out-of-band DTMF tones, 745 // false to disable. 746 // 747 // Return value: 748 // -1 if the method fails, e.g. DTMF playout is not supported. 749 // 0 if the status is set successfully. 750 // 751 virtual int32_t SetDtmfPlayoutStatus(const bool enable) = 0; 752 753 /////////////////////////////////////////////////////////////////////////// 754 // bool DtmfPlayoutStatus() 755 // Get Dtmf playout status. 756 // 757 // Return value: 758 // true if out-of-band Dtmf tones are played, 759 // false if playout of Dtmf tones is disabled. 760 // 761 virtual bool DtmfPlayoutStatus() const = 0; 762 763 /////////////////////////////////////////////////////////////////////////// 764 // int32_t PlayoutTimestamp() 765 // The send timestamp of an RTP packet is associated with the decoded 766 // audio of the packet in question. This function returns the timestamp of 767 // the latest audio obtained by calling PlayoutData10ms(). 768 // 769 // Input: 770 // -timestamp : a reference to a uint32_t to receive the 771 // timestamp. 772 // Return value: 773 // 0 if the output is a correct timestamp. 774 // -1 if failed to output the correct timestamp. 775 // 776 // TODO(tlegrand): Change function to return the timestamp. 777 virtual int32_t PlayoutTimestamp(uint32_t* timestamp) = 0; 778 779 /////////////////////////////////////////////////////////////////////////// 780 // int32_t DecoderEstimatedBandwidth() 781 // Get the estimate of the Bandwidth, in bits/second, based on the incoming 782 // stream. This API is useful in one-way communication scenarios, where 783 // the bandwidth information is sent in an out-of-band fashion. 784 // Currently only supported if iSAC is registered as a receiver. 785 // 786 // Return value: 787 // >0 bandwidth in bits/second. 788 // -1 if failed to get a bandwidth estimate. 789 // 790 virtual int32_t DecoderEstimatedBandwidth() const = 0; 791 792 /////////////////////////////////////////////////////////////////////////// 793 // int32_t SetPlayoutMode() 794 // Call this API to set the playout mode. Playout mode could be optimized 795 // for i) voice, ii) FAX or iii) streaming. In Voice mode, NetEQ is 796 // optimized to deliver highest audio quality while maintaining a minimum 797 // delay. In FAX mode, NetEQ is optimized to have few delay changes as 798 // possible and maintain a constant delay, perhaps large relative to voice 799 // mode, to avoid PLC. In streaming mode, we tolerate a little more delay 800 // to achieve better jitter robustness. 801 // 802 // Input: 803 // -mode : playout mode. Possible inputs are: 804 // "voice", 805 // "fax" and 806 // "streaming". 807 // 808 // Return value: 809 // -1 if failed to set the mode, 810 // 0 if succeeding. 811 // 812 virtual int32_t SetPlayoutMode(const AudioPlayoutMode mode) = 0; 813 814 /////////////////////////////////////////////////////////////////////////// 815 // AudioPlayoutMode PlayoutMode() 816 // Get playout mode, i.e. whether it is speech, FAX or streaming. See 817 // audio_coding_module_typedefs.h for definition of AudioPlayoutMode. 818 // 819 // Return value: 820 // voice: is for voice output, 821 // fax: a mode that is optimized for receiving FAX signals. 822 // In this mode NetEq tries to maintain a constant high 823 // delay to avoid PLC if possible. 824 // streaming: a mode that is suitable for streaming. In this mode we 825 // accept longer delay to improve jitter robustness. 826 // 827 virtual AudioPlayoutMode PlayoutMode() const = 0; 828 829 /////////////////////////////////////////////////////////////////////////// 830 // int32_t PlayoutData10Ms( 831 // Get 10 milliseconds of raw audio data for playout, at the given sampling 832 // frequency. ACM will perform a resampling if required. 833 // 834 // Input: 835 // -desired_freq_hz : the desired sampling frequency, in Hertz, of the 836 // output audio. If set to -1, the function returns 837 // the audio at the current sampling frequency. 838 // 839 // Output: 840 // -audio_frame : output audio frame which contains raw audio data 841 // and other relevant parameters, c.f. 842 // module_common_types.h for the definition of 843 // AudioFrame. 844 // 845 // Return value: 846 // -1 if the function fails, 847 // 0 if the function succeeds. 848 // 849 virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz, 850 AudioFrame* audio_frame) = 0; 851 852 /////////////////////////////////////////////////////////////////////////// 853 // Codec specific 854 // 855 856 /////////////////////////////////////////////////////////////////////////// 857 // int32_t SetISACMaxRate() 858 // Set the maximum instantaneous rate of iSAC. For a payload of B bits 859 // with a frame-size of T sec the instantaneous rate is B/T bits per 860 // second. Therefore, (B/T < |max_rate_bps|) and 861 // (B < |max_payload_len_bytes| * 8) are always satisfied for iSAC payloads, 862 // c.f SetISACMaxPayloadSize(). 863 // 864 // Input: 865 // -max_rate_bps : maximum instantaneous bit-rate given in bits/sec. 866 // 867 // Return value: 868 // -1 if failed to set the maximum rate. 869 // 0 if the maximum rate is set successfully. 870 // 871 virtual int SetISACMaxRate(int max_rate_bps) = 0; 872 873 /////////////////////////////////////////////////////////////////////////// 874 // int32_t SetISACMaxPayloadSize() 875 // Set the maximum payload size of iSAC packets. No iSAC payload, 876 // regardless of its frame-size, may exceed the given limit. For 877 // an iSAC payload of size B bits and frame-size T seconds we have; 878 // (B < |max_payload_len_bytes| * 8) and (B/T < |max_rate_bps|), c.f. 879 // SetISACMaxRate(). 880 // 881 // Input: 882 // -max_payload_len_bytes : maximum payload size in bytes. 883 // 884 // Return value: 885 // -1 if failed to set the maximum payload-size. 886 // 0 if the given length is set successfully. 887 // 888 virtual int SetISACMaxPayloadSize(int max_payload_len_bytes) = 0; 889 890 /////////////////////////////////////////////////////////////////////////// 891 // int32_t ConfigISACBandwidthEstimator() 892 // Call this function to configure the bandwidth estimator of ISAC. 893 // During the adaptation of bit-rate, iSAC automatically adjusts the 894 // frame-size (either 30 or 60 ms) to save on RTP header. The initial 895 // frame-size can be specified by the first argument. The configuration also 896 // regards the initial estimate of bandwidths. The estimator starts from 897 // this point and converges to the actual bottleneck. This is given by the 898 // second parameter. Furthermore, it is also possible to control the 899 // adaptation of frame-size. This is specified by the last parameter. 900 // 901 // Input: 902 // -init_frame_size_ms : initial frame-size in milliseconds. For iSAC-wb 903 // 30 ms and 60 ms (default) are acceptable values, 904 // and for iSAC-swb 30 ms is the only acceptable 905 // value. Zero indicates default value. 906 // -init_rate_bps : initial estimate of the bandwidth. Values 907 // between 10000 and 58000 are acceptable. 908 // -enforce_srame_size : if true, the frame-size will not be adapted. 909 // 910 // Return value: 911 // -1 if failed to configure the bandwidth estimator, 912 // 0 if the configuration was successfully applied. 913 // 914 virtual int32_t ConfigISACBandwidthEstimator( 915 int init_frame_size_ms, 916 int init_rate_bps, 917 bool enforce_frame_size = false) = 0; 918 919 /////////////////////////////////////////////////////////////////////////// 920 // int SetOpusMaxPlaybackRate() 921 // If current send codec is Opus, informs it about maximum playback rate the 922 // receiver will render. Opus can use this information to optimize the bit 923 // rate and increase the computation efficiency. 924 // 925 // Input: 926 // -frequency_hz : maximum playback rate in Hz. 927 // 928 // Return value: 929 // -1 if current send codec is not Opus or 930 // error occurred in setting the maximum playback rate, 931 // 0 maximum bandwidth is set successfully. 932 // 933 virtual int SetOpusMaxPlaybackRate(int frequency_hz) = 0; 934 935 /////////////////////////////////////////////////////////////////////////// 936 // statistics 937 // 938 939 /////////////////////////////////////////////////////////////////////////// 940 // int32_t NetworkStatistics() 941 // Get network statistics. Note that the internal statistics of NetEq are 942 // reset by this call. 943 // 944 // Input: 945 // -network_statistics : a structure that contains network statistics. 946 // 947 // Return value: 948 // -1 if failed to set the network statistics, 949 // 0 if statistics are set successfully. 950 // 951 virtual int32_t NetworkStatistics( 952 ACMNetworkStatistics* network_statistics) = 0; 953 954 // 955 // Set an initial delay for playout. 956 // An initial delay yields ACM playout silence until equivalent of |delay_ms| 957 // audio payload is accumulated in NetEq jitter. Thereafter, ACM pulls audio 958 // from NetEq in its regular fashion, and the given delay is maintained 959 // through out the call, unless channel conditions yield to a higher jitter 960 // buffer delay. 961 // 962 // Input: 963 // -delay_ms : delay in milliseconds. 964 // 965 // Return values: 966 // -1 if failed to set the delay. 967 // 0 if delay is set successfully. 968 // 969 virtual int SetInitialPlayoutDelay(int delay_ms) = 0; 970 971 // 972 // Enable NACK and set the maximum size of the NACK list. If NACK is already 973 // enable then the maximum NACK list size is modified accordingly. 974 // 975 // If the sequence number of last received packet is N, the sequence numbers 976 // of NACK list are in the range of [N - |max_nack_list_size|, N). 977 // 978 // |max_nack_list_size| should be positive (none zero) and less than or 979 // equal to |Nack::kNackListSizeLimit|. Otherwise, No change is applied and -1 980 // is returned. 0 is returned at success. 981 // 982 virtual int EnableNack(size_t max_nack_list_size) = 0; 983 984 // Disable NACK. 985 virtual void DisableNack() = 0; 986 987 // 988 // Get a list of packets to be retransmitted. |round_trip_time_ms| is an 989 // estimate of the round-trip-time (in milliseconds). Missing packets which 990 // will be playout in a shorter time than the round-trip-time (with respect 991 // to the time this API is called) will not be included in the list. 992 // 993 // Negative |round_trip_time_ms| results is an error message and empty list 994 // is returned. 995 // 996 virtual std::vector<uint16_t> GetNackList(int round_trip_time_ms) const = 0; 997 998 virtual void GetDecodingCallStatistics( 999 AudioDecodingCallStats* call_stats) const = 0; 1000 }; 1001 1002 class AudioEncoder; 1003 class ReceiverInfo; 1004 1005 class AudioCoding { 1006 public: 1007 struct Config { 1008 Config() 1009 : neteq_config(), 1010 clock(Clock::GetRealTimeClock()), 1011 transport(NULL), 1012 vad_callback(NULL), 1013 play_dtmf(true), 1014 initial_playout_delay_ms(0), 1015 playout_channels(1), 1016 playout_frequency_hz(32000) {} 1017 1018 AudioCodingModule::Config ToOldConfig() const { 1019 AudioCodingModule::Config old_config; 1020 old_config.id = 0; 1021 old_config.neteq_config = neteq_config; 1022 old_config.clock = clock; 1023 return old_config; 1024 } 1025 1026 NetEq::Config neteq_config; 1027 Clock* clock; 1028 AudioPacketizationCallback* transport; 1029 ACMVADCallback* vad_callback; 1030 bool play_dtmf; 1031 int initial_playout_delay_ms; 1032 int playout_channels; 1033 int playout_frequency_hz; 1034 }; 1035 1036 static AudioCoding* Create(const Config& config); 1037 virtual ~AudioCoding() {}; 1038 1039 // Registers a codec, specified by |send_codec|, as sending codec. 1040 // This API can be called multiple times. The last codec registered overwrites 1041 // the previous ones. Returns true if successful, false if not. 1042 // 1043 // Note: If a stereo codec is registered as send codec, VAD/DTX will 1044 // automatically be turned off, since it is not supported for stereo sending. 1045 virtual bool RegisterSendCodec(AudioEncoder* send_codec) = 0; 1046 1047 // Temporary solution to be used during refactoring: 1048 // |encoder_type| should be from the anonymous enum in acm2::ACMCodecDB. 1049 virtual bool RegisterSendCodec(int encoder_type, 1050 uint8_t payload_type, 1051 int frame_size_samples = 0) = 0; 1052 1053 // Returns the encoder object currently in use. This is the same as the 1054 // codec that was registered in the latest call to RegisterSendCodec(). 1055 virtual const AudioEncoder* GetSenderInfo() const = 0; 1056 1057 // Temporary solution to be used during refactoring. 1058 virtual const CodecInst* GetSenderCodecInst() = 0; 1059 1060 // Adds 10 ms of raw (PCM) audio data to the encoder. If the sampling 1061 // frequency of the audio does not match the sampling frequency of the 1062 // current encoder, ACM will resample the audio. 1063 // 1064 // Return value: 1065 // 0 successfully added the frame. 1066 // -1 some error occurred and data is not added. 1067 // < -1 to add the frame to the buffer n samples had to be 1068 // overwritten, -n is the return value in this case. 1069 // TODO(henrik.lundin): Make a better design for the return values. This one 1070 // is just a copy of the old API. 1071 virtual int Add10MsAudio(const AudioFrame& audio_frame) = 0; 1072 1073 // Returns a combined info about the currently used decoder(s). 1074 virtual const ReceiverInfo* GetReceiverInfo() const = 0; 1075 1076 // Registers a codec, specified by |receive_codec|, as receiving codec. 1077 // This API can be called multiple times. If registering with a payload type 1078 // that was already registered in a previous call, the latest call will 1079 // override previous calls. Returns true if successful, false if not. 1080 virtual bool RegisterReceiveCodec(AudioDecoder* receive_codec) = 0; 1081 1082 // Temporary solution: 1083 // |decoder_type| should be from the anonymous enum in acm2::ACMCodecDB. 1084 virtual bool RegisterReceiveCodec(int decoder_type, uint8_t payload_type) = 0; 1085 1086 // The following two methods both inserts a new packet to the receiver. 1087 // InsertPacket takes an RTP header input in |rtp_info|, while InsertPayload 1088 // only requires a payload type and a timestamp. The latter assumes that the 1089 // payloads come in the right order, and without any losses. In both cases, 1090 // |incoming_payload| contains the RTP payload after the RTP header. Return 1091 // true if successful, false if not. 1092 virtual bool InsertPacket(const uint8_t* incoming_payload, 1093 int32_t payload_len_bytes, 1094 const WebRtcRTPHeader& rtp_info) = 0; 1095 1096 // TODO(henrik.lundin): Remove this method? 1097 virtual bool InsertPayload(const uint8_t* incoming_payload, 1098 int32_t payload_len_byte, 1099 uint8_t payload_type, 1100 uint32_t timestamp) = 0; 1101 1102 // These two methods set a minimum and maximum jitter buffer delay in 1103 // milliseconds. The pupose is mainly to adjust the delay to synchronize 1104 // audio and video. The preferred jitter buffer size, computed by NetEq based 1105 // on the current channel conditions, is clamped from below and above by these 1106 // two methods. The given delay limits must be non-negative, less than 1107 // 10000 ms, and the minimum must be strictly smaller than the maximum. 1108 // Further, the maximum must be at lest one frame duration. If these 1109 // conditions are not met, false is returned. Giving the value 0 effectively 1110 // unsets the minimum or maximum delay limits. 1111 // Note that calling these methods is optional. If not called, NetEq will 1112 // determine the optimal buffer size based on the network conditions. 1113 virtual bool SetMinimumPlayoutDelay(int time_ms) = 0; 1114 1115 virtual bool SetMaximumPlayoutDelay(int time_ms) = 0; 1116 1117 // Returns the current value of the jitter buffer's preferred latency. This 1118 // is computed based on inter-arrival times and playout mode of NetEq. The 1119 // actual target delay is this value clamped from below and above by the 1120 // values specified through SetMinimumPlayoutDelay() and 1121 // SetMaximumPlayoutDelay(), respectively, if provided. 1122 // TODO(henrik.lundin) Rename to PreferredDelayMs? 1123 virtual int LeastRequiredDelayMs() const = 0; 1124 1125 // The send timestamp of an RTP packet is associated with the decoded 1126 // audio of the packet in question. This function returns the timestamp of 1127 // the latest audio delivered by Get10MsAudio(). Returns false if no timestamp 1128 // can be provided, true otherwise. 1129 virtual bool PlayoutTimestamp(uint32_t* timestamp) = 0; 1130 1131 // Delivers 10 ms of audio in |audio_frame|. Returns true if successful, 1132 // false otherwise. 1133 virtual bool Get10MsAudio(AudioFrame* audio_frame) = 0; 1134 1135 // Returns the network statistics. Note that the internal statistics of NetEq 1136 // are reset by this call. Returns true if successful, false otherwise. 1137 virtual bool NetworkStatistics(ACMNetworkStatistics* network_statistics) = 0; 1138 1139 // Enables NACK and sets the maximum size of the NACK list. If NACK is already 1140 // enabled then the maximum NACK list size is modified accordingly. Returns 1141 // true if successful, false otherwise. 1142 // 1143 // If the sequence number of last received packet is N, the sequence numbers 1144 // of NACK list are in the range of [N - |max_nack_list_size|, N). 1145 // 1146 // |max_nack_list_size| should be positive and less than or equal to 1147 // |Nack::kNackListSizeLimit|. 1148 virtual bool EnableNack(size_t max_nack_list_size) = 0; 1149 1150 // Disables NACK. 1151 virtual void DisableNack() = 0; 1152 1153 1154 // Temporary solution to be used during refactoring. 1155 // If DTX is enabled and the codec does not have internal DTX/VAD 1156 // WebRtc VAD will be automatically enabled and |enable_vad| is ignored. 1157 // 1158 // If DTX is disabled but VAD is enabled no DTX packets are sent, 1159 // regardless of whether the codec has internal DTX/VAD or not. In this 1160 // case, WebRtc VAD is running to label frames as active/in-active. 1161 // 1162 // NOTE! VAD/DTX is not supported when sending stereo. 1163 // 1164 // Return true if successful, false otherwise. 1165 virtual bool SetVad(bool enable_dtx, 1166 bool enable_vad, 1167 ACMVADMode vad_mode) = 0; 1168 1169 // Returns a list of packets to request retransmission of. 1170 // |round_trip_time_ms| is an estimate of the round-trip-time (in 1171 // milliseconds). Missing packets which will be decoded sooner than the 1172 // round-trip-time (with respect to the time this API is called) will not be 1173 // included in the list. 1174 // |round_trip_time_ms| must be non-negative. 1175 virtual std::vector<uint16_t> GetNackList(int round_trip_time_ms) const = 0; 1176 1177 // Returns the timing statistics for calls to Get10MsAudio. 1178 virtual void GetDecodingCallStatistics( 1179 AudioDecodingCallStats* call_stats) const = 0; 1180 }; 1181 1182 } // namespace webrtc 1183 1184 #endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_INTERFACE_AUDIO_CODING_MODULE_H_ 1185