1 /* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ 12 #define WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ 13 14 #include <algorithm> 15 #include <vector> 16 17 #include "webrtc/base/array_view.h" 18 #include "webrtc/typedefs.h" 19 20 namespace webrtc { 21 22 // This is the interface class for encoders in AudioCoding module. Each codec 23 // type must have an implementation of this class. 24 class AudioEncoder { 25 public: 26 struct EncodedInfoLeaf { 27 size_t encoded_bytes = 0; 28 uint32_t encoded_timestamp = 0; 29 int payload_type = 0; 30 bool send_even_if_empty = false; 31 bool speech = true; 32 }; 33 34 // This is the main struct for auxiliary encoding information. Each encoded 35 // packet should be accompanied by one EncodedInfo struct, containing the 36 // total number of |encoded_bytes|, the |encoded_timestamp| and the 37 // |payload_type|. If the packet contains redundant encodings, the |redundant| 38 // vector will be populated with EncodedInfoLeaf structs. Each struct in the 39 // vector represents one encoding; the order of structs in the vector is the 40 // same as the order in which the actual payloads are written to the byte 41 // stream. When EncoderInfoLeaf structs are present in the vector, the main 42 // struct's |encoded_bytes| will be the sum of all the |encoded_bytes| in the 43 // vector. 44 struct EncodedInfo : public EncodedInfoLeaf { 45 EncodedInfo(); 46 ~EncodedInfo(); 47 48 std::vector<EncodedInfoLeaf> redundant; 49 }; 50 51 virtual ~AudioEncoder() = default; 52 53 // Returns the maximum number of bytes that can be produced by the encoder 54 // at each Encode() call. The caller can use the return value to determine 55 // the size of the buffer that needs to be allocated. This value is allowed 56 // to depend on encoder parameters like bitrate, frame size etc., so if 57 // any of these change, the caller of Encode() is responsible for checking 58 // that the buffer is large enough by calling MaxEncodedBytes() again. 59 virtual size_t MaxEncodedBytes() const = 0; 60 61 // Returns the input sample rate in Hz and the number of input channels. 62 // These are constants set at instantiation time. 63 virtual int SampleRateHz() const = 0; 64 virtual size_t NumChannels() const = 0; 65 66 // Returns the rate at which the RTP timestamps are updated. The default 67 // implementation returns SampleRateHz(). 68 virtual int RtpTimestampRateHz() const; 69 70 // Returns the number of 10 ms frames the encoder will put in the next 71 // packet. This value may only change when Encode() outputs a packet; i.e., 72 // the encoder may vary the number of 10 ms frames from packet to packet, but 73 // it must decide the length of the next packet no later than when outputting 74 // the preceding packet. 75 virtual size_t Num10MsFramesInNextPacket() const = 0; 76 77 // Returns the maximum value that can be returned by 78 // Num10MsFramesInNextPacket(). 79 virtual size_t Max10MsFramesInAPacket() const = 0; 80 81 // Returns the current target bitrate in bits/s. The value -1 means that the 82 // codec adapts the target automatically, and a current target cannot be 83 // provided. 84 virtual int GetTargetBitrate() const = 0; 85 86 // Accepts one 10 ms block of input audio (i.e., SampleRateHz() / 100 * 87 // NumChannels() samples). Multi-channel audio must be sample-interleaved. 88 // The encoder produces zero or more bytes of output in |encoded| and 89 // returns additional encoding information. 90 // The caller is responsible for making sure that |max_encoded_bytes| is 91 // not smaller than the number of bytes actually produced by the encoder. 92 // Encode() checks some preconditions, calls EncodeInternal() which does the 93 // actual work, and then checks some postconditions. 94 EncodedInfo Encode(uint32_t rtp_timestamp, 95 rtc::ArrayView<const int16_t> audio, 96 size_t max_encoded_bytes, 97 uint8_t* encoded); 98 99 virtual EncodedInfo EncodeInternal(uint32_t rtp_timestamp, 100 rtc::ArrayView<const int16_t> audio, 101 size_t max_encoded_bytes, 102 uint8_t* encoded) = 0; 103 104 // Resets the encoder to its starting state, discarding any input that has 105 // been fed to the encoder but not yet emitted in a packet. 106 virtual void Reset() = 0; 107 108 // Enables or disables codec-internal FEC (forward error correction). Returns 109 // true if the codec was able to comply. The default implementation returns 110 // true when asked to disable FEC and false when asked to enable it (meaning 111 // that FEC isn't supported). 112 virtual bool SetFec(bool enable); 113 114 // Enables or disables codec-internal VAD/DTX. Returns true if the codec was 115 // able to comply. The default implementation returns true when asked to 116 // disable DTX and false when asked to enable it (meaning that DTX isn't 117 // supported). 118 virtual bool SetDtx(bool enable); 119 120 // Sets the application mode. Returns true if the codec was able to comply. 121 // The default implementation just returns false. 122 enum class Application { kSpeech, kAudio }; 123 virtual bool SetApplication(Application application); 124 125 // Tells the encoder about the highest sample rate the decoder is expected to 126 // use when decoding the bitstream. The encoder would typically use this 127 // information to adjust the quality of the encoding. The default 128 // implementation does nothing. 129 virtual void SetMaxPlaybackRate(int frequency_hz); 130 131 // Tells the encoder what the projected packet loss rate is. The rate is in 132 // the range [0.0, 1.0]. The encoder would typically use this information to 133 // adjust channel coding efforts, such as FEC. The default implementation 134 // does nothing. 135 virtual void SetProjectedPacketLossRate(double fraction); 136 137 // Tells the encoder what average bitrate we'd like it to produce. The 138 // encoder is free to adjust or disregard the given bitrate (the default 139 // implementation does the latter). 140 virtual void SetTargetBitrate(int target_bps); 141 }; 142 } // namespace webrtc 143 #endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ 144