1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_coding/neteq/background_noise.h" 12 13 #include <assert.h> 14 #include <string.h> // memcpy 15 16 #include <algorithm> // min, max 17 18 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 19 #include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h" 20 #include "webrtc/modules/audio_coding/neteq/post_decode_vad.h" 21 22 namespace webrtc { 23 24 // static 25 const size_t BackgroundNoise::kMaxLpcOrder; 26 27 BackgroundNoise::BackgroundNoise(size_t num_channels) 28 : num_channels_(num_channels), 29 channel_parameters_(new ChannelParameters[num_channels_]), 30 mode_(NetEq::kBgnOn) { 31 Reset(); 32 } 33 34 BackgroundNoise::~BackgroundNoise() {} 35 36 void BackgroundNoise::Reset() { 37 initialized_ = false; 38 for (size_t channel = 0; channel < num_channels_; ++channel) { 39 channel_parameters_[channel].Reset(); 40 } 41 // Keep _bgnMode as it is. 42 } 43 44 void BackgroundNoise::Update(const AudioMultiVector& input, 45 const PostDecodeVad& vad) { 46 if (vad.running() && vad.active_speech()) { 47 // Do not update the background noise parameters if we know that the signal 48 // is active speech. 49 return; 50 } 51 52 int32_t auto_correlation[kMaxLpcOrder + 1]; 53 int16_t fiter_output[kMaxLpcOrder + kResidualLength]; 54 int16_t reflection_coefficients[kMaxLpcOrder]; 55 int16_t lpc_coefficients[kMaxLpcOrder + 1]; 56 57 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { 58 ChannelParameters& parameters = channel_parameters_[channel_ix]; 59 int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0}; 60 int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder]; 61 memcpy(temp_signal, 62 &input[channel_ix][input.Size() - kVecLen], 63 sizeof(int16_t) * kVecLen); 64 65 int32_t sample_energy = CalculateAutoCorrelation(temp_signal, kVecLen, 66 auto_correlation); 67 68 if ((!vad.running() && 69 sample_energy < parameters.energy_update_threshold) || 70 (vad.running() && !vad.active_speech())) { 71 // Generate LPC coefficients. 72 if (auto_correlation[0] > 0) { 73 // Regardless of whether the filter is actually updated or not, 74 // update energy threshold levels, since we have in fact observed 75 // a low energy signal. 76 if (sample_energy < parameters.energy_update_threshold) { 77 // Never go under 1.0 in average sample energy. 78 parameters.energy_update_threshold = std::max(sample_energy, 1); 79 parameters.low_energy_update_threshold = 0; 80 } 81 82 // Only update BGN if filter is stable, i.e., if return value from 83 // Levinson-Durbin function is 1. 84 if (WebRtcSpl_LevinsonDurbin(auto_correlation, lpc_coefficients, 85 reflection_coefficients, 86 kMaxLpcOrder) != 1) { 87 return; 88 } 89 } else { 90 // Center value in auto-correlation is not positive. Do not update. 91 return; 92 } 93 94 // Generate the CNG gain factor by looking at the energy of the residual. 95 WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength, 96 fiter_output, lpc_coefficients, 97 kMaxLpcOrder + 1, kResidualLength); 98 int32_t residual_energy = WebRtcSpl_DotProductWithScale(fiter_output, 99 fiter_output, 100 kResidualLength, 101 0); 102 103 // Check spectral flatness. 104 // Comparing the residual variance with the input signal variance tells 105 // if the spectrum is flat or not. 106 // If 20 * residual_energy >= sample_energy << 6, the spectrum is flat 107 // enough. Also ensure that the energy is non-zero. 108 if ((residual_energy * 20 >= (sample_energy << 6)) && 109 (sample_energy > 0)) { 110 // Spectrum is flat enough; save filter parameters. 111 // |temp_signal| + |kVecLen| - |kMaxLpcOrder| points at the first of the 112 // |kMaxLpcOrder| samples in the residual signal, which will form the 113 // filter state for the next noise generation. 114 SaveParameters(channel_ix, lpc_coefficients, 115 temp_signal + kVecLen - kMaxLpcOrder, sample_energy, 116 residual_energy); 117 } 118 } else { 119 // Will only happen if post-decode VAD is disabled and |sample_energy| is 120 // not low enough. Increase the threshold for update so that it increases 121 // by a factor 4 in 4 seconds. 122 IncrementEnergyThreshold(channel_ix, sample_energy); 123 } 124 } 125 return; 126 } 127 128 int32_t BackgroundNoise::Energy(size_t channel) const { 129 assert(channel < num_channels_); 130 return channel_parameters_[channel].energy; 131 } 132 133 void BackgroundNoise::SetMuteFactor(size_t channel, int16_t value) { 134 assert(channel < num_channels_); 135 channel_parameters_[channel].mute_factor = value; 136 } 137 138 int16_t BackgroundNoise::MuteFactor(size_t channel) const { 139 assert(channel < num_channels_); 140 return channel_parameters_[channel].mute_factor; 141 } 142 143 const int16_t* BackgroundNoise::Filter(size_t channel) const { 144 assert(channel < num_channels_); 145 return channel_parameters_[channel].filter; 146 } 147 148 const int16_t* BackgroundNoise::FilterState(size_t channel) const { 149 assert(channel < num_channels_); 150 return channel_parameters_[channel].filter_state; 151 } 152 153 void BackgroundNoise::SetFilterState(size_t channel, const int16_t* input, 154 size_t length) { 155 assert(channel < num_channels_); 156 length = std::min(length, kMaxLpcOrder); 157 memcpy(channel_parameters_[channel].filter_state, input, 158 length * sizeof(int16_t)); 159 } 160 161 int16_t BackgroundNoise::Scale(size_t channel) const { 162 assert(channel < num_channels_); 163 return channel_parameters_[channel].scale; 164 } 165 int16_t BackgroundNoise::ScaleShift(size_t channel) const { 166 assert(channel < num_channels_); 167 return channel_parameters_[channel].scale_shift; 168 } 169 170 int32_t BackgroundNoise::CalculateAutoCorrelation( 171 const int16_t* signal, size_t length, int32_t* auto_correlation) const { 172 int16_t signal_max = WebRtcSpl_MaxAbsValueW16(signal, length); 173 int correlation_scale = kLogVecLen - 174 WebRtcSpl_NormW32(signal_max * signal_max); 175 correlation_scale = std::max(0, correlation_scale); 176 177 static const int kCorrelationStep = -1; 178 WebRtcSpl_CrossCorrelation(auto_correlation, signal, signal, length, 179 kMaxLpcOrder + 1, correlation_scale, 180 kCorrelationStep); 181 182 // Number of shifts to normalize energy to energy/sample. 183 int energy_sample_shift = kLogVecLen - correlation_scale; 184 return auto_correlation[0] >> energy_sample_shift; 185 } 186 187 void BackgroundNoise::IncrementEnergyThreshold(size_t channel, 188 int32_t sample_energy) { 189 // TODO(hlundin): Simplify the below threshold update. What this code 190 // does is simply "threshold += (increment * threshold) >> 16", but due 191 // to the limited-width operations, it is not exactly the same. The 192 // difference should be inaudible, but bit-exactness would not be 193 // maintained. 194 assert(channel < num_channels_); 195 ChannelParameters& parameters = channel_parameters_[channel]; 196 int32_t temp_energy = 197 (kThresholdIncrement * parameters.low_energy_update_threshold) >> 16; 198 temp_energy += kThresholdIncrement * 199 (parameters.energy_update_threshold & 0xFF); 200 temp_energy += (kThresholdIncrement * 201 ((parameters.energy_update_threshold>>8) & 0xFF)) << 8; 202 parameters.low_energy_update_threshold += temp_energy; 203 204 parameters.energy_update_threshold += kThresholdIncrement * 205 (parameters.energy_update_threshold>>16); 206 parameters.energy_update_threshold += 207 parameters.low_energy_update_threshold >> 16; 208 parameters.low_energy_update_threshold = 209 parameters.low_energy_update_threshold & 0x0FFFF; 210 211 // Update maximum energy. 212 // Decrease by a factor 1/1024 each time. 213 parameters.max_energy = parameters.max_energy - 214 (parameters.max_energy >> 10); 215 if (sample_energy > parameters.max_energy) { 216 parameters.max_energy = sample_energy; 217 } 218 219 // Set |energy_update_threshold| to no less than 60 dB lower than 220 // |max_energy_|. Adding 524288 assures proper rounding. 221 int32_t energy_update_threshold = (parameters.max_energy + 524288) >> 20; 222 if (energy_update_threshold > parameters.energy_update_threshold) { 223 parameters.energy_update_threshold = energy_update_threshold; 224 } 225 } 226 227 void BackgroundNoise::SaveParameters(size_t channel, 228 const int16_t* lpc_coefficients, 229 const int16_t* filter_state, 230 int32_t sample_energy, 231 int32_t residual_energy) { 232 assert(channel < num_channels_); 233 ChannelParameters& parameters = channel_parameters_[channel]; 234 memcpy(parameters.filter, lpc_coefficients, 235 (kMaxLpcOrder+1) * sizeof(int16_t)); 236 memcpy(parameters.filter_state, filter_state, 237 kMaxLpcOrder * sizeof(int16_t)); 238 // Save energy level and update energy threshold levels. 239 // Never get under 1.0 in average sample energy. 240 parameters.energy = std::max(sample_energy, 1); 241 parameters.energy_update_threshold = parameters.energy; 242 parameters.low_energy_update_threshold = 0; 243 244 // Normalize residual_energy to 29 or 30 bits before sqrt. 245 int16_t norm_shift = WebRtcSpl_NormW32(residual_energy) - 1; 246 if (norm_shift & 0x1) { 247 norm_shift -= 1; // Even number of shifts required. 248 } 249 residual_energy = WEBRTC_SPL_SHIFT_W32(residual_energy, norm_shift); 250 251 // Calculate scale and shift factor. 252 parameters.scale = static_cast<int16_t>(WebRtcSpl_SqrtFloor(residual_energy)); 253 // Add 13 to the |scale_shift_|, since the random numbers table is in 254 // Q13. 255 // TODO(hlundin): Move the "13" to where the |scale_shift_| is used? 256 parameters.scale_shift = 257 static_cast<int16_t>(13 + ((kLogResidualLength + norm_shift) / 2)); 258 259 initialized_ = true; 260 } 261 262 } // namespace webrtc 263