1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_coding/neteq/expand.h" 12 13 #include <assert.h> 14 #include <string.h> // memset 15 16 #include <algorithm> // min, max 17 #include <limits> // numeric_limits<T> 18 19 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 20 #include "webrtc/modules/audio_coding/neteq/background_noise.h" 21 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h" 22 #include "webrtc/modules/audio_coding/neteq/random_vector.h" 23 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h" 24 25 namespace webrtc { 26 27 void Expand::Reset() { 28 first_expand_ = true; 29 consecutive_expands_ = 0; 30 max_lag_ = 0; 31 for (size_t ix = 0; ix < num_channels_; ++ix) { 32 channel_parameters_[ix].expand_vector0.Clear(); 33 channel_parameters_[ix].expand_vector1.Clear(); 34 } 35 } 36 37 int Expand::Process(AudioMultiVector* output) { 38 int16_t random_vector[kMaxSampleRate / 8000 * 120 + 30]; 39 int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125]; 40 static const int kTempDataSize = 3600; 41 int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this. 42 int16_t* voiced_vector_storage = temp_data; 43 int16_t* voiced_vector = &voiced_vector_storage[overlap_length_]; 44 static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; 45 int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125]; 46 int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder; 47 int16_t* noise_vector = unvoiced_array_memory + kNoiseLpcOrder; 48 49 int fs_mult = fs_hz_ / 8000; 50 51 if (first_expand_) { 52 // Perform initial setup if this is the first expansion since last reset. 53 AnalyzeSignal(random_vector); 54 first_expand_ = false; 55 } else { 56 // This is not the first expansion, parameters are already estimated. 57 // Extract a noise segment. 58 int16_t rand_length = max_lag_; 59 // This only applies to SWB where length could be larger than 256. 60 assert(rand_length <= kMaxSampleRate / 8000 * 120 + 30); 61 GenerateRandomVector(2, rand_length, random_vector); 62 } 63 64 65 // Generate signal. 66 UpdateLagIndex(); 67 68 // Voiced part. 69 // Generate a weighted vector with the current lag. 70 size_t expansion_vector_length = max_lag_ + overlap_length_; 71 size_t current_lag = expand_lags_[current_lag_index_]; 72 // Copy lag+overlap data. 73 size_t expansion_vector_position = expansion_vector_length - current_lag - 74 overlap_length_; 75 size_t temp_length = current_lag + overlap_length_; 76 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { 77 ChannelParameters& parameters = channel_parameters_[channel_ix]; 78 if (current_lag_index_ == 0) { 79 // Use only expand_vector0. 80 assert(expansion_vector_position + temp_length <= 81 parameters.expand_vector0.Size()); 82 memcpy(voiced_vector_storage, 83 ¶meters.expand_vector0[expansion_vector_position], 84 sizeof(int16_t) * temp_length); 85 } else if (current_lag_index_ == 1) { 86 // Mix 3/4 of expand_vector0 with 1/4 of expand_vector1. 87 WebRtcSpl_ScaleAndAddVectorsWithRound( 88 ¶meters.expand_vector0[expansion_vector_position], 3, 89 ¶meters.expand_vector1[expansion_vector_position], 1, 2, 90 voiced_vector_storage, static_cast<int>(temp_length)); 91 } else if (current_lag_index_ == 2) { 92 // Mix 1/2 of expand_vector0 with 1/2 of expand_vector1. 93 assert(expansion_vector_position + temp_length <= 94 parameters.expand_vector0.Size()); 95 assert(expansion_vector_position + temp_length <= 96 parameters.expand_vector1.Size()); 97 WebRtcSpl_ScaleAndAddVectorsWithRound( 98 ¶meters.expand_vector0[expansion_vector_position], 1, 99 ¶meters.expand_vector1[expansion_vector_position], 1, 1, 100 voiced_vector_storage, static_cast<int>(temp_length)); 101 } 102 103 // Get tapering window parameters. Values are in Q15. 104 int16_t muting_window, muting_window_increment; 105 int16_t unmuting_window, unmuting_window_increment; 106 if (fs_hz_ == 8000) { 107 muting_window = DspHelper::kMuteFactorStart8kHz; 108 muting_window_increment = DspHelper::kMuteFactorIncrement8kHz; 109 unmuting_window = DspHelper::kUnmuteFactorStart8kHz; 110 unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz; 111 } else if (fs_hz_ == 16000) { 112 muting_window = DspHelper::kMuteFactorStart16kHz; 113 muting_window_increment = DspHelper::kMuteFactorIncrement16kHz; 114 unmuting_window = DspHelper::kUnmuteFactorStart16kHz; 115 unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz; 116 } else if (fs_hz_ == 32000) { 117 muting_window = DspHelper::kMuteFactorStart32kHz; 118 muting_window_increment = DspHelper::kMuteFactorIncrement32kHz; 119 unmuting_window = DspHelper::kUnmuteFactorStart32kHz; 120 unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz; 121 } else { // fs_ == 48000 122 muting_window = DspHelper::kMuteFactorStart48kHz; 123 muting_window_increment = DspHelper::kMuteFactorIncrement48kHz; 124 unmuting_window = DspHelper::kUnmuteFactorStart48kHz; 125 unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz; 126 } 127 128 // Smooth the expanded if it has not been muted to a low amplitude and 129 // |current_voice_mix_factor| is larger than 0.5. 130 if ((parameters.mute_factor > 819) && 131 (parameters.current_voice_mix_factor > 8192)) { 132 size_t start_ix = sync_buffer_->Size() - overlap_length_; 133 for (size_t i = 0; i < overlap_length_; i++) { 134 // Do overlap add between new vector and overlap. 135 (*sync_buffer_)[channel_ix][start_ix + i] = 136 (((*sync_buffer_)[channel_ix][start_ix + i] * muting_window) + 137 (((parameters.mute_factor * voiced_vector_storage[i]) >> 14) * 138 unmuting_window) + 16384) >> 15; 139 muting_window += muting_window_increment; 140 unmuting_window += unmuting_window_increment; 141 } 142 } else if (parameters.mute_factor == 0) { 143 // The expanded signal will consist of only comfort noise if 144 // mute_factor = 0. Set the output length to 15 ms for best noise 145 // production. 146 // TODO(hlundin): This has been disabled since the length of 147 // parameters.expand_vector0 and parameters.expand_vector1 no longer 148 // match with expand_lags_, causing invalid reads and writes. Is it a good 149 // idea to enable this again, and solve the vector size problem? 150 // max_lag_ = fs_mult * 120; 151 // expand_lags_[0] = fs_mult * 120; 152 // expand_lags_[1] = fs_mult * 120; 153 // expand_lags_[2] = fs_mult * 120; 154 } 155 156 // Unvoiced part. 157 // Filter |scaled_random_vector| through |ar_filter_|. 158 memcpy(unvoiced_vector - kUnvoicedLpcOrder, parameters.ar_filter_state, 159 sizeof(int16_t) * kUnvoicedLpcOrder); 160 int32_t add_constant = 0; 161 if (parameters.ar_gain_scale > 0) { 162 add_constant = 1 << (parameters.ar_gain_scale - 1); 163 } 164 WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector, 165 parameters.ar_gain, add_constant, 166 parameters.ar_gain_scale, 167 static_cast<int>(current_lag)); 168 WebRtcSpl_FilterARFastQ12(scaled_random_vector, unvoiced_vector, 169 parameters.ar_filter, kUnvoicedLpcOrder + 1, 170 static_cast<int>(current_lag)); 171 memcpy(parameters.ar_filter_state, 172 &(unvoiced_vector[current_lag - kUnvoicedLpcOrder]), 173 sizeof(int16_t) * kUnvoicedLpcOrder); 174 175 // Combine voiced and unvoiced contributions. 176 177 // Set a suitable cross-fading slope. 178 // For lag = 179 // <= 31 * fs_mult => go from 1 to 0 in about 8 ms; 180 // (>= 31 .. <= 63) * fs_mult => go from 1 to 0 in about 16 ms; 181 // >= 64 * fs_mult => go from 1 to 0 in about 32 ms. 182 // temp_shift = getbits(max_lag_) - 5. 183 int temp_shift = (31 - WebRtcSpl_NormW32(max_lag_)) - 5; 184 int16_t mix_factor_increment = 256 >> temp_shift; 185 if (stop_muting_) { 186 mix_factor_increment = 0; 187 } 188 189 // Create combined signal by shifting in more and more of unvoiced part. 190 temp_shift = 8 - temp_shift; // = getbits(mix_factor_increment). 191 size_t temp_lenght = (parameters.current_voice_mix_factor - 192 parameters.voice_mix_factor) >> temp_shift; 193 temp_lenght = std::min(temp_lenght, current_lag); 194 DspHelper::CrossFade(voiced_vector, unvoiced_vector, temp_lenght, 195 ¶meters.current_voice_mix_factor, 196 mix_factor_increment, temp_data); 197 198 // End of cross-fading period was reached before end of expanded signal 199 // path. Mix the rest with a fixed mixing factor. 200 if (temp_lenght < current_lag) { 201 if (mix_factor_increment != 0) { 202 parameters.current_voice_mix_factor = parameters.voice_mix_factor; 203 } 204 int temp_scale = 16384 - parameters.current_voice_mix_factor; 205 WebRtcSpl_ScaleAndAddVectorsWithRound( 206 voiced_vector + temp_lenght, parameters.current_voice_mix_factor, 207 unvoiced_vector + temp_lenght, temp_scale, 14, 208 temp_data + temp_lenght, static_cast<int>(current_lag - temp_lenght)); 209 } 210 211 // Select muting slope depending on how many consecutive expands we have 212 // done. 213 if (consecutive_expands_ == 3) { 214 // Let the mute factor decrease from 1.0 to 0.95 in 6.25 ms. 215 // mute_slope = 0.0010 / fs_mult in Q20. 216 parameters.mute_slope = std::max(parameters.mute_slope, 217 static_cast<int16_t>(1049 / fs_mult)); 218 } 219 if (consecutive_expands_ == 7) { 220 // Let the mute factor decrease from 1.0 to 0.90 in 6.25 ms. 221 // mute_slope = 0.0020 / fs_mult in Q20. 222 parameters.mute_slope = std::max(parameters.mute_slope, 223 static_cast<int16_t>(2097 / fs_mult)); 224 } 225 226 // Mute segment according to slope value. 227 if ((consecutive_expands_ != 0) || !parameters.onset) { 228 // Mute to the previous level, then continue with the muting. 229 WebRtcSpl_AffineTransformVector(temp_data, temp_data, 230 parameters.mute_factor, 8192, 231 14, static_cast<int>(current_lag)); 232 233 if (!stop_muting_) { 234 DspHelper::MuteSignal(temp_data, parameters.mute_slope, current_lag); 235 236 // Shift by 6 to go from Q20 to Q14. 237 // TODO(hlundin): Adding 8192 before shifting 6 steps seems wrong. 238 // Legacy. 239 int16_t gain = static_cast<int16_t>(16384 - 240 (((current_lag * parameters.mute_slope) + 8192) >> 6)); 241 gain = ((gain * parameters.mute_factor) + 8192) >> 14; 242 243 // Guard against getting stuck with very small (but sometimes audible) 244 // gain. 245 if ((consecutive_expands_ > 3) && (gain >= parameters.mute_factor)) { 246 parameters.mute_factor = 0; 247 } else { 248 parameters.mute_factor = gain; 249 } 250 } 251 } 252 253 // Background noise part. 254 GenerateBackgroundNoise(random_vector, 255 channel_ix, 256 channel_parameters_[channel_ix].mute_slope, 257 TooManyExpands(), 258 current_lag, 259 unvoiced_array_memory); 260 261 // Add background noise to the combined voiced-unvoiced signal. 262 for (size_t i = 0; i < current_lag; i++) { 263 temp_data[i] = temp_data[i] + noise_vector[i]; 264 } 265 if (channel_ix == 0) { 266 output->AssertSize(current_lag); 267 } else { 268 assert(output->Size() == current_lag); 269 } 270 memcpy(&(*output)[channel_ix][0], temp_data, 271 sizeof(temp_data[0]) * current_lag); 272 } 273 274 // Increase call number and cap it. 275 consecutive_expands_ = consecutive_expands_ >= kMaxConsecutiveExpands ? 276 kMaxConsecutiveExpands : consecutive_expands_ + 1; 277 return 0; 278 } 279 280 void Expand::SetParametersForNormalAfterExpand() { 281 current_lag_index_ = 0; 282 lag_index_direction_ = 0; 283 stop_muting_ = true; // Do not mute signal any more. 284 } 285 286 void Expand::SetParametersForMergeAfterExpand() { 287 current_lag_index_ = -1; /* out of the 3 possible ones */ 288 lag_index_direction_ = 1; /* make sure we get the "optimal" lag */ 289 stop_muting_ = true; 290 } 291 292 void Expand::InitializeForAnExpandPeriod() { 293 lag_index_direction_ = 1; 294 current_lag_index_ = -1; 295 stop_muting_ = false; 296 random_vector_->set_seed_increment(1); 297 consecutive_expands_ = 0; 298 for (size_t ix = 0; ix < num_channels_; ++ix) { 299 channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14. 300 channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14. 301 // Start with 0 gain for background noise. 302 background_noise_->SetMuteFactor(ix, 0); 303 } 304 } 305 306 bool Expand::TooManyExpands() { 307 return consecutive_expands_ >= kMaxConsecutiveExpands; 308 } 309 310 void Expand::AnalyzeSignal(int16_t* random_vector) { 311 int32_t auto_correlation[kUnvoicedLpcOrder + 1]; 312 int16_t reflection_coeff[kUnvoicedLpcOrder]; 313 int16_t correlation_vector[kMaxSampleRate / 8000 * 102]; 314 int best_correlation_index[kNumCorrelationCandidates]; 315 int16_t best_correlation[kNumCorrelationCandidates]; 316 int16_t best_distortion_index[kNumCorrelationCandidates]; 317 int16_t best_distortion[kNumCorrelationCandidates]; 318 int32_t correlation_vector2[(99 * kMaxSampleRate / 8000) + 1]; 319 int32_t best_distortion_w32[kNumCorrelationCandidates]; 320 static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; 321 int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125]; 322 int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder; 323 324 int fs_mult = fs_hz_ / 8000; 325 326 // Pre-calculate common multiplications with fs_mult. 327 int fs_mult_4 = fs_mult * 4; 328 int fs_mult_20 = fs_mult * 20; 329 int fs_mult_120 = fs_mult * 120; 330 int fs_mult_dist_len = fs_mult * kDistortionLength; 331 int fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength; 332 333 const size_t signal_length = 256 * fs_mult; 334 const int16_t* audio_history = 335 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length]; 336 337 // Initialize. 338 InitializeForAnExpandPeriod(); 339 340 // Calculate correlation in downsampled domain (4 kHz sample rate). 341 int16_t correlation_scale; 342 int correlation_length = 51; // TODO(hlundin): Legacy bit-exactness. 343 // If it is decided to break bit-exactness |correlation_length| should be 344 // initialized to the return value of Correlation(). 345 Correlation(audio_history, signal_length, correlation_vector, 346 &correlation_scale); 347 348 // Find peaks in correlation vector. 349 DspHelper::PeakDetection(correlation_vector, correlation_length, 350 kNumCorrelationCandidates, fs_mult, 351 best_correlation_index, best_correlation); 352 353 // Adjust peak locations; cross-correlation lags start at 2.5 ms 354 // (20 * fs_mult samples). 355 best_correlation_index[0] += fs_mult_20; 356 best_correlation_index[1] += fs_mult_20; 357 best_correlation_index[2] += fs_mult_20; 358 359 // Calculate distortion around the |kNumCorrelationCandidates| best lags. 360 int distortion_scale = 0; 361 for (int i = 0; i < kNumCorrelationCandidates; i++) { 362 int16_t min_index = std::max(fs_mult_20, 363 best_correlation_index[i] - fs_mult_4); 364 int16_t max_index = std::min(fs_mult_120 - 1, 365 best_correlation_index[i] + fs_mult_4); 366 best_distortion_index[i] = DspHelper::MinDistortion( 367 &(audio_history[signal_length - fs_mult_dist_len]), min_index, 368 max_index, fs_mult_dist_len, &best_distortion_w32[i]); 369 distortion_scale = std::max(16 - WebRtcSpl_NormW32(best_distortion_w32[i]), 370 distortion_scale); 371 } 372 // Shift the distortion values to fit in 16 bits. 373 WebRtcSpl_VectorBitShiftW32ToW16(best_distortion, kNumCorrelationCandidates, 374 best_distortion_w32, distortion_scale); 375 376 // Find the maximizing index |i| of the cost function 377 // f[i] = best_correlation[i] / best_distortion[i]. 378 int32_t best_ratio = std::numeric_limits<int32_t>::min(); 379 int best_index = -1; 380 for (int i = 0; i < kNumCorrelationCandidates; ++i) { 381 int32_t ratio; 382 if (best_distortion[i] > 0) { 383 ratio = (best_correlation[i] << 16) / best_distortion[i]; 384 } else if (best_correlation[i] == 0) { 385 ratio = 0; // No correlation set result to zero. 386 } else { 387 ratio = std::numeric_limits<int32_t>::max(); // Denominator is zero. 388 } 389 if (ratio > best_ratio) { 390 best_index = i; 391 best_ratio = ratio; 392 } 393 } 394 395 int distortion_lag = best_distortion_index[best_index]; 396 int correlation_lag = best_correlation_index[best_index]; 397 max_lag_ = std::max(distortion_lag, correlation_lag); 398 399 // Calculate the exact best correlation in the range between 400 // |correlation_lag| and |distortion_lag|. 401 correlation_length = distortion_lag + 10; 402 correlation_length = std::min(correlation_length, fs_mult_120); 403 correlation_length = std::max(correlation_length, 60 * fs_mult); 404 405 int start_index = std::min(distortion_lag, correlation_lag); 406 int correlation_lags = WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) 407 + 1; 408 assert(correlation_lags <= 99 * fs_mult + 1); // Cannot be larger. 409 410 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { 411 ChannelParameters& parameters = channel_parameters_[channel_ix]; 412 // Calculate suitable scaling. 413 int16_t signal_max = WebRtcSpl_MaxAbsValueW16( 414 &audio_history[signal_length - correlation_length - start_index 415 - correlation_lags], 416 correlation_length + start_index + correlation_lags - 1); 417 correlation_scale = ((31 - WebRtcSpl_NormW32(signal_max * signal_max)) 418 + (31 - WebRtcSpl_NormW32(correlation_length))) - 31; 419 correlation_scale = std::max(static_cast<int16_t>(0), correlation_scale); 420 421 // Calculate the correlation, store in |correlation_vector2|. 422 WebRtcSpl_CrossCorrelation( 423 correlation_vector2, 424 &(audio_history[signal_length - correlation_length]), 425 &(audio_history[signal_length - correlation_length - start_index]), 426 correlation_length, correlation_lags, correlation_scale, -1); 427 428 // Find maximizing index. 429 best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags); 430 int32_t max_correlation = correlation_vector2[best_index]; 431 // Compensate index with start offset. 432 best_index = best_index + start_index; 433 434 // Calculate energies. 435 int32_t energy1 = WebRtcSpl_DotProductWithScale( 436 &(audio_history[signal_length - correlation_length]), 437 &(audio_history[signal_length - correlation_length]), 438 correlation_length, correlation_scale); 439 int32_t energy2 = WebRtcSpl_DotProductWithScale( 440 &(audio_history[signal_length - correlation_length - best_index]), 441 &(audio_history[signal_length - correlation_length - best_index]), 442 correlation_length, correlation_scale); 443 444 // Calculate the correlation coefficient between the two portions of the 445 // signal. 446 int16_t corr_coefficient; 447 if ((energy1 > 0) && (energy2 > 0)) { 448 int energy1_scale = std::max(16 - WebRtcSpl_NormW32(energy1), 0); 449 int energy2_scale = std::max(16 - WebRtcSpl_NormW32(energy2), 0); 450 // Make sure total scaling is even (to simplify scale factor after sqrt). 451 if ((energy1_scale + energy2_scale) & 1) { 452 // If sum is odd, add 1 to make it even. 453 energy1_scale += 1; 454 } 455 int16_t scaled_energy1 = energy1 >> energy1_scale; 456 int16_t scaled_energy2 = energy2 >> energy2_scale; 457 int16_t sqrt_energy_product = WebRtcSpl_SqrtFloor( 458 scaled_energy1 * scaled_energy2); 459 // Calculate max_correlation / sqrt(energy1 * energy2) in Q14. 460 int cc_shift = 14 - (energy1_scale + energy2_scale) / 2; 461 max_correlation = WEBRTC_SPL_SHIFT_W32(max_correlation, cc_shift); 462 corr_coefficient = WebRtcSpl_DivW32W16(max_correlation, 463 sqrt_energy_product); 464 corr_coefficient = std::min(static_cast<int16_t>(16384), 465 corr_coefficient); // Cap at 1.0 in Q14. 466 } else { 467 corr_coefficient = 0; 468 } 469 470 // Extract the two vectors expand_vector0 and expand_vector1 from 471 // |audio_history|. 472 int16_t expansion_length = static_cast<int16_t>(max_lag_ + overlap_length_); 473 const int16_t* vector1 = &(audio_history[signal_length - expansion_length]); 474 const int16_t* vector2 = vector1 - distortion_lag; 475 // Normalize the second vector to the same energy as the first. 476 energy1 = WebRtcSpl_DotProductWithScale(vector1, vector1, expansion_length, 477 correlation_scale); 478 energy2 = WebRtcSpl_DotProductWithScale(vector2, vector2, expansion_length, 479 correlation_scale); 480 // Confirm that amplitude ratio sqrt(energy1 / energy2) is within 0.5 - 2.0, 481 // i.e., energy1 / energy1 is within 0.25 - 4. 482 int16_t amplitude_ratio; 483 if ((energy1 / 4 < energy2) && (energy1 > energy2 / 4)) { 484 // Energy constraint fulfilled. Use both vectors and scale them 485 // accordingly. 486 int16_t scaled_energy2 = std::max(16 - WebRtcSpl_NormW32(energy2), 0); 487 int16_t scaled_energy1 = scaled_energy2 - 13; 488 // Calculate scaled_energy1 / scaled_energy2 in Q13. 489 int32_t energy_ratio = WebRtcSpl_DivW32W16( 490 WEBRTC_SPL_SHIFT_W32(energy1, -scaled_energy1), 491 WEBRTC_SPL_RSHIFT_W32(energy2, scaled_energy2)); 492 // Calculate sqrt ratio in Q13 (sqrt of en1/en2 in Q26). 493 amplitude_ratio = WebRtcSpl_SqrtFloor(energy_ratio << 13); 494 // Copy the two vectors and give them the same energy. 495 parameters.expand_vector0.Clear(); 496 parameters.expand_vector0.PushBack(vector1, expansion_length); 497 parameters.expand_vector1.Clear(); 498 if (parameters.expand_vector1.Size() < 499 static_cast<size_t>(expansion_length)) { 500 parameters.expand_vector1.Extend( 501 expansion_length - parameters.expand_vector1.Size()); 502 } 503 WebRtcSpl_AffineTransformVector(¶meters.expand_vector1[0], 504 const_cast<int16_t*>(vector2), 505 amplitude_ratio, 506 4096, 507 13, 508 expansion_length); 509 } else { 510 // Energy change constraint not fulfilled. Only use last vector. 511 parameters.expand_vector0.Clear(); 512 parameters.expand_vector0.PushBack(vector1, expansion_length); 513 // Copy from expand_vector0 to expand_vector1. 514 parameters.expand_vector0.CopyTo(¶meters.expand_vector1); 515 // Set the energy_ratio since it is used by muting slope. 516 if ((energy1 / 4 < energy2) || (energy2 == 0)) { 517 amplitude_ratio = 4096; // 0.5 in Q13. 518 } else { 519 amplitude_ratio = 16384; // 2.0 in Q13. 520 } 521 } 522 523 // Set the 3 lag values. 524 int lag_difference = distortion_lag - correlation_lag; 525 if (lag_difference == 0) { 526 // |distortion_lag| and |correlation_lag| are equal. 527 expand_lags_[0] = distortion_lag; 528 expand_lags_[1] = distortion_lag; 529 expand_lags_[2] = distortion_lag; 530 } else { 531 // |distortion_lag| and |correlation_lag| are not equal; use different 532 // combinations of the two. 533 // First lag is |distortion_lag| only. 534 expand_lags_[0] = distortion_lag; 535 // Second lag is the average of the two. 536 expand_lags_[1] = (distortion_lag + correlation_lag) / 2; 537 // Third lag is the average again, but rounding towards |correlation_lag|. 538 if (lag_difference > 0) { 539 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2; 540 } else { 541 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2; 542 } 543 } 544 545 // Calculate the LPC and the gain of the filters. 546 // Calculate scale value needed for auto-correlation. 547 correlation_scale = WebRtcSpl_MaxAbsValueW16( 548 &(audio_history[signal_length - fs_mult_lpc_analysis_len]), 549 fs_mult_lpc_analysis_len); 550 551 correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0); 552 correlation_scale = std::max(correlation_scale * 2 + 7, 0); 553 554 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function. 555 size_t temp_index = signal_length - fs_mult_lpc_analysis_len - 556 kUnvoicedLpcOrder; 557 // Copy signal to temporary vector to be able to pad with leading zeros. 558 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len 559 + kUnvoicedLpcOrder]; 560 memset(temp_signal, 0, 561 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder)); 562 memcpy(&temp_signal[kUnvoicedLpcOrder], 563 &audio_history[temp_index + kUnvoicedLpcOrder], 564 sizeof(int16_t) * fs_mult_lpc_analysis_len); 565 WebRtcSpl_CrossCorrelation(auto_correlation, 566 &temp_signal[kUnvoicedLpcOrder], 567 &temp_signal[kUnvoicedLpcOrder], 568 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, 569 correlation_scale, -1); 570 delete [] temp_signal; 571 572 // Verify that variance is positive. 573 if (auto_correlation[0] > 0) { 574 // Estimate AR filter parameters using Levinson-Durbin algorithm; 575 // kUnvoicedLpcOrder + 1 filter coefficients. 576 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation, 577 parameters.ar_filter, 578 reflection_coeff, 579 kUnvoicedLpcOrder); 580 581 // Keep filter parameters only if filter is stable. 582 if (stability != 1) { 583 // Set first coefficient to 4096 (1.0 in Q12). 584 parameters.ar_filter[0] = 4096; 585 // Set remaining |kUnvoicedLpcOrder| coefficients to zero. 586 WebRtcSpl_MemSetW16(parameters.ar_filter + 1, 0, kUnvoicedLpcOrder); 587 } 588 } 589 590 if (channel_ix == 0) { 591 // Extract a noise segment. 592 int16_t noise_length; 593 if (distortion_lag < 40) { 594 noise_length = 2 * distortion_lag + 30; 595 } else { 596 noise_length = distortion_lag + 30; 597 } 598 if (noise_length <= RandomVector::kRandomTableSize) { 599 memcpy(random_vector, RandomVector::kRandomTable, 600 sizeof(int16_t) * noise_length); 601 } else { 602 // Only applies to SWB where length could be larger than 603 // |kRandomTableSize|. 604 memcpy(random_vector, RandomVector::kRandomTable, 605 sizeof(int16_t) * RandomVector::kRandomTableSize); 606 assert(noise_length <= kMaxSampleRate / 8000 * 120 + 30); 607 random_vector_->IncreaseSeedIncrement(2); 608 random_vector_->Generate( 609 noise_length - RandomVector::kRandomTableSize, 610 &random_vector[RandomVector::kRandomTableSize]); 611 } 612 } 613 614 // Set up state vector and calculate scale factor for unvoiced filtering. 615 memcpy(parameters.ar_filter_state, 616 &(audio_history[signal_length - kUnvoicedLpcOrder]), 617 sizeof(int16_t) * kUnvoicedLpcOrder); 618 memcpy(unvoiced_vector - kUnvoicedLpcOrder, 619 &(audio_history[signal_length - 128 - kUnvoicedLpcOrder]), 620 sizeof(int16_t) * kUnvoicedLpcOrder); 621 WebRtcSpl_FilterMAFastQ12( 622 const_cast<int16_t*>(&audio_history[signal_length - 128]), 623 unvoiced_vector, parameters.ar_filter, kUnvoicedLpcOrder + 1, 128); 624 int16_t unvoiced_prescale; 625 if (WebRtcSpl_MaxAbsValueW16(unvoiced_vector, 128) > 4000) { 626 unvoiced_prescale = 4; 627 } else { 628 unvoiced_prescale = 0; 629 } 630 int32_t unvoiced_energy = WebRtcSpl_DotProductWithScale(unvoiced_vector, 631 unvoiced_vector, 632 128, 633 unvoiced_prescale); 634 635 // Normalize |unvoiced_energy| to 28 or 29 bits to preserve sqrt() accuracy. 636 int16_t unvoiced_scale = WebRtcSpl_NormW32(unvoiced_energy) - 3; 637 // Make sure we do an odd number of shifts since we already have 7 shifts 638 // from dividing with 128 earlier. This will make the total scale factor 639 // even, which is suitable for the sqrt. 640 unvoiced_scale += ((unvoiced_scale & 0x1) ^ 0x1); 641 unvoiced_energy = WEBRTC_SPL_SHIFT_W32(unvoiced_energy, unvoiced_scale); 642 int32_t unvoiced_gain = WebRtcSpl_SqrtFloor(unvoiced_energy); 643 parameters.ar_gain_scale = 13 644 + (unvoiced_scale + 7 - unvoiced_prescale) / 2; 645 parameters.ar_gain = unvoiced_gain; 646 647 // Calculate voice_mix_factor from corr_coefficient. 648 // Let x = corr_coefficient. Then, we compute: 649 // if (x > 0.48) 650 // voice_mix_factor = (-5179 + 19931x - 16422x^2 + 5776x^3) / 4096; 651 // else 652 // voice_mix_factor = 0; 653 if (corr_coefficient > 7875) { 654 int16_t x1, x2, x3; 655 x1 = corr_coefficient; // |corr_coefficient| is in Q14. 656 x2 = (x1 * x1) >> 14; // Shift 14 to keep result in Q14. 657 x3 = (x1 * x2) >> 14; 658 static const int kCoefficients[4] = { -5179, 19931, -16422, 5776 }; 659 int32_t temp_sum = kCoefficients[0] << 14; 660 temp_sum += kCoefficients[1] * x1; 661 temp_sum += kCoefficients[2] * x2; 662 temp_sum += kCoefficients[3] * x3; 663 parameters.voice_mix_factor = temp_sum / 4096; 664 parameters.voice_mix_factor = std::min(parameters.voice_mix_factor, 665 static_cast<int16_t>(16384)); 666 parameters.voice_mix_factor = std::max(parameters.voice_mix_factor, 667 static_cast<int16_t>(0)); 668 } else { 669 parameters.voice_mix_factor = 0; 670 } 671 672 // Calculate muting slope. Reuse value from earlier scaling of 673 // |expand_vector0| and |expand_vector1|. 674 int16_t slope = amplitude_ratio; 675 if (slope > 12288) { 676 // slope > 1.5. 677 // Calculate (1 - (1 / slope)) / distortion_lag = 678 // (slope - 1) / (distortion_lag * slope). 679 // |slope| is in Q13, so 1 corresponds to 8192. Shift up to Q25 before 680 // the division. 681 // Shift the denominator from Q13 to Q5 before the division. The result of 682 // the division will then be in Q20. 683 int16_t temp_ratio = WebRtcSpl_DivW32W16((slope - 8192) << 12, 684 (distortion_lag * slope) >> 8); 685 if (slope > 14746) { 686 // slope > 1.8. 687 // Divide by 2, with proper rounding. 688 parameters.mute_slope = (temp_ratio + 1) / 2; 689 } else { 690 // Divide by 8, with proper rounding. 691 parameters.mute_slope = (temp_ratio + 4) / 8; 692 } 693 parameters.onset = true; 694 } else { 695 // Calculate (1 - slope) / distortion_lag. 696 // Shift |slope| by 7 to Q20 before the division. The result is in Q20. 697 parameters.mute_slope = WebRtcSpl_DivW32W16((8192 - slope) << 7, 698 distortion_lag); 699 if (parameters.voice_mix_factor <= 13107) { 700 // Make sure the mute factor decreases from 1.0 to 0.9 in no more than 701 // 6.25 ms. 702 // mute_slope >= 0.005 / fs_mult in Q20. 703 parameters.mute_slope = std::max(static_cast<int16_t>(5243 / fs_mult), 704 parameters.mute_slope); 705 } else if (slope > 8028) { 706 parameters.mute_slope = 0; 707 } 708 parameters.onset = false; 709 } 710 } 711 } 712 713 int16_t Expand::Correlation(const int16_t* input, size_t input_length, 714 int16_t* output, int16_t* output_scale) const { 715 // Set parameters depending on sample rate. 716 const int16_t* filter_coefficients; 717 int16_t num_coefficients; 718 int16_t downsampling_factor; 719 if (fs_hz_ == 8000) { 720 num_coefficients = 3; 721 downsampling_factor = 2; 722 filter_coefficients = DspHelper::kDownsample8kHzTbl; 723 } else if (fs_hz_ == 16000) { 724 num_coefficients = 5; 725 downsampling_factor = 4; 726 filter_coefficients = DspHelper::kDownsample16kHzTbl; 727 } else if (fs_hz_ == 32000) { 728 num_coefficients = 7; 729 downsampling_factor = 8; 730 filter_coefficients = DspHelper::kDownsample32kHzTbl; 731 } else { // fs_hz_ == 48000. 732 num_coefficients = 7; 733 downsampling_factor = 12; 734 filter_coefficients = DspHelper::kDownsample48kHzTbl; 735 } 736 737 // Correlate from lag 10 to lag 60 in downsampled domain. 738 // (Corresponds to 20-120 for narrow-band, 40-240 for wide-band, and so on.) 739 static const int kCorrelationStartLag = 10; 740 static const int kNumCorrelationLags = 54; 741 static const int kCorrelationLength = 60; 742 // Downsample to 4 kHz sample rate. 743 static const int kDownsampledLength = kCorrelationStartLag 744 + kNumCorrelationLags + kCorrelationLength; 745 int16_t downsampled_input[kDownsampledLength]; 746 static const int kFilterDelay = 0; 747 WebRtcSpl_DownsampleFast( 748 input + input_length - kDownsampledLength * downsampling_factor, 749 kDownsampledLength * downsampling_factor, downsampled_input, 750 kDownsampledLength, filter_coefficients, num_coefficients, 751 downsampling_factor, kFilterDelay); 752 753 // Normalize |downsampled_input| to using all 16 bits. 754 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input, 755 kDownsampledLength); 756 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value); 757 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength, 758 downsampled_input, norm_shift); 759 760 int32_t correlation[kNumCorrelationLags]; 761 static const int kCorrelationShift = 6; 762 WebRtcSpl_CrossCorrelation( 763 correlation, 764 &downsampled_input[kDownsampledLength - kCorrelationLength], 765 &downsampled_input[kDownsampledLength - kCorrelationLength 766 - kCorrelationStartLag], 767 kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1); 768 769 // Normalize and move data from 32-bit to 16-bit vector. 770 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation, 771 kNumCorrelationLags); 772 int16_t norm_shift2 = std::max(18 - WebRtcSpl_NormW32(max_correlation), 0); 773 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation, 774 norm_shift2); 775 // Total scale factor (right shifts) of correlation value. 776 *output_scale = 2 * norm_shift + kCorrelationShift + norm_shift2; 777 return kNumCorrelationLags; 778 } 779 780 void Expand::UpdateLagIndex() { 781 current_lag_index_ = current_lag_index_ + lag_index_direction_; 782 // Change direction if needed. 783 if (current_lag_index_ <= 0) { 784 lag_index_direction_ = 1; 785 } 786 if (current_lag_index_ >= kNumLags - 1) { 787 lag_index_direction_ = -1; 788 } 789 } 790 791 Expand* ExpandFactory::Create(BackgroundNoise* background_noise, 792 SyncBuffer* sync_buffer, 793 RandomVector* random_vector, 794 int fs, 795 size_t num_channels) const { 796 return new Expand(background_noise, sync_buffer, random_vector, fs, 797 num_channels); 798 } 799 800 // TODO(turajs): This can be moved to BackgroundNoise class. 801 void Expand::GenerateBackgroundNoise(int16_t* random_vector, 802 size_t channel, 803 int16_t mute_slope, 804 bool too_many_expands, 805 size_t num_noise_samples, 806 int16_t* buffer) { 807 static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; 808 int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125]; 809 assert(static_cast<size_t>(kMaxSampleRate / 8000 * 125) >= num_noise_samples); 810 int16_t* noise_samples = &buffer[kNoiseLpcOrder]; 811 if (background_noise_->initialized()) { 812 // Use background noise parameters. 813 memcpy(noise_samples - kNoiseLpcOrder, 814 background_noise_->FilterState(channel), 815 sizeof(int16_t) * kNoiseLpcOrder); 816 817 int dc_offset = 0; 818 if (background_noise_->ScaleShift(channel) > 1) { 819 dc_offset = 1 << (background_noise_->ScaleShift(channel) - 1); 820 } 821 822 // Scale random vector to correct energy level. 823 WebRtcSpl_AffineTransformVector( 824 scaled_random_vector, random_vector, 825 background_noise_->Scale(channel), dc_offset, 826 background_noise_->ScaleShift(channel), 827 static_cast<int>(num_noise_samples)); 828 829 WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_samples, 830 background_noise_->Filter(channel), 831 kNoiseLpcOrder + 1, 832 static_cast<int>(num_noise_samples)); 833 834 background_noise_->SetFilterState( 835 channel, 836 &(noise_samples[num_noise_samples - kNoiseLpcOrder]), 837 kNoiseLpcOrder); 838 839 // Unmute the background noise. 840 int16_t bgn_mute_factor = background_noise_->MuteFactor(channel); 841 NetEq::BackgroundNoiseMode bgn_mode = background_noise_->mode(); 842 if (bgn_mode == NetEq::kBgnFade && too_many_expands && 843 bgn_mute_factor > 0) { 844 // Fade BGN to zero. 845 // Calculate muting slope, approximately -2^18 / fs_hz. 846 int16_t mute_slope; 847 if (fs_hz_ == 8000) { 848 mute_slope = -32; 849 } else if (fs_hz_ == 16000) { 850 mute_slope = -16; 851 } else if (fs_hz_ == 32000) { 852 mute_slope = -8; 853 } else { 854 mute_slope = -5; 855 } 856 // Use UnmuteSignal function with negative slope. 857 // |bgn_mute_factor| is in Q14. |mute_slope| is in Q20. 858 DspHelper::UnmuteSignal(noise_samples, 859 num_noise_samples, 860 &bgn_mute_factor, 861 mute_slope, 862 noise_samples); 863 } else if (bgn_mute_factor < 16384) { 864 // If mode is kBgnOn, or if kBgnFade has started fading, 865 // use regular |mute_slope|. 866 if (!stop_muting_ && bgn_mode != NetEq::kBgnOff && 867 !(bgn_mode == NetEq::kBgnFade && too_many_expands)) { 868 DspHelper::UnmuteSignal(noise_samples, 869 static_cast<int>(num_noise_samples), 870 &bgn_mute_factor, 871 mute_slope, 872 noise_samples); 873 } else { 874 // kBgnOn and stop muting, or 875 // kBgnOff (mute factor is always 0), or 876 // kBgnFade has reached 0. 877 WebRtcSpl_AffineTransformVector(noise_samples, noise_samples, 878 bgn_mute_factor, 8192, 14, 879 static_cast<int>(num_noise_samples)); 880 } 881 } 882 // Update mute_factor in BackgroundNoise class. 883 background_noise_->SetMuteFactor(channel, bgn_mute_factor); 884 } else { 885 // BGN parameters have not been initialized; use zero noise. 886 memset(noise_samples, 0, sizeof(int16_t) * num_noise_samples); 887 } 888 } 889 890 void Expand::GenerateRandomVector(int seed_increment, 891 size_t length, 892 int16_t* random_vector) { 893 // TODO(turajs): According to hlundin The loop should not be needed. Should be 894 // just as good to generate all of the vector in one call. 895 size_t samples_generated = 0; 896 const size_t kMaxRandSamples = RandomVector::kRandomTableSize; 897 while (samples_generated < length) { 898 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples); 899 random_vector_->IncreaseSeedIncrement(seed_increment); 900 random_vector_->Generate(rand_length, &random_vector[samples_generated]); 901 samples_generated += rand_length; 902 } 903 } 904 905 } // namespace webrtc 906