1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_processing/audio_buffer.h" 12 13 #include "webrtc/common_audio/include/audio_util.h" 14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h" 15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 16 17 namespace webrtc { 18 namespace { 19 20 enum { 21 kSamplesPer8kHzChannel = 80, 22 kSamplesPer16kHzChannel = 160, 23 kSamplesPer32kHzChannel = 320 24 }; 25 26 bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) { 27 switch (layout) { 28 case AudioProcessing::kMono: 29 case AudioProcessing::kStereo: 30 return false; 31 case AudioProcessing::kMonoAndKeyboard: 32 case AudioProcessing::kStereoAndKeyboard: 33 return true; 34 } 35 assert(false); 36 return false; 37 } 38 39 int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) { 40 switch (layout) { 41 case AudioProcessing::kMono: 42 case AudioProcessing::kStereo: 43 assert(false); 44 return -1; 45 case AudioProcessing::kMonoAndKeyboard: 46 return 1; 47 case AudioProcessing::kStereoAndKeyboard: 48 return 2; 49 } 50 assert(false); 51 return -1; 52 } 53 54 void StereoToMono(const float* left, const float* right, float* out, 55 int samples_per_channel) { 56 for (int i = 0; i < samples_per_channel; ++i) { 57 out[i] = (left[i] + right[i]) / 2; 58 } 59 } 60 61 void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out, 62 int samples_per_channel) { 63 for (int i = 0; i < samples_per_channel; ++i) { 64 out[i] = (left[i] + right[i]) >> 1; 65 } 66 } 67 68 } // namespace 69 70 // One int16_t and one float ChannelBuffer that are kept in sync. The sync is 71 // broken when someone requests write access to either ChannelBuffer, and 72 // reestablished when someone requests the outdated ChannelBuffer. It is 73 // therefore safe to use the return value of ibuf_const() and fbuf_const() 74 // until the next call to ibuf() or fbuf(), and the return value of ibuf() and 75 // fbuf() until the next call to any of the other functions. 76 class IFChannelBuffer { 77 public: 78 IFChannelBuffer(int samples_per_channel, int num_channels) 79 : ivalid_(true), 80 ibuf_(samples_per_channel, num_channels), 81 fvalid_(true), 82 fbuf_(samples_per_channel, num_channels) {} 83 84 ChannelBuffer<int16_t>* ibuf() { return ibuf(false); } 85 ChannelBuffer<float>* fbuf() { return fbuf(false); } 86 const ChannelBuffer<int16_t>* ibuf_const() { return ibuf(true); } 87 const ChannelBuffer<float>* fbuf_const() { return fbuf(true); } 88 89 private: 90 ChannelBuffer<int16_t>* ibuf(bool readonly) { 91 RefreshI(); 92 fvalid_ = readonly; 93 return &ibuf_; 94 } 95 96 ChannelBuffer<float>* fbuf(bool readonly) { 97 RefreshF(); 98 ivalid_ = readonly; 99 return &fbuf_; 100 } 101 102 void RefreshF() { 103 if (!fvalid_) { 104 assert(ivalid_); 105 const int16_t* const int_data = ibuf_.data(); 106 float* const float_data = fbuf_.data(); 107 const int length = fbuf_.length(); 108 for (int i = 0; i < length; ++i) 109 float_data[i] = int_data[i]; 110 fvalid_ = true; 111 } 112 } 113 114 void RefreshI() { 115 if (!ivalid_) { 116 assert(fvalid_); 117 const float* const float_data = fbuf_.data(); 118 int16_t* const int_data = ibuf_.data(); 119 const int length = ibuf_.length(); 120 for (int i = 0; i < length; ++i) 121 int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(), 122 float_data[i], 123 std::numeric_limits<int16_t>::min()); 124 ivalid_ = true; 125 } 126 } 127 128 bool ivalid_; 129 ChannelBuffer<int16_t> ibuf_; 130 bool fvalid_; 131 ChannelBuffer<float> fbuf_; 132 }; 133 134 AudioBuffer::AudioBuffer(int input_samples_per_channel, 135 int num_input_channels, 136 int process_samples_per_channel, 137 int num_process_channels, 138 int output_samples_per_channel) 139 : input_samples_per_channel_(input_samples_per_channel), 140 num_input_channels_(num_input_channels), 141 proc_samples_per_channel_(process_samples_per_channel), 142 num_proc_channels_(num_process_channels), 143 output_samples_per_channel_(output_samples_per_channel), 144 samples_per_split_channel_(proc_samples_per_channel_), 145 mixed_low_pass_valid_(false), 146 reference_copied_(false), 147 activity_(AudioFrame::kVadUnknown), 148 keyboard_data_(NULL), 149 channels_(new IFChannelBuffer(proc_samples_per_channel_, 150 num_proc_channels_)) { 151 assert(input_samples_per_channel_ > 0); 152 assert(proc_samples_per_channel_ > 0); 153 assert(output_samples_per_channel_ > 0); 154 assert(num_input_channels_ > 0 && num_input_channels_ <= 2); 155 assert(num_proc_channels_ <= num_input_channels); 156 157 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { 158 input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_, 159 num_proc_channels_)); 160 } 161 162 if (input_samples_per_channel_ != proc_samples_per_channel_ || 163 output_samples_per_channel_ != proc_samples_per_channel_) { 164 // Create an intermediate buffer for resampling. 165 process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_, 166 num_proc_channels_)); 167 } 168 169 if (input_samples_per_channel_ != proc_samples_per_channel_) { 170 input_resamplers_.reserve(num_proc_channels_); 171 for (int i = 0; i < num_proc_channels_; ++i) { 172 input_resamplers_.push_back( 173 new PushSincResampler(input_samples_per_channel_, 174 proc_samples_per_channel_)); 175 } 176 } 177 178 if (output_samples_per_channel_ != proc_samples_per_channel_) { 179 output_resamplers_.reserve(num_proc_channels_); 180 for (int i = 0; i < num_proc_channels_; ++i) { 181 output_resamplers_.push_back( 182 new PushSincResampler(proc_samples_per_channel_, 183 output_samples_per_channel_)); 184 } 185 } 186 187 if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) { 188 samples_per_split_channel_ = kSamplesPer16kHzChannel; 189 split_channels_low_.reset(new IFChannelBuffer(samples_per_split_channel_, 190 num_proc_channels_)); 191 split_channels_high_.reset(new IFChannelBuffer(samples_per_split_channel_, 192 num_proc_channels_)); 193 filter_states_.reset(new SplitFilterStates[num_proc_channels_]); 194 } 195 } 196 197 AudioBuffer::~AudioBuffer() {} 198 199 void AudioBuffer::CopyFrom(const float* const* data, 200 int samples_per_channel, 201 AudioProcessing::ChannelLayout layout) { 202 assert(samples_per_channel == input_samples_per_channel_); 203 assert(ChannelsFromLayout(layout) == num_input_channels_); 204 InitForNewData(); 205 206 if (HasKeyboardChannel(layout)) { 207 keyboard_data_ = data[KeyboardChannelIndex(layout)]; 208 } 209 210 // Downmix. 211 const float* const* data_ptr = data; 212 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { 213 StereoToMono(data[0], 214 data[1], 215 input_buffer_->channel(0), 216 input_samples_per_channel_); 217 data_ptr = input_buffer_->channels(); 218 } 219 220 // Resample. 221 if (input_samples_per_channel_ != proc_samples_per_channel_) { 222 for (int i = 0; i < num_proc_channels_; ++i) { 223 input_resamplers_[i]->Resample(data_ptr[i], 224 input_samples_per_channel_, 225 process_buffer_->channel(i), 226 proc_samples_per_channel_); 227 } 228 data_ptr = process_buffer_->channels(); 229 } 230 231 // Convert to int16. 232 for (int i = 0; i < num_proc_channels_; ++i) { 233 ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_, 234 channels_->ibuf()->channel(i)); 235 } 236 } 237 238 void AudioBuffer::CopyTo(int samples_per_channel, 239 AudioProcessing::ChannelLayout layout, 240 float* const* data) { 241 assert(samples_per_channel == output_samples_per_channel_); 242 assert(ChannelsFromLayout(layout) == num_proc_channels_); 243 244 // Convert to float. 245 float* const* data_ptr = data; 246 if (output_samples_per_channel_ != proc_samples_per_channel_) { 247 // Convert to an intermediate buffer for subsequent resampling. 248 data_ptr = process_buffer_->channels(); 249 } 250 for (int i = 0; i < num_proc_channels_; ++i) { 251 ScaleToFloat(channels_->ibuf()->channel(i), 252 proc_samples_per_channel_, 253 data_ptr[i]); 254 } 255 256 // Resample. 257 if (output_samples_per_channel_ != proc_samples_per_channel_) { 258 for (int i = 0; i < num_proc_channels_; ++i) { 259 output_resamplers_[i]->Resample(data_ptr[i], 260 proc_samples_per_channel_, 261 data[i], 262 output_samples_per_channel_); 263 } 264 } 265 } 266 267 void AudioBuffer::InitForNewData() { 268 keyboard_data_ = NULL; 269 mixed_low_pass_valid_ = false; 270 reference_copied_ = false; 271 activity_ = AudioFrame::kVadUnknown; 272 } 273 274 const int16_t* AudioBuffer::data(int channel) const { 275 return channels_->ibuf_const()->channel(channel); 276 } 277 278 int16_t* AudioBuffer::data(int channel) { 279 mixed_low_pass_valid_ = false; 280 return channels_->ibuf()->channel(channel); 281 } 282 283 const float* AudioBuffer::data_f(int channel) const { 284 return channels_->fbuf_const()->channel(channel); 285 } 286 287 float* AudioBuffer::data_f(int channel) { 288 mixed_low_pass_valid_ = false; 289 return channels_->fbuf()->channel(channel); 290 } 291 292 const int16_t* AudioBuffer::low_pass_split_data(int channel) const { 293 return split_channels_low_.get() 294 ? split_channels_low_->ibuf_const()->channel(channel) 295 : data(channel); 296 } 297 298 int16_t* AudioBuffer::low_pass_split_data(int channel) { 299 mixed_low_pass_valid_ = false; 300 return split_channels_low_.get() 301 ? split_channels_low_->ibuf()->channel(channel) 302 : data(channel); 303 } 304 305 const float* AudioBuffer::low_pass_split_data_f(int channel) const { 306 return split_channels_low_.get() 307 ? split_channels_low_->fbuf_const()->channel(channel) 308 : data_f(channel); 309 } 310 311 float* AudioBuffer::low_pass_split_data_f(int channel) { 312 mixed_low_pass_valid_ = false; 313 return split_channels_low_.get() 314 ? split_channels_low_->fbuf()->channel(channel) 315 : data_f(channel); 316 } 317 318 const int16_t* AudioBuffer::high_pass_split_data(int channel) const { 319 return split_channels_high_.get() 320 ? split_channels_high_->ibuf_const()->channel(channel) 321 : NULL; 322 } 323 324 int16_t* AudioBuffer::high_pass_split_data(int channel) { 325 return split_channels_high_.get() 326 ? split_channels_high_->ibuf()->channel(channel) 327 : NULL; 328 } 329 330 const float* AudioBuffer::high_pass_split_data_f(int channel) const { 331 return split_channels_high_.get() 332 ? split_channels_high_->fbuf_const()->channel(channel) 333 : NULL; 334 } 335 336 float* AudioBuffer::high_pass_split_data_f(int channel) { 337 return split_channels_high_.get() 338 ? split_channels_high_->fbuf()->channel(channel) 339 : NULL; 340 } 341 342 const int16_t* AudioBuffer::mixed_low_pass_data() { 343 // Currently only mixing stereo to mono is supported. 344 assert(num_proc_channels_ == 1 || num_proc_channels_ == 2); 345 346 if (num_proc_channels_ == 1) { 347 return low_pass_split_data(0); 348 } 349 350 if (!mixed_low_pass_valid_) { 351 if (!mixed_low_pass_channels_.get()) { 352 mixed_low_pass_channels_.reset( 353 new ChannelBuffer<int16_t>(samples_per_split_channel_, 1)); 354 } 355 StereoToMono(low_pass_split_data(0), 356 low_pass_split_data(1), 357 mixed_low_pass_channels_->data(), 358 samples_per_split_channel_); 359 mixed_low_pass_valid_ = true; 360 } 361 return mixed_low_pass_channels_->data(); 362 } 363 364 const int16_t* AudioBuffer::low_pass_reference(int channel) const { 365 if (!reference_copied_) { 366 return NULL; 367 } 368 369 return low_pass_reference_channels_->channel(channel); 370 } 371 372 const float* AudioBuffer::keyboard_data() const { 373 return keyboard_data_; 374 } 375 376 SplitFilterStates* AudioBuffer::filter_states(int channel) { 377 assert(channel >= 0 && channel < num_proc_channels_); 378 return &filter_states_[channel]; 379 } 380 381 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) { 382 activity_ = activity; 383 } 384 385 AudioFrame::VADActivity AudioBuffer::activity() const { 386 return activity_; 387 } 388 389 int AudioBuffer::num_channels() const { 390 return num_proc_channels_; 391 } 392 393 int AudioBuffer::samples_per_channel() const { 394 return proc_samples_per_channel_; 395 } 396 397 int AudioBuffer::samples_per_split_channel() const { 398 return samples_per_split_channel_; 399 } 400 401 int AudioBuffer::samples_per_keyboard_channel() const { 402 // We don't resample the keyboard channel. 403 return input_samples_per_channel_; 404 } 405 406 // TODO(andrew): Do deinterleaving and mixing in one step? 407 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { 408 assert(proc_samples_per_channel_ == input_samples_per_channel_); 409 assert(frame->num_channels_ == num_input_channels_); 410 assert(frame->samples_per_channel_ == proc_samples_per_channel_); 411 InitForNewData(); 412 activity_ = frame->vad_activity_; 413 414 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { 415 // Downmix directly; no explicit deinterleaving needed. 416 int16_t* downmixed = channels_->ibuf()->channel(0); 417 for (int i = 0; i < input_samples_per_channel_; ++i) { 418 // HACK(ajm): The downmixing in the int16_t path is in practice never 419 // called from production code. We do this weird scaling to and from float 420 // to satisfy tests checking for bit-exactness with the float path. 421 float downmix_float = (ScaleToFloat(frame->data_[i * 2]) + 422 ScaleToFloat(frame->data_[i * 2 + 1])) / 2; 423 downmixed[i] = ScaleAndRoundToInt16(downmix_float); 424 } 425 } else { 426 assert(num_proc_channels_ == num_input_channels_); 427 int16_t* interleaved = frame->data_; 428 for (int i = 0; i < num_proc_channels_; ++i) { 429 int16_t* deinterleaved = channels_->ibuf()->channel(i); 430 int interleaved_idx = i; 431 for (int j = 0; j < proc_samples_per_channel_; ++j) { 432 deinterleaved[j] = interleaved[interleaved_idx]; 433 interleaved_idx += num_proc_channels_; 434 } 435 } 436 } 437 } 438 439 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const { 440 assert(proc_samples_per_channel_ == output_samples_per_channel_); 441 assert(num_proc_channels_ == num_input_channels_); 442 assert(frame->num_channels_ == num_proc_channels_); 443 assert(frame->samples_per_channel_ == proc_samples_per_channel_); 444 frame->vad_activity_ = activity_; 445 446 if (!data_changed) { 447 return; 448 } 449 450 int16_t* interleaved = frame->data_; 451 for (int i = 0; i < num_proc_channels_; i++) { 452 int16_t* deinterleaved = channels_->ibuf()->channel(i); 453 int interleaved_idx = i; 454 for (int j = 0; j < proc_samples_per_channel_; j++) { 455 interleaved[interleaved_idx] = deinterleaved[j]; 456 interleaved_idx += num_proc_channels_; 457 } 458 } 459 } 460 461 void AudioBuffer::CopyLowPassToReference() { 462 reference_copied_ = true; 463 if (!low_pass_reference_channels_.get()) { 464 low_pass_reference_channels_.reset( 465 new ChannelBuffer<int16_t>(samples_per_split_channel_, 466 num_proc_channels_)); 467 } 468 for (int i = 0; i < num_proc_channels_; i++) { 469 low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i); 470 } 471 } 472 473 } // namespace webrtc 474