Home | History | Annotate | Download | only in common_audio
      1 /*
      2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef WEBRTC_INTERNAL_BEAMFORMER_BLOCKER_H_
     12 #define WEBRTC_INTERNAL_BEAMFORMER_BLOCKER_H_
     13 
     14 #include "webrtc/base/scoped_ptr.h"
     15 #include "webrtc/common_audio/audio_ring_buffer.h"
     16 #include "webrtc/common_audio/channel_buffer.h"
     17 
     18 namespace webrtc {
     19 
     20 // The callback function to process audio in the time domain. Input has already
     21 // been windowed, and output will be windowed. The number of input channels
     22 // must be >= the number of output channels.
     23 class BlockerCallback {
     24  public:
     25   virtual ~BlockerCallback() {}
     26 
     27   virtual void ProcessBlock(const float* const* input,
     28                             size_t num_frames,
     29                             size_t num_input_channels,
     30                             size_t num_output_channels,
     31                             float* const* output) = 0;
     32 };
     33 
     34 // The main purpose of Blocker is to abstract away the fact that often we
     35 // receive a different number of audio frames than our transform takes. For
     36 // example, most FFTs work best when the fft-size is a power of 2, but suppose
     37 // we receive 20ms of audio at a sample rate of 48000. That comes to 960 frames
     38 // of audio, which is not a power of 2. Blocker allows us to specify the
     39 // transform and all other necessary processing via the Process() callback
     40 // function without any constraints on the transform-size
     41 // (read: |block_size_|) or received-audio-size (read: |chunk_size_|).
     42 // We handle this for the multichannel audio case, allowing for different
     43 // numbers of input and output channels (for example, beamforming takes 2 or
     44 // more input channels and returns 1 output channel). Audio signals are
     45 // represented as deinterleaved floats in the range [-1, 1].
     46 //
     47 // Blocker is responsible for:
     48 // - blocking audio while handling potential discontinuities on the edges
     49 //   of chunks
     50 // - windowing blocks before sending them to Process()
     51 // - windowing processed blocks, and overlap-adding them together before
     52 //   sending back a processed chunk
     53 //
     54 // To use blocker:
     55 // 1. Impelment a BlockerCallback object |bc|.
     56 // 2. Instantiate a Blocker object |b|, passing in |bc|.
     57 // 3. As you receive audio, call b.ProcessChunk() to get processed audio.
     58 //
     59 // A small amount of delay is added to the first received chunk to deal with
     60 // the difference in chunk/block sizes. This delay is <= chunk_size.
     61 //
     62 // Ownership of window is retained by the caller.  That is, Blocker makes a
     63 // copy of window and does not attempt to delete it.
     64 class Blocker {
     65  public:
     66   Blocker(size_t chunk_size,
     67           size_t block_size,
     68           size_t num_input_channels,
     69           size_t num_output_channels,
     70           const float* window,
     71           size_t shift_amount,
     72           BlockerCallback* callback);
     73 
     74   void ProcessChunk(const float* const* input,
     75                     size_t chunk_size,
     76                     size_t num_input_channels,
     77                     size_t num_output_channels,
     78                     float* const* output);
     79 
     80  private:
     81   const size_t chunk_size_;
     82   const size_t block_size_;
     83   const size_t num_input_channels_;
     84   const size_t num_output_channels_;
     85 
     86   // The number of frames of delay to add at the beginning of the first chunk.
     87   const size_t initial_delay_;
     88 
     89   // The frame index into the input buffer where the first block should be read
     90   // from. This is necessary because shift_amount_ is not necessarily a
     91   // multiple of chunk_size_, so blocks won't line up at the start of the
     92   // buffer.
     93   size_t frame_offset_;
     94 
     95   // Since blocks nearly always overlap, there are certain blocks that require
     96   // frames from the end of one chunk and the beginning of the next chunk. The
     97   // input and output buffers are responsible for saving those frames between
     98   // calls to ProcessChunk().
     99   //
    100   // Both contain |initial delay| + |chunk_size| frames. The input is a fairly
    101   // standard FIFO, but due to the overlap-add it's harder to use an
    102   // AudioRingBuffer for the output.
    103   AudioRingBuffer input_buffer_;
    104   ChannelBuffer<float> output_buffer_;
    105 
    106   // Space for the input block (can't wrap because of windowing).
    107   ChannelBuffer<float> input_block_;
    108 
    109   // Space for the output block (can't wrap because of overlap/add).
    110   ChannelBuffer<float> output_block_;
    111 
    112   rtc::scoped_ptr<float[]> window_;
    113 
    114   // The amount of frames between the start of contiguous blocks. For example,
    115   // |shift_amount_| = |block_size_| / 2 for a Hann window.
    116   size_t shift_amount_;
    117 
    118   BlockerCallback* callback_;
    119 };
    120 
    121 }  // namespace webrtc
    122 
    123 #endif  // WEBRTC_INTERNAL_BEAMFORMER_BLOCKER_H_
    124