Home | History | Annotate | Download | only in audio_utils
      1 /*
      2 ** Copyright 2011, The Android Open-Source Project
      3 **
      4 ** Licensed under the Apache License, Version 2.0 (the "License");
      5 ** you may not use this file except in compliance with the License.
      6 ** You may obtain a copy of the License at
      7 **
      8 **     http://www.apache.org/licenses/LICENSE-2.0
      9 **
     10 ** Unless required by applicable law or agreed to in writing, software
     11 ** distributed under the License is distributed on an "AS IS" BASIS,
     12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ** See the License for the specific language governing permissions and
     14 ** limitations under the License.
     15 */
     16 
     17 //#define LOG_NDEBUG 0
     18 #define LOG_TAG "echo_reference"
     19 
     20 #include <errno.h>
     21 #include <stdlib.h>
     22 #include <pthread.h>
     23 #include <cutils/log.h>
     24 #include <system/audio.h>
     25 #include <audio_utils/resampler.h>
     26 #include <audio_utils/echo_reference.h>
     27 
     28 // echo reference state: bit field indicating if read, write or both are active.
     29 enum state {
     30     ECHOREF_IDLE = 0x00,        // idle
     31     ECHOREF_READING = 0x01,     // reading is active
     32     ECHOREF_WRITING = 0x02      // writing is active
     33 };
     34 
     35 struct echo_reference {
     36     struct echo_reference_itfe itfe;
     37     int status;                     // init status
     38     uint32_t state;                 // active state: reading, writing or both
     39     audio_format_t rd_format;       // read sample format
     40     uint32_t rd_channel_count;      // read number of channels
     41     uint32_t rd_sampling_rate;      // read sampling rate in Hz
     42     size_t rd_frame_size;           // read frame size (bytes per sample)
     43     audio_format_t wr_format;       // write sample format
     44     uint32_t wr_channel_count;      // write number of channels
     45     uint32_t wr_sampling_rate;      // write sampling rate in Hz
     46     size_t wr_frame_size;           // write frame size (bytes per sample)
     47     void *buffer;                   // main buffer
     48     size_t buf_size;                // main buffer size in frames
     49     size_t frames_in;               // number of frames in main buffer
     50     void *wr_buf;                   // buffer for input conversions
     51     size_t wr_buf_size;             // size of conversion buffer in frames
     52     size_t wr_frames_in;            // number of frames in conversion buffer
     53     void *wr_src_buf;               // resampler input buf (either wr_buf or buffer used by write())
     54     struct timespec wr_render_time; // latest render time indicated by write()
     55                                     // default ALSA gettimeofday() format
     56     int32_t  playback_delay;        // playback buffer delay indicated by last write()
     57     pthread_mutex_t lock;                      // mutex protecting read/write concurrency
     58     pthread_cond_t cond;                       // condition signaled when data is ready to read
     59     struct resampler_itfe *down_sampler;       // input resampler
     60     struct resampler_buffer_provider provider; // resampler buffer provider
     61 };
     62 
     63 
     64 int echo_reference_get_next_buffer(struct resampler_buffer_provider *buffer_provider,
     65                                    struct resampler_buffer* buffer)
     66 {
     67     struct echo_reference *er;
     68 
     69     if (buffer_provider == NULL) {
     70         return -EINVAL;
     71     }
     72 
     73     er = (struct echo_reference *)((char *)buffer_provider -
     74                                       offsetof(struct echo_reference, provider));
     75 
     76     if (er->wr_src_buf == NULL || er->wr_frames_in == 0) {
     77         buffer->raw = NULL;
     78         buffer->frame_count = 0;
     79         return -ENODATA;
     80     }
     81 
     82     buffer->frame_count = (buffer->frame_count > er->wr_frames_in) ? er->wr_frames_in : buffer->frame_count;
     83     // this is er->rd_channel_count here as we resample after stereo to mono conversion if any
     84     buffer->i16 = (int16_t *)er->wr_src_buf + (er->wr_buf_size - er->wr_frames_in) * er->rd_channel_count;
     85 
     86     return 0;
     87 }
     88 
     89 void echo_reference_release_buffer(struct resampler_buffer_provider *buffer_provider,
     90                                   struct resampler_buffer* buffer)
     91 {
     92     struct echo_reference *er;
     93 
     94     if (buffer_provider == NULL) {
     95         return;
     96     }
     97 
     98     er = (struct echo_reference *)((char *)buffer_provider -
     99                                       offsetof(struct echo_reference, provider));
    100 
    101     er->wr_frames_in -= buffer->frame_count;
    102 }
    103 
    104 static void echo_reference_reset_l(struct echo_reference *er)
    105 {
    106     LOGV("echo_reference_reset_l()");
    107     free(er->buffer);
    108     er->buffer = NULL;
    109     er->buf_size = 0;
    110     er->frames_in = 0;
    111     free(er->wr_buf);
    112     er->wr_buf = NULL;
    113     er->wr_buf_size = 0;
    114     er->wr_render_time.tv_sec = 0;
    115     er->wr_render_time.tv_nsec = 0;
    116 }
    117 
    118 static int echo_reference_write(struct echo_reference_itfe *echo_reference,
    119                          struct echo_reference_buffer *buffer)
    120 {
    121     struct echo_reference *er = (struct echo_reference *)echo_reference;
    122     int status = 0;
    123 
    124     if (er == NULL) {
    125         return -EINVAL;
    126     }
    127 
    128     pthread_mutex_lock(&er->lock);
    129 
    130     if (buffer == NULL) {
    131         LOGV("echo_reference_write() stop write");
    132         er->state &= ~ECHOREF_WRITING;
    133         echo_reference_reset_l(er);
    134         goto exit;
    135     }
    136 
    137     LOGV("echo_reference_write() START trying to write %d frames", buffer->frame_count);
    138     LOGV("echo_reference_write() playbackTimestamp:[%d].[%d], er->playback_delay:[%d]",
    139             (int)buffer->time_stamp.tv_sec,
    140             (int)buffer->time_stamp.tv_nsec, er->playback_delay);
    141 
    142     //LOGV("echo_reference_write() %d frames", buffer->frame_count);
    143     // discard writes until a valid time stamp is provided.
    144 
    145     if ((buffer->time_stamp.tv_sec == 0) && (buffer->time_stamp.tv_nsec == 0) &&
    146         (er->wr_render_time.tv_sec == 0) && (er->wr_render_time.tv_nsec == 0)) {
    147         goto exit;
    148     }
    149 
    150     if ((er->state & ECHOREF_WRITING) == 0) {
    151         LOGV("echo_reference_write() start write");
    152         if (er->down_sampler != NULL) {
    153             er->down_sampler->reset(er->down_sampler);
    154         }
    155         er->state |= ECHOREF_WRITING;
    156     }
    157 
    158     if ((er->state & ECHOREF_READING) == 0) {
    159         goto exit;
    160     }
    161 
    162     er->wr_render_time.tv_sec  = buffer->time_stamp.tv_sec;
    163     er->wr_render_time.tv_nsec = buffer->time_stamp.tv_nsec;
    164 
    165     er->playback_delay = buffer->delay_ns;
    166 
    167     void *srcBuf;
    168     size_t inFrames;
    169     // do stereo to mono and down sampling if necessary
    170     if (er->rd_channel_count != er->wr_channel_count ||
    171             er->rd_sampling_rate != er->wr_sampling_rate) {
    172         if (er->wr_buf_size < buffer->frame_count) {
    173             er->wr_buf_size = buffer->frame_count;
    174             //max buffer size is normally function of read sampling rate but as write sampling rate
    175             //is always more than read sampling rate this works
    176             er->wr_buf = realloc(er->wr_buf, er->wr_buf_size * er->rd_frame_size);
    177         }
    178 
    179         inFrames = buffer->frame_count;
    180         if (er->rd_channel_count != er->wr_channel_count) {
    181             // must be stereo to mono
    182             int16_t *src16 = (int16_t *)buffer->raw;
    183             int16_t *dst16 = (int16_t *)er->wr_buf;
    184             size_t frames = buffer->frame_count;
    185             while (frames--) {
    186                 *dst16++ = (int16_t)(((int32_t)*src16 + (int32_t)*(src16 + 1)) >> 1);
    187                 src16 += 2;
    188             }
    189         }
    190         if (er->wr_sampling_rate != er->rd_sampling_rate) {
    191             if (er->down_sampler == NULL) {
    192                 int rc;
    193                 LOGV("echo_reference_write() new ReSampler(%d, %d)",
    194                       er->wr_sampling_rate, er->rd_sampling_rate);
    195                 er->provider.get_next_buffer = echo_reference_get_next_buffer;
    196                 er->provider.release_buffer = echo_reference_release_buffer;
    197                 rc = create_resampler(er->wr_sampling_rate,
    198                                  er->rd_sampling_rate,
    199                                  er->rd_channel_count,
    200                                  RESAMPLER_QUALITY_VOIP,
    201                                  &er->provider,
    202                                  &er->down_sampler);
    203                 if (rc != 0) {
    204                     er->down_sampler = NULL;
    205                     LOGV("echo_reference_write() failure to create resampler %d", rc);
    206                     status = -ENODEV;
    207                     goto exit;
    208                 }
    209             }
    210             // er->wr_src_buf and er->wr_frames_in are used by getNexBuffer() called by the resampler
    211             // to get new frames
    212             if (er->rd_channel_count != er->wr_channel_count) {
    213                 er->wr_src_buf = er->wr_buf;
    214             } else {
    215                 er->wr_src_buf = buffer->raw;
    216             }
    217             er->wr_frames_in = buffer->frame_count;
    218             // inFrames is always more than we need here to get frames remaining from previous runs
    219             // inFrames is updated by resample() with the number of frames produced
    220             LOGV("echo_reference_write() ReSampling(%d, %d)",
    221                   er->wr_sampling_rate, er->rd_sampling_rate);
    222             er->down_sampler->resample_from_provider(er->down_sampler,
    223                                                      (int16_t *)er->wr_buf, &inFrames);
    224             LOGV_IF(er->wr_frames_in != 0,
    225                     "echo_reference_write() er->wr_frames_in not 0 (%d) after resampler",
    226                     er->wr_frames_in);
    227         }
    228         srcBuf = er->wr_buf;
    229     } else {
    230         inFrames = buffer->frame_count;
    231         srcBuf = buffer->raw;
    232     }
    233 
    234     if (er->frames_in + inFrames > er->buf_size) {
    235         LOGV("echo_reference_write() increasing buffer size from %d to %d",
    236                 er->buf_size, er->frames_in + inFrames);
    237                 er->buf_size = er->frames_in + inFrames;
    238                 er->buffer = realloc(er->buffer, er->buf_size * er->rd_frame_size);
    239     }
    240     memcpy((char *)er->buffer + er->frames_in * er->rd_frame_size,
    241            srcBuf,
    242            inFrames * er->rd_frame_size);
    243     er->frames_in += inFrames;
    244 
    245     LOGV("EchoReference::write_log() inFrames:[%d], mFramesInOld:[%d], "\
    246          "mFramesInNew:[%d], er->buf_size:[%d], er->wr_render_time:[%d].[%d],"
    247          "er->playback_delay:[%d]",
    248          inFrames, er->frames_in - inFrames, er->frames_in, er->buf_size,
    249          (int)er->wr_render_time.tv_sec,
    250          (int)er->wr_render_time.tv_nsec, er->playback_delay);
    251 
    252     pthread_cond_signal(&er->cond);
    253 exit:
    254     pthread_mutex_unlock(&er->lock);
    255     LOGV("echo_reference_write() END");
    256     return status;
    257 }
    258 
    259 #define MIN_DELAY_UPDATE_NS 62500 // delay jump threshold to update ref buffer
    260                                   // 0.5 samples at 8kHz in nsecs
    261 
    262 
    263 static int echo_reference_read(struct echo_reference_itfe *echo_reference,
    264                          struct echo_reference_buffer *buffer)
    265 {
    266     struct echo_reference *er = (struct echo_reference *)echo_reference;
    267 
    268     if (er == NULL) {
    269         return -EINVAL;
    270     }
    271 
    272     pthread_mutex_lock(&er->lock);
    273 
    274     if (buffer == NULL) {
    275         LOGV("EchoReference::read() stop read");
    276         er->state &= ~ECHOREF_READING;
    277         goto exit;
    278     }
    279 
    280     LOGV("EchoReference::read() START, delayCapture:[%d],er->frames_in:[%d],buffer->frame_count:[%d]",
    281     buffer->delay_ns, er->frames_in, buffer->frame_count);
    282 
    283     if ((er->state & ECHOREF_READING) == 0) {
    284         LOGV("EchoReference::read() start read");
    285         echo_reference_reset_l(er);
    286         er->state |= ECHOREF_READING;
    287     }
    288 
    289     if ((er->state & ECHOREF_WRITING) == 0) {
    290         memset(buffer->raw, 0, er->rd_frame_size * buffer->frame_count);
    291         buffer->delay_ns = 0;
    292         goto exit;
    293     }
    294 
    295 //    LOGV("EchoReference::read() %d frames", buffer->frame_count);
    296 
    297     // allow some time for new frames to arrive if not enough frames are ready for read
    298     if (er->frames_in < buffer->frame_count) {
    299         uint32_t timeoutMs = (uint32_t)((1000 * buffer->frame_count) / er->rd_sampling_rate / 2);
    300         struct timespec ts;
    301 
    302         ts.tv_sec  = timeoutMs/1000;
    303         ts.tv_nsec = timeoutMs%1000;
    304         pthread_cond_timedwait_relative_np(&er->cond, &er->lock, &ts);
    305 
    306         if (er->frames_in < buffer->frame_count) {
    307             LOGV("EchoReference::read() waited %d ms but still not enough frames"\
    308                  " er->frames_in: %d, buffer->frame_count = %d",
    309                 timeoutMs, er->frames_in, buffer->frame_count);
    310             buffer->frame_count = er->frames_in;
    311         }
    312     }
    313 
    314     int64_t timeDiff;
    315     struct timespec tmp;
    316 
    317     if ((er->wr_render_time.tv_sec == 0 && er->wr_render_time.tv_nsec == 0) ||
    318         (buffer->time_stamp.tv_sec == 0 && buffer->time_stamp.tv_nsec == 0)) {
    319         LOGV("read: NEW:timestamp is zero---------setting timeDiff = 0, "\
    320              "not updating delay this time");
    321         timeDiff = 0;
    322     } else {
    323         if (buffer->time_stamp.tv_nsec < er->wr_render_time.tv_nsec) {
    324             tmp.tv_sec = buffer->time_stamp.tv_sec - er->wr_render_time.tv_sec - 1;
    325             tmp.tv_nsec = 1000000000 + buffer->time_stamp.tv_nsec - er->wr_render_time.tv_nsec;
    326         } else {
    327             tmp.tv_sec = buffer->time_stamp.tv_sec - er->wr_render_time.tv_sec;
    328             tmp.tv_nsec = buffer->time_stamp.tv_nsec - er->wr_render_time.tv_nsec;
    329         }
    330         timeDiff = (((int64_t)tmp.tv_sec * 1000000000 + tmp.tv_nsec));
    331 
    332         int64_t expectedDelayNs =  er->playback_delay + buffer->delay_ns - timeDiff;
    333 
    334         LOGV("expectedDelayNs[%lld] =  er->playback_delay[%d] + delayCapture[%d] - timeDiff[%lld]",
    335         expectedDelayNs, er->playback_delay, buffer->delay_ns, timeDiff);
    336 
    337         if (expectedDelayNs > 0) {
    338             int64_t delayNs = ((int64_t)er->frames_in * 1000000000) / er->rd_sampling_rate;
    339 
    340             delayNs -= expectedDelayNs;
    341 
    342             if (abs(delayNs) >= MIN_DELAY_UPDATE_NS) {
    343                 if (delayNs < 0) {
    344                     size_t previousFrameIn = er->frames_in;
    345                     er->frames_in = (expectedDelayNs * er->rd_sampling_rate)/1000000000;
    346                     int    offset = er->frames_in - previousFrameIn;
    347                     LOGV("EchoReference::readlog: delayNs = NEGATIVE and ENOUGH : "\
    348                          "setting %d frames to zero er->frames_in: %d, previousFrameIn = %d",
    349                          offset, er->frames_in, previousFrameIn);
    350 
    351                     if (er->frames_in > er->buf_size) {
    352                         er->buf_size = er->frames_in;
    353                         er->buffer  = realloc(er->buffer, er->frames_in * er->rd_frame_size);
    354                         LOGV("EchoReference::read: increasing buffer size to %d", er->buf_size);
    355                     }
    356 
    357                     if (offset > 0)
    358                         memset((char *)er->buffer + previousFrameIn * er->rd_frame_size,
    359                                0, offset * er->rd_frame_size);
    360                 } else {
    361                     size_t  previousFrameIn = er->frames_in;
    362                     int     framesInInt = (int)(((int64_t)expectedDelayNs *
    363                                            (int64_t)er->rd_sampling_rate)/1000000000);
    364                     int     offset = previousFrameIn - framesInInt;
    365 
    366                     LOGV("EchoReference::readlog: delayNs = POSITIVE/ENOUGH :previousFrameIn: %d,"\
    367                          "framesInInt: [%d], offset:[%d], buffer->frame_count:[%d]",
    368                          previousFrameIn, framesInInt, offset, buffer->frame_count);
    369 
    370                     if (framesInInt < (int)buffer->frame_count) {
    371                         if (framesInInt > 0) {
    372                             memset((char *)er->buffer + framesInInt * er->rd_frame_size,
    373                                    0, (buffer->frame_count-framesInInt) * er->rd_frame_size);
    374                             LOGV("EchoReference::read: pushing [%d] zeros into ref buffer",
    375                                  (buffer->frame_count-framesInInt));
    376                         } else {
    377                             LOGV("framesInInt = %d", framesInInt);
    378                         }
    379                         framesInInt = buffer->frame_count;
    380                     } else {
    381                         if (offset > 0) {
    382                             memcpy(er->buffer, (char *)er->buffer + (offset * er->rd_frame_size),
    383                                    framesInInt * er->rd_frame_size);
    384                             LOGV("EchoReference::read: shifting ref buffer by [%d]",framesInInt);
    385                         }
    386                     }
    387                     er->frames_in = (size_t)framesInInt;
    388                 }
    389             } else {
    390                 LOGV("EchoReference::read: NOT ENOUGH samples to update %lld", delayNs);
    391             }
    392         } else {
    393             LOGV("NEGATIVE expectedDelayNs[%lld] =  "\
    394                  "er->playback_delay[%d] + delayCapture[%d] - timeDiff[%lld]",
    395                  expectedDelayNs, er->playback_delay, buffer->delay_ns, timeDiff);
    396         }
    397     }
    398 
    399     memcpy(buffer->raw,
    400            (char *)er->buffer,
    401            buffer->frame_count * er->rd_frame_size);
    402 
    403     er->frames_in -= buffer->frame_count;
    404     memcpy(er->buffer,
    405            (char *)er->buffer + buffer->frame_count * er->rd_frame_size,
    406            er->frames_in * er->rd_frame_size);
    407 
    408     // As the reference buffer is now time aligned to the microphone signal there is a zero delay
    409     buffer->delay_ns = 0;
    410 
    411     LOGV("EchoReference::read() END %d frames, total frames in %d",
    412           buffer->frame_count, er->frames_in);
    413 
    414     pthread_cond_signal(&er->cond);
    415 
    416 exit:
    417     pthread_mutex_unlock(&er->lock);
    418     return 0;
    419 }
    420 
    421 
    422 int create_echo_reference(audio_format_t rdFormat,
    423                             uint32_t rdChannelCount,
    424                             uint32_t rdSamplingRate,
    425                             audio_format_t wrFormat,
    426                             uint32_t wrChannelCount,
    427                             uint32_t wrSamplingRate,
    428                             struct echo_reference_itfe **echo_reference)
    429 {
    430     struct echo_reference *er;
    431 
    432     LOGV("create_echo_reference()");
    433 
    434     if (echo_reference == NULL) {
    435         return -EINVAL;
    436     }
    437 
    438     *echo_reference = NULL;
    439 
    440     if (rdFormat != AUDIO_FORMAT_PCM_16_BIT ||
    441             rdFormat != wrFormat) {
    442         LOGW("create_echo_reference bad format rd %d, wr %d", rdFormat, wrFormat);
    443         return -EINVAL;
    444     }
    445     if ((rdChannelCount != 1 && rdChannelCount != 2) ||
    446             wrChannelCount != 2) {
    447         LOGW("create_echo_reference bad channel count rd %d, wr %d", rdChannelCount, wrChannelCount);
    448         return -EINVAL;
    449     }
    450 
    451     if (wrSamplingRate < rdSamplingRate) {
    452         LOGW("create_echo_reference bad smp rate rd %d, wr %d", rdSamplingRate, wrSamplingRate);
    453         return -EINVAL;
    454     }
    455 
    456     er = (struct echo_reference *)calloc(1, sizeof(struct echo_reference));
    457 
    458     er->itfe.read = echo_reference_read;
    459     er->itfe.write = echo_reference_write;
    460 
    461     er->state = ECHOREF_IDLE;
    462     er->rd_format = rdFormat;
    463     er->rd_channel_count = rdChannelCount;
    464     er->rd_sampling_rate = rdSamplingRate;
    465     er->wr_format = wrFormat;
    466     er->wr_channel_count = wrChannelCount;
    467     er->wr_sampling_rate = wrSamplingRate;
    468     er->rd_frame_size = audio_bytes_per_sample(rdFormat) * rdChannelCount;
    469     er->wr_frame_size = audio_bytes_per_sample(wrFormat) * wrChannelCount;
    470     *echo_reference = &er->itfe;
    471     return 0;
    472 }
    473 
    474 void release_echo_reference(struct echo_reference_itfe *echo_reference) {
    475     struct echo_reference *er = (struct echo_reference *)echo_reference;
    476 
    477     if (er == NULL) {
    478         return;
    479     }
    480 
    481     LOGV("EchoReference dstor");
    482     echo_reference_reset_l(er);
    483     if (er->down_sampler != NULL) {
    484         release_resampler(er->down_sampler);
    485     }
    486     free(er);
    487 }
    488 
    489