Home | History | Annotate | Download | only in audio_utils
      1 /*
      2 ** Copyright 2011, The Android Open-Source Project
      3 **
      4 ** Licensed under the Apache License, Version 2.0 (the "License");
      5 ** you may not use this file except in compliance with the License.
      6 ** You may obtain a copy of the License at
      7 **
      8 **     http://www.apache.org/licenses/LICENSE-2.0
      9 **
     10 ** Unless required by applicable law or agreed to in writing, software
     11 ** distributed under the License is distributed on an "AS IS" BASIS,
     12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ** See the License for the specific language governing permissions and
     14 ** limitations under the License.
     15 */
     16 
     17 //#define LOG_NDEBUG 0
     18 #define LOG_TAG "echo_reference"
     19 
     20 #include <errno.h>
     21 #include <stdlib.h>
     22 #include <pthread.h>
     23 #include <cutils/log.h>
     24 #include <system/audio.h>
     25 #include <audio_utils/resampler.h>
     26 #include <audio_utils/echo_reference.h>
     27 
     28 // echo reference state: bit field indicating if read, write or both are active.
     29 enum state {
     30     ECHOREF_IDLE = 0x00,        // idle
     31     ECHOREF_READING = 0x01,     // reading is active
     32     ECHOREF_WRITING = 0x02      // writing is active
     33 };
     34 
     35 struct echo_reference {
     36     struct echo_reference_itfe itfe;
     37     int status;                     // init status
     38     uint32_t state;                 // active state: reading, writing or both
     39     audio_format_t rd_format;       // read sample format
     40     uint32_t rd_channel_count;      // read number of channels
     41     uint32_t rd_sampling_rate;      // read sampling rate in Hz
     42     size_t rd_frame_size;           // read frame size (bytes per sample)
     43     audio_format_t wr_format;       // write sample format
     44     uint32_t wr_channel_count;      // write number of channels
     45     uint32_t wr_sampling_rate;      // write sampling rate in Hz
     46     size_t wr_frame_size;           // write frame size (bytes per sample)
     47     void *buffer;                   // main buffer
     48     size_t buf_size;                // main buffer size in frames
     49     size_t frames_in;               // number of frames in main buffer
     50     void *wr_buf;                   // buffer for input conversions
     51     size_t wr_buf_size;             // size of conversion buffer in frames
     52     size_t wr_frames_in;            // number of frames in conversion buffer
     53     size_t wr_curr_frame_size;      // number of frames given to current write() function
     54     void *wr_src_buf;               // resampler input buf (either wr_buf or buffer used by write())
     55     struct timespec wr_render_time; // latest render time indicated by write()
     56                                     // default ALSA gettimeofday() format
     57     int32_t  playback_delay;        // playback buffer delay indicated by last write()
     58     int16_t prev_delta_sign;        // sign of previous delay difference:
     59                                     //  1: positive, -1: negative, 0: unknown
     60     uint16_t delta_count;           // number of consecutive delay differences with same sign
     61     pthread_mutex_t lock;                      // mutex protecting read/write concurrency
     62     pthread_cond_t cond;                       // condition signaled when data is ready to read
     63     struct resampler_itfe *resampler;          // input resampler
     64     struct resampler_buffer_provider provider; // resampler buffer provider
     65 };
     66 
     67 
     68 int echo_reference_get_next_buffer(struct resampler_buffer_provider *buffer_provider,
     69                                    struct resampler_buffer* buffer)
     70 {
     71     struct echo_reference *er;
     72 
     73     if (buffer_provider == NULL) {
     74         return -EINVAL;
     75     }
     76 
     77     er = (struct echo_reference *)((char *)buffer_provider -
     78                                       offsetof(struct echo_reference, provider));
     79 
     80     if (er->wr_src_buf == NULL || er->wr_frames_in == 0) {
     81         buffer->raw = NULL;
     82         buffer->frame_count = 0;
     83         return -ENODATA;
     84     }
     85 
     86     buffer->frame_count = (buffer->frame_count > er->wr_frames_in) ?
     87             er->wr_frames_in : buffer->frame_count;
     88     // this is er->rd_channel_count here as we resample after stereo to mono conversion if any
     89     buffer->i16 = (int16_t *)er->wr_src_buf + (er->wr_curr_frame_size - er->wr_frames_in) *
     90             er->rd_channel_count;
     91 
     92     return 0;
     93 }
     94 
     95 void echo_reference_release_buffer(struct resampler_buffer_provider *buffer_provider,
     96                                   struct resampler_buffer* buffer)
     97 {
     98     struct echo_reference *er;
     99 
    100     if (buffer_provider == NULL) {
    101         return;
    102     }
    103 
    104     er = (struct echo_reference *)((char *)buffer_provider -
    105                                       offsetof(struct echo_reference, provider));
    106 
    107     er->wr_frames_in -= buffer->frame_count;
    108 }
    109 
    110 static void echo_reference_reset_l(struct echo_reference *er)
    111 {
    112     ALOGV("echo_reference_reset_l()");
    113     free(er->buffer);
    114     er->buffer = NULL;
    115     er->buf_size = 0;
    116     er->frames_in = 0;
    117     free(er->wr_buf);
    118     er->wr_buf = NULL;
    119     er->wr_buf_size = 0;
    120     er->wr_render_time.tv_sec = 0;
    121     er->wr_render_time.tv_nsec = 0;
    122     er->delta_count = 0;
    123     er->prev_delta_sign = 0;
    124 }
    125 
    126 /* additional space in resampler buffer allowing for extra samples to be returned
    127  * by speex resampler when sample rates ratio is not an integer.
    128  */
    129 #define RESAMPLER_HEADROOM_SAMPLES   10
    130 
    131 static int echo_reference_write(struct echo_reference_itfe *echo_reference,
    132                          struct echo_reference_buffer *buffer)
    133 {
    134     struct echo_reference *er = (struct echo_reference *)echo_reference;
    135     int status = 0;
    136 
    137     if (er == NULL) {
    138         return -EINVAL;
    139     }
    140 
    141     pthread_mutex_lock(&er->lock);
    142 
    143     if (buffer == NULL) {
    144         ALOGV("echo_reference_write() stop write");
    145         er->state &= ~ECHOREF_WRITING;
    146         echo_reference_reset_l(er);
    147         goto exit;
    148     }
    149 
    150     ALOGV("echo_reference_write() START trying to write %d frames", buffer->frame_count);
    151     ALOGV("echo_reference_write() playbackTimestamp:[%d].[%d], er->playback_delay:[%d]",
    152             (int)buffer->time_stamp.tv_sec,
    153             (int)buffer->time_stamp.tv_nsec, er->playback_delay);
    154 
    155     //ALOGV("echo_reference_write() %d frames", buffer->frame_count);
    156     // discard writes until a valid time stamp is provided.
    157 
    158     if ((buffer->time_stamp.tv_sec == 0) && (buffer->time_stamp.tv_nsec == 0) &&
    159         (er->wr_render_time.tv_sec == 0) && (er->wr_render_time.tv_nsec == 0)) {
    160         goto exit;
    161     }
    162 
    163     if ((er->state & ECHOREF_WRITING) == 0) {
    164         ALOGV("echo_reference_write() start write");
    165         if (er->resampler != NULL) {
    166             er->resampler->reset(er->resampler);
    167         }
    168         er->state |= ECHOREF_WRITING;
    169     }
    170 
    171     if ((er->state & ECHOREF_READING) == 0) {
    172         goto exit;
    173     }
    174 
    175     er->wr_render_time.tv_sec  = buffer->time_stamp.tv_sec;
    176     er->wr_render_time.tv_nsec = buffer->time_stamp.tv_nsec;
    177 
    178     er->playback_delay = buffer->delay_ns;
    179 
    180     // this will be used in the get_next_buffer, to support variable input buffer sizes
    181     er->wr_curr_frame_size = buffer->frame_count;
    182 
    183     void *srcBuf;
    184     size_t inFrames;
    185     // do stereo to mono and down sampling if necessary
    186     if (er->rd_channel_count != er->wr_channel_count ||
    187             er->rd_sampling_rate != er->wr_sampling_rate) {
    188         size_t wrBufSize = buffer->frame_count;
    189 
    190         inFrames = buffer->frame_count;
    191 
    192         if (er->rd_sampling_rate != er->wr_sampling_rate) {
    193             inFrames = (buffer->frame_count * er->rd_sampling_rate) / er->wr_sampling_rate +
    194                                                     RESAMPLER_HEADROOM_SAMPLES;
    195             // wr_buf is not only used as resampler output but also for stereo to mono conversion
    196             // output so buffer size is driven by both write and read sample rates
    197             if (inFrames > wrBufSize) {
    198                 wrBufSize = inFrames;
    199             }
    200         }
    201 
    202         if (er->wr_buf_size < wrBufSize) {
    203             ALOGV("echo_reference_write() increasing write buffer size from %d to %d",
    204                     er->wr_buf_size, wrBufSize);
    205             er->wr_buf_size = wrBufSize;
    206             er->wr_buf = realloc(er->wr_buf, er->wr_buf_size * er->rd_frame_size);
    207         }
    208 
    209         if (er->rd_channel_count != er->wr_channel_count) {
    210             // must be stereo to mono
    211             int16_t *src16 = (int16_t *)buffer->raw;
    212             int16_t *dst16 = (int16_t *)er->wr_buf;
    213             size_t frames = buffer->frame_count;
    214             while (frames--) {
    215                 *dst16++ = (int16_t)(((int32_t)*src16 + (int32_t)*(src16 + 1)) >> 1);
    216                 src16 += 2;
    217             }
    218         }
    219         if (er->wr_sampling_rate != er->rd_sampling_rate) {
    220             if (er->resampler == NULL) {
    221                 int rc;
    222                 ALOGV("echo_reference_write() new ReSampler(%d, %d)",
    223                       er->wr_sampling_rate, er->rd_sampling_rate);
    224                 er->provider.get_next_buffer = echo_reference_get_next_buffer;
    225                 er->provider.release_buffer = echo_reference_release_buffer;
    226                 rc = create_resampler(er->wr_sampling_rate,
    227                                  er->rd_sampling_rate,
    228                                  er->rd_channel_count,
    229                                  RESAMPLER_QUALITY_DEFAULT,
    230                                  &er->provider,
    231                                  &er->resampler);
    232                 if (rc != 0) {
    233                     er->resampler = NULL;
    234                     ALOGV("echo_reference_write() failure to create resampler %d", rc);
    235                     status = -ENODEV;
    236                     goto exit;
    237                 }
    238             }
    239             // er->wr_src_buf and er->wr_frames_in are used by getNexBuffer() called by the
    240             // resampler to get new frames
    241             if (er->rd_channel_count != er->wr_channel_count) {
    242                 er->wr_src_buf = er->wr_buf;
    243             } else {
    244                 er->wr_src_buf = buffer->raw;
    245             }
    246             er->wr_frames_in = buffer->frame_count;
    247             // inFrames is always more than we need here to get frames remaining from previous runs
    248             // inFrames is updated by resample() with the number of frames produced
    249             ALOGV("echo_reference_write() ReSampling(%d, %d)",
    250                   er->wr_sampling_rate, er->rd_sampling_rate);
    251             er->resampler->resample_from_provider(er->resampler,
    252                                                      (int16_t *)er->wr_buf, &inFrames);
    253             ALOGV_IF(er->wr_frames_in != 0,
    254                     "echo_reference_write() er->wr_frames_in not 0 (%d) after resampler",
    255                     er->wr_frames_in);
    256         }
    257         srcBuf = er->wr_buf;
    258     } else {
    259         inFrames = buffer->frame_count;
    260         srcBuf = buffer->raw;
    261     }
    262 
    263     if (er->frames_in + inFrames > er->buf_size) {
    264         ALOGV("echo_reference_write() increasing buffer size from %d to %d",
    265                 er->buf_size, er->frames_in + inFrames);
    266                 er->buf_size = er->frames_in + inFrames;
    267                 er->buffer = realloc(er->buffer, er->buf_size * er->rd_frame_size);
    268     }
    269     memcpy((char *)er->buffer + er->frames_in * er->rd_frame_size,
    270            srcBuf,
    271            inFrames * er->rd_frame_size);
    272     er->frames_in += inFrames;
    273 
    274     ALOGV("echo_reference_write() frames written:[%d], frames total:[%d] buffer size:[%d]\n"
    275           "                       er->wr_render_time:[%d].[%d], er->playback_delay:[%d]",
    276           inFrames, er->frames_in, er->buf_size,
    277           (int)er->wr_render_time.tv_sec, (int)er->wr_render_time.tv_nsec, er->playback_delay);
    278 
    279     pthread_cond_signal(&er->cond);
    280 exit:
    281     pthread_mutex_unlock(&er->lock);
    282     ALOGV("echo_reference_write() END");
    283     return status;
    284 }
    285 
    286 // delay jump threshold to update ref buffer: 6 samples at 8kHz in nsecs
    287 #define MIN_DELAY_DELTA_NS (375000*2)
    288 // number of consecutive delta with same sign between expected and actual delay before adjusting
    289 // the buffer
    290 #define MIN_DELTA_NUM 4
    291 
    292 
    293 static int echo_reference_read(struct echo_reference_itfe *echo_reference,
    294                          struct echo_reference_buffer *buffer)
    295 {
    296     struct echo_reference *er = (struct echo_reference *)echo_reference;
    297 
    298     if (er == NULL) {
    299         return -EINVAL;
    300     }
    301 
    302     pthread_mutex_lock(&er->lock);
    303 
    304     if (buffer == NULL) {
    305         ALOGV("echo_reference_read() stop read");
    306         er->state &= ~ECHOREF_READING;
    307         goto exit;
    308     }
    309 
    310     ALOGV("echo_reference_read() START, delayCapture:[%d], "
    311             "er->frames_in:[%d],buffer->frame_count:[%d]",
    312     buffer->delay_ns, er->frames_in, buffer->frame_count);
    313 
    314     if ((er->state & ECHOREF_READING) == 0) {
    315         ALOGV("echo_reference_read() start read");
    316         echo_reference_reset_l(er);
    317         er->state |= ECHOREF_READING;
    318     }
    319 
    320     if ((er->state & ECHOREF_WRITING) == 0) {
    321         memset(buffer->raw, 0, er->rd_frame_size * buffer->frame_count);
    322         buffer->delay_ns = 0;
    323         goto exit;
    324     }
    325 
    326 //    ALOGV("echo_reference_read() %d frames", buffer->frame_count);
    327 
    328     // allow some time for new frames to arrive if not enough frames are ready for read
    329     if (er->frames_in < buffer->frame_count) {
    330         uint32_t timeoutMs = (uint32_t)((1000 * buffer->frame_count) / er->rd_sampling_rate / 2);
    331         struct timespec ts;
    332 
    333         ts.tv_sec  = timeoutMs/1000;
    334         ts.tv_nsec = timeoutMs%1000;
    335         pthread_cond_timedwait_relative_np(&er->cond, &er->lock, &ts);
    336 
    337         ALOGV_IF((er->frames_in < buffer->frame_count),
    338                  "echo_reference_read() waited %d ms but still not enough frames"\
    339                  " er->frames_in: %d, buffer->frame_count = %d",
    340                  timeoutMs, er->frames_in, buffer->frame_count);
    341     }
    342 
    343     int64_t timeDiff;
    344     struct timespec tmp;
    345 
    346     if ((er->wr_render_time.tv_sec == 0 && er->wr_render_time.tv_nsec == 0) ||
    347         (buffer->time_stamp.tv_sec == 0 && buffer->time_stamp.tv_nsec == 0)) {
    348         ALOGV("echo_reference_read(): NEW:timestamp is zero---------setting timeDiff = 0, "\
    349              "not updating delay this time");
    350         timeDiff = 0;
    351     } else {
    352         if (buffer->time_stamp.tv_nsec < er->wr_render_time.tv_nsec) {
    353             tmp.tv_sec = buffer->time_stamp.tv_sec - er->wr_render_time.tv_sec - 1;
    354             tmp.tv_nsec = 1000000000 + buffer->time_stamp.tv_nsec - er->wr_render_time.tv_nsec;
    355         } else {
    356             tmp.tv_sec = buffer->time_stamp.tv_sec - er->wr_render_time.tv_sec;
    357             tmp.tv_nsec = buffer->time_stamp.tv_nsec - er->wr_render_time.tv_nsec;
    358         }
    359         timeDiff = (((int64_t)tmp.tv_sec * 1000000000 + tmp.tv_nsec));
    360 
    361         int64_t expectedDelayNs =  er->playback_delay + buffer->delay_ns - timeDiff;
    362 
    363         if (er->resampler != NULL) {
    364             // Resampler already compensates part of the delay
    365             int32_t rsmp_delay = er->resampler->delay_ns(er->resampler);
    366             expectedDelayNs -= rsmp_delay;
    367         }
    368 
    369         ALOGV("echo_reference_read(): expectedDelayNs[%lld] = "
    370                 "er->playback_delay[%d] + delayCapture[%d] - timeDiff[%lld]",
    371                 expectedDelayNs, er->playback_delay, buffer->delay_ns, timeDiff);
    372 
    373         if (expectedDelayNs > 0) {
    374             int64_t delayNs = ((int64_t)er->frames_in * 1000000000) / er->rd_sampling_rate;
    375 
    376             int64_t  deltaNs = delayNs - expectedDelayNs;
    377 
    378             ALOGV("echo_reference_read(): EchoPathDelayDeviation between reference and DMA [%lld]",
    379                     deltaNs);
    380             if (abs(deltaNs) >= MIN_DELAY_DELTA_NS) {
    381                 // smooth the variation and update the reference buffer only
    382                 // if a deviation in the same direction is observed for more than MIN_DELTA_NUM
    383                 // consecutive reads.
    384                 int16_t delay_sign = (deltaNs >= 0) ? 1 : -1;
    385                 if (delay_sign == er->prev_delta_sign) {
    386                     er->delta_count++;
    387                 } else {
    388                     er->delta_count = 1;
    389                 }
    390                 er->prev_delta_sign = delay_sign;
    391 
    392                 if (er->delta_count > MIN_DELTA_NUM) {
    393                     size_t previousFrameIn = er->frames_in;
    394                     er->frames_in = (size_t)((expectedDelayNs * er->rd_sampling_rate)/1000000000);
    395                     int offset = er->frames_in - previousFrameIn;
    396 
    397                     ALOGV("echo_reference_read(): deltaNs ENOUGH and %s: "
    398                             "er->frames_in: %d, previousFrameIn = %d",
    399                          delay_sign ? "positive" : "negative", er->frames_in, previousFrameIn);
    400 
    401                     if (deltaNs < 0) {
    402                         // Less data available in the reference buffer than expected
    403                         if (er->frames_in > er->buf_size) {
    404                             er->buf_size = er->frames_in;
    405                             er->buffer  = realloc(er->buffer, er->buf_size * er->rd_frame_size);
    406                             ALOGV("echo_reference_read(): increasing buffer size to %d",
    407                                   er->buf_size);
    408                         }
    409 
    410                         if (offset > 0) {
    411                             memset((char *)er->buffer + previousFrameIn * er->rd_frame_size,
    412                                    0, offset * er->rd_frame_size);
    413                             ALOGV("echo_reference_read(): pushing ref buffer by [%d]", offset);
    414                         }
    415                     } else {
    416                         // More data available in the reference buffer than expected
    417                         offset = -offset;
    418                         if (offset > 0) {
    419                             memcpy(er->buffer, (char *)er->buffer + (offset * er->rd_frame_size),
    420                                    er->frames_in * er->rd_frame_size);
    421                             ALOGV("echo_reference_read(): shifting ref buffer by [%d]",
    422                                   er->frames_in);
    423                         }
    424                     }
    425                 }
    426             } else {
    427                 er->delta_count = 0;
    428                 er->prev_delta_sign = 0;
    429                 ALOGV("echo_reference_read(): Constant EchoPathDelay - difference "
    430                         "between reference and DMA %lld", deltaNs);
    431             }
    432         } else {
    433             ALOGV("echo_reference_read(): NEGATIVE expectedDelayNs[%lld] =  "\
    434                  "er->playback_delay[%d] + delayCapture[%d] - timeDiff[%lld]",
    435                  expectedDelayNs, er->playback_delay, buffer->delay_ns, timeDiff);
    436         }
    437     }
    438 
    439     if (er->frames_in < buffer->frame_count) {
    440         if (buffer->frame_count > er->buf_size) {
    441             er->buf_size = buffer->frame_count;
    442             er->buffer  = realloc(er->buffer, er->buf_size * er->rd_frame_size);
    443             ALOGV("echo_reference_read(): increasing buffer size to %d", er->buf_size);
    444         }
    445         // filling up the reference buffer with 0s to match the expected delay.
    446         memset((char *)er->buffer + er->frames_in * er->rd_frame_size,
    447             0, (buffer->frame_count - er->frames_in) * er->rd_frame_size);
    448         er->frames_in = buffer->frame_count;
    449     }
    450 
    451     memcpy(buffer->raw,
    452            (char *)er->buffer,
    453            buffer->frame_count * er->rd_frame_size);
    454 
    455     er->frames_in -= buffer->frame_count;
    456     memcpy(er->buffer,
    457            (char *)er->buffer + buffer->frame_count * er->rd_frame_size,
    458            er->frames_in * er->rd_frame_size);
    459 
    460     // As the reference buffer is now time aligned to the microphone signal there is a zero delay
    461     buffer->delay_ns = 0;
    462 
    463     ALOGV("echo_reference_read() END %d frames, total frames in %d",
    464           buffer->frame_count, er->frames_in);
    465 
    466     pthread_cond_signal(&er->cond);
    467 
    468 exit:
    469     pthread_mutex_unlock(&er->lock);
    470     return 0;
    471 }
    472 
    473 
    474 int create_echo_reference(audio_format_t rdFormat,
    475                             uint32_t rdChannelCount,
    476                             uint32_t rdSamplingRate,
    477                             audio_format_t wrFormat,
    478                             uint32_t wrChannelCount,
    479                             uint32_t wrSamplingRate,
    480                             struct echo_reference_itfe **echo_reference)
    481 {
    482     struct echo_reference *er;
    483 
    484     ALOGV("create_echo_reference()");
    485 
    486     if (echo_reference == NULL) {
    487         return -EINVAL;
    488     }
    489 
    490     *echo_reference = NULL;
    491 
    492     if (rdFormat != AUDIO_FORMAT_PCM_16_BIT ||
    493             rdFormat != wrFormat) {
    494         ALOGW("create_echo_reference bad format rd %d, wr %d", rdFormat, wrFormat);
    495         return -EINVAL;
    496     }
    497     if ((rdChannelCount != 1 && rdChannelCount != 2) ||
    498             wrChannelCount != 2) {
    499         ALOGW("create_echo_reference bad channel count rd %d, wr %d", rdChannelCount,
    500                 wrChannelCount);
    501         return -EINVAL;
    502     }
    503 
    504     er = (struct echo_reference *)calloc(1, sizeof(struct echo_reference));
    505 
    506     er->itfe.read = echo_reference_read;
    507     er->itfe.write = echo_reference_write;
    508 
    509     er->state = ECHOREF_IDLE;
    510     er->rd_format = rdFormat;
    511     er->rd_channel_count = rdChannelCount;
    512     er->rd_sampling_rate = rdSamplingRate;
    513     er->wr_format = wrFormat;
    514     er->wr_channel_count = wrChannelCount;
    515     er->wr_sampling_rate = wrSamplingRate;
    516     er->rd_frame_size = audio_bytes_per_sample(rdFormat) * rdChannelCount;
    517     er->wr_frame_size = audio_bytes_per_sample(wrFormat) * wrChannelCount;
    518     *echo_reference = &er->itfe;
    519     return 0;
    520 }
    521 
    522 void release_echo_reference(struct echo_reference_itfe *echo_reference) {
    523     struct echo_reference *er = (struct echo_reference *)echo_reference;
    524 
    525     if (er == NULL) {
    526         return;
    527     }
    528 
    529     ALOGV("EchoReference dstor");
    530     echo_reference_reset_l(er);
    531     if (er->resampler != NULL) {
    532         release_resampler(er->resampler);
    533     }
    534     free(er);
    535 }
    536 
    537