Home | History | Annotate | Download | only in audio_utils
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <cutils/bitops.h>  /* for popcount() */
     18 #include <audio_utils/primitives.h>
     19 #include "private/private.h"
     20 
     21 void ditherAndClamp(int32_t *out, const int32_t *sums, size_t pairs)
     22 {
     23     for (; pairs > 0; --pairs) {
     24         const int32_t l = clamp16(*sums++ >> 12);
     25         const int32_t r = clamp16(*sums++ >> 12);
     26         *out++ = (r << 16) | (l & 0xFFFF);
     27     }
     28 }
     29 
     30 void memcpy_to_i16_from_q4_27(int16_t *dst, const int32_t *src, size_t count)
     31 {
     32     for (; count > 0; --count) {
     33         *dst++ = clamp16(*src++ >> 12);
     34     }
     35 }
     36 
     37 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count)
     38 {
     39     dst += count;
     40     src += count;
     41     for (; count > 0; --count) {
     42         *--dst = (int16_t)(*--src - 0x80) << 8;
     43     }
     44 }
     45 
     46 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count)
     47 {
     48     for (; count > 0; --count) {
     49         *dst++ = (*src++ >> 8) + 0x80;
     50     }
     51 }
     52 
     53 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count)
     54 {
     55     for (; count > 0; --count) {
     56         *dst++ = clamp8_from_float(*src++);
     57     }
     58 }
     59 
     60 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count)
     61 {
     62     for (; count > 0; --count) {
     63         *dst++ = *src++ >> 16;
     64     }
     65 }
     66 
     67 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count)
     68 {
     69     for (; count > 0; --count) {
     70         *dst++ = clamp16_from_float(*src++);
     71     }
     72 }
     73 
     74 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count)
     75 {
     76     for (; count > 0; --count) {
     77         *dst++ = float_from_q4_27(*src++);
     78     }
     79 }
     80 
     81 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count)
     82 {
     83     dst += count;
     84     src += count;
     85     for (; count > 0; --count) {
     86         *--dst = float_from_i16(*--src);
     87     }
     88 }
     89 
     90 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count)
     91 {
     92     dst += count;
     93     src += count;
     94     for (; count > 0; --count) {
     95         *--dst = float_from_u8(*--src);
     96     }
     97 }
     98 
     99 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count)
    100 {
    101     dst += count;
    102     src += count * 3;
    103     for (; count > 0; --count) {
    104         src -= 3;
    105         *--dst = float_from_p24(src);
    106     }
    107 }
    108 
    109 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count)
    110 {
    111     for (; count > 0; --count) {
    112 #if HAVE_BIG_ENDIAN
    113         *dst++ = src[1] | (src[0] << 8);
    114 #else
    115         *dst++ = src[1] | (src[2] << 8);
    116 #endif
    117         src += 3;
    118     }
    119 }
    120 
    121 void memcpy_to_i32_from_p24(int32_t *dst, const uint8_t *src, size_t count)
    122 {
    123     dst += count;
    124     src += count * 3;
    125     for (; count > 0; --count) {
    126         src -= 3;
    127 #if HAVE_BIG_ENDIAN
    128         *--dst = (src[2] << 8) | (src[1] << 16) | (src[0] << 24);
    129 #else
    130         *--dst = (src[0] << 8) | (src[1] << 16) | (src[2] << 24);
    131 #endif
    132     }
    133 }
    134 
    135 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count)
    136 {
    137     dst += count * 3;
    138     src += count;
    139     for (; count > 0; --count) {
    140         dst -= 3;
    141         const int16_t sample = *--src;
    142 #if HAVE_BIG_ENDIAN
    143         dst[0] = sample >> 8;
    144         dst[1] = sample;
    145         dst[2] = 0;
    146 #else
    147         dst[0] = 0;
    148         dst[1] = sample;
    149         dst[2] = sample >> 8;
    150 #endif
    151     }
    152 }
    153 
    154 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count)
    155 {
    156     for (; count > 0; --count) {
    157         int32_t ival = clamp24_from_float(*src++);
    158 
    159 #if HAVE_BIG_ENDIAN
    160         *dst++ = ival >> 16;
    161         *dst++ = ival >> 8;
    162         *dst++ = ival;
    163 #else
    164         *dst++ = ival;
    165         *dst++ = ival >> 8;
    166         *dst++ = ival >> 16;
    167 #endif
    168     }
    169 }
    170 
    171 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count)
    172 {
    173     for (; count > 0; --count) {
    174         int32_t ival = clamp24_from_q8_23(*src++);
    175 
    176 #if HAVE_BIG_ENDIAN
    177         *dst++ = ival >> 16;
    178         *dst++ = ival >> 8;
    179         *dst++ = ival;
    180 #else
    181         *dst++ = ival;
    182         *dst++ = ival >> 8;
    183         *dst++ = ival >> 16;
    184 #endif
    185     }
    186 }
    187 
    188 void memcpy_to_p24_from_i32(uint8_t *dst, const int32_t *src, size_t count)
    189 {
    190     for (; count > 0; --count) {
    191         int32_t ival = *src++ >> 8;
    192 
    193 #if HAVE_BIG_ENDIAN
    194         *dst++ = ival >> 16;
    195         *dst++ = ival >> 8;
    196         *dst++ = ival;
    197 #else
    198         *dst++ = ival;
    199         *dst++ = ival >> 8;
    200         *dst++ = ival >> 16;
    201 #endif
    202     }
    203 }
    204 
    205 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count)
    206 {
    207     dst += count;
    208     src += count;
    209     for (; count > 0; --count) {
    210         *--dst = (int32_t)*--src << 8;
    211     }
    212 }
    213 
    214 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count)
    215 {
    216     for (; count > 0; --count) {
    217         *dst++ = clamp24_from_float(*src++);
    218     }
    219 }
    220 
    221 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count)
    222 {
    223     dst += count;
    224     src += count * 3;
    225     for (; count > 0; --count) {
    226         src -= 3;
    227 #if HAVE_BIG_ENDIAN
    228         *--dst = (int8_t)src[0] << 16 | src[1] << 8 | src[2];
    229 #else
    230         *--dst = (int8_t)src[2] << 16 | src[1] << 8 | src[0];
    231 #endif
    232     }
    233 }
    234 
    235 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count)
    236 {
    237     for (; count > 0; --count) {
    238         *dst++ = clampq4_27_from_float(*src++);
    239     }
    240 }
    241 
    242 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count)
    243 {
    244     for (; count > 0; --count) {
    245         *dst++ = clamp16(*src++ >> 8);
    246     }
    247 }
    248 
    249 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count)
    250 {
    251     for (; count > 0; --count) {
    252         *dst++ = float_from_q8_23(*src++);
    253     }
    254 }
    255 
    256 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count)
    257 {
    258     dst += count;
    259     src += count;
    260     for (; count > 0; --count) {
    261         *--dst = (int32_t)*--src << 16;
    262     }
    263 }
    264 
    265 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count)
    266 {
    267     for (; count > 0; --count) {
    268         *dst++ = clamp32_from_float(*src++);
    269     }
    270 }
    271 
    272 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count)
    273 {
    274     for (; count > 0; --count) {
    275         *dst++ = float_from_i32(*src++);
    276     }
    277 }
    278 
    279 void memcpy_to_float_from_float_with_clamping(float *dst, const float *src, size_t count,
    280                                               float absMax) {
    281     // Note: using NEON intrinsics (vminq_f32, vld1q_f32...) did NOT accelerate
    282     // the function when benchmarked. The compiler already vectorize using FMINNM f32x4 & similar.
    283     // Note: clamping induce a ~20% overhead compared to memcpy for count in [64, 512]
    284     //       See primitives_benchmark
    285     for (; count > 0; --count) {
    286         const float sample = *src++;
    287         *dst++ = fmax(-absMax, fmin(absMax, sample));
    288     }
    289 }
    290 
    291 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count)
    292 {
    293     for (; count > 0; --count) {
    294         *dst++ = (int16_t)(((int32_t)src[0] + (int32_t)src[1]) >> 1);
    295         src += 2;
    296     }
    297 }
    298 
    299 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count)
    300 {
    301     dst += count * 2;
    302     src += count;
    303     for (; count > 0; --count) {
    304         const int32_t temp = *--src;
    305         dst -= 2;
    306         dst[0] = temp;
    307         dst[1] = temp;
    308     }
    309 }
    310 
    311 void downmix_to_mono_float_from_stereo_float(float *dst, const float *src, size_t frames)
    312 {
    313     for (; frames > 0; --frames) {
    314         *dst++ = (src[0] + src[1]) * 0.5;
    315         src += 2;
    316     }
    317 }
    318 
    319 void upmix_to_stereo_float_from_mono_float(float *dst, const float *src, size_t frames)
    320 {
    321     dst += frames * 2;
    322     src += frames;
    323     for (; frames > 0; --frames) {
    324         const float temp = *--src;
    325         dst -= 2;
    326         dst[0] = temp;
    327         dst[1] = temp;
    328     }
    329 }
    330 
    331 size_t nonZeroMono32(const int32_t *samples, size_t count)
    332 {
    333     size_t nonZero = 0;
    334     for (; count > 0; --count) {
    335         nonZero += *samples++ != 0;
    336     }
    337     return nonZero;
    338 }
    339 
    340 size_t nonZeroMono16(const int16_t *samples, size_t count)
    341 {
    342     size_t nonZero = 0;
    343     for (; count > 0; --count) {
    344         nonZero += *samples++ != 0;
    345     }
    346     return nonZero;
    347 }
    348 
    349 size_t nonZeroStereo32(const int32_t *frames, size_t count)
    350 {
    351     size_t nonZero = 0;
    352     for (; count > 0; --count) {
    353         nonZero += frames[0] != 0 || frames[1] != 0;
    354         frames += 2;
    355     }
    356     return nonZero;
    357 }
    358 
    359 size_t nonZeroStereo16(const int16_t *frames, size_t count)
    360 {
    361     size_t nonZero = 0;
    362     for (; count > 0; --count) {
    363         nonZero += frames[0] != 0 || frames[1] != 0;
    364         frames += 2;
    365     }
    366     return nonZero;
    367 }
    368 
    369 /*
    370  * C macro to do channel mask copying independent of dst/src sample type.
    371  * Don't pass in any expressions for the macro arguments here.
    372  */
    373 #define copy_frame_by_mask(dst, dmask, src, smask, count, zero) \
    374 { \
    375     uint32_t bit, ormask; \
    376     for (; (count) > 0; --(count)) { \
    377         ormask = (dmask) | (smask); \
    378         while (ormask) { \
    379             bit = ormask & -ormask; /* get lowest bit */ \
    380             ormask ^= bit; /* remove lowest bit */ \
    381             if ((dmask) & bit) { \
    382                 *(dst)++ = (smask) & bit ? *(src)++ : (zero); \
    383             } else { /* source channel only */ \
    384                 ++(src); \
    385             } \
    386         } \
    387     } \
    388 }
    389 
    390 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
    391         const void *src, uint32_t src_mask, size_t sample_size, size_t count)
    392 {
    393 #if 0
    394     /* alternate way of handling memcpy_by_channel_mask by using the idxary */
    395     int8_t idxary[32];
    396     uint32_t src_channels = popcount(src_mask);
    397     uint32_t dst_channels =
    398             memcpy_by_index_array_initialization(idxary, 32, dst_mask, src_mask);
    399 
    400     memcpy_by_idxary(dst, dst_channels, src, src_channels, idxary, sample_size, count);
    401 #else
    402     if (dst_mask == src_mask) {
    403         memcpy(dst, src, sample_size * popcount(dst_mask) * count);
    404         return;
    405     }
    406     switch (sample_size) {
    407     case 1: {
    408         uint8_t *udst = (uint8_t*)dst;
    409         const uint8_t *usrc = (const uint8_t*)src;
    410 
    411         copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, 0);
    412     } break;
    413     case 2: {
    414         uint16_t *udst = (uint16_t*)dst;
    415         const uint16_t *usrc = (const uint16_t*)src;
    416 
    417         copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, 0);
    418     } break;
    419     case 3: { /* could be slow.  use a struct to represent 3 bytes of data. */
    420         uint8x3_t *udst = (uint8x3_t*)dst;
    421         const uint8x3_t *usrc = (const uint8x3_t*)src;
    422         static const uint8x3_t zero; /* tricky - we use this to zero out a sample */
    423 
    424         copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, zero);
    425     } break;
    426     case 4: {
    427         uint32_t *udst = (uint32_t*)dst;
    428         const uint32_t *usrc = (const uint32_t*)src;
    429 
    430         copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, 0);
    431     } break;
    432     default:
    433         abort(); /* illegal value */
    434         break;
    435     }
    436 #endif
    437 }
    438 
    439 /*
    440  * C macro to do copying by index array, to rearrange samples
    441  * within a frame.  This is independent of src/dst sample type.
    442  * Don't pass in any expressions for the macro arguments here.
    443  */
    444 #define copy_frame_by_idx(dst, dst_channels, src, src_channels, idxary, count, zero) \
    445 { \
    446     unsigned i; \
    447     int index; \
    448     for (; (count) > 0; --(count)) { \
    449         for (i = 0; i < (dst_channels); ++i) { \
    450             index = (idxary)[i]; \
    451             *(dst)++ = index < 0 ? (zero) : (src)[index]; \
    452         } \
    453         (src) += (src_channels); \
    454     } \
    455 }
    456 
    457 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
    458         const void *src, uint32_t src_channels,
    459         const int8_t *idxary, size_t sample_size, size_t count)
    460 {
    461     switch (sample_size) {
    462     case 1: {
    463         uint8_t *udst = (uint8_t*)dst;
    464         const uint8_t *usrc = (const uint8_t*)src;
    465 
    466         copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, 0);
    467     } break;
    468     case 2: {
    469         uint16_t *udst = (uint16_t*)dst;
    470         const uint16_t *usrc = (const uint16_t*)src;
    471 
    472         copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, 0);
    473     } break;
    474     case 3: { /* could be slow.  use a struct to represent 3 bytes of data. */
    475         uint8x3_t *udst = (uint8x3_t*)dst;
    476         const uint8x3_t *usrc = (const uint8x3_t*)src;
    477         static const uint8x3_t zero;
    478 
    479         copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, zero);
    480     } break;
    481     case 4: {
    482         uint32_t *udst = (uint32_t*)dst;
    483         const uint32_t *usrc = (const uint32_t*)src;
    484 
    485         copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, 0);
    486     } break;
    487     default:
    488         abort(); /* illegal value */
    489         break;
    490     }
    491 }
    492 
    493 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
    494         uint32_t dst_mask, uint32_t src_mask)
    495 {
    496     size_t n = 0;
    497     int srcidx = 0;
    498     uint32_t bit, ormask = src_mask | dst_mask;
    499 
    500     while (ormask && n < idxcount) {
    501         bit = ormask & -ormask;          /* get lowest bit */
    502         ormask ^= bit;                   /* remove lowest bit */
    503         if (src_mask & dst_mask & bit) { /* matching channel */
    504             idxary[n++] = srcidx++;
    505         } else if (src_mask & bit) {     /* source channel only */
    506             ++srcidx;
    507         } else {                         /* destination channel only */
    508             idxary[n++] = -1;
    509         }
    510     }
    511     return n + popcount(ormask & dst_mask);
    512 }
    513 
    514 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount,
    515         uint32_t dst_mask, uint32_t src_mask) {
    516     size_t dst_count = popcount(dst_mask);
    517     if (idxcount == 0) {
    518         return dst_count;
    519     }
    520     if (dst_count > idxcount) {
    521         dst_count = idxcount;
    522     }
    523 
    524     size_t src_idx, dst_idx;
    525     for (src_idx = 0, dst_idx = 0; dst_idx < dst_count; ++dst_idx) {
    526         if (src_mask & 1) {
    527             idxary[dst_idx] = src_idx++;
    528         } else {
    529             idxary[dst_idx] = -1;
    530         }
    531         src_mask >>= 1;
    532     }
    533     return dst_idx;
    534 }
    535 
    536 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount,
    537         uint32_t dst_mask, uint32_t src_mask) {
    538     size_t src_idx, dst_idx;
    539     size_t dst_count = __builtin_popcount(dst_mask);
    540     size_t src_count = __builtin_popcount(src_mask);
    541     if (idxcount == 0) {
    542         return dst_count;
    543     }
    544     if (dst_count > idxcount) {
    545         dst_count = idxcount;
    546     }
    547     for (src_idx = 0, dst_idx = 0; dst_idx < dst_count; ++src_idx) {
    548         if (dst_mask & 1) {
    549             idxary[dst_idx++] = src_idx < src_count ? (signed)src_idx : -1;
    550         }
    551         dst_mask >>= 1;
    552     }
    553     return dst_idx;
    554 }
    555 
    556 void accumulate_i16(int16_t *dst, const int16_t *src, size_t count) {
    557     while (count--) {
    558         *dst = clamp16((int32_t)*dst + *src++);
    559         ++dst;
    560     }
    561 }
    562 
    563 void accumulate_u8(uint8_t *dst, const uint8_t *src, size_t count) {
    564     int32_t sum;
    565     for (; count > 0; --count) {
    566         // 8-bit samples are centered around 0x80.
    567         sum = *dst + *src++ - 0x80;
    568         // Clamp to [0, 0xff].
    569         *dst++ = (sum & 0x100) ? (~sum >> 9) : sum;
    570     }
    571 }
    572 
    573 void accumulate_p24(uint8_t *dst, const uint8_t *src, size_t count) {
    574     for (; count > 0; --count) {
    575         // Unpack.
    576         int32_t dst_q8_23 = 0;
    577         int32_t src_q8_23 = 0;
    578         memcpy_to_q8_23_from_p24(&dst_q8_23, dst, 1);
    579         memcpy_to_q8_23_from_p24(&src_q8_23, src, 1);
    580 
    581         // Accumulate and overwrite.
    582         dst_q8_23 += src_q8_23;
    583         memcpy_to_p24_from_q8_23(dst, &dst_q8_23, 1);
    584 
    585         // Move on to next sample.
    586         dst += 3;
    587         src += 3;
    588   }
    589 }
    590 
    591 void accumulate_q8_23(int32_t *dst, const int32_t *src, size_t count) {
    592     for (; count > 0; --count) {
    593         *dst = clamp24_from_q8_23(*dst + *src++);
    594         ++dst;
    595     }
    596 }
    597 
    598 void accumulate_i32(int32_t *dst, const int32_t *src, size_t count) {
    599     for (; count > 0; --count) {
    600         *dst = clamp32((int64_t)*dst + *src++);
    601         ++dst;
    602     }
    603 }
    604 
    605 void accumulate_float(float *dst, const float *src, size_t count) {
    606     for (; count > 0; --count) {
    607         *dst++ += *src++;
    608     }
    609 }
    610