Home | History | Annotate | Download | only in audio_utils
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ANDROID_AUDIO_PRIMITIVES_H
     18 #define ANDROID_AUDIO_PRIMITIVES_H
     19 
     20 #include <math.h>
     21 #include <stdint.h>
     22 #include <stdlib.h>
     23 #include <sys/cdefs.h>
     24 
     25 /** \cond */
     26 __BEGIN_DECLS
     27 /** \endcond */
     28 
     29 /**
     30  * \file primitives.h
     31  * The memcpy_* conversion routines are designed to work in-place on same dst as src
     32  * buffers only if the types shrink on copy, with the exception of memcpy_to_i16_from_u8().
     33  * This allows the loops to go upwards for faster cache access (and may be more flexible
     34  * for future optimization later).
     35  */
     36 
     37 /**
     38  * Deprecated. Use memcpy_to_i16_from_q4_27() instead (double the pairs for the count).
     39  * Neither this function nor memcpy_to_i16_from_q4_27() actually dither.
     40  *
     41  * Dither and clamp pairs of 32-bit input samples (sums) to 16-bit output samples (out).
     42  * Each 32-bit input sample can be viewed as a signed fixed-point Q19.12 of which the
     43  * .12 fraction bits are dithered and the 19 integer bits are clamped to signed 16 bits.
     44  * Alternatively the input can be viewed as Q4.27, of which the lowest .12 of the fraction
     45  * is dithered and the remaining fraction is converted to the output Q.15, with clamping
     46  * on the 4 integer guard bits.
     47  *
     48  * For interleaved stereo, pairs is the number of sample pairs,
     49  * and out is an array of interleaved pairs of 16-bit samples per channel.
     50  * For mono, pairs is the number of samples / 2, and out is an array of 16-bit samples.
     51  * The name "dither" is a misnomer; the current implementation does not actually dither
     52  * but uses truncation.  This may change.
     53  * The out and sums buffers must either be completely separate (non-overlapping), or
     54  * they must both start at the same address.  Partially overlapping buffers are not supported.
     55  */
     56 void ditherAndClamp(int32_t *out, const int32_t *sums, size_t pairs);
     57 
     58 /**
     59  * Copy samples from signed fixed-point 32-bit Q4.27 to 16-bit Q0.15
     60  *
     61  *  \param dst     Destination buffer
     62  *  \param src     Source buffer
     63  *  \param count   Number of samples to copy
     64  *
     65  * The destination and source buffers must either be completely separate (non-overlapping), or
     66  * they must both start at the same address.  Partially overlapping buffers are not supported.
     67  */
     68 void memcpy_to_i16_from_q4_27(int16_t *dst, const int32_t *src, size_t count);
     69 
     70 /**
     71  * Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 16-bit.
     72  *
     73  *  \param dst     Destination buffer
     74  *  \param src     Source buffer
     75  *  \param count   Number of samples to copy
     76  *
     77  * The destination and source buffers must either be completely separate (non-overlapping), or
     78  * they must both start at the same address.  Partially overlapping buffers are not supported.
     79  */
     80 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count);
     81 
     82 /**
     83  * Shrink and copy samples from signed 16-bit to unsigned 8-bit offset by 0x80.
     84  *
     85  *  \param dst     Destination buffer
     86  *  \param src     Source buffer
     87  *  \param count   Number of samples to copy
     88  *
     89  * The destination and source buffers must either be completely separate (non-overlapping), or
     90  * they must both start at the same address.  Partially overlapping buffers are not supported.
     91  * The conversion is done by truncation, without dithering, so it loses resolution.
     92  */
     93 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count);
     94 
     95 /**
     96  * Copy samples from float to unsigned 8-bit offset by 0x80.
     97  *
     98  *  \param dst     Destination buffer
     99  *  \param src     Source buffer
    100  *  \param count   Number of samples to copy
    101  *
    102  * The destination and source buffers must either be completely separate (non-overlapping), or
    103  * they must both start at the same address.  Partially overlapping buffers are not supported.
    104  * The conversion is done by truncation, without dithering, so it loses resolution.
    105  */
    106 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count);
    107 
    108 /**
    109  * Shrink and copy samples from signed 32-bit fixed-point Q0.31 to signed 16-bit Q0.15.
    110  *
    111  *  \param dst     Destination buffer
    112  *  \param src     Source buffer
    113  *  \param count   Number of samples to copy
    114  *
    115  * The destination and source buffers must either be completely separate (non-overlapping), or
    116  * they must both start at the same address.  Partially overlapping buffers are not supported.
    117  * The conversion is done by truncation, without dithering, so it loses resolution.
    118  */
    119 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count);
    120 
    121 /**
    122  * Shrink and copy samples from single-precision floating-point to signed 16-bit.
    123  * Each float should be in the range -1.0 to 1.0.  Values outside that range are clamped,
    124  * refer to clamp16_from_float().
    125  *
    126  *  \param dst     Destination buffer
    127  *  \param src     Source buffer
    128  *  \param count   Number of samples to copy
    129  *
    130  * The destination and source buffers must either be completely separate (non-overlapping), or
    131  * they must both start at the same address.  Partially overlapping buffers are not supported.
    132  * The conversion is done by truncation, without dithering, so it loses resolution.
    133  */
    134 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count);
    135 
    136 /**
    137  * Copy samples from signed fixed-point 32-bit Q4.27 to single-precision floating-point.
    138  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
    139  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].  Note the closed range
    140  * at 1.0 and 16.0 is due to rounding on conversion to float. See float_from_q4_27() for details.
    141  *
    142  *  \param dst     Destination buffer
    143  *  \param src     Source buffer
    144  *  \param count   Number of samples to copy
    145  *
    146  * The destination and source buffers must either be completely separate (non-overlapping), or
    147  * they must both start at the same address.  Partially overlapping buffers are not supported.
    148  */
    149 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count);
    150 
    151 /**
    152  * Copy samples from signed fixed-point 16 bit Q0.15 to single-precision floating-point.
    153  * The output float range is [-1.0, 1.0) for the fixed-point range [0x8000, 0x7fff].
    154  * No rounding is needed as the representation is exact.
    155  *
    156  *  \param dst     Destination buffer
    157  *  \param src     Source buffer
    158  *  \param count   Number of samples to copy
    159  *
    160  * The destination and source buffers must either be completely separate (non-overlapping), or
    161  * they must both start at the same address.  Partially overlapping buffers are not supported.
    162  */
    163 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count);
    164 
    165 /**
    166  * Copy samples from unsigned fixed-point 8 bit to single-precision floating-point.
    167  * The output float range is [-1.0, 1.0) for the fixed-point range [0x00, 0xFF].
    168  * No rounding is needed as the representation is exact.
    169  *
    170  *  \param dst     Destination buffer
    171  *  \param src     Source buffer
    172  *  \param count   Number of samples to copy
    173  *
    174  * The destination and source buffers must either be completely separate (non-overlapping), or
    175  * they must both start at the same address.  Partially overlapping buffers are not supported.
    176  */
    177 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count);
    178 
    179 /**
    180  * Copy samples from signed fixed-point packed 24 bit Q0.23 to single-precision floating-point.
    181  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
    182  * The output float range is [-1.0, 1.0) for the fixed-point range [0x800000, 0x7fffff].
    183  * No rounding is needed as the representation is exact.
    184  *
    185  *  \param dst     Destination buffer
    186  *  \param src     Source buffer
    187  *  \param count   Number of samples to copy
    188  *
    189  * The destination and source buffers must either be completely separate (non-overlapping), or
    190  * they must both start at the same address.  Partially overlapping buffers are not supported.
    191  */
    192 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count);
    193 
    194 /**
    195  * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed point 16 bit Q0.15.
    196  * The packed 24 bit output is stored in native endian format in a uint8_t byte array.
    197  * The data is truncated without rounding.
    198  *
    199  *  \param dst     Destination buffer
    200  *  \param src     Source buffer
    201  *  \param count   Number of samples to copy
    202  *
    203  * The destination and source buffers must either be completely separate (non-overlapping), or
    204  * they must both start at the same address.  Partially overlapping buffers are not supported.
    205  */
    206 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count);
    207 
    208 /**
    209  * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed-point 32-bit Q0.31.
    210  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
    211  * The output data range is [0x80000000, 0x7fffff00] at intervals of 0x100.
    212  *
    213  *  \param dst     Destination buffer
    214  *  \param src     Source buffer
    215  *  \param count   Number of samples to copy
    216  *
    217  * The destination and source buffers must either be completely separate (non-overlapping), or
    218  * they must both start at the same address.  Partially overlapping buffers are not supported.
    219  */
    220 void memcpy_to_i32_from_p24(int32_t *dst, const uint8_t *src, size_t count);
    221 
    222 /**
    223  * Copy samples from signed fixed point 16 bit Q0.15 to signed fixed-point packed 24 bit Q0.23.
    224  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
    225  * The output data range is [0x800000, 0x7fff00] (not full).
    226  * Nevertheless there is no DC offset on the output, if the input has no DC offset.
    227  *
    228  *  \param dst     Destination buffer
    229  *  \param src     Source buffer
    230  *  \param count   Number of samples to copy
    231  *
    232  * The destination and source buffers must either be completely separate (non-overlapping), or
    233  * they must both start at the same address.  Partially overlapping buffers are not supported.
    234  */
    235 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count);
    236 
    237 /**
    238  * Copy samples from single-precision floating-point to signed fixed-point packed 24 bit Q0.23.
    239  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
    240  * The data is clamped and rounded to nearest, ties away from zero. See clamp24_from_float()
    241  * for details.
    242  *
    243  *  \param dst     Destination buffer
    244  *  \param src     Source buffer
    245  *  \param count   Number of samples to copy
    246  *
    247  * The destination and source buffers must either be completely separate (non-overlapping), or
    248  * they must both start at the same address.  Partially overlapping buffers are not supported.
    249  */
    250 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count);
    251 
    252 /**
    253  * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed-point packed 24 bit Q0.23.
    254  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
    255  * The data is clamped to the range is [0x800000, 0x7fffff].
    256  *
    257  *  \param dst     Destination buffer
    258  *  \param src     Source buffer
    259  *  \param count   Number of samples to copy
    260  *
    261  * The destination and source buffers must either be completely separate (non-overlapping), or
    262  * they must both start at the same address.
    263  */
    264 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
    265 
    266 /**
    267  * Shrink and copy samples from signed 32-bit fixed-point Q0.31
    268  * to signed fixed-point packed 24 bit Q0.23.
    269  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
    270  *
    271  *  \param dst     Destination buffer
    272  *  \param src     Source buffer
    273  *  \param count   Number of samples to copy
    274  *
    275  * The destination and source buffers must either be completely separate (non-overlapping), or
    276  * they must both start at the same address.  Partially overlapping buffers are not supported.
    277  * The conversion is done by truncation, without dithering, so it loses resolution.
    278  */
    279 void memcpy_to_p24_from_i32(uint8_t *dst, const int32_t *src, size_t count);
    280 
    281 /**
    282  * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q8.23.
    283  * The output data range is [0xff800000, 0x007fff00] at intervals of 0x100.
    284  *
    285  *  \param dst     Destination buffer
    286  *  \param src     Source buffer
    287  *  \param count   Number of samples to copy
    288  *
    289  * The destination and source buffers must either be completely separate (non-overlapping), or
    290  * they must both start at the same address.  Partially overlapping buffers are not supported.
    291  */
    292 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count);
    293 
    294 /**
    295  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q8.23.
    296  * This copy will clamp the Q8.23 representation to [0xff800000, 0x007fffff] even though there
    297  * are guard bits available. Fractional lsb is rounded to nearest, ties away from zero.
    298  * See clamp24_from_float() for details.
    299  *
    300  *  \param dst     Destination buffer
    301  *  \param src     Source buffer
    302  *  \param count   Number of samples to copy
    303  *
    304  * The destination and source buffers must either be completely separate (non-overlapping), or
    305  * they must both start at the same address.  Partially overlapping buffers are not supported.
    306  */
    307 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count);
    308 
    309 /**
    310  * Copy samples from signed fixed point packed 24-bit Q0.23 to signed fixed-point 32-bit Q8.23.
    311  * The output data range is [0xff800000, 0x007fffff].
    312  *
    313  *  \param dst     Destination buffer
    314  *  \param src     Source buffer
    315  *  \param count   Number of samples to copy
    316  *
    317  * The destination and source buffers must either be completely separate (non-overlapping), or
    318  * they must both start at the same address.  Partially overlapping buffers are not supported.
    319  */
    320 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count);
    321 
    322 /**
    323  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q4.27.
    324  * The conversion will use the full available Q4.27 range, including guard bits.
    325  * Fractional lsb is rounded to nearest, ties away from zero.
    326  * See clampq4_27_from_float() for details.
    327  *
    328  *  \param dst     Destination buffer
    329  *  \param src     Source buffer
    330  *  \param count   Number of samples to copy
    331  *
    332  * The destination and source buffers must either be completely separate (non-overlapping), or
    333  * they must both start at the same address.  Partially overlapping buffers are not supported.
    334  */
    335 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count);
    336 
    337 /**
    338  * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed point 16-bit Q0.15.
    339  * The data is clamped, and truncated without rounding.
    340  *
    341  *  \param dst     Destination buffer
    342  *  \param src     Source buffer
    343  *  \param count   Number of samples to copy
    344  *
    345  * The destination and source buffers must either be completely separate (non-overlapping), or
    346  * they must both start at the same address.  Partially overlapping buffers are not supported.
    347  */
    348 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count);
    349 
    350 /**
    351  * Copy samples from signed fixed-point 32-bit Q8.23 to single-precision floating-point.
    352  * The nominal output float range is [-1.0, 1.0) for the fixed-point
    353  * range [0xff800000, 0x007fffff]. The maximum output float range is [-256.0, 256.0).
    354  * No rounding is needed as the representation is exact for nominal values.
    355  * Rounding for overflow values is to nearest, ties to even.
    356  *
    357  *  \param dst     Destination buffer
    358  *  \param src     Source buffer
    359  *  \param count   Number of samples to copy
    360  *
    361  * The destination and source buffers must either be completely separate (non-overlapping), or
    362  * they must both start at the same address.  Partially overlapping buffers are not supported.
    363  */
    364 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count);
    365 
    366 /**
    367  * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q0.31.
    368  * The output data range is [0x80000000, 0x7fff0000] at intervals of 0x10000.
    369  *
    370  *  \param dst     Destination buffer
    371  *  \param src     Source buffer
    372  *  \param count   Number of samples to copy
    373  *
    374  * The destination and source buffers must either be completely separate (non-overlapping), or
    375  * they must both start at the same address.  Partially overlapping buffers are not supported.
    376  */
    377 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count);
    378 
    379 /**
    380  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q0.31.
    381  * If rounding is needed on truncation, the fractional lsb is rounded to nearest,
    382  * ties away from zero. See clamp32_from_float() for details.
    383  *
    384  *  \param dst     Destination buffer
    385  *  \param src     Source buffer
    386  *  \param count   Number of samples to copy
    387  *
    388  * The destination and source buffers must either be completely separate (non-overlapping), or
    389  * they must both start at the same address.  Partially overlapping buffers are not supported.
    390  */
    391 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count);
    392 
    393 /**
    394  * Copy samples from signed fixed-point 32-bit Q0.31 to single-precision floating-point.
    395  * The float range is [-1.0, 1.0] for the fixed-point range [0x80000000, 0x7fffffff].
    396  * Rounding is done according to float_from_i32().
    397  *
    398  *  \param dst     Destination buffer
    399  *  \param src     Source buffer
    400  *  \param count   Number of samples to copy
    401  *
    402  * The destination and source buffers must either be completely separate (non-overlapping), or
    403  * they must both start at the same address.  Partially overlapping buffers are not supported.
    404  */
    405 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count);
    406 
    407 /**
    408  * Copy samples from unrestricted float to range restricted float [-absMax, absMax].
    409  * Any float sample not in the range [-absMax, absMax] will be clamped in this range.
    410  *
    411  *  \param dst     Destination buffer
    412  *  \param src     Source buffer
    413  *  \param count   Number of samples to copy
    414  *  \param absMax  Maximum of the absolute value of the copied samples.
    415  *
    416  * The destination and source buffers must either be completely separate (non-overlapping), or
    417  * they must both start at the same address.  Partially overlapping buffers are not supported.
    418  * Note: NAN is clamped to absMax and not 0 for performance reason (~2xfaster).
    419  */
    420 void memcpy_to_float_from_float_with_clamping(float *dst, const float *src, size_t count,
    421                                               float absMax);
    422 
    423 /**
    424  * Downmix pairs of interleaved stereo input 16-bit samples to mono output 16-bit samples.
    425  *
    426  *  \param dst     Destination buffer
    427  *  \param src     Source buffer
    428  *  \param count   Number of stereo frames to downmix
    429  *
    430  * The destination and source buffers must be completely separate (non-overlapping).
    431  * The current implementation truncates the mean rather than dither, but this may change.
    432  */
    433 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count);
    434 
    435 /**
    436  * Upmix mono input 16-bit samples to pairs of interleaved stereo output 16-bit samples by
    437  * duplicating.
    438  *
    439  *  \param dst     Destination buffer
    440  *  \param src     Source buffer
    441  *  \param count   Number of mono samples to upmix
    442  *
    443  * The destination and source buffers must either be completely separate (non-overlapping), or
    444  * they must both start at the same address.  Partially overlapping buffers are not supported.
    445  */
    446 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count);
    447 
    448 /**
    449  * Downmix pairs of interleaved stereo input float samples to mono output float samples
    450  * by averaging the stereo pair together.
    451  *
    452  *  \param dst     Destination buffer
    453  *  \param src     Source buffer
    454  *  \param count   Number of stereo frames to downmix
    455  *
    456  * The destination and source buffers must be completely separate (non-overlapping),
    457  * or they must both start at the same address.
    458  */
    459 void downmix_to_mono_float_from_stereo_float(float *dst, const float *src, size_t count);
    460 
    461 /**
    462  * Upmix mono input float samples to pairs of interleaved stereo output float samples by
    463  * duplicating.
    464  *
    465  *  \param dst     Destination buffer
    466  *  \param src     Source buffer
    467  *  \param count   Number of mono samples to upmix
    468  *
    469  * The destination and source buffers must either be completely separate (non-overlapping), or
    470  * they must both start at the same address.  Partially overlapping buffers are not supported.
    471  */
    472 void upmix_to_stereo_float_from_mono_float(float *dst, const float *src, size_t count);
    473 
    474 /**
    475  * \return the total number of non-zero 32-bit samples.
    476  */
    477 size_t nonZeroMono32(const int32_t *samples, size_t count);
    478 
    479 /**
    480  * \return the total number of non-zero 16-bit samples.
    481  */
    482 size_t nonZeroMono16(const int16_t *samples, size_t count);
    483 
    484 /**
    485  * \return the total number of non-zero stereo frames, where a frame is considered non-zero
    486  * if either of its constituent 32-bit samples is non-zero.
    487  */
    488 size_t nonZeroStereo32(const int32_t *frames, size_t count);
    489 
    490 /**
    491  * \return the total number of non-zero stereo frames, where a frame is considered non-zero
    492  * if either of its constituent 16-bit samples is non-zero.
    493  */
    494 size_t nonZeroStereo16(const int16_t *frames, size_t count);
    495 
    496 /**
    497  * Copy frames, selecting source samples based on a source channel mask to fit
    498  * the destination channel mask. Unmatched channels in the destination channel mask
    499  * are zero filled. Unmatched channels in the source channel mask are dropped.
    500  * Channels present in the channel mask are represented by set bits in the
    501  * uint32_t value and are matched without further interpretation.
    502  *
    503  *  \param dst         Destination buffer
    504  *  \param dst_mask    Bit mask corresponding to destination channels present
    505  *  \param src         Source buffer
    506  *  \param src_mask    Bit mask corresponding to source channels present
    507  *  \param sample_size Size of each sample in bytes.  Must be 1, 2, 3, or 4.
    508  *  \param count       Number of frames to copy
    509  *
    510  * The destination and source buffers must be completely separate (non-overlapping).
    511  * If the sample size is not in range, the function will abort.
    512  */
    513 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
    514         const void *src, uint32_t src_mask, size_t sample_size, size_t count);
    515 
    516 /**
    517  * Copy frames, selecting source samples based on an index array (idxary).
    518  * The idxary[] consists of dst_channels number of elements.
    519  * The ith element if idxary[] corresponds the ith destination channel.
    520  * A non-negative value is the channel index in the source frame.
    521  * A negative index (-1) represents filling with 0.
    522  *
    523  * Example: Swapping L and R channels for stereo streams
    524  * <PRE>
    525  * idxary[0] = 1;
    526  * idxary[1] = 0;
    527  * </PRE>
    528  *
    529  * Example: Copying a mono source to the front center 5.1 channel
    530  * <PRE>
    531  * idxary[0] = -1;
    532  * idxary[1] = -1;
    533  * idxary[2] = 0;
    534  * idxary[3] = -1;
    535  * idxary[4] = -1;
    536  * idxary[5] = -1;
    537  * </PRE>
    538  *
    539  * This copy allows swizzling of channels or replication of channels.
    540  *
    541  *  \param dst           Destination buffer
    542  *  \param dst_channels  Number of destination channels per frame
    543  *  \param src           Source buffer
    544  *  \param src_channels  Number of source channels per frame
    545  *  \param idxary        Array of indices representing channels in the source frame
    546  *  \param sample_size   Size of each sample in bytes.  Must be 1, 2, 3, or 4.
    547  *  \param count         Number of frames to copy
    548  *
    549  * The destination and source buffers must be completely separate (non-overlapping).
    550  * If the sample size is not in range, the function will abort.
    551  */
    552 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
    553         const void *src, uint32_t src_channels,
    554         const int8_t *idxary, size_t sample_size, size_t count);
    555 
    556 /**
    557  * Prepares an index array (idxary) from channel masks, which can be later
    558  * used by memcpy_by_index_array().
    559  *
    560  * \return the number of array elements required.
    561  * This may be greater than idxcount, so the return value should be checked
    562  * if idxary size is less than 32.
    563  *
    564  * Note that idxary is a caller allocated array
    565  * of at least as many channels as present in the dst_mask.
    566  * Channels present in the channel mask are represented by set bits in the
    567  * uint32_t value and are matched without further interpretation.
    568  *
    569  * This function is typically used for converting audio data with different
    570  * channel position masks.
    571  *
    572  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
    573  *  \param idxcount    Number of caller allocated elements in idxary
    574  *  \param dst_mask    Bit mask corresponding to destination channels present
    575  *  \param src_mask    Bit mask corresponding to source channels present
    576  */
    577 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
    578         uint32_t dst_mask, uint32_t src_mask);
    579 
    580 /**
    581  * Prepares an index array (idxary) from channel masks, which can be later
    582  * used by memcpy_by_index_array().
    583  *
    584  * \return the number of array elements required.
    585  *
    586  * For a source channel index mask, the source channels will map to the destination
    587  * channels as if counting the set bits in dst_mask in order from lsb to msb
    588  * (zero bits are ignored). The ith bit of the src_mask corresponds to the
    589  * ith SET bit of dst_mask and the ith destination channel.  Hence, a zero ith
    590  * bit of the src_mask indicates that the ith destination channel plays silence.
    591  *
    592  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
    593  *  \param idxcount    Number of caller allocated elements in idxary
    594  *  \param dst_mask    Bit mask corresponding to destination channels present
    595  *  \param src_mask    Bit mask corresponding to source channels present
    596  */
    597 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount,
    598         uint32_t dst_mask, uint32_t src_mask);
    599 
    600 /**
    601  * Prepares an index array (idxary) from channel mask bits, which can be later
    602  * used by memcpy_by_index_array().
    603  *
    604  * \return the number of array elements required.
    605  *
    606  * This initialization is for a destination channel index mask from a positional
    607  * source mask.
    608  *
    609  * For an destination channel index mask, the input channels will map
    610  * to the destination channels, with the ith SET bit in the source bits corresponding
    611  * to the ith bit in the destination bits. If there is a zero bit in the middle
    612  * of set destination bits (unlikely), the corresponding source channel will
    613  * be dropped.
    614  *
    615  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
    616  *  \param idxcount    Number of caller allocated elements in idxary
    617  *  \param dst_mask    Bit mask corresponding to destination channels present
    618  *  \param src_mask    Bit mask corresponding to source channels present
    619  */
    620 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount,
    621         uint32_t dst_mask, uint32_t src_mask);
    622 
    623 /**
    624  * Add and clamp signed 16-bit samples.
    625  *
    626  *  \param dst     Destination buffer
    627  *  \param src     Source buffer
    628  *  \param count   Number of samples to add
    629  *
    630  * The destination and source buffers must either be completely separate (non-overlapping), or
    631  * they must both start at the same address.  Partially overlapping buffers are not supported.
    632  */
    633 void accumulate_i16(int16_t *dst, const int16_t *src, size_t count);
    634 
    635 /**
    636  * Add and clamp unsigned 8-bit samples.
    637  *
    638  *  \param dst     Destination buffer
    639  *  \param src     Source buffer
    640  *  \param count   Number of samples to add
    641  *
    642  * The destination and source buffers must either be completely separate (non-overlapping), or
    643  * they must both start at the same address.  Partially overlapping buffers are not supported.
    644  */
    645 void accumulate_u8(uint8_t *dst, const uint8_t *src, size_t count);
    646 
    647 /**
    648  * Add and clamp packed 24-bit Q0.23 samples.
    649  *
    650  *  \param dst     Destination buffer
    651  *  \param src     Source buffer
    652  *  \param count   Number of samples to add
    653  *
    654  * The destination and source buffers must either be completely separate (non-overlapping), or
    655  * they must both start at the same address.  Partially overlapping buffers are not supported.
    656  */
    657 void accumulate_p24(uint8_t *dst, const uint8_t *src, size_t count);
    658 
    659 /**
    660  * Add and clamp 32-bit Q8.23 samples.
    661  *
    662  *  \param dst     Destination buffer
    663  *  \param src     Source buffer
    664  *  \param count   Number of samples to add
    665  *
    666  * The destination and source buffers must either be completely separate (non-overlapping), or
    667  * they must both start at the same address.  Partially overlapping buffers are not supported.
    668  */
    669 void accumulate_q8_23(int32_t *dst, const int32_t *src, size_t count);
    670 
    671 /**
    672  * Add and clamp signed 32-bit Q0.31 samples.
    673  *
    674  *  \param dst     Destination buffer
    675  *  \param src     Source buffer
    676  *  \param count   Number of samples to add
    677  *
    678  * The destination and source buffers must either be completely separate (non-overlapping), or
    679  * they must both start at the same address.  Partially overlapping buffers are not supported.
    680  */
    681 void accumulate_i32(int32_t *dst, const int32_t *src, size_t count);
    682 
    683 /**
    684  * Add float samples. Result is not clamped.
    685  *
    686  *  \param dst     Destination buffer
    687  *  \param src     Source buffer
    688  *  \param count   Number of samples to add
    689  *
    690  * The destination and source buffers must either be completely separate (non-overlapping), or
    691  * they must both start at the same address.  Partially overlapping buffers are not supported.
    692  */
    693 void accumulate_float(float *dst, const float *src, size_t count);
    694 
    695 /**
    696  * Clamp (aka hard limit or clip) a signed 32-bit sample to 16-bit range.
    697  */
    698 static inline int16_t clamp16(int32_t sample)
    699 {
    700     if ((sample>>15) ^ (sample>>31))
    701         sample = 0x7FFF ^ (sample>>31);
    702     return sample;
    703 }
    704 
    705 /**
    706  * Clamp (aka hard limit or clip) a signed 64-bit sample to 32-bit range.
    707  */
    708 static inline int32_t clamp32(int64_t sample)
    709 {
    710     if ((sample>>31) ^ (sample>>63))
    711         sample = 0x7fffffff ^ (sample>>63);
    712     return sample;
    713 }
    714 
    715 /**
    716  * Convert a IEEE 754 single precision float [-1.0, 1.0) to int16_t [-32768, 32767]
    717  * with clamping.  Note the open bound at 1.0, values within 1/65536 of 1.0 map
    718  * to 32767 instead of 32768 (early clamping due to the smaller positive integer subrange).
    719  *
    720  * Values outside the range [-1.0, 1.0) are properly clamped to -32768 and 32767,
    721  * including -Inf and +Inf. NaN will generally be treated either as -32768 or 32767,
    722  * depending on the sign bit inside NaN (whose representation is not unique).
    723  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
    724  *
    725  * OLD code disabled: Rounding of 0.5 lsb is to even (default for IEEE 754).
    726  * NEW code enabled: Rounding of 0.5 lsb is away from 0.
    727  */
    728 static inline int16_t clamp16_from_float(float f)
    729 {
    730 #if 0
    731     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
    732      * floating point significand. The normal shift is 3<<22, but the -15 offset
    733      * is used to multiply by 32768.
    734      */
    735     static const float offset = (float)(3 << (22 - 15));
    736     /* zero = (0x10f << 22) =  0x43c00000 (not directly used) */
    737     static const int32_t limneg = (0x10f << 22) /*zero*/ - 32768; /* 0x43bf8000 */
    738     static const int32_t limpos = (0x10f << 22) /*zero*/ + 32767; /* 0x43c07fff */
    739 
    740     union {
    741         float f;
    742         int32_t i;
    743     } u;
    744 
    745     u.f = f + offset; /* recenter valid range */
    746     /* Now the valid range is represented as integers between [limneg, limpos].
    747      * Clamp using the fact that float representation (as an integer) is an ordered set.
    748      */
    749     if (u.i < limneg)
    750         u.i = -32768;
    751     else if (u.i > limpos)
    752         u.i = 32767;
    753     return u.i; /* Return lower 16 bits, the part of interest in the significand. */
    754 #else
    755     static const float scale = 1 << 15;
    756     return roundf(fmaxf(fminf(f * scale, scale - 1.f), -scale));
    757 #endif
    758 }
    759 
    760 /**
    761  * Convert a IEEE 754 single precision float [-1.0, 1.0) to uint8_t [0, 0xff]
    762  * with clamping.  Note the open bound at 1.0, values within 1/128 of 1.0 map
    763  * to 255 instead of 256 (early clamping due to the smaller positive integer subrange).
    764  *
    765  * Values outside the range [-1.0, 1.0) are properly clamped to 0 and 255,
    766  * including -Inf and +Inf. NaN will generally be treated either as 0 or 255,
    767  * depending on the sign bit inside NaN (whose representation is not unique).
    768  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
    769  *
    770  * OLD code disabled: Rounding of 0.5 lsb is to even (default for IEEE 754).
    771  * NEW code enabled: Rounding of 0.5 lsb is away from 0.
    772  */
    773 static inline uint8_t clamp8_from_float(float f)
    774 {
    775 #if 0
    776     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
    777      * floating point significand. The normal shift is 3<<22, but the -7 offset
    778      * is used to multiply by 128.
    779      */
    780     static const float offset = (float)((3 << (22 - 7)) + 1 /* to cancel -1.0 */);
    781     /* zero = (0x11f << 22) =  0x47c00000 */
    782     static const int32_t limneg = (0x11f << 22) /*zero*/;
    783     static const int32_t limpos = (0x11f << 22) /*zero*/ + 255; /* 0x47c000ff */
    784 
    785     union {
    786         float f;
    787         int32_t i;
    788     } u;
    789 
    790     u.f = f + offset; /* recenter valid range */
    791     /* Now the valid range is represented as integers between [limneg, limpos].
    792      * Clamp using the fact that float representation (as an integer) is an ordered set.
    793      */
    794     if (u.i < limneg)
    795         return 0;
    796     if (u.i > limpos)
    797         return 255;
    798     return u.i; /* Return lower 8 bits, the part of interest in the significand. */
    799 #else
    800     return roundf(fmaxf(fminf(f * 128.f + 128.f, 255.f), 0.f));
    801 #endif
    802 }
    803 
    804 /**
    805  * Convert a single-precision floating point value to a Q0.23 integer value, stored in a
    806  * 32 bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
    807  *
    808  * OLD code disabled: Rounds to nearest, ties away from 0.
    809  * NEW code enabled: Rounding of 0.5 lsb is away from 0.
    810  *
    811  * Values outside the range [-1.0, 1.0) are properly clamped to -8388608 and 8388607,
    812  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
    813  * depending on hardware and future implementation of this function.
    814  */
    815 static inline int32_t clamp24_from_float(float f)
    816 {
    817 #if 0
    818     static const float scale = (float)(1 << 23);
    819     static const float limpos = 0x7fffff / scale;
    820     static const float limneg = -0x800000 / scale;
    821 
    822     if (f <= limneg) {
    823         return -0x800000;
    824     } else if (f >= limpos) {
    825         return 0x7fffff;
    826     }
    827     f *= scale;
    828     /* integer conversion is through truncation (though int to float is not).
    829      * ensure that we round to nearest, ties away from 0.
    830      */
    831     return f > 0 ? f + 0.5 : f - 0.5;
    832 #else
    833     static const float scale = 1 << 23;
    834     return roundf(fmaxf(fminf(f * scale, scale - 1.f), -scale));
    835 #endif
    836 }
    837 
    838 /**
    839  * Convert a signed fixed-point 32-bit Q8.23 value to a Q0.23 integer value,
    840  * stored in a 32-bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
    841  *
    842  * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
    843  */
    844 static inline int32_t clamp24_from_q8_23(int32_t ival)
    845 {
    846     static const int32_t limpos = 0x7fffff;
    847     static const int32_t limneg = -0x800000;
    848     if (ival < limneg) {
    849         return limneg;
    850     } else if (ival > limpos) {
    851         return limpos;
    852     } else {
    853         return ival;
    854     }
    855 }
    856 
    857 /**
    858  * Convert a single-precision floating point value to a Q4.27 integer value.
    859  * Rounds to nearest, ties away from 0.
    860  *
    861  * Values outside the range [-16.0, 16.0) are properly clamped to -2147483648 and 2147483647,
    862  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
    863  * depending on hardware and future implementation of this function.
    864  */
    865 static inline int32_t clampq4_27_from_float(float f)
    866 {
    867     static const float scale = (float)(1UL << 27);
    868     static const float limpos = 16.;
    869     static const float limneg = -16.;
    870 
    871     if (f <= limneg) {
    872         return -0x80000000; /* or 0x80000000 */
    873     } else if (f >= limpos) {
    874         return 0x7fffffff;
    875     }
    876     f *= scale;
    877     /* integer conversion is through truncation (though int to float is not).
    878      * ensure that we round to nearest, ties away from 0.
    879      */
    880     return f > 0 ? f + 0.5 : f - 0.5;
    881 }
    882 
    883 /**
    884  * Convert a single-precision floating point value to a Q0.31 integer value.
    885  * Rounds to nearest, ties away from 0.
    886  *
    887  * Values outside the range [-1.0, 1.0) are properly clamped to -2147483648 and 2147483647,
    888  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
    889  * depending on hardware and future implementation of this function.
    890  */
    891 static inline int32_t clamp32_from_float(float f)
    892 {
    893     static const float scale = (float)(1UL << 31);
    894     static const float limpos = 1.;
    895     static const float limneg = -1.;
    896 
    897     if (f <= limneg) {
    898         return -0x80000000; /* or 0x80000000 */
    899     } else if (f >= limpos) {
    900         return 0x7fffffff;
    901     }
    902     f *= scale;
    903     /* integer conversion is through truncation (though int to float is not).
    904      * ensure that we round to nearest, ties away from 0.
    905      */
    906     return f > 0 ? f + 0.5 : f - 0.5;
    907 }
    908 
    909 /**
    910  * Convert a signed fixed-point 32-bit Q4.27 value to single-precision floating-point.
    911  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
    912  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].
    913  *
    914  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
    915  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
    916  * precision floating point, the 0.5 lsb in the significand conversion will round
    917  * towards even, as per IEEE 754 default.
    918  */
    919 static inline float float_from_q4_27(int32_t ival)
    920 {
    921     /* The scale factor is the reciprocal of the fractional bits.
    922      *
    923      * Since the scale factor is a power of 2, the scaling is exact, and there
    924      * is no rounding due to the multiplication - the bit pattern is preserved.
    925      * However, there may be rounding due to the fixed-point to float conversion,
    926      * as described above.
    927      */
    928     static const float scale = 1. / (float)(1UL << 27);
    929 
    930     return ival * scale;
    931 }
    932 
    933 /**
    934  * Convert an unsigned fixed-point 32-bit U4.28 value to single-precision floating-point.
    935  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
    936  * [0x00000000, 0x10000000].  The full float range is [0.0, 16.0].
    937  *
    938  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
    939  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
    940  * precision floating point, the 0.5 lsb in the significand conversion will round
    941  * towards even, as per IEEE 754 default.
    942  */
    943 static inline float float_from_u4_28(uint32_t uval)
    944 {
    945     static const float scale = 1. / (float)(1UL << 28);
    946 
    947     return uval * scale;
    948 }
    949 
    950 /**
    951  * Convert an unsigned fixed-point 16-bit U4.12 value to single-precision floating-point.
    952  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
    953  * [0x0000, 0x1000].  The full float range is [0.0, 16.0).
    954  */
    955 static inline float float_from_u4_12(uint16_t uval)
    956 {
    957     static const float scale = 1. / (float)(1UL << 12);
    958 
    959     return uval * scale;
    960 }
    961 
    962 /**
    963  * Convert a single-precision floating point value to a U4.28 integer value.
    964  * Rounds to nearest, ties away from 0.
    965  *
    966  * Values outside the range [0, 16.0] are properly clamped to [0, 4294967295]
    967  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
    968  * depending on hardware and future implementation of this function.
    969  */
    970 static inline uint32_t u4_28_from_float(float f)
    971 {
    972     static const float scale = (float)(1 << 28);
    973     static const float limpos = 0xffffffffUL / scale;
    974 
    975     if (f <= 0.) {
    976         return 0;
    977     } else if (f >= limpos) {
    978         return 0xffffffff;
    979     }
    980     /* integer conversion is through truncation (though int to float is not).
    981      * ensure that we round to nearest, ties away from 0.
    982      */
    983     return f * scale + 0.5;
    984 }
    985 
    986 /**
    987  * Convert a single-precision floating point value to a U4.12 integer value.
    988  * Rounds to nearest, ties away from 0.
    989  *
    990  * Values outside the range [0, 16.0) are properly clamped to [0, 65535]
    991  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
    992  * depending on hardware and future implementation of this function.
    993  */
    994 static inline uint16_t u4_12_from_float(float f)
    995 {
    996     static const float scale = (float)(1 << 12);
    997     static const float limpos = 0xffff / scale;
    998 
    999     if (f <= 0.) {
   1000         return 0;
   1001     } else if (f >= limpos) {
   1002         return 0xffff;
   1003     }
   1004     /* integer conversion is through truncation (though int to float is not).
   1005      * ensure that we round to nearest, ties away from 0.
   1006      */
   1007     return f * scale + 0.5;
   1008 }
   1009 
   1010 /**
   1011  * Convert a signed fixed-point 16-bit Q0.15 value to single-precision floating-point.
   1012  * The output float range is [-1.0, 1.0) for the fixed-point range
   1013  * [0x8000, 0x7fff].
   1014  *
   1015  * There is no rounding, the conversion and representation is exact.
   1016  */
   1017 static inline float float_from_i16(int16_t ival)
   1018 {
   1019     /* The scale factor is the reciprocal of the nominal 16 bit integer
   1020      * half-sided range (32768).
   1021      *
   1022      * Since the scale factor is a power of 2, the scaling is exact, and there
   1023      * is no rounding due to the multiplication - the bit pattern is preserved.
   1024      */
   1025     static const float scale = 1. / (float)(1UL << 15);
   1026 
   1027     return ival * scale;
   1028 }
   1029 
   1030 /**
   1031  * Convert an unsigned fixed-point 8-bit U0.8 value to single-precision floating-point.
   1032  * The nominal output float range is [-1.0, 1.0) if the fixed-point range is
   1033  * [0x00, 0xff].
   1034  */
   1035 static inline float float_from_u8(uint8_t uval)
   1036 {
   1037     static const float scale = 1. / (float)(1UL << 7);
   1038 
   1039     return ((int)uval - 128) * scale;
   1040 }
   1041 
   1042 /**
   1043  * Convert a packed 24bit Q0.23 value stored native-endian in a uint8_t ptr
   1044  * to a signed fixed-point 32 bit integer Q0.31 value. The output Q0.31 range
   1045  * is [0x80000000, 0x7fffff00] for the fixed-point range [0x800000, 0x7fffff].
   1046  * Even though the output range is limited on the positive side, there is no
   1047  * DC offset on the output, if the input has no DC offset.
   1048  *
   1049  * Avoid relying on the limited output range, as future implementations may go
   1050  * to full range.
   1051  */
   1052 static inline int32_t i32_from_p24(const uint8_t *packed24)
   1053 {
   1054     /* convert to 32b */
   1055     return (packed24[0] << 8) | (packed24[1] << 16) | (packed24[2] << 24);
   1056 }
   1057 
   1058 /**
   1059  * Convert a 32-bit Q0.31 value to single-precision floating-point.
   1060  * The output float range is [-1.0, 1.0] for the fixed-point range
   1061  * [0x80000000, 0x7fffffff].
   1062  *
   1063  * Rounding may occur in the least significant 8 bits for large fixed point
   1064  * values due to storage into the 24-bit floating-point significand.
   1065  * Rounding will be to nearest, ties to even.
   1066  */
   1067 static inline float float_from_i32(int32_t ival)
   1068 {
   1069     static const float scale = 1. / (float)(1UL << 31);
   1070 
   1071     return ival * scale;
   1072 }
   1073 
   1074 /**
   1075  * Convert a packed 24bit Q0.23 value stored native endian in a uint8_t ptr
   1076  * to single-precision floating-point. The output float range is [-1.0, 1.0)
   1077  * for the fixed-point range [0x800000, 0x7fffff].
   1078  *
   1079  * There is no rounding, the conversion and representation is exact.
   1080  */
   1081 static inline float float_from_p24(const uint8_t *packed24)
   1082 {
   1083     return float_from_i32(i32_from_p24(packed24));
   1084 }
   1085 
   1086 /**
   1087  * Convert a 24-bit Q8.23 value to single-precision floating-point.
   1088  * The nominal output float range is [-1.0, 1.0) for the fixed-point
   1089  * range [0xff800000, 0x007fffff].  The maximum float range is [-256.0, 256.0).
   1090  *
   1091  * There is no rounding in the nominal range, the conversion and representation
   1092  * is exact. For values outside the nominal range, rounding is to nearest, ties to even.
   1093  */
   1094 static inline float float_from_q8_23(int32_t ival)
   1095 {
   1096     static const float scale = 1. / (float)(1UL << 23);
   1097 
   1098     return ival * scale;
   1099 }
   1100 
   1101 /**
   1102  * Multiply-accumulate 16-bit terms with 32-bit result: return a + in*v.
   1103  */
   1104 static inline
   1105 int32_t mulAdd(int16_t in, int16_t v, int32_t a)
   1106 {
   1107 #if defined(__arm__) && !defined(__thumb__)
   1108     int32_t out;
   1109     asm( "smlabb %[out], %[in], %[v], %[a] \n"
   1110          : [out]"=r"(out)
   1111          : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
   1112          : );
   1113     return out;
   1114 #else
   1115     return a + in * (int32_t)v;
   1116 #endif
   1117 }
   1118 
   1119 /**
   1120  * Multiply 16-bit terms with 32-bit result: return in*v.
   1121  */
   1122 static inline
   1123 int32_t mul(int16_t in, int16_t v)
   1124 {
   1125 #if defined(__arm__) && !defined(__thumb__)
   1126     int32_t out;
   1127     asm( "smulbb %[out], %[in], %[v] \n"
   1128          : [out]"=r"(out)
   1129          : [in]"%r"(in), [v]"r"(v)
   1130          : );
   1131     return out;
   1132 #else
   1133     return in * (int32_t)v;
   1134 #endif
   1135 }
   1136 
   1137 /**
   1138  * Similar to mulAdd, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
   1139  */
   1140 static inline
   1141 int32_t mulAddRL(int left, uint32_t inRL, uint32_t vRL, int32_t a)
   1142 {
   1143 #if defined(__arm__) && !defined(__thumb__)
   1144     int32_t out;
   1145     if (left) {
   1146         asm( "smlabb %[out], %[inRL], %[vRL], %[a] \n"
   1147              : [out]"=r"(out)
   1148              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
   1149              : );
   1150     } else {
   1151         asm( "smlatt %[out], %[inRL], %[vRL], %[a] \n"
   1152              : [out]"=r"(out)
   1153              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
   1154              : );
   1155     }
   1156     return out;
   1157 #else
   1158     if (left) {
   1159         return a + (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
   1160     } else {
   1161         return a + (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
   1162     }
   1163 #endif
   1164 }
   1165 
   1166 /**
   1167  * Similar to mul, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
   1168  */
   1169 static inline
   1170 int32_t mulRL(int left, uint32_t inRL, uint32_t vRL)
   1171 {
   1172 #if defined(__arm__) && !defined(__thumb__)
   1173     int32_t out;
   1174     if (left) {
   1175         asm( "smulbb %[out], %[inRL], %[vRL] \n"
   1176              : [out]"=r"(out)
   1177              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
   1178              : );
   1179     } else {
   1180         asm( "smultt %[out], %[inRL], %[vRL] \n"
   1181              : [out]"=r"(out)
   1182              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
   1183              : );
   1184     }
   1185     return out;
   1186 #else
   1187     if (left) {
   1188         return (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
   1189     } else {
   1190         return (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
   1191     }
   1192 #endif
   1193 }
   1194 
   1195 /** \cond */
   1196 __END_DECLS
   1197 /** \endcond */
   1198 
   1199 #endif  // ANDROID_AUDIO_PRIMITIVES_H
   1200