Home | History | Annotate | Download | only in silk
      1 /***********************************************************************
      2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
      3 Redistribution and use in source and binary forms, with or without
      4 modification, are permitted provided that the following conditions
      5 are met:
      6 - Redistributions of source code must retain the above copyright notice,
      7 this list of conditions and the following disclaimer.
      8 - Redistributions in binary form must reproduce the above copyright
      9 notice, this list of conditions and the following disclaimer in the
     10 documentation and/or other materials provided with the distribution.
     11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
     12 names of specific contributors, may be used to endorse or promote
     13 products derived from this software without specific prior written
     14 permission.
     15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     25 POSSIBILITY OF SUCH DAMAGE.
     26 ***********************************************************************/
     27 
     28 #ifndef SILK_SIGPROC_FIX_H
     29 #define SILK_SIGPROC_FIX_H
     30 
     31 #ifdef  __cplusplus
     32 extern "C"
     33 {
     34 #endif
     35 
     36 /*#define silk_MACRO_COUNT */          /* Used to enable WMOPS counting */
     37 
     38 #define SILK_MAX_ORDER_LPC            16            /* max order of the LPC analysis in schur() and k2a() */
     39 
     40 #include <string.h>                                 /* for memset(), memcpy(), memmove() */
     41 #include "typedef.h"
     42 #include "resampler_structs.h"
     43 #include "macros.h"
     44 #include "cpu_support.h"
     45 
     46 #if defined(OPUS_X86_MAY_HAVE_SSE4_1)
     47 #include "x86/SigProc_FIX_sse.h"
     48 #endif
     49 
     50 /********************************************************************/
     51 /*                    SIGNAL PROCESSING FUNCTIONS                   */
     52 /********************************************************************/
     53 
     54 /*!
     55  * Initialize/reset the resampler state for a given pair of input/output sampling rates
     56 */
     57 opus_int silk_resampler_init(
     58     silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
     59     opus_int32                  Fs_Hz_in,           /* I    Input sampling rate (Hz)                                    */
     60     opus_int32                  Fs_Hz_out,          /* I    Output sampling rate (Hz)                                   */
     61     opus_int                    forEnc              /* I    If 1: encoder; if 0: decoder                                */
     62 );
     63 
     64 /*!
     65  * Resampler: convert from one sampling rate to another
     66  */
     67 opus_int silk_resampler(
     68     silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
     69     opus_int16                  out[],              /* O    Output signal                                               */
     70     const opus_int16            in[],               /* I    Input signal                                                */
     71     opus_int32                  inLen               /* I    Number of input samples                                     */
     72 );
     73 
     74 /*!
     75 * Downsample 2x, mediocre quality
     76 */
     77 void silk_resampler_down2(
     78     opus_int32                  *S,                 /* I/O  State vector [ 2 ]                                          */
     79     opus_int16                  *out,               /* O    Output signal [ len ]                                       */
     80     const opus_int16            *in,                /* I    Input signal [ floor(len/2) ]                               */
     81     opus_int32                  inLen               /* I    Number of input samples                                     */
     82 );
     83 
     84 /*!
     85  * Downsample by a factor 2/3, low quality
     86 */
     87 void silk_resampler_down2_3(
     88     opus_int32                  *S,                 /* I/O  State vector [ 6 ]                                          */
     89     opus_int16                  *out,               /* O    Output signal [ floor(2*inLen/3) ]                          */
     90     const opus_int16            *in,                /* I    Input signal [ inLen ]                                      */
     91     opus_int32                  inLen               /* I    Number of input samples                                     */
     92 );
     93 
     94 /*!
     95  * second order ARMA filter;
     96  * slower than biquad() but uses more precise coefficients
     97  * can handle (slowly) varying coefficients
     98  */
     99 void silk_biquad_alt(
    100     const opus_int16            *in,                /* I     input signal                                               */
    101     const opus_int32            *B_Q28,             /* I     MA coefficients [3]                                        */
    102     const opus_int32            *A_Q28,             /* I     AR coefficients [2]                                        */
    103     opus_int32                  *S,                 /* I/O   State vector [2]                                           */
    104     opus_int16                  *out,               /* O     output signal                                              */
    105     const opus_int32            len,                /* I     signal length (must be even)                               */
    106     opus_int                    stride              /* I     Operate on interleaved signal if > 1                       */
    107 );
    108 
    109 /* Variable order MA prediction error filter. */
    110 void silk_LPC_analysis_filter(
    111     opus_int16                  *out,               /* O    Output signal                                               */
    112     const opus_int16            *in,                /* I    Input signal                                                */
    113     const opus_int16            *B,                 /* I    MA prediction coefficients, Q12 [order]                     */
    114     const opus_int32            len,                /* I    Signal length                                               */
    115     const opus_int32            d,                  /* I    Filter order                                                */
    116     int                         arch                /* I    Run-time architecture                                       */
    117 );
    118 
    119 /* Chirp (bandwidth expand) LP AR filter */
    120 void silk_bwexpander(
    121     opus_int16                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
    122     const opus_int              d,                  /* I    Length of ar                                                */
    123     opus_int32                  chirp_Q16           /* I    Chirp factor (typically in the range 0 to 1)                */
    124 );
    125 
    126 /* Chirp (bandwidth expand) LP AR filter */
    127 void silk_bwexpander_32(
    128     opus_int32                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
    129     const opus_int              d,                  /* I    Length of ar                                                */
    130     opus_int32                  chirp_Q16           /* I    Chirp factor in Q16                                         */
    131 );
    132 
    133 /* Compute inverse of LPC prediction gain, and                           */
    134 /* test if LPC coefficients are stable (all poles within unit circle)    */
    135 opus_int32 silk_LPC_inverse_pred_gain(              /* O   Returns inverse prediction gain in energy domain, Q30        */
    136     const opus_int16            *A_Q12,             /* I   Prediction coefficients, Q12 [order]                         */
    137     const opus_int              order               /* I   Prediction order                                             */
    138 );
    139 
    140 /* For input in Q24 domain */
    141 opus_int32 silk_LPC_inverse_pred_gain_Q24(          /* O    Returns inverse prediction gain in energy domain, Q30       */
    142     const opus_int32            *A_Q24,             /* I    Prediction coefficients [order]                             */
    143     const opus_int              order               /* I    Prediction order                                            */
    144 );
    145 
    146 /* Split signal in two decimated bands using first-order allpass filters */
    147 void silk_ana_filt_bank_1(
    148     const opus_int16            *in,                /* I    Input signal [N]                                            */
    149     opus_int32                  *S,                 /* I/O  State vector [2]                                            */
    150     opus_int16                  *outL,              /* O    Low band [N/2]                                              */
    151     opus_int16                  *outH,              /* O    High band [N/2]                                             */
    152     const opus_int32            N                   /* I    Number of input samples                                     */
    153 );
    154 
    155 /********************************************************************/
    156 /*                        SCALAR FUNCTIONS                          */
    157 /********************************************************************/
    158 
    159 /* Approximation of 128 * log2() (exact inverse of approx 2^() below) */
    160 /* Convert input to a log scale    */
    161 opus_int32 silk_lin2log(
    162     const opus_int32            inLin               /* I  input in linear scale                                         */
    163 );
    164 
    165 /* Approximation of a sigmoid function */
    166 opus_int silk_sigm_Q15(
    167     opus_int                    in_Q5               /* I                                                                */
    168 );
    169 
    170 /* Approximation of 2^() (exact inverse of approx log2() above) */
    171 /* Convert input to a linear scale */
    172 opus_int32 silk_log2lin(
    173     const opus_int32            inLog_Q7            /* I  input on log scale                                            */
    174 );
    175 
    176 /* Compute number of bits to right shift the sum of squares of a vector    */
    177 /* of int16s to make it fit in an int32                                    */
    178 void silk_sum_sqr_shift(
    179     opus_int32                  *energy,            /* O   Energy of x, after shifting to the right                     */
    180     opus_int                    *shift,             /* O   Number of bits right shift applied to energy                 */
    181     const opus_int16            *x,                 /* I   Input vector                                                 */
    182     opus_int                    len                 /* I   Length of input vector                                       */
    183 );
    184 
    185 /* Calculates the reflection coefficients from the correlation sequence    */
    186 /* Faster than schur64(), but much less accurate.                          */
    187 /* uses SMLAWB(), requiring armv5E and higher.                             */
    188 opus_int32 silk_schur(                              /* O    Returns residual energy                                     */
    189     opus_int16                  *rc_Q15,            /* O    reflection coefficients [order] Q15                         */
    190     const opus_int32            *c,                 /* I    correlations [order+1]                                      */
    191     const opus_int32            order               /* I    prediction order                                            */
    192 );
    193 
    194 /* Calculates the reflection coefficients from the correlation sequence    */
    195 /* Slower than schur(), but more accurate.                                 */
    196 /* Uses SMULL(), available on armv4                                        */
    197 opus_int32 silk_schur64(                            /* O    returns residual energy                                     */
    198     opus_int32                  rc_Q16[],           /* O    Reflection coefficients [order] Q16                         */
    199     const opus_int32            c[],                /* I    Correlations [order+1]                                      */
    200     opus_int32                  order               /* I    Prediction order                                            */
    201 );
    202 
    203 /* Step up function, converts reflection coefficients to prediction coefficients */
    204 void silk_k2a(
    205     opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
    206     const opus_int16            *rc_Q15,            /* I    Reflection coefficients [order] Q15                         */
    207     const opus_int32            order               /* I    Prediction order                                            */
    208 );
    209 
    210 /* Step up function, converts reflection coefficients to prediction coefficients */
    211 void silk_k2a_Q16(
    212     opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
    213     const opus_int32            *rc_Q16,            /* I    Reflection coefficients [order] Q16                         */
    214     const opus_int32            order               /* I    Prediction order                                            */
    215 );
    216 
    217 /* Apply sine window to signal vector.                              */
    218 /* Window types:                                                    */
    219 /*    1 -> sine window from 0 to pi/2                               */
    220 /*    2 -> sine window from pi/2 to pi                              */
    221 /* every other sample of window is linearly interpolated, for speed */
    222 void silk_apply_sine_window(
    223     opus_int16                  px_win[],           /* O    Pointer to windowed signal                                  */
    224     const opus_int16            px[],               /* I    Pointer to input signal                                     */
    225     const opus_int              win_type,           /* I    Selects a window type                                       */
    226     const opus_int              length              /* I    Window length, multiple of 4                                */
    227 );
    228 
    229 /* Compute autocorrelation */
    230 void silk_autocorr(
    231     opus_int32                  *results,           /* O    Result (length correlationCount)                            */
    232     opus_int                    *scale,             /* O    Scaling of the correlation vector                           */
    233     const opus_int16            *inputData,         /* I    Input data to correlate                                     */
    234     const opus_int              inputDataSize,      /* I    Length of input                                             */
    235     const opus_int              correlationCount,   /* I    Number of correlation taps to compute                       */
    236     int                         arch                /* I    Run-time architecture                                       */
    237 );
    238 
    239 void silk_decode_pitch(
    240     opus_int16                  lagIndex,           /* I                                                                */
    241     opus_int8                   contourIndex,       /* O                                                                */
    242     opus_int                    pitch_lags[],       /* O    4 pitch values                                              */
    243     const opus_int              Fs_kHz,             /* I    sampling frequency (kHz)                                    */
    244     const opus_int              nb_subfr            /* I    number of sub frames                                        */
    245 );
    246 
    247 opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
    248     const opus_int16            *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
    249     opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
    250     opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
    251     opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
    252     opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
    253     opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
    254     const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
    255     const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
    256     const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
    257     const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
    258     const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
    259     int                         arch                /* I    Run-time architecture                                       */
    260 );
    261 
    262 /* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients      */
    263 /* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */
    264 void silk_A2NLSF(
    265     opus_int16                  *NLSF,              /* O    Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */
    266     opus_int32                  *a_Q16,             /* I/O  Monic whitening filter coefficients in Q16 [d]              */
    267     const opus_int              d                   /* I    Filter order (must be even)                                 */
    268 );
    269 
    270 /* compute whitening filter coefficients from normalized line spectral frequencies */
    271 void silk_NLSF2A(
    272     opus_int16                  *a_Q12,             /* O    monic whitening filter coefficients in Q12,  [ d ]          */
    273     const opus_int16            *NLSF,              /* I    normalized line spectral frequencies in Q15, [ d ]          */
    274     const opus_int              d                   /* I    filter order (should be even)                               */
    275 );
    276 
    277 void silk_insertion_sort_increasing(
    278     opus_int32                  *a,                 /* I/O   Unsorted / Sorted vector                                   */
    279     opus_int                    *idx,               /* O     Index vector for the sorted elements                       */
    280     const opus_int              L,                  /* I     Vector length                                              */
    281     const opus_int              K                   /* I     Number of correctly sorted positions                       */
    282 );
    283 
    284 void silk_insertion_sort_decreasing_int16(
    285     opus_int16                  *a,                 /* I/O   Unsorted / Sorted vector                                   */
    286     opus_int                    *idx,               /* O     Index vector for the sorted elements                       */
    287     const opus_int              L,                  /* I     Vector length                                              */
    288     const opus_int              K                   /* I     Number of correctly sorted positions                       */
    289 );
    290 
    291 void silk_insertion_sort_increasing_all_values_int16(
    292      opus_int16                 *a,                 /* I/O   Unsorted / Sorted vector                                   */
    293      const opus_int             L                   /* I     Vector length                                              */
    294 );
    295 
    296 /* NLSF stabilizer, for a single input data vector */
    297 void silk_NLSF_stabilize(
    298           opus_int16            *NLSF_Q15,          /* I/O   Unstable/stabilized normalized LSF vector in Q15 [L]       */
    299     const opus_int16            *NDeltaMin_Q15,     /* I     Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1]   */
    300     const opus_int              L                   /* I     Number of NLSF parameters in the input vector              */
    301 );
    302 
    303 /* Laroia low complexity NLSF weights */
    304 void silk_NLSF_VQ_weights_laroia(
    305     opus_int16                  *pNLSFW_Q_OUT,      /* O     Pointer to input vector weights [D]                        */
    306     const opus_int16            *pNLSF_Q15,         /* I     Pointer to input vector         [D]                        */
    307     const opus_int              D                   /* I     Input vector dimension (even)                              */
    308 );
    309 
    310 /* Compute reflection coefficients from input signal */
    311 void silk_burg_modified_c(
    312     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
    313     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
    314     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
    315     const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
    316     const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
    317     const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
    318     const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
    319     const opus_int              D,                  /* I    Order                                                       */
    320     int                         arch                /* I    Run-time architecture                                       */
    321 );
    322 
    323 /* Copy and multiply a vector by a constant */
    324 void silk_scale_copy_vector16(
    325     opus_int16                  *data_out,
    326     const opus_int16            *data_in,
    327     opus_int32                  gain_Q16,           /* I    Gain in Q16                                                 */
    328     const opus_int              dataSize            /* I    Length                                                      */
    329 );
    330 
    331 /* Some for the LTP related function requires Q26 to work.*/
    332 void silk_scale_vector32_Q26_lshift_18(
    333     opus_int32                  *data1,             /* I/O  Q0/Q18                                                      */
    334     opus_int32                  gain_Q26,           /* I    Q26                                                         */
    335     opus_int                    dataSize            /* I    length                                                      */
    336 );
    337 
    338 /********************************************************************/
    339 /*                        INLINE ARM MATH                           */
    340 /********************************************************************/
    341 
    342 /*    return sum( inVec1[i] * inVec2[i] ) */
    343 
    344 opus_int32 silk_inner_prod_aligned(
    345     const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
    346     const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
    347     const opus_int              len,                /*    I vector lengths                                              */
    348     int                         arch                /*    I Run-time architecture                                       */
    349 );
    350 
    351 
    352 opus_int32 silk_inner_prod_aligned_scale(
    353     const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
    354     const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
    355     const opus_int              scale,              /*    I number of bits to shift                                     */
    356     const opus_int              len                 /*    I vector lengths                                              */
    357 );
    358 
    359 opus_int64 silk_inner_prod16_aligned_64_c(
    360     const opus_int16            *inVec1,            /*    I input vector 1                                              */
    361     const opus_int16            *inVec2,            /*    I input vector 2                                              */
    362     const opus_int              len                 /*    I vector lengths                                              */
    363 );
    364 
    365 /********************************************************************/
    366 /*                                MACROS                            */
    367 /********************************************************************/
    368 
    369 /* Rotate a32 right by 'rot' bits. Negative rot values result in rotating
    370    left. Output is 32bit int.
    371    Note: contemporary compilers recognize the C expression below and
    372    compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */
    373 static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
    374 {
    375     opus_uint32 x = (opus_uint32) a32;
    376     opus_uint32 r = (opus_uint32) rot;
    377     opus_uint32 m = (opus_uint32) -rot;
    378     if( rot == 0 ) {
    379         return a32;
    380     } else if( rot < 0 ) {
    381         return (opus_int32) ((x << m) | (x >> (32 - m)));
    382     } else {
    383         return (opus_int32) ((x << (32 - r)) | (x >> r));
    384     }
    385 }
    386 
    387 /* Allocate opus_int16 aligned to 4-byte memory address */
    388 #if EMBEDDED_ARM
    389 #define silk_DWORD_ALIGN __attribute__((aligned(4)))
    390 #else
    391 #define silk_DWORD_ALIGN
    392 #endif
    393 
    394 /* Useful Macros that can be adjusted to other platforms */
    395 #define silk_memcpy(dest, src, size)        memcpy((dest), (src), (size))
    396 #define silk_memset(dest, src, size)        memset((dest), (src), (size))
    397 #define silk_memmove(dest, src, size)       memmove((dest), (src), (size))
    398 
    399 /* Fixed point macros */
    400 
    401 /* (a32 * b32) output have to be 32bit int */
    402 #define silk_MUL(a32, b32)                  ((a32) * (b32))
    403 
    404 /* (a32 * b32) output have to be 32bit uint */
    405 #define silk_MUL_uint(a32, b32)             silk_MUL(a32, b32)
    406 
    407 /* a32 + (b32 * c32) output have to be 32bit int */
    408 #define silk_MLA(a32, b32, c32)             silk_ADD32((a32),((b32) * (c32)))
    409 
    410 /* a32 + (b32 * c32) output have to be 32bit uint */
    411 #define silk_MLA_uint(a32, b32, c32)        silk_MLA(a32, b32, c32)
    412 
    413 /* ((a32 >> 16)  * (b32 >> 16)) output have to be 32bit int */
    414 #define silk_SMULTT(a32, b32)               (((a32) >> 16) * ((b32) >> 16))
    415 
    416 /* a32 + ((a32 >> 16)  * (b32 >> 16)) output have to be 32bit int */
    417 #define silk_SMLATT(a32, b32, c32)          silk_ADD32((a32),((b32) >> 16) * ((c32) >> 16))
    418 
    419 #define silk_SMLALBB(a64, b16, c16)         silk_ADD64((a64),(opus_int64)((opus_int32)(b16) * (opus_int32)(c16)))
    420 
    421 /* (a32 * b32) */
    422 #define silk_SMULL(a32, b32)                ((opus_int64)(a32) * /*(opus_int64)*/(b32))
    423 
    424 /* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
    425    (just standard two's complement implementation-specific behaviour) */
    426 #define silk_ADD32_ovflw(a, b)              ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b)))
    427 /* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
    428    (just standard two's complement implementation-specific behaviour) */
    429 #define silk_SUB32_ovflw(a, b)              ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b)))
    430 
    431 /* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
    432 #define silk_MLA_ovflw(a32, b32, c32)       silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32))
    433 #define silk_SMLABB_ovflw(a32, b32, c32)    (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))))
    434 
    435 #define silk_DIV32_16(a32, b16)             ((opus_int32)((a32) / (b16)))
    436 #define silk_DIV32(a32, b32)                ((opus_int32)((a32) / (b32)))
    437 
    438 /* These macros enables checking for overflow in silk_API_Debug.h*/
    439 #define silk_ADD16(a, b)                    ((a) + (b))
    440 #define silk_ADD32(a, b)                    ((a) + (b))
    441 #define silk_ADD64(a, b)                    ((a) + (b))
    442 
    443 #define silk_SUB16(a, b)                    ((a) - (b))
    444 #define silk_SUB32(a, b)                    ((a) - (b))
    445 #define silk_SUB64(a, b)                    ((a) - (b))
    446 
    447 #define silk_SAT8(a)                        ((a) > silk_int8_MAX ? silk_int8_MAX  :       \
    448                                             ((a) < silk_int8_MIN ? silk_int8_MIN  : (a)))
    449 #define silk_SAT16(a)                       ((a) > silk_int16_MAX ? silk_int16_MAX :      \
    450                                             ((a) < silk_int16_MIN ? silk_int16_MIN : (a)))
    451 #define silk_SAT32(a)                       ((a) > silk_int32_MAX ? silk_int32_MAX :      \
    452                                             ((a) < silk_int32_MIN ? silk_int32_MIN : (a)))
    453 
    454 #define silk_CHECK_FIT8(a)                  (a)
    455 #define silk_CHECK_FIT16(a)                 (a)
    456 #define silk_CHECK_FIT32(a)                 (a)
    457 
    458 #define silk_ADD_SAT16(a, b)                (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a), (b) ) )
    459 #define silk_ADD_SAT64(a, b)                ((((a) + (b)) & 0x8000000000000000LL) == 0 ?                            \
    460                                             ((((a) & (b)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a)+(b)) : \
    461                                             ((((a) | (b)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a)+(b)) )
    462 
    463 #define silk_SUB_SAT16(a, b)                (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a), (b) ) )
    464 #define silk_SUB_SAT64(a, b)                ((((a)-(b)) & 0x8000000000000000LL) == 0 ?                                               \
    465                                             (( (a) & ((b)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a)-(b)) : \
    466                                             ((((a)^0x8000000000000000LL) & (b)  & 0x8000000000000000LL) ? silk_int64_MAX : (a)-(b)) )
    467 
    468 /* Saturation for positive input values */
    469 #define silk_POS_SAT32(a)                   ((a) > silk_int32_MAX ? silk_int32_MAX : (a))
    470 
    471 /* Add with saturation for positive input values */
    472 #define silk_ADD_POS_SAT8(a, b)             ((((a)+(b)) & 0x80)                 ? silk_int8_MAX  : ((a)+(b)))
    473 #define silk_ADD_POS_SAT16(a, b)            ((((a)+(b)) & 0x8000)               ? silk_int16_MAX : ((a)+(b)))
    474 #define silk_ADD_POS_SAT32(a, b)            ((((a)+(b)) & 0x80000000)           ? silk_int32_MAX : ((a)+(b)))
    475 #define silk_ADD_POS_SAT64(a, b)            ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)))
    476 
    477 #define silk_LSHIFT8(a, shift)              ((opus_int8)((opus_uint8)(a)<<(shift)))         /* shift >= 0, shift < 8  */
    478 #define silk_LSHIFT16(a, shift)             ((opus_int16)((opus_uint16)(a)<<(shift)))       /* shift >= 0, shift < 16 */
    479 #define silk_LSHIFT32(a, shift)             ((opus_int32)((opus_uint32)(a)<<(shift)))       /* shift >= 0, shift < 32 */
    480 #define silk_LSHIFT64(a, shift)             ((opus_int64)((opus_uint64)(a)<<(shift)))       /* shift >= 0, shift < 64 */
    481 #define silk_LSHIFT(a, shift)               silk_LSHIFT32(a, shift)                         /* shift >= 0, shift < 32 */
    482 
    483 #define silk_RSHIFT8(a, shift)              ((a)>>(shift))                                  /* shift >= 0, shift < 8  */
    484 #define silk_RSHIFT16(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 16 */
    485 #define silk_RSHIFT32(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 32 */
    486 #define silk_RSHIFT64(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 64 */
    487 #define silk_RSHIFT(a, shift)               silk_RSHIFT32(a, shift)                         /* shift >= 0, shift < 32 */
    488 
    489 /* saturates before shifting */
    490 #define silk_LSHIFT_SAT32(a, shift)         (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), \
    491                                                     silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) ))
    492 
    493 #define silk_LSHIFT_ovflw(a, shift)         ((opus_int32)((opus_uint32)(a) << (shift)))     /* shift >= 0, allowed to overflow */
    494 #define silk_LSHIFT_uint(a, shift)          ((a) << (shift))                                /* shift >= 0 */
    495 #define silk_RSHIFT_uint(a, shift)          ((a) >> (shift))                                /* shift >= 0 */
    496 
    497 #define silk_ADD_LSHIFT(a, b, shift)        ((a) + silk_LSHIFT((b), (shift)))               /* shift >= 0 */
    498 #define silk_ADD_LSHIFT32(a, b, shift)      silk_ADD32((a), silk_LSHIFT32((b), (shift)))    /* shift >= 0 */
    499 #define silk_ADD_LSHIFT_uint(a, b, shift)   ((a) + silk_LSHIFT_uint((b), (shift)))          /* shift >= 0 */
    500 #define silk_ADD_RSHIFT(a, b, shift)        ((a) + silk_RSHIFT((b), (shift)))               /* shift >= 0 */
    501 #define silk_ADD_RSHIFT32(a, b, shift)      silk_ADD32((a), silk_RSHIFT32((b), (shift)))    /* shift >= 0 */
    502 #define silk_ADD_RSHIFT_uint(a, b, shift)   ((a) + silk_RSHIFT_uint((b), (shift)))          /* shift >= 0 */
    503 #define silk_SUB_LSHIFT32(a, b, shift)      silk_SUB32((a), silk_LSHIFT32((b), (shift)))    /* shift >= 0 */
    504 #define silk_SUB_RSHIFT32(a, b, shift)      silk_SUB32((a), silk_RSHIFT32((b), (shift)))    /* shift >= 0 */
    505 
    506 /* Requires that shift > 0 */
    507 #define silk_RSHIFT_ROUND(a, shift)         ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
    508 #define silk_RSHIFT_ROUND64(a, shift)       ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
    509 
    510 /* Number of rightshift required to fit the multiplication */
    511 #define silk_NSHIFT_MUL_32_32(a, b)         ( -(31- (32-silk_CLZ32(silk_abs(a)) + (32-silk_CLZ32(silk_abs(b))))) )
    512 #define silk_NSHIFT_MUL_16_16(a, b)         ( -(15- (16-silk_CLZ16(silk_abs(a)) + (16-silk_CLZ16(silk_abs(b))))) )
    513 
    514 
    515 #define silk_min(a, b)                      (((a) < (b)) ? (a) : (b))
    516 #define silk_max(a, b)                      (((a) > (b)) ? (a) : (b))
    517 
    518 /* Macro to convert floating-point constants to fixed-point */
    519 #define SILK_FIX_CONST( C, Q )              ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5))
    520 
    521 /* silk_min() versions with typecast in the function call */
    522 static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
    523 {
    524     return (((a) < (b)) ? (a) : (b));
    525 }
    526 static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
    527 {
    528     return (((a) < (b)) ? (a) : (b));
    529 }
    530 static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
    531 {
    532     return (((a) < (b)) ? (a) : (b));
    533 }
    534 static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
    535 {
    536     return (((a) < (b)) ? (a) : (b));
    537 }
    538 
    539 /* silk_min() versions with typecast in the function call */
    540 static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
    541 {
    542     return (((a) > (b)) ? (a) : (b));
    543 }
    544 static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
    545 {
    546     return (((a) > (b)) ? (a) : (b));
    547 }
    548 static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
    549 {
    550     return (((a) > (b)) ? (a) : (b));
    551 }
    552 static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
    553 {
    554     return (((a) > (b)) ? (a) : (b));
    555 }
    556 
    557 #define silk_LIMIT( a, limit1, limit2)      ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
    558                                                                  : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))))
    559 
    560 #define silk_LIMIT_int                      silk_LIMIT
    561 #define silk_LIMIT_16                       silk_LIMIT
    562 #define silk_LIMIT_32                       silk_LIMIT
    563 
    564 #define silk_abs(a)                         (((a) >  0)  ? (a) : -(a))            /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */
    565 #define silk_abs_int(a)                     (((a) ^ ((a) >> (8 * sizeof(a) - 1))) - ((a) >> (8 * sizeof(a) - 1)))
    566 #define silk_abs_int32(a)                   (((a) ^ ((a) >> 31)) - ((a) >> 31))
    567 #define silk_abs_int64(a)                   (((a) >  0)  ? (a) : -(a))
    568 
    569 #define silk_sign(a)                        ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 ))
    570 
    571 /* PSEUDO-RANDOM GENERATOR                                                          */
    572 /* Make sure to store the result as the seed for the next call (also in between     */
    573 /* frames), otherwise result won't be random at all. When only using some of the    */
    574 /* bits, take the most significant bits by right-shifting.                          */
    575 #define silk_RAND(seed)                     (silk_MLA_ovflw(907633515, (seed), 196314165))
    576 
    577 /*  Add some multiplication functions that can be easily mapped to ARM. */
    578 
    579 /*    silk_SMMUL: Signed top word multiply.
    580           ARMv6        2 instruction cycles.
    581           ARMv3M+      3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/
    582 /*#define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/
    583 /* the following seems faster on x86 */
    584 #define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
    585 
    586 #if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
    587 #define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
    588     ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
    589 
    590 #define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \
    591     ((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len))
    592 #endif
    593 
    594 #include "Inlines.h"
    595 #include "MacroCount.h"
    596 #include "MacroDebug.h"
    597 
    598 #ifdef OPUS_ARM_INLINE_ASM
    599 #include "arm/SigProc_FIX_armv4.h"
    600 #endif
    601 
    602 #ifdef OPUS_ARM_INLINE_EDSP
    603 #include "arm/SigProc_FIX_armv5e.h"
    604 #endif
    605 
    606 #if defined(MIPSr1_ASM)
    607 #include "mips/sigproc_fix_mipsr1.h"
    608 #endif
    609 
    610 
    611 #ifdef  __cplusplus
    612 }
    613 #endif
    614 
    615 #endif /* SILK_SIGPROC_FIX_H */
    616