Home | History | Annotate | Download | only in libaudioprocessing
      1 /*
      2  * Copyright (C) 2007 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "AudioResamplerSinc"
     18 //#define LOG_NDEBUG 0
     19 
     20 #define __STDC_CONSTANT_MACROS
     21 #include <malloc.h>
     22 #include <pthread.h>
     23 #include <string.h>
     24 #include <stdlib.h>
     25 #include <dlfcn.h>
     26 
     27 #include <cutils/compiler.h>
     28 #include <cutils/properties.h>
     29 
     30 #include <utils/Log.h>
     31 #include <audio_utils/primitives.h>
     32 
     33 #include "AudioResamplerSinc.h"
     34 
     35 #if defined(__clang__) && !__has_builtin(__builtin_assume_aligned)
     36 #define __builtin_assume_aligned(p, a) \
     37 	(((uintptr_t(p) % (a)) == 0) ? (p) : (__builtin_unreachable(), (p)))
     38 #endif
     39 
     40 #if defined(__arm__) && !defined(__thumb__)
     41 #define USE_INLINE_ASSEMBLY (true)
     42 #else
     43 #define USE_INLINE_ASSEMBLY (false)
     44 #endif
     45 
     46 #if defined(__aarch64__) || defined(__ARM_NEON__)
     47 #ifndef USE_NEON
     48 #define USE_NEON (true)
     49 #endif
     50 #else
     51 #define USE_NEON (false)
     52 #endif
     53 #if USE_NEON
     54 #include <arm_neon.h>
     55 #endif
     56 
     57 #define UNUSED(x) ((void)(x))
     58 
     59 namespace android {
     60 // ----------------------------------------------------------------------------
     61 
     62 
     63 /*
     64  * These coeficients are computed with the "fir" utility found in
     65  * tools/resampler_tools
     66  * cmd-line: fir -l 7 -s 48000 -c 20478
     67  */
     68 const uint32_t AudioResamplerSinc::mFirCoefsUp[] __attribute__ ((aligned (32))) = {
     69 #include "AudioResamplerSincUp.h"
     70 };
     71 
     72 /*
     73  * These coefficients are optimized for 48KHz -> 44.1KHz
     74  * cmd-line: fir -l 7 -s 48000 -c 17189
     75  */
     76 const uint32_t AudioResamplerSinc::mFirCoefsDown[] __attribute__ ((aligned (32))) = {
     77 #include "AudioResamplerSincDown.h"
     78 };
     79 
     80 // we use 15 bits to interpolate between these samples
     81 // this cannot change because the mul below rely on it.
     82 static const int pLerpBits = 15;
     83 
     84 static pthread_once_t once_control = PTHREAD_ONCE_INIT;
     85 static readCoefficientsFn readResampleCoefficients = NULL;
     86 
     87 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::highQualityConstants;
     88 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::veryHighQualityConstants;
     89 
     90 void AudioResamplerSinc::init_routine()
     91 {
     92     // for high quality resampler, the parameters for coefficients are compile-time constants
     93     Constants *c = &highQualityConstants;
     94     c->coefsBits = RESAMPLE_FIR_LERP_INT_BITS;
     95     c->cShift = kNumPhaseBits - c->coefsBits;
     96     c->cMask = ((1<< c->coefsBits)-1) << c->cShift;
     97     c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits;
     98     c->pMask = ((1<< pLerpBits)-1) << c->pShift;
     99     c->halfNumCoefs = RESAMPLE_FIR_NUM_COEF;
    100 
    101     // for very high quality resampler, the parameters are load-time constants
    102     veryHighQualityConstants = highQualityConstants;
    103 
    104     // Open the dll to get the coefficients for VERY_HIGH_QUALITY
    105     void *resampleCoeffLib = dlopen("libaudio-resampler.so", RTLD_NOW);
    106     ALOGV("Open libaudio-resampler library = %p", resampleCoeffLib);
    107     if (resampleCoeffLib == NULL) {
    108         ALOGE("Could not open audio-resampler library: %s", dlerror());
    109         return;
    110     }
    111 
    112     readResampleFirNumCoeffFn readResampleFirNumCoeff;
    113     readResampleFirLerpIntBitsFn readResampleFirLerpIntBits;
    114 
    115     readResampleCoefficients = (readCoefficientsFn)
    116             dlsym(resampleCoeffLib, "readResamplerCoefficients");
    117     readResampleFirNumCoeff = (readResampleFirNumCoeffFn)
    118             dlsym(resampleCoeffLib, "readResampleFirNumCoeff");
    119     readResampleFirLerpIntBits = (readResampleFirLerpIntBitsFn)
    120             dlsym(resampleCoeffLib, "readResampleFirLerpIntBits");
    121 
    122     if (!readResampleCoefficients || !readResampleFirNumCoeff || !readResampleFirLerpIntBits) {
    123         readResampleCoefficients = NULL;
    124         dlclose(resampleCoeffLib);
    125         resampleCoeffLib = NULL;
    126         ALOGE("Could not find symbol: %s", dlerror());
    127         return;
    128     }
    129 
    130     c = &veryHighQualityConstants;
    131     c->coefsBits = readResampleFirLerpIntBits();
    132     c->cShift = kNumPhaseBits - c->coefsBits;
    133     c->cMask = ((1<<c->coefsBits)-1) << c->cShift;
    134     c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits;
    135     c->pMask = ((1<<pLerpBits)-1) << c->pShift;
    136     // number of zero-crossing on each side
    137     c->halfNumCoefs = readResampleFirNumCoeff();
    138     ALOGV("coefsBits = %d", c->coefsBits);
    139     ALOGV("halfNumCoefs = %d", c->halfNumCoefs);
    140     // note that we "leak" resampleCoeffLib until the process exits
    141 }
    142 
    143 // ----------------------------------------------------------------------------
    144 
    145 #if !USE_NEON
    146 
    147 static inline
    148 int32_t mulRL(int left, int32_t in, uint32_t vRL)
    149 {
    150 #if USE_INLINE_ASSEMBLY
    151     int32_t out;
    152     if (left) {
    153         asm( "smultb %[out], %[in], %[vRL] \n"
    154              : [out]"=r"(out)
    155              : [in]"%r"(in), [vRL]"r"(vRL)
    156              : );
    157     } else {
    158         asm( "smultt %[out], %[in], %[vRL] \n"
    159              : [out]"=r"(out)
    160              : [in]"%r"(in), [vRL]"r"(vRL)
    161              : );
    162     }
    163     return out;
    164 #else
    165     int16_t v = left ? int16_t(vRL) : int16_t(vRL>>16);
    166     return int32_t((int64_t(in) * v) >> 16);
    167 #endif
    168 }
    169 
    170 static inline
    171 int32_t mulAdd(int16_t in, int32_t v, int32_t a)
    172 {
    173 #if USE_INLINE_ASSEMBLY
    174     int32_t out;
    175     asm( "smlawb %[out], %[v], %[in], %[a] \n"
    176          : [out]"=r"(out)
    177          : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
    178          : );
    179     return out;
    180 #else
    181     return a + int32_t((int64_t(v) * in) >> 16);
    182 #endif
    183 }
    184 
    185 static inline
    186 int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a)
    187 {
    188 #if USE_INLINE_ASSEMBLY
    189     int32_t out;
    190     if (left) {
    191         asm( "smlawb %[out], %[v], %[inRL], %[a] \n"
    192              : [out]"=r"(out)
    193              : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
    194              : );
    195     } else {
    196         asm( "smlawt %[out], %[v], %[inRL], %[a] \n"
    197              : [out]"=r"(out)
    198              : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
    199              : );
    200     }
    201     return out;
    202 #else
    203     int16_t s = left ? int16_t(inRL) : int16_t(inRL>>16);
    204     return a + int32_t((int64_t(v) * s) >> 16);
    205 #endif
    206 }
    207 
    208 #endif // !USE_NEON
    209 
    210 // ----------------------------------------------------------------------------
    211 
    212 AudioResamplerSinc::AudioResamplerSinc(
    213         int inChannelCount, int32_t sampleRate, src_quality quality)
    214     : AudioResampler(inChannelCount, sampleRate, quality),
    215     mState(0), mImpulse(0), mRingFull(0), mFirCoefs(0)
    216 {
    217     /*
    218      * Layout of the state buffer for 32 tap:
    219      *
    220      * "present" sample            beginning of 2nd buffer
    221      *                 v                v
    222      *  0              01               2              23              3
    223      *  0              F0               0              F0              F
    224      * [pppppppppppppppInnnnnnnnnnnnnnnnpppppppppppppppInnnnnnnnnnnnnnnn]
    225      *                 ^               ^ head
    226      *
    227      * p = past samples, convoluted with the (p)ositive side of sinc()
    228      * n = future samples, convoluted with the (n)egative side of sinc()
    229      * r = extra space for implementing the ring buffer
    230      *
    231      */
    232 
    233     mVolumeSIMD[0] = 0;
    234     mVolumeSIMD[1] = 0;
    235 
    236     // Load the constants for coefficients
    237     int ok = pthread_once(&once_control, init_routine);
    238     if (ok != 0) {
    239         ALOGE("%s pthread_once failed: %d", __func__, ok);
    240     }
    241     mConstants = (quality == VERY_HIGH_QUALITY) ?
    242             &veryHighQualityConstants : &highQualityConstants;
    243 }
    244 
    245 
    246 AudioResamplerSinc::~AudioResamplerSinc() {
    247     free(mState);
    248 }
    249 
    250 void AudioResamplerSinc::init() {
    251     const Constants& c(*mConstants);
    252     const size_t numCoefs = 2 * c.halfNumCoefs;
    253     const size_t stateSize = numCoefs * mChannelCount * 2;
    254     mState = (int16_t*)memalign(32, stateSize*sizeof(int16_t));
    255     memset(mState, 0, sizeof(int16_t)*stateSize);
    256     mImpulse  = mState   + (c.halfNumCoefs-1)*mChannelCount;
    257     mRingFull = mImpulse + (numCoefs+1)*mChannelCount;
    258 }
    259 
    260 void AudioResamplerSinc::setVolume(float left, float right) {
    261     AudioResampler::setVolume(left, right);
    262     // convert to U4_28 (rounding down).
    263     // integer volume values are clamped to 0 to UNITY_GAIN.
    264     mVolumeSIMD[0] = u4_28_from_float(clampFloatVol(left));
    265     mVolumeSIMD[1] = u4_28_from_float(clampFloatVol(right));
    266 }
    267 
    268 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
    269             AudioBufferProvider* provider)
    270 {
    271     // FIXME store current state (up or down sample) and only load the coefs when the state
    272     // changes. Or load two pointers one for up and one for down in the init function.
    273     // Not critical now since the read functions are fast, but would be important if read was slow.
    274     if (mConstants == &veryHighQualityConstants && readResampleCoefficients) {
    275         mFirCoefs = readResampleCoefficients( mInSampleRate <= mSampleRate );
    276     } else {
    277         mFirCoefs = (const int32_t *)
    278                 ((mInSampleRate <= mSampleRate) ? mFirCoefsUp : mFirCoefsDown);
    279     }
    280 
    281     // select the appropriate resampler
    282     switch (mChannelCount) {
    283     case 1:
    284         return resample<1>(out, outFrameCount, provider);
    285     case 2:
    286         return resample<2>(out, outFrameCount, provider);
    287     default:
    288         LOG_ALWAYS_FATAL("invalid channel count: %d", mChannelCount);
    289         return 0;
    290     }
    291 }
    292 
    293 
    294 template<int CHANNELS>
    295 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
    296         AudioBufferProvider* provider)
    297 {
    298     const Constants& c(*mConstants);
    299     const size_t headOffset = c.halfNumCoefs*CHANNELS;
    300     int16_t* impulse = mImpulse;
    301     uint32_t vRL = mVolumeRL;
    302     size_t inputIndex = mInputIndex;
    303     uint32_t phaseFraction = mPhaseFraction;
    304     uint32_t phaseIncrement = mPhaseIncrement;
    305     size_t outputIndex = 0;
    306     size_t outputSampleCount = outFrameCount * 2;
    307     size_t inFrameCount = getInFrameCountRequired(outFrameCount);
    308 
    309     while (outputIndex < outputSampleCount) {
    310         // buffer is empty, fetch a new one
    311         while (mBuffer.frameCount == 0) {
    312             mBuffer.frameCount = inFrameCount;
    313             provider->getNextBuffer(&mBuffer);
    314             if (mBuffer.raw == NULL) {
    315                 goto resample_exit;
    316             }
    317             const uint32_t phaseIndex = phaseFraction >> kNumPhaseBits;
    318             if (phaseIndex == 1) {
    319                 // read one frame
    320                 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
    321             } else if (phaseIndex == 2) {
    322                 // read 2 frames
    323                 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
    324                 inputIndex++;
    325                 if (inputIndex >= mBuffer.frameCount) {
    326                     inputIndex -= mBuffer.frameCount;
    327                     provider->releaseBuffer(&mBuffer);
    328                 } else {
    329                     read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
    330                 }
    331             }
    332         }
    333         int16_t const * const in = mBuffer.i16;
    334         const size_t frameCount = mBuffer.frameCount;
    335 
    336         // Always read-in the first samples from the input buffer
    337         int16_t* head = impulse + headOffset;
    338         for (size_t i=0 ; i<CHANNELS ; i++) {
    339             head[i] = in[inputIndex*CHANNELS + i];
    340         }
    341 
    342         // handle boundary case
    343         while (CC_LIKELY(outputIndex < outputSampleCount)) {
    344             filterCoefficient<CHANNELS>(&out[outputIndex], phaseFraction, impulse, vRL);
    345             outputIndex += 2;
    346 
    347             phaseFraction += phaseIncrement;
    348             const size_t phaseIndex = phaseFraction >> kNumPhaseBits;
    349             for (size_t i=0 ; i<phaseIndex ; i++) {
    350                 inputIndex++;
    351                 if (inputIndex >= frameCount) {
    352                     goto done;  // need a new buffer
    353                 }
    354                 read<CHANNELS>(impulse, phaseFraction, in, inputIndex);
    355             }
    356         }
    357 done:
    358         // if done with buffer, save samples
    359         if (inputIndex >= frameCount) {
    360             inputIndex -= frameCount;
    361             provider->releaseBuffer(&mBuffer);
    362         }
    363     }
    364 
    365 resample_exit:
    366     mImpulse = impulse;
    367     mInputIndex = inputIndex;
    368     mPhaseFraction = phaseFraction;
    369     return outputIndex / CHANNELS;
    370 }
    371 
    372 template<int CHANNELS>
    373 /***
    374 * read()
    375 *
    376 * This function reads only one frame from input buffer and writes it in
    377 * state buffer
    378 *
    379 **/
    380 void AudioResamplerSinc::read(
    381         int16_t*& impulse, uint32_t& phaseFraction,
    382         const int16_t* in, size_t inputIndex)
    383 {
    384     impulse += CHANNELS;
    385     phaseFraction -= 1LU<<kNumPhaseBits;
    386 
    387     const Constants& c(*mConstants);
    388     if (CC_UNLIKELY(impulse >= mRingFull)) {
    389         const size_t stateSize = (c.halfNumCoefs*2)*CHANNELS;
    390         memcpy(mState, mState+stateSize, sizeof(int16_t)*stateSize);
    391         impulse -= stateSize;
    392     }
    393 
    394     int16_t* head = impulse + c.halfNumCoefs*CHANNELS;
    395     for (size_t i=0 ; i<CHANNELS ; i++) {
    396         head[i] = in[inputIndex*CHANNELS + i];
    397     }
    398 }
    399 
    400 template<int CHANNELS>
    401 void AudioResamplerSinc::filterCoefficient(int32_t* out, uint32_t phase,
    402          const int16_t *samples, uint32_t vRL)
    403 {
    404     // NOTE: be very careful when modifying the code here. register
    405     // pressure is very high and a small change might cause the compiler
    406     // to generate far less efficient code.
    407     // Always sanity check the result with objdump or test-resample.
    408 
    409     // compute the index of the coefficient on the positive side and
    410     // negative side
    411     const Constants& c(*mConstants);
    412     const int32_t ONE = c.cMask | c.pMask;
    413     uint32_t indexP = ( phase & c.cMask) >> c.cShift;
    414     uint32_t lerpP  = ( phase & c.pMask) >> c.pShift;
    415     uint32_t indexN = ((ONE-phase) & c.cMask) >> c.cShift;
    416     uint32_t lerpN  = ((ONE-phase) & c.pMask) >> c.pShift;
    417 
    418     const size_t offset = c.halfNumCoefs;
    419     indexP *= offset;
    420     indexN *= offset;
    421 
    422     int32_t const* coefsP = mFirCoefs + indexP;
    423     int32_t const* coefsN = mFirCoefs + indexN;
    424     int16_t const* sP = samples;
    425     int16_t const* sN = samples + CHANNELS;
    426 
    427     size_t count = offset;
    428 
    429 #if !USE_NEON
    430     int32_t l = 0;
    431     int32_t r = 0;
    432     for (size_t i=0 ; i<count ; i++) {
    433         interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP);
    434         sP -= CHANNELS;
    435         interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN);
    436         sN += CHANNELS;
    437     }
    438     out[0] += 2 * mulRL(1, l, vRL);
    439     out[1] += 2 * mulRL(0, r, vRL);
    440 #else
    441     UNUSED(vRL);
    442     if (CHANNELS == 1) {
    443         int32_t const* coefsP1 = coefsP + offset;
    444         int32_t const* coefsN1 = coefsN + offset;
    445         sP -= CHANNELS*3;
    446 
    447         int32x4_t sum;
    448         int32x2_t lerpPN;
    449         lerpPN = vdup_n_s32(0);
    450         lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
    451         lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
    452         lerpPN = vshl_n_s32(lerpPN, 16);
    453         sum = vdupq_n_s32(0);
    454 
    455         int16x4_t sampleP, sampleN;
    456         int32x4_t samplePExt, sampleNExt;
    457         int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
    458 
    459         coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
    460         coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
    461         coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
    462         coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
    463         for (; count > 0; count -= 4) {
    464             sampleP = vld1_s16(sP);
    465             sampleN = vld1_s16(sN);
    466             coefsPV0 = vld1q_s32(coefsP);
    467             coefsNV0 = vld1q_s32(coefsN);
    468             coefsPV1 = vld1q_s32(coefsP1);
    469             coefsNV1 = vld1q_s32(coefsN1);
    470             sP -= 4;
    471             sN += 4;
    472             coefsP += 4;
    473             coefsN += 4;
    474             coefsP1 += 4;
    475             coefsN1 += 4;
    476 
    477             sampleP = vrev64_s16(sampleP);
    478 
    479             // interpolate (step1)
    480             coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
    481             coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
    482             samplePExt = vshll_n_s16(sampleP, 15);
    483             // interpolate (step2)
    484             coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
    485             coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
    486             sampleNExt = vshll_n_s16(sampleN, 15);
    487             // interpolate (step3)
    488             coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
    489             coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
    490 
    491             samplePExt = vqrdmulhq_s32(samplePExt, coefsPV0);
    492             sampleNExt = vqrdmulhq_s32(sampleNExt, coefsNV0);
    493             sum = vaddq_s32(sum, samplePExt);
    494             sum = vaddq_s32(sum, sampleNExt);
    495         }
    496         int32x2_t volumesV, outV;
    497         volumesV = vld1_s32(mVolumeSIMD);
    498         outV = vld1_s32(out);
    499 
    500         //add all 4 partial sums
    501         int32x2_t sumLow, sumHigh;
    502         sumLow = vget_low_s32(sum);
    503         sumHigh = vget_high_s32(sum);
    504         sumLow = vpadd_s32(sumLow, sumHigh);
    505         sumLow = vpadd_s32(sumLow, sumLow);
    506 
    507         sumLow = vqrdmulh_s32(sumLow, volumesV);
    508         outV = vadd_s32(outV, sumLow);
    509         vst1_s32(out, outV);
    510     } else if (CHANNELS == 2) {
    511         int32_t const* coefsP1 = coefsP + offset;
    512         int32_t const* coefsN1 = coefsN + offset;
    513         sP -= CHANNELS*3;
    514 
    515         int32x4_t sum0, sum1;
    516         int32x2_t lerpPN;
    517 
    518         lerpPN = vdup_n_s32(0);
    519         lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
    520         lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
    521         lerpPN = vshl_n_s32(lerpPN, 16);
    522         sum0 = vdupq_n_s32(0);
    523         sum1 = vdupq_n_s32(0);
    524 
    525         int16x4x2_t sampleP, sampleN;
    526         int32x4x2_t samplePExt, sampleNExt;
    527         int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
    528 
    529         coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
    530         coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
    531         coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
    532         coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
    533         for (; count > 0; count -= 4) {
    534             sampleP = vld2_s16(sP);
    535             sampleN = vld2_s16(sN);
    536             coefsPV0 = vld1q_s32(coefsP);
    537             coefsNV0 = vld1q_s32(coefsN);
    538             coefsPV1 = vld1q_s32(coefsP1);
    539             coefsNV1 = vld1q_s32(coefsN1);
    540             sP -= 8;
    541             sN += 8;
    542             coefsP += 4;
    543             coefsN += 4;
    544             coefsP1 += 4;
    545             coefsN1 += 4;
    546 
    547             sampleP.val[0] = vrev64_s16(sampleP.val[0]);
    548             sampleP.val[1] = vrev64_s16(sampleP.val[1]);
    549 
    550             // interpolate (step1)
    551             coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
    552             coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
    553             samplePExt.val[0] = vshll_n_s16(sampleP.val[0], 15);
    554             samplePExt.val[1] = vshll_n_s16(sampleP.val[1], 15);
    555             // interpolate (step2)
    556             coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
    557             coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
    558             sampleNExt.val[0] = vshll_n_s16(sampleN.val[0], 15);
    559             sampleNExt.val[1] = vshll_n_s16(sampleN.val[1], 15);
    560             // interpolate (step3)
    561             coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
    562             coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
    563 
    564             samplePExt.val[0] = vqrdmulhq_s32(samplePExt.val[0], coefsPV0);
    565             samplePExt.val[1] = vqrdmulhq_s32(samplePExt.val[1], coefsPV0);
    566             sampleNExt.val[0] = vqrdmulhq_s32(sampleNExt.val[0], coefsNV0);
    567             sampleNExt.val[1] = vqrdmulhq_s32(sampleNExt.val[1], coefsNV0);
    568             sum0 = vaddq_s32(sum0, samplePExt.val[0]);
    569             sum1 = vaddq_s32(sum1, samplePExt.val[1]);
    570             sum0 = vaddq_s32(sum0, sampleNExt.val[0]);
    571             sum1 = vaddq_s32(sum1, sampleNExt.val[1]);
    572         }
    573         int32x2_t volumesV, outV;
    574         volumesV = vld1_s32(mVolumeSIMD);
    575         outV = vld1_s32(out);
    576 
    577         //add all 4 partial sums
    578         int32x2_t sumLow0, sumHigh0, sumLow1, sumHigh1;
    579         sumLow0 = vget_low_s32(sum0);
    580         sumHigh0 = vget_high_s32(sum0);
    581         sumLow1 = vget_low_s32(sum1);
    582         sumHigh1 = vget_high_s32(sum1);
    583         sumLow0 = vpadd_s32(sumLow0, sumHigh0);
    584         sumLow0 = vpadd_s32(sumLow0, sumLow0);
    585         sumLow1 = vpadd_s32(sumLow1, sumHigh1);
    586         sumLow1 = vpadd_s32(sumLow1, sumLow1);
    587 
    588         sumLow0 = vtrn_s32(sumLow0, sumLow1).val[0];
    589         sumLow0 = vqrdmulh_s32(sumLow0, volumesV);
    590         outV = vadd_s32(outV, sumLow0);
    591         vst1_s32(out, outV);
    592     }
    593 #endif
    594 }
    595 
    596 template<int CHANNELS>
    597 void AudioResamplerSinc::interpolate(
    598         int32_t& l, int32_t& r,
    599         const int32_t* coefs, size_t offset,
    600         int32_t lerp, const int16_t* samples)
    601 {
    602     int32_t c0 = coefs[0];
    603     int32_t c1 = coefs[offset];
    604     int32_t sinc = mulAdd(lerp, (c1-c0)<<1, c0);
    605     if (CHANNELS == 2) {
    606         uint32_t rl = *reinterpret_cast<const uint32_t*>(samples);
    607         l = mulAddRL(1, rl, sinc, l);
    608         r = mulAddRL(0, rl, sinc, r);
    609     } else {
    610         r = l = mulAdd(samples[0], sinc, l);
    611     }
    612 }
    613 // ----------------------------------------------------------------------------
    614 } // namespace android
    615