1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #define LOG_TAG "AudioResamplerSinc" 18 //#define LOG_NDEBUG 0 19 20 #define __STDC_CONSTANT_MACROS 21 #include <malloc.h> 22 #include <string.h> 23 #include <stdlib.h> 24 #include <dlfcn.h> 25 26 #include <cutils/compiler.h> 27 #include <cutils/properties.h> 28 29 #include <utils/Log.h> 30 #include <audio_utils/primitives.h> 31 32 #include "AudioResamplerSinc.h" 33 34 #if defined(__clang__) && !__has_builtin(__builtin_assume_aligned) 35 #define __builtin_assume_aligned(p, a) \ 36 (((uintptr_t(p) % (a)) == 0) ? (p) : (__builtin_unreachable(), (p))) 37 #endif 38 39 #if defined(__arm__) && !defined(__thumb__) 40 #define USE_INLINE_ASSEMBLY (true) 41 #else 42 #define USE_INLINE_ASSEMBLY (false) 43 #endif 44 45 #if defined(__aarch64__) || defined(__ARM_NEON__) 46 #ifndef USE_NEON 47 #define USE_NEON (true) 48 #endif 49 #else 50 #define USE_NEON (false) 51 #endif 52 #if USE_NEON 53 #include <arm_neon.h> 54 #endif 55 56 #define UNUSED(x) ((void)(x)) 57 58 namespace android { 59 // ---------------------------------------------------------------------------- 60 61 62 /* 63 * These coeficients are computed with the "fir" utility found in 64 * tools/resampler_tools 65 * cmd-line: fir -l 7 -s 48000 -c 20478 66 */ 67 const uint32_t AudioResamplerSinc::mFirCoefsUp[] __attribute__ ((aligned (32))) = { 68 #include "AudioResamplerSincUp.h" 69 }; 70 71 /* 72 * These coefficients are optimized for 48KHz -> 44.1KHz 73 * cmd-line: fir -l 7 -s 48000 -c 17189 74 */ 75 const uint32_t AudioResamplerSinc::mFirCoefsDown[] __attribute__ ((aligned (32))) = { 76 #include "AudioResamplerSincDown.h" 77 }; 78 79 // we use 15 bits to interpolate between these samples 80 // this cannot change because the mul below rely on it. 81 static const int pLerpBits = 15; 82 83 static pthread_once_t once_control = PTHREAD_ONCE_INIT; 84 static readCoefficientsFn readResampleCoefficients = NULL; 85 86 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::highQualityConstants; 87 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::veryHighQualityConstants; 88 89 void AudioResamplerSinc::init_routine() 90 { 91 // for high quality resampler, the parameters for coefficients are compile-time constants 92 Constants *c = &highQualityConstants; 93 c->coefsBits = RESAMPLE_FIR_LERP_INT_BITS; 94 c->cShift = kNumPhaseBits - c->coefsBits; 95 c->cMask = ((1<< c->coefsBits)-1) << c->cShift; 96 c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits; 97 c->pMask = ((1<< pLerpBits)-1) << c->pShift; 98 c->halfNumCoefs = RESAMPLE_FIR_NUM_COEF; 99 100 // for very high quality resampler, the parameters are load-time constants 101 veryHighQualityConstants = highQualityConstants; 102 103 // Open the dll to get the coefficients for VERY_HIGH_QUALITY 104 void *resampleCoeffLib = dlopen("libaudio-resampler.so", RTLD_NOW); 105 ALOGV("Open libaudio-resampler library = %p", resampleCoeffLib); 106 if (resampleCoeffLib == NULL) { 107 ALOGE("Could not open audio-resampler library: %s", dlerror()); 108 return; 109 } 110 111 readResampleFirNumCoeffFn readResampleFirNumCoeff; 112 readResampleFirLerpIntBitsFn readResampleFirLerpIntBits; 113 114 readResampleCoefficients = (readCoefficientsFn) 115 dlsym(resampleCoeffLib, "readResamplerCoefficients"); 116 readResampleFirNumCoeff = (readResampleFirNumCoeffFn) 117 dlsym(resampleCoeffLib, "readResampleFirNumCoeff"); 118 readResampleFirLerpIntBits = (readResampleFirLerpIntBitsFn) 119 dlsym(resampleCoeffLib, "readResampleFirLerpIntBits"); 120 121 if (!readResampleCoefficients || !readResampleFirNumCoeff || !readResampleFirLerpIntBits) { 122 readResampleCoefficients = NULL; 123 dlclose(resampleCoeffLib); 124 resampleCoeffLib = NULL; 125 ALOGE("Could not find symbol: %s", dlerror()); 126 return; 127 } 128 129 c = &veryHighQualityConstants; 130 c->coefsBits = readResampleFirLerpIntBits(); 131 c->cShift = kNumPhaseBits - c->coefsBits; 132 c->cMask = ((1<<c->coefsBits)-1) << c->cShift; 133 c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits; 134 c->pMask = ((1<<pLerpBits)-1) << c->pShift; 135 // number of zero-crossing on each side 136 c->halfNumCoefs = readResampleFirNumCoeff(); 137 ALOGV("coefsBits = %d", c->coefsBits); 138 ALOGV("halfNumCoefs = %d", c->halfNumCoefs); 139 // note that we "leak" resampleCoeffLib until the process exits 140 } 141 142 // ---------------------------------------------------------------------------- 143 144 #if !USE_NEON 145 146 static inline 147 int32_t mulRL(int left, int32_t in, uint32_t vRL) 148 { 149 #if USE_INLINE_ASSEMBLY 150 int32_t out; 151 if (left) { 152 asm( "smultb %[out], %[in], %[vRL] \n" 153 : [out]"=r"(out) 154 : [in]"%r"(in), [vRL]"r"(vRL) 155 : ); 156 } else { 157 asm( "smultt %[out], %[in], %[vRL] \n" 158 : [out]"=r"(out) 159 : [in]"%r"(in), [vRL]"r"(vRL) 160 : ); 161 } 162 return out; 163 #else 164 int16_t v = left ? int16_t(vRL) : int16_t(vRL>>16); 165 return int32_t((int64_t(in) * v) >> 16); 166 #endif 167 } 168 169 static inline 170 int32_t mulAdd(int16_t in, int32_t v, int32_t a) 171 { 172 #if USE_INLINE_ASSEMBLY 173 int32_t out; 174 asm( "smlawb %[out], %[v], %[in], %[a] \n" 175 : [out]"=r"(out) 176 : [in]"%r"(in), [v]"r"(v), [a]"r"(a) 177 : ); 178 return out; 179 #else 180 return a + int32_t((int64_t(v) * in) >> 16); 181 #endif 182 } 183 184 static inline 185 int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a) 186 { 187 #if USE_INLINE_ASSEMBLY 188 int32_t out; 189 if (left) { 190 asm( "smlawb %[out], %[v], %[inRL], %[a] \n" 191 : [out]"=r"(out) 192 : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) 193 : ); 194 } else { 195 asm( "smlawt %[out], %[v], %[inRL], %[a] \n" 196 : [out]"=r"(out) 197 : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) 198 : ); 199 } 200 return out; 201 #else 202 int16_t s = left ? int16_t(inRL) : int16_t(inRL>>16); 203 return a + int32_t((int64_t(v) * s) >> 16); 204 #endif 205 } 206 207 #endif // !USE_NEON 208 209 // ---------------------------------------------------------------------------- 210 211 AudioResamplerSinc::AudioResamplerSinc( 212 int inChannelCount, int32_t sampleRate, src_quality quality) 213 : AudioResampler(inChannelCount, sampleRate, quality), 214 mState(0), mImpulse(0), mRingFull(0), mFirCoefs(0) 215 { 216 /* 217 * Layout of the state buffer for 32 tap: 218 * 219 * "present" sample beginning of 2nd buffer 220 * v v 221 * 0 01 2 23 3 222 * 0 F0 0 F0 F 223 * [pppppppppppppppInnnnnnnnnnnnnnnnpppppppppppppppInnnnnnnnnnnnnnnn] 224 * ^ ^ head 225 * 226 * p = past samples, convoluted with the (p)ositive side of sinc() 227 * n = future samples, convoluted with the (n)egative side of sinc() 228 * r = extra space for implementing the ring buffer 229 * 230 */ 231 232 mVolumeSIMD[0] = 0; 233 mVolumeSIMD[1] = 0; 234 235 // Load the constants for coefficients 236 int ok = pthread_once(&once_control, init_routine); 237 if (ok != 0) { 238 ALOGE("%s pthread_once failed: %d", __func__, ok); 239 } 240 mConstants = (quality == VERY_HIGH_QUALITY) ? 241 &veryHighQualityConstants : &highQualityConstants; 242 } 243 244 245 AudioResamplerSinc::~AudioResamplerSinc() { 246 free(mState); 247 } 248 249 void AudioResamplerSinc::init() { 250 const Constants& c(*mConstants); 251 const size_t numCoefs = 2 * c.halfNumCoefs; 252 const size_t stateSize = numCoefs * mChannelCount * 2; 253 mState = (int16_t*)memalign(32, stateSize*sizeof(int16_t)); 254 memset(mState, 0, sizeof(int16_t)*stateSize); 255 mImpulse = mState + (c.halfNumCoefs-1)*mChannelCount; 256 mRingFull = mImpulse + (numCoefs+1)*mChannelCount; 257 } 258 259 void AudioResamplerSinc::setVolume(float left, float right) { 260 AudioResampler::setVolume(left, right); 261 // convert to U4_28 (rounding down). 262 // integer volume values are clamped to 0 to UNITY_GAIN. 263 mVolumeSIMD[0] = u4_28_from_float(clampFloatVol(left)); 264 mVolumeSIMD[1] = u4_28_from_float(clampFloatVol(right)); 265 } 266 267 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount, 268 AudioBufferProvider* provider) 269 { 270 // FIXME store current state (up or down sample) and only load the coefs when the state 271 // changes. Or load two pointers one for up and one for down in the init function. 272 // Not critical now since the read functions are fast, but would be important if read was slow. 273 if (mConstants == &veryHighQualityConstants && readResampleCoefficients) { 274 mFirCoefs = readResampleCoefficients( mInSampleRate <= mSampleRate ); 275 } else { 276 mFirCoefs = (const int32_t *) 277 ((mInSampleRate <= mSampleRate) ? mFirCoefsUp : mFirCoefsDown); 278 } 279 280 // select the appropriate resampler 281 switch (mChannelCount) { 282 case 1: 283 return resample<1>(out, outFrameCount, provider); 284 case 2: 285 return resample<2>(out, outFrameCount, provider); 286 default: 287 LOG_ALWAYS_FATAL("invalid channel count: %d", mChannelCount); 288 return 0; 289 } 290 } 291 292 293 template<int CHANNELS> 294 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount, 295 AudioBufferProvider* provider) 296 { 297 const Constants& c(*mConstants); 298 const size_t headOffset = c.halfNumCoefs*CHANNELS; 299 int16_t* impulse = mImpulse; 300 uint32_t vRL = mVolumeRL; 301 size_t inputIndex = mInputIndex; 302 uint32_t phaseFraction = mPhaseFraction; 303 uint32_t phaseIncrement = mPhaseIncrement; 304 size_t outputIndex = 0; 305 size_t outputSampleCount = outFrameCount * 2; 306 size_t inFrameCount = getInFrameCountRequired(outFrameCount); 307 308 while (outputIndex < outputSampleCount) { 309 // buffer is empty, fetch a new one 310 while (mBuffer.frameCount == 0) { 311 mBuffer.frameCount = inFrameCount; 312 provider->getNextBuffer(&mBuffer); 313 if (mBuffer.raw == NULL) { 314 goto resample_exit; 315 } 316 const uint32_t phaseIndex = phaseFraction >> kNumPhaseBits; 317 if (phaseIndex == 1) { 318 // read one frame 319 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex); 320 } else if (phaseIndex == 2) { 321 // read 2 frames 322 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex); 323 inputIndex++; 324 if (inputIndex >= mBuffer.frameCount) { 325 inputIndex -= mBuffer.frameCount; 326 provider->releaseBuffer(&mBuffer); 327 } else { 328 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex); 329 } 330 } 331 } 332 int16_t const * const in = mBuffer.i16; 333 const size_t frameCount = mBuffer.frameCount; 334 335 // Always read-in the first samples from the input buffer 336 int16_t* head = impulse + headOffset; 337 for (size_t i=0 ; i<CHANNELS ; i++) { 338 head[i] = in[inputIndex*CHANNELS + i]; 339 } 340 341 // handle boundary case 342 while (CC_LIKELY(outputIndex < outputSampleCount)) { 343 filterCoefficient<CHANNELS>(&out[outputIndex], phaseFraction, impulse, vRL); 344 outputIndex += 2; 345 346 phaseFraction += phaseIncrement; 347 const size_t phaseIndex = phaseFraction >> kNumPhaseBits; 348 for (size_t i=0 ; i<phaseIndex ; i++) { 349 inputIndex++; 350 if (inputIndex >= frameCount) { 351 goto done; // need a new buffer 352 } 353 read<CHANNELS>(impulse, phaseFraction, in, inputIndex); 354 } 355 } 356 done: 357 // if done with buffer, save samples 358 if (inputIndex >= frameCount) { 359 inputIndex -= frameCount; 360 provider->releaseBuffer(&mBuffer); 361 } 362 } 363 364 resample_exit: 365 mImpulse = impulse; 366 mInputIndex = inputIndex; 367 mPhaseFraction = phaseFraction; 368 return outputIndex / CHANNELS; 369 } 370 371 template<int CHANNELS> 372 /*** 373 * read() 374 * 375 * This function reads only one frame from input buffer and writes it in 376 * state buffer 377 * 378 **/ 379 void AudioResamplerSinc::read( 380 int16_t*& impulse, uint32_t& phaseFraction, 381 const int16_t* in, size_t inputIndex) 382 { 383 impulse += CHANNELS; 384 phaseFraction -= 1LU<<kNumPhaseBits; 385 386 const Constants& c(*mConstants); 387 if (CC_UNLIKELY(impulse >= mRingFull)) { 388 const size_t stateSize = (c.halfNumCoefs*2)*CHANNELS; 389 memcpy(mState, mState+stateSize, sizeof(int16_t)*stateSize); 390 impulse -= stateSize; 391 } 392 393 int16_t* head = impulse + c.halfNumCoefs*CHANNELS; 394 for (size_t i=0 ; i<CHANNELS ; i++) { 395 head[i] = in[inputIndex*CHANNELS + i]; 396 } 397 } 398 399 template<int CHANNELS> 400 void AudioResamplerSinc::filterCoefficient(int32_t* out, uint32_t phase, 401 const int16_t *samples, uint32_t vRL) 402 { 403 // NOTE: be very careful when modifying the code here. register 404 // pressure is very high and a small change might cause the compiler 405 // to generate far less efficient code. 406 // Always sanity check the result with objdump or test-resample. 407 408 // compute the index of the coefficient on the positive side and 409 // negative side 410 const Constants& c(*mConstants); 411 const int32_t ONE = c.cMask | c.pMask; 412 uint32_t indexP = ( phase & c.cMask) >> c.cShift; 413 uint32_t lerpP = ( phase & c.pMask) >> c.pShift; 414 uint32_t indexN = ((ONE-phase) & c.cMask) >> c.cShift; 415 uint32_t lerpN = ((ONE-phase) & c.pMask) >> c.pShift; 416 417 const size_t offset = c.halfNumCoefs; 418 indexP *= offset; 419 indexN *= offset; 420 421 int32_t const* coefsP = mFirCoefs + indexP; 422 int32_t const* coefsN = mFirCoefs + indexN; 423 int16_t const* sP = samples; 424 int16_t const* sN = samples + CHANNELS; 425 426 size_t count = offset; 427 428 #if !USE_NEON 429 int32_t l = 0; 430 int32_t r = 0; 431 for (size_t i=0 ; i<count ; i++) { 432 interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP); 433 sP -= CHANNELS; 434 interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN); 435 sN += CHANNELS; 436 } 437 out[0] += 2 * mulRL(1, l, vRL); 438 out[1] += 2 * mulRL(0, r, vRL); 439 #else 440 UNUSED(vRL); 441 if (CHANNELS == 1) { 442 int32_t const* coefsP1 = coefsP + offset; 443 int32_t const* coefsN1 = coefsN + offset; 444 sP -= CHANNELS*3; 445 446 int32x4_t sum; 447 int32x2_t lerpPN; 448 lerpPN = vdup_n_s32(0); 449 lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0); 450 lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1); 451 lerpPN = vshl_n_s32(lerpPN, 16); 452 sum = vdupq_n_s32(0); 453 454 int16x4_t sampleP, sampleN; 455 int32x4_t samplePExt, sampleNExt; 456 int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1; 457 458 coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16); 459 coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16); 460 coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16); 461 coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16); 462 for (; count > 0; count -= 4) { 463 sampleP = vld1_s16(sP); 464 sampleN = vld1_s16(sN); 465 coefsPV0 = vld1q_s32(coefsP); 466 coefsNV0 = vld1q_s32(coefsN); 467 coefsPV1 = vld1q_s32(coefsP1); 468 coefsNV1 = vld1q_s32(coefsN1); 469 sP -= 4; 470 sN += 4; 471 coefsP += 4; 472 coefsN += 4; 473 coefsP1 += 4; 474 coefsN1 += 4; 475 476 sampleP = vrev64_s16(sampleP); 477 478 // interpolate (step1) 479 coefsPV1 = vsubq_s32(coefsPV1, coefsPV0); 480 coefsNV1 = vsubq_s32(coefsNV1, coefsNV0); 481 samplePExt = vshll_n_s16(sampleP, 15); 482 // interpolate (step2) 483 coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0); 484 coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1); 485 sampleNExt = vshll_n_s16(sampleN, 15); 486 // interpolate (step3) 487 coefsPV0 = vaddq_s32(coefsPV0, coefsPV1); 488 coefsNV0 = vaddq_s32(coefsNV0, coefsNV1); 489 490 samplePExt = vqrdmulhq_s32(samplePExt, coefsPV0); 491 sampleNExt = vqrdmulhq_s32(sampleNExt, coefsNV0); 492 sum = vaddq_s32(sum, samplePExt); 493 sum = vaddq_s32(sum, sampleNExt); 494 } 495 int32x2_t volumesV, outV; 496 volumesV = vld1_s32(mVolumeSIMD); 497 outV = vld1_s32(out); 498 499 //add all 4 partial sums 500 int32x2_t sumLow, sumHigh; 501 sumLow = vget_low_s32(sum); 502 sumHigh = vget_high_s32(sum); 503 sumLow = vpadd_s32(sumLow, sumHigh); 504 sumLow = vpadd_s32(sumLow, sumLow); 505 506 sumLow = vqrdmulh_s32(sumLow, volumesV); 507 outV = vadd_s32(outV, sumLow); 508 vst1_s32(out, outV); 509 } else if (CHANNELS == 2) { 510 int32_t const* coefsP1 = coefsP + offset; 511 int32_t const* coefsN1 = coefsN + offset; 512 sP -= CHANNELS*3; 513 514 int32x4_t sum0, sum1; 515 int32x2_t lerpPN; 516 517 lerpPN = vdup_n_s32(0); 518 lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0); 519 lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1); 520 lerpPN = vshl_n_s32(lerpPN, 16); 521 sum0 = vdupq_n_s32(0); 522 sum1 = vdupq_n_s32(0); 523 524 int16x4x2_t sampleP, sampleN; 525 int32x4x2_t samplePExt, sampleNExt; 526 int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1; 527 528 coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16); 529 coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16); 530 coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16); 531 coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16); 532 for (; count > 0; count -= 4) { 533 sampleP = vld2_s16(sP); 534 sampleN = vld2_s16(sN); 535 coefsPV0 = vld1q_s32(coefsP); 536 coefsNV0 = vld1q_s32(coefsN); 537 coefsPV1 = vld1q_s32(coefsP1); 538 coefsNV1 = vld1q_s32(coefsN1); 539 sP -= 8; 540 sN += 8; 541 coefsP += 4; 542 coefsN += 4; 543 coefsP1 += 4; 544 coefsN1 += 4; 545 546 sampleP.val[0] = vrev64_s16(sampleP.val[0]); 547 sampleP.val[1] = vrev64_s16(sampleP.val[1]); 548 549 // interpolate (step1) 550 coefsPV1 = vsubq_s32(coefsPV1, coefsPV0); 551 coefsNV1 = vsubq_s32(coefsNV1, coefsNV0); 552 samplePExt.val[0] = vshll_n_s16(sampleP.val[0], 15); 553 samplePExt.val[1] = vshll_n_s16(sampleP.val[1], 15); 554 // interpolate (step2) 555 coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0); 556 coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1); 557 sampleNExt.val[0] = vshll_n_s16(sampleN.val[0], 15); 558 sampleNExt.val[1] = vshll_n_s16(sampleN.val[1], 15); 559 // interpolate (step3) 560 coefsPV0 = vaddq_s32(coefsPV0, coefsPV1); 561 coefsNV0 = vaddq_s32(coefsNV0, coefsNV1); 562 563 samplePExt.val[0] = vqrdmulhq_s32(samplePExt.val[0], coefsPV0); 564 samplePExt.val[1] = vqrdmulhq_s32(samplePExt.val[1], coefsPV0); 565 sampleNExt.val[0] = vqrdmulhq_s32(sampleNExt.val[0], coefsNV0); 566 sampleNExt.val[1] = vqrdmulhq_s32(sampleNExt.val[1], coefsNV0); 567 sum0 = vaddq_s32(sum0, samplePExt.val[0]); 568 sum1 = vaddq_s32(sum1, samplePExt.val[1]); 569 sum0 = vaddq_s32(sum0, sampleNExt.val[0]); 570 sum1 = vaddq_s32(sum1, sampleNExt.val[1]); 571 } 572 int32x2_t volumesV, outV; 573 volumesV = vld1_s32(mVolumeSIMD); 574 outV = vld1_s32(out); 575 576 //add all 4 partial sums 577 int32x2_t sumLow0, sumHigh0, sumLow1, sumHigh1; 578 sumLow0 = vget_low_s32(sum0); 579 sumHigh0 = vget_high_s32(sum0); 580 sumLow1 = vget_low_s32(sum1); 581 sumHigh1 = vget_high_s32(sum1); 582 sumLow0 = vpadd_s32(sumLow0, sumHigh0); 583 sumLow0 = vpadd_s32(sumLow0, sumLow0); 584 sumLow1 = vpadd_s32(sumLow1, sumHigh1); 585 sumLow1 = vpadd_s32(sumLow1, sumLow1); 586 587 sumLow0 = vtrn_s32(sumLow0, sumLow1).val[0]; 588 sumLow0 = vqrdmulh_s32(sumLow0, volumesV); 589 outV = vadd_s32(outV, sumLow0); 590 vst1_s32(out, outV); 591 } 592 #endif 593 } 594 595 template<int CHANNELS> 596 void AudioResamplerSinc::interpolate( 597 int32_t& l, int32_t& r, 598 const int32_t* coefs, size_t offset, 599 int32_t lerp, const int16_t* samples) 600 { 601 int32_t c0 = coefs[0]; 602 int32_t c1 = coefs[offset]; 603 int32_t sinc = mulAdd(lerp, (c1-c0)<<1, c0); 604 if (CHANNELS == 2) { 605 uint32_t rl = *reinterpret_cast<const uint32_t*>(samples); 606 l = mulAddRL(1, rl, sinc, l); 607 r = mulAddRL(0, rl, sinc, r); 608 } else { 609 r = l = mulAdd(samples[0], sinc, l); 610 } 611 } 612 // ---------------------------------------------------------------------------- 613 } // namespace android 614