1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #define LOG_TAG "AudioResamplerSinc" 18 //#define LOG_NDEBUG 0 19 20 #define __STDC_CONSTANT_MACROS 21 #include <malloc.h> 22 #include <pthread.h> 23 #include <string.h> 24 #include <stdlib.h> 25 #include <dlfcn.h> 26 27 #include <cutils/compiler.h> 28 #include <cutils/properties.h> 29 30 #include <utils/Log.h> 31 #include <audio_utils/primitives.h> 32 33 #include "AudioResamplerSinc.h" 34 35 #if defined(__clang__) && !__has_builtin(__builtin_assume_aligned) 36 #define __builtin_assume_aligned(p, a) \ 37 (((uintptr_t(p) % (a)) == 0) ? (p) : (__builtin_unreachable(), (p))) 38 #endif 39 40 #if defined(__arm__) && !defined(__thumb__) 41 #define USE_INLINE_ASSEMBLY (true) 42 #else 43 #define USE_INLINE_ASSEMBLY (false) 44 #endif 45 46 #if defined(__aarch64__) || defined(__ARM_NEON__) 47 #ifndef USE_NEON 48 #define USE_NEON (true) 49 #endif 50 #else 51 #define USE_NEON (false) 52 #endif 53 #if USE_NEON 54 #include <arm_neon.h> 55 #endif 56 57 #define UNUSED(x) ((void)(x)) 58 59 namespace android { 60 // ---------------------------------------------------------------------------- 61 62 63 /* 64 * These coeficients are computed with the "fir" utility found in 65 * tools/resampler_tools 66 * cmd-line: fir -l 7 -s 48000 -c 20478 67 */ 68 const uint32_t AudioResamplerSinc::mFirCoefsUp[] __attribute__ ((aligned (32))) = { 69 #include "AudioResamplerSincUp.h" 70 }; 71 72 /* 73 * These coefficients are optimized for 48KHz -> 44.1KHz 74 * cmd-line: fir -l 7 -s 48000 -c 17189 75 */ 76 const uint32_t AudioResamplerSinc::mFirCoefsDown[] __attribute__ ((aligned (32))) = { 77 #include "AudioResamplerSincDown.h" 78 }; 79 80 // we use 15 bits to interpolate between these samples 81 // this cannot change because the mul below rely on it. 82 static const int pLerpBits = 15; 83 84 static pthread_once_t once_control = PTHREAD_ONCE_INIT; 85 static readCoefficientsFn readResampleCoefficients = NULL; 86 87 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::highQualityConstants; 88 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::veryHighQualityConstants; 89 90 void AudioResamplerSinc::init_routine() 91 { 92 // for high quality resampler, the parameters for coefficients are compile-time constants 93 Constants *c = &highQualityConstants; 94 c->coefsBits = RESAMPLE_FIR_LERP_INT_BITS; 95 c->cShift = kNumPhaseBits - c->coefsBits; 96 c->cMask = ((1<< c->coefsBits)-1) << c->cShift; 97 c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits; 98 c->pMask = ((1<< pLerpBits)-1) << c->pShift; 99 c->halfNumCoefs = RESAMPLE_FIR_NUM_COEF; 100 101 // for very high quality resampler, the parameters are load-time constants 102 veryHighQualityConstants = highQualityConstants; 103 104 // Open the dll to get the coefficients for VERY_HIGH_QUALITY 105 void *resampleCoeffLib = dlopen("libaudio-resampler.so", RTLD_NOW); 106 ALOGV("Open libaudio-resampler library = %p", resampleCoeffLib); 107 if (resampleCoeffLib == NULL) { 108 ALOGE("Could not open audio-resampler library: %s", dlerror()); 109 return; 110 } 111 112 readResampleFirNumCoeffFn readResampleFirNumCoeff; 113 readResampleFirLerpIntBitsFn readResampleFirLerpIntBits; 114 115 readResampleCoefficients = (readCoefficientsFn) 116 dlsym(resampleCoeffLib, "readResamplerCoefficients"); 117 readResampleFirNumCoeff = (readResampleFirNumCoeffFn) 118 dlsym(resampleCoeffLib, "readResampleFirNumCoeff"); 119 readResampleFirLerpIntBits = (readResampleFirLerpIntBitsFn) 120 dlsym(resampleCoeffLib, "readResampleFirLerpIntBits"); 121 122 if (!readResampleCoefficients || !readResampleFirNumCoeff || !readResampleFirLerpIntBits) { 123 readResampleCoefficients = NULL; 124 dlclose(resampleCoeffLib); 125 resampleCoeffLib = NULL; 126 ALOGE("Could not find symbol: %s", dlerror()); 127 return; 128 } 129 130 c = &veryHighQualityConstants; 131 c->coefsBits = readResampleFirLerpIntBits(); 132 c->cShift = kNumPhaseBits - c->coefsBits; 133 c->cMask = ((1<<c->coefsBits)-1) << c->cShift; 134 c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits; 135 c->pMask = ((1<<pLerpBits)-1) << c->pShift; 136 // number of zero-crossing on each side 137 c->halfNumCoefs = readResampleFirNumCoeff(); 138 ALOGV("coefsBits = %d", c->coefsBits); 139 ALOGV("halfNumCoefs = %d", c->halfNumCoefs); 140 // note that we "leak" resampleCoeffLib until the process exits 141 } 142 143 // ---------------------------------------------------------------------------- 144 145 #if !USE_NEON 146 147 static inline 148 int32_t mulRL(int left, int32_t in, uint32_t vRL) 149 { 150 #if USE_INLINE_ASSEMBLY 151 int32_t out; 152 if (left) { 153 asm( "smultb %[out], %[in], %[vRL] \n" 154 : [out]"=r"(out) 155 : [in]"%r"(in), [vRL]"r"(vRL) 156 : ); 157 } else { 158 asm( "smultt %[out], %[in], %[vRL] \n" 159 : [out]"=r"(out) 160 : [in]"%r"(in), [vRL]"r"(vRL) 161 : ); 162 } 163 return out; 164 #else 165 int16_t v = left ? int16_t(vRL) : int16_t(vRL>>16); 166 return int32_t((int64_t(in) * v) >> 16); 167 #endif 168 } 169 170 static inline 171 int32_t mulAdd(int16_t in, int32_t v, int32_t a) 172 { 173 #if USE_INLINE_ASSEMBLY 174 int32_t out; 175 asm( "smlawb %[out], %[v], %[in], %[a] \n" 176 : [out]"=r"(out) 177 : [in]"%r"(in), [v]"r"(v), [a]"r"(a) 178 : ); 179 return out; 180 #else 181 return a + int32_t((int64_t(v) * in) >> 16); 182 #endif 183 } 184 185 static inline 186 int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a) 187 { 188 #if USE_INLINE_ASSEMBLY 189 int32_t out; 190 if (left) { 191 asm( "smlawb %[out], %[v], %[inRL], %[a] \n" 192 : [out]"=r"(out) 193 : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) 194 : ); 195 } else { 196 asm( "smlawt %[out], %[v], %[inRL], %[a] \n" 197 : [out]"=r"(out) 198 : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) 199 : ); 200 } 201 return out; 202 #else 203 int16_t s = left ? int16_t(inRL) : int16_t(inRL>>16); 204 return a + int32_t((int64_t(v) * s) >> 16); 205 #endif 206 } 207 208 #endif // !USE_NEON 209 210 // ---------------------------------------------------------------------------- 211 212 AudioResamplerSinc::AudioResamplerSinc( 213 int inChannelCount, int32_t sampleRate, src_quality quality) 214 : AudioResampler(inChannelCount, sampleRate, quality), 215 mState(0), mImpulse(0), mRingFull(0), mFirCoefs(0) 216 { 217 /* 218 * Layout of the state buffer for 32 tap: 219 * 220 * "present" sample beginning of 2nd buffer 221 * v v 222 * 0 01 2 23 3 223 * 0 F0 0 F0 F 224 * [pppppppppppppppInnnnnnnnnnnnnnnnpppppppppppppppInnnnnnnnnnnnnnnn] 225 * ^ ^ head 226 * 227 * p = past samples, convoluted with the (p)ositive side of sinc() 228 * n = future samples, convoluted with the (n)egative side of sinc() 229 * r = extra space for implementing the ring buffer 230 * 231 */ 232 233 mVolumeSIMD[0] = 0; 234 mVolumeSIMD[1] = 0; 235 236 // Load the constants for coefficients 237 int ok = pthread_once(&once_control, init_routine); 238 if (ok != 0) { 239 ALOGE("%s pthread_once failed: %d", __func__, ok); 240 } 241 mConstants = (quality == VERY_HIGH_QUALITY) ? 242 &veryHighQualityConstants : &highQualityConstants; 243 } 244 245 246 AudioResamplerSinc::~AudioResamplerSinc() { 247 free(mState); 248 } 249 250 void AudioResamplerSinc::init() { 251 const Constants& c(*mConstants); 252 const size_t numCoefs = 2 * c.halfNumCoefs; 253 const size_t stateSize = numCoefs * mChannelCount * 2; 254 mState = (int16_t*)memalign(32, stateSize*sizeof(int16_t)); 255 memset(mState, 0, sizeof(int16_t)*stateSize); 256 mImpulse = mState + (c.halfNumCoefs-1)*mChannelCount; 257 mRingFull = mImpulse + (numCoefs+1)*mChannelCount; 258 } 259 260 void AudioResamplerSinc::setVolume(float left, float right) { 261 AudioResampler::setVolume(left, right); 262 // convert to U4_28 (rounding down). 263 // integer volume values are clamped to 0 to UNITY_GAIN. 264 mVolumeSIMD[0] = u4_28_from_float(clampFloatVol(left)); 265 mVolumeSIMD[1] = u4_28_from_float(clampFloatVol(right)); 266 } 267 268 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount, 269 AudioBufferProvider* provider) 270 { 271 // FIXME store current state (up or down sample) and only load the coefs when the state 272 // changes. Or load two pointers one for up and one for down in the init function. 273 // Not critical now since the read functions are fast, but would be important if read was slow. 274 if (mConstants == &veryHighQualityConstants && readResampleCoefficients) { 275 mFirCoefs = readResampleCoefficients( mInSampleRate <= mSampleRate ); 276 } else { 277 mFirCoefs = (const int32_t *) 278 ((mInSampleRate <= mSampleRate) ? mFirCoefsUp : mFirCoefsDown); 279 } 280 281 // select the appropriate resampler 282 switch (mChannelCount) { 283 case 1: 284 return resample<1>(out, outFrameCount, provider); 285 case 2: 286 return resample<2>(out, outFrameCount, provider); 287 default: 288 LOG_ALWAYS_FATAL("invalid channel count: %d", mChannelCount); 289 return 0; 290 } 291 } 292 293 294 template<int CHANNELS> 295 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount, 296 AudioBufferProvider* provider) 297 { 298 const Constants& c(*mConstants); 299 const size_t headOffset = c.halfNumCoefs*CHANNELS; 300 int16_t* impulse = mImpulse; 301 uint32_t vRL = mVolumeRL; 302 size_t inputIndex = mInputIndex; 303 uint32_t phaseFraction = mPhaseFraction; 304 uint32_t phaseIncrement = mPhaseIncrement; 305 size_t outputIndex = 0; 306 size_t outputSampleCount = outFrameCount * 2; 307 size_t inFrameCount = getInFrameCountRequired(outFrameCount); 308 309 while (outputIndex < outputSampleCount) { 310 // buffer is empty, fetch a new one 311 while (mBuffer.frameCount == 0) { 312 mBuffer.frameCount = inFrameCount; 313 provider->getNextBuffer(&mBuffer); 314 if (mBuffer.raw == NULL) { 315 goto resample_exit; 316 } 317 const uint32_t phaseIndex = phaseFraction >> kNumPhaseBits; 318 if (phaseIndex == 1) { 319 // read one frame 320 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex); 321 } else if (phaseIndex == 2) { 322 // read 2 frames 323 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex); 324 inputIndex++; 325 if (inputIndex >= mBuffer.frameCount) { 326 inputIndex -= mBuffer.frameCount; 327 provider->releaseBuffer(&mBuffer); 328 } else { 329 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex); 330 } 331 } 332 } 333 int16_t const * const in = mBuffer.i16; 334 const size_t frameCount = mBuffer.frameCount; 335 336 // Always read-in the first samples from the input buffer 337 int16_t* head = impulse + headOffset; 338 for (size_t i=0 ; i<CHANNELS ; i++) { 339 head[i] = in[inputIndex*CHANNELS + i]; 340 } 341 342 // handle boundary case 343 while (CC_LIKELY(outputIndex < outputSampleCount)) { 344 filterCoefficient<CHANNELS>(&out[outputIndex], phaseFraction, impulse, vRL); 345 outputIndex += 2; 346 347 phaseFraction += phaseIncrement; 348 const size_t phaseIndex = phaseFraction >> kNumPhaseBits; 349 for (size_t i=0 ; i<phaseIndex ; i++) { 350 inputIndex++; 351 if (inputIndex >= frameCount) { 352 goto done; // need a new buffer 353 } 354 read<CHANNELS>(impulse, phaseFraction, in, inputIndex); 355 } 356 } 357 done: 358 // if done with buffer, save samples 359 if (inputIndex >= frameCount) { 360 inputIndex -= frameCount; 361 provider->releaseBuffer(&mBuffer); 362 } 363 } 364 365 resample_exit: 366 mImpulse = impulse; 367 mInputIndex = inputIndex; 368 mPhaseFraction = phaseFraction; 369 return outputIndex / CHANNELS; 370 } 371 372 template<int CHANNELS> 373 /*** 374 * read() 375 * 376 * This function reads only one frame from input buffer and writes it in 377 * state buffer 378 * 379 **/ 380 void AudioResamplerSinc::read( 381 int16_t*& impulse, uint32_t& phaseFraction, 382 const int16_t* in, size_t inputIndex) 383 { 384 impulse += CHANNELS; 385 phaseFraction -= 1LU<<kNumPhaseBits; 386 387 const Constants& c(*mConstants); 388 if (CC_UNLIKELY(impulse >= mRingFull)) { 389 const size_t stateSize = (c.halfNumCoefs*2)*CHANNELS; 390 memcpy(mState, mState+stateSize, sizeof(int16_t)*stateSize); 391 impulse -= stateSize; 392 } 393 394 int16_t* head = impulse + c.halfNumCoefs*CHANNELS; 395 for (size_t i=0 ; i<CHANNELS ; i++) { 396 head[i] = in[inputIndex*CHANNELS + i]; 397 } 398 } 399 400 template<int CHANNELS> 401 void AudioResamplerSinc::filterCoefficient(int32_t* out, uint32_t phase, 402 const int16_t *samples, uint32_t vRL) 403 { 404 // NOTE: be very careful when modifying the code here. register 405 // pressure is very high and a small change might cause the compiler 406 // to generate far less efficient code. 407 // Always sanity check the result with objdump or test-resample. 408 409 // compute the index of the coefficient on the positive side and 410 // negative side 411 const Constants& c(*mConstants); 412 const int32_t ONE = c.cMask | c.pMask; 413 uint32_t indexP = ( phase & c.cMask) >> c.cShift; 414 uint32_t lerpP = ( phase & c.pMask) >> c.pShift; 415 uint32_t indexN = ((ONE-phase) & c.cMask) >> c.cShift; 416 uint32_t lerpN = ((ONE-phase) & c.pMask) >> c.pShift; 417 418 const size_t offset = c.halfNumCoefs; 419 indexP *= offset; 420 indexN *= offset; 421 422 int32_t const* coefsP = mFirCoefs + indexP; 423 int32_t const* coefsN = mFirCoefs + indexN; 424 int16_t const* sP = samples; 425 int16_t const* sN = samples + CHANNELS; 426 427 size_t count = offset; 428 429 #if !USE_NEON 430 int32_t l = 0; 431 int32_t r = 0; 432 for (size_t i=0 ; i<count ; i++) { 433 interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP); 434 sP -= CHANNELS; 435 interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN); 436 sN += CHANNELS; 437 } 438 out[0] += 2 * mulRL(1, l, vRL); 439 out[1] += 2 * mulRL(0, r, vRL); 440 #else 441 UNUSED(vRL); 442 if (CHANNELS == 1) { 443 int32_t const* coefsP1 = coefsP + offset; 444 int32_t const* coefsN1 = coefsN + offset; 445 sP -= CHANNELS*3; 446 447 int32x4_t sum; 448 int32x2_t lerpPN; 449 lerpPN = vdup_n_s32(0); 450 lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0); 451 lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1); 452 lerpPN = vshl_n_s32(lerpPN, 16); 453 sum = vdupq_n_s32(0); 454 455 int16x4_t sampleP, sampleN; 456 int32x4_t samplePExt, sampleNExt; 457 int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1; 458 459 coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16); 460 coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16); 461 coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16); 462 coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16); 463 for (; count > 0; count -= 4) { 464 sampleP = vld1_s16(sP); 465 sampleN = vld1_s16(sN); 466 coefsPV0 = vld1q_s32(coefsP); 467 coefsNV0 = vld1q_s32(coefsN); 468 coefsPV1 = vld1q_s32(coefsP1); 469 coefsNV1 = vld1q_s32(coefsN1); 470 sP -= 4; 471 sN += 4; 472 coefsP += 4; 473 coefsN += 4; 474 coefsP1 += 4; 475 coefsN1 += 4; 476 477 sampleP = vrev64_s16(sampleP); 478 479 // interpolate (step1) 480 coefsPV1 = vsubq_s32(coefsPV1, coefsPV0); 481 coefsNV1 = vsubq_s32(coefsNV1, coefsNV0); 482 samplePExt = vshll_n_s16(sampleP, 15); 483 // interpolate (step2) 484 coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0); 485 coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1); 486 sampleNExt = vshll_n_s16(sampleN, 15); 487 // interpolate (step3) 488 coefsPV0 = vaddq_s32(coefsPV0, coefsPV1); 489 coefsNV0 = vaddq_s32(coefsNV0, coefsNV1); 490 491 samplePExt = vqrdmulhq_s32(samplePExt, coefsPV0); 492 sampleNExt = vqrdmulhq_s32(sampleNExt, coefsNV0); 493 sum = vaddq_s32(sum, samplePExt); 494 sum = vaddq_s32(sum, sampleNExt); 495 } 496 int32x2_t volumesV, outV; 497 volumesV = vld1_s32(mVolumeSIMD); 498 outV = vld1_s32(out); 499 500 //add all 4 partial sums 501 int32x2_t sumLow, sumHigh; 502 sumLow = vget_low_s32(sum); 503 sumHigh = vget_high_s32(sum); 504 sumLow = vpadd_s32(sumLow, sumHigh); 505 sumLow = vpadd_s32(sumLow, sumLow); 506 507 sumLow = vqrdmulh_s32(sumLow, volumesV); 508 outV = vadd_s32(outV, sumLow); 509 vst1_s32(out, outV); 510 } else if (CHANNELS == 2) { 511 int32_t const* coefsP1 = coefsP + offset; 512 int32_t const* coefsN1 = coefsN + offset; 513 sP -= CHANNELS*3; 514 515 int32x4_t sum0, sum1; 516 int32x2_t lerpPN; 517 518 lerpPN = vdup_n_s32(0); 519 lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0); 520 lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1); 521 lerpPN = vshl_n_s32(lerpPN, 16); 522 sum0 = vdupq_n_s32(0); 523 sum1 = vdupq_n_s32(0); 524 525 int16x4x2_t sampleP, sampleN; 526 int32x4x2_t samplePExt, sampleNExt; 527 int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1; 528 529 coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16); 530 coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16); 531 coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16); 532 coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16); 533 for (; count > 0; count -= 4) { 534 sampleP = vld2_s16(sP); 535 sampleN = vld2_s16(sN); 536 coefsPV0 = vld1q_s32(coefsP); 537 coefsNV0 = vld1q_s32(coefsN); 538 coefsPV1 = vld1q_s32(coefsP1); 539 coefsNV1 = vld1q_s32(coefsN1); 540 sP -= 8; 541 sN += 8; 542 coefsP += 4; 543 coefsN += 4; 544 coefsP1 += 4; 545 coefsN1 += 4; 546 547 sampleP.val[0] = vrev64_s16(sampleP.val[0]); 548 sampleP.val[1] = vrev64_s16(sampleP.val[1]); 549 550 // interpolate (step1) 551 coefsPV1 = vsubq_s32(coefsPV1, coefsPV0); 552 coefsNV1 = vsubq_s32(coefsNV1, coefsNV0); 553 samplePExt.val[0] = vshll_n_s16(sampleP.val[0], 15); 554 samplePExt.val[1] = vshll_n_s16(sampleP.val[1], 15); 555 // interpolate (step2) 556 coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0); 557 coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1); 558 sampleNExt.val[0] = vshll_n_s16(sampleN.val[0], 15); 559 sampleNExt.val[1] = vshll_n_s16(sampleN.val[1], 15); 560 // interpolate (step3) 561 coefsPV0 = vaddq_s32(coefsPV0, coefsPV1); 562 coefsNV0 = vaddq_s32(coefsNV0, coefsNV1); 563 564 samplePExt.val[0] = vqrdmulhq_s32(samplePExt.val[0], coefsPV0); 565 samplePExt.val[1] = vqrdmulhq_s32(samplePExt.val[1], coefsPV0); 566 sampleNExt.val[0] = vqrdmulhq_s32(sampleNExt.val[0], coefsNV0); 567 sampleNExt.val[1] = vqrdmulhq_s32(sampleNExt.val[1], coefsNV0); 568 sum0 = vaddq_s32(sum0, samplePExt.val[0]); 569 sum1 = vaddq_s32(sum1, samplePExt.val[1]); 570 sum0 = vaddq_s32(sum0, sampleNExt.val[0]); 571 sum1 = vaddq_s32(sum1, sampleNExt.val[1]); 572 } 573 int32x2_t volumesV, outV; 574 volumesV = vld1_s32(mVolumeSIMD); 575 outV = vld1_s32(out); 576 577 //add all 4 partial sums 578 int32x2_t sumLow0, sumHigh0, sumLow1, sumHigh1; 579 sumLow0 = vget_low_s32(sum0); 580 sumHigh0 = vget_high_s32(sum0); 581 sumLow1 = vget_low_s32(sum1); 582 sumHigh1 = vget_high_s32(sum1); 583 sumLow0 = vpadd_s32(sumLow0, sumHigh0); 584 sumLow0 = vpadd_s32(sumLow0, sumLow0); 585 sumLow1 = vpadd_s32(sumLow1, sumHigh1); 586 sumLow1 = vpadd_s32(sumLow1, sumLow1); 587 588 sumLow0 = vtrn_s32(sumLow0, sumLow1).val[0]; 589 sumLow0 = vqrdmulh_s32(sumLow0, volumesV); 590 outV = vadd_s32(outV, sumLow0); 591 vst1_s32(out, outV); 592 } 593 #endif 594 } 595 596 template<int CHANNELS> 597 void AudioResamplerSinc::interpolate( 598 int32_t& l, int32_t& r, 599 const int32_t* coefs, size_t offset, 600 int32_t lerp, const int16_t* samples) 601 { 602 int32_t c0 = coefs[0]; 603 int32_t c1 = coefs[offset]; 604 int32_t sinc = mulAdd(lerp, (c1-c0)<<1, c0); 605 if (CHANNELS == 2) { 606 uint32_t rl = *reinterpret_cast<const uint32_t*>(samples); 607 l = mulAddRL(1, rl, sinc, l); 608 r = mulAddRL(0, rl, sinc, r); 609 } else { 610 r = l = mulAdd(samples[0], sinc, l); 611 } 612 } 613 // ---------------------------------------------------------------------------- 614 } // namespace android 615