1 b0VIM 7.4UaADdgreidhojo20.mtv.corp.google.com~dgreid/src/android/ryu-mnc-dev/device/google/dragon/audio/hal/dsp/dsp_util.cutf-8 3210#"! UtpfPad-rTPO98,gNGF+ T eF'wQ% 27 28 29 30 31 32 e 33 T 34 A 35 36 37 39 41 \ [ [)}R2zX1 42 zZ:sY?%~sb)gdb65.-#endif#define interleave_stereo interleave_stereo} } *output++ = max(-32768, min(32767, (int)(f * 32768.0f))); f += (f > 0) ? (0.5f / 32768.0f) : (-0.5f / 32768.0f); f = *input2++; *output++ = max(-32768, min(32767, (int)(f * 32768.0f))); f += (f > 0) ? (0.5f / 32768.0f) : (-0.5f / 32768.0f); f = *input1++; float f; while (frames--) { /* The remaining samples */ } ); "q0", "q1", "q2", "q3", "q4", "memory", "cc" : /* clobber */ [neg]"w"(neg) [pos]"w"(pos), [output]"3"(output), [input2]"2"(input2), [input1]"1"(input1), [chunk]"0"(chunk), : /* input */ "=r"(output) "=r"(input2), "=r"(input1), "=r"(chunk), : /* output */ "bne 1b \n" "vst2.16 {d2-d3}, [%[output]]! \n" "vqmovn.s32 d3, q2 \n" "vqmovn.s32 d2, q1 \n" "vcvt.s32.f32 q2, q2, #15 \n" "vcvt.s32.f32 q1, q1, #15 \n" "vadd.f32 q2, q2, q4 \n" "vadd.f32 q1, q1, q3 \n" "vbsl q4, %q[pos], %q[neg] \n" "vbsl q3, %q[pos], %q[neg] \n" "vcgt.f32 q4, q2, q0 \n" "vcgt.f32 q3, q1, q0 \n" */ * input, then truncate. * to positive input, and adding -0.5 to the negative /* We try to round to the nearest number by adding 0.5 "subs %[chunk], #1 \n" "vld1.32 {d4-d5}, [%[input2]]! \n" "vld1.32 {d2-d3}, [%[input1]]! \n" "1: \n" "veor q0, q0, q0 \n" __asm__ __volatile__ ( if (chunk) { frames &= 3; int chunk = frames >> 2; float32x4_t neg = vdupq_n_f32(-0.5f / 32768.0f); float32x4_t pos = vdupq_n_f32(0.5f / 32768.0f); /* L0 L1 L2 L3, R0 R1 R2 R3 -> L0 R0 L1 R1, L2 R2 L3 R3 */ /* Process 4 frames (8 samples) each loop. */{ int16_t *output, int frames)static void interleave_stereo(float *input1, float *input2,#define deinterleave_stereo deinterleave_stereo} } *output2++ = *input++ / 32768.0f; *output1++ = *input++ / 32768.0f; while (frames--) { /* The remaining samples. */ } ); "q0", "q1", "q2", "q3", "memory", "cc" : /* clobber */ : /* input */ [output2]"+r"(output2) [output1]"+r"(output1), [input]"+r"(input), [chunk]"+r"(chunk), : /* output */ "bne 1b \n" "vst1.32 {d0-d3}, [%[output1]]! \n" "vst1.32 {d4-d7}, [%[output2]]! \n" "vcvt.f32.s32 q0, q0, #15 \n" "vcvt.f32.s32 q1, q1, #15 \n" "vcvt.f32.s32 q2, q2, #15 \n" "vcvt.f32.s32 q3, q3, #15 \n" "vmovl.s16 q0, d0 \n" "vmovl.s16 q1, d1 \n" "vmovl.s16 q2, d2 \n" "vmovl.s16 q3, d3 \n" "subs %[chunk], #1 \n" "vld2.16 {d0-d3}, [%[input]]! \n" "1: \n" __asm__ __volatile__ ( if (chunk) { frames &= 7; int chunk = frames >> 3; /* L0 R0 L1 R1 L2 R2 L3 R3... -> L0 L1 L2 L3... R0 R1 R2 R3... */ /* Process 8 frames (16 samples) each loop. */{ float *output2, int frames)static void deinterleave_stereo(int16_t *input, float *output1,#include <arm_neon.h>#ifdef __ARM_NEON__#undef interleave_stereo#undef deinterleave_stereo#endif _a < _b ? _a : _b; }) __typeof__(b) _b = (b); \#define min(a, b) ({ __typeof__(a) _a = (a); \#ifndef min#endif _a > _b ? _a : _b; }) __typeof__(b) _b = (b); \#define max(a, b) ({ __typeof__(a) _a = (a); \#ifndef max#include "dsp_util.h" */ * found in the LICENSE file. * Use of this source code is governed by a BSD-style license that can be/* Copyright (c) 2013 The Chromium OS Authors. All rights reserved.ad?Pz>-~/ z w p o O 2 1 uecF;:!c@0 82 83 84 85 86 } 87 i 88 e 89 c 90 b 91 9 92 7 93 94 95 ? }#endif#warning "Don}}}}#endif#warning "Don't know how to disable denorms. Performace may suffer."#else __asm__ __volatile__ ("mcr p10, 7, %0, cr1, cr0, 0" : : "r" (cw | (1 << 24))); __asm__ __volatile__ ("mrc p10, 7, %0, cr1, cr0, 0" : "=r" (cw)); int cw;#elif defined(__arm__) __builtin_ia32_ldmxcsr(mxcsr | 0x8040); mxcsr = __builtin_ia32_stmxcsr(); unsigned int mxcsr;#if defined(__i386__) || defined(__x86_64__){void dsp_enable_flush_denormal_to_zero()} } *output++ = i16; i16 = (int16_t) (f > 0 ? f + 0.5f : f - 0.5f); else i16 = -32768; else if (f < -32768) i16 = 32767; if (f > 32767) float f = *(input_ptr[j]++) * 32768.0f; int16_t i16; for (j = 0; j < channels; j++) { for (i = 0; i < frames; i++) input_ptr[i] = input[i]; for (i = 0; i < channels; i++)#endif } return; interleave_stereo(input[0], input[1], output, frames); if (channels == 2) {#ifdef interleave_stereo int i, j; float *input_ptr[channels];{ int frames)void dsp_util_interleave(float *const *input, int16_t *output, int channels,} *(output_ptr[j]++) = *input++ / 32768.0f; for (j = 0; j < channels; j++) for (i = 0; i < frames; i++) output_ptr[i] = output[i]; for (i = 0; i < channels; i++)#endif } return; deinterleave_stereo(input, output[0], output[1], frames); if (channels == 2) {#ifdef deinterleave_stereo int i, j; float *output_ptr[channels];{ int frames)void dsp_util_deinterleave(int16_t *input, float *const *output, int channels,#endif#define interleave_stereo interleave_stereo} } *output++ = max(-32768, min(32767, (int)(f * 32768.0f))); f += (f > 0) ? (0.5f / 32768.0f) : (-0.5f / 32768.0f); f = *input2++; *output++ = max(-32768, min(32767, (int)(f * 32768.0f))); f += (f > 0) ? (0.5f / 32768.0f) : (-0.5f / 32768.0f); f = *input1++; float f; while (frames--) { /* The remaining samples */ad 96 fxvFN O PQ 112 113 114 R 115 116 t [ B % zF@=< 117 P*(m:n;o< p= 118 kQ7 } ); "xmm0", "xmm1", "xmm2", "memory", "cc" : /* clobber */ [scale_2_15]"x"(_mm_set1_ps(1.0f*(1<<15))) [output]"3"(output), [input2]"2"(input2), [input1]"1"(input1), [chunk]"0"(chunk), : /* input */ "=r"(output) "=r"(input2), "=r"(input1), "=r"(chunk), : /* output */ "jnz 1b \n" "sub $1, %[chunk] \n" "add $16, %[output] \n" "movdqu %%xmm0, (%[output]) \n" "packssdw %%xmm1, %%xmm0 \n" "cvtps2dq %%xmm1, %%xmm1 \n" "cvtps2dq %%xmm0, %%xmm0 \n" "mulps %[scale_2_15], %%xmm1 \n" "mulps %[scale_2_15], %%xmm0 \n" "add $16, %[input2] \n" "add $16, %[input1] \n" "unpckhps %%xmm2, %%xmm1 \n" "unpcklps %%xmm2, %%xmm0 \n" "movaps %%xmm0, %%xmm1 \n" "lddqu (%[input2]), %%xmm2 \n" "lddqu (%[input1]), %%xmm0 \n" "1: \n" __asm__ __volatile__ ( if (chunk) { frames &= 3; int chunk = frames >> 2; /* L0 L1 L2 L3, R0 R1 R2 R3 -> L0 R0 L1 R1, L2 R2 L3 R3 */ /* Process 4 frames (8 samples) each loop. */{ int16_t *output, int frames)static void interleave_stereo(float *input1, float *input2,#define deinterleave_stereo deinterleave_stereo} } *output2++ = *input++ / 32768.0f; *output1++ = *input++ / 32768.0f; while (frames--) { /* The remaining samples. */ } ); "xmm0", "xmm1", "xmm2", "xmm3", "memory", "cc" : /* clobber */ [scale_2_n15]"x"(_mm_set1_ps(1.0f/(1<<15))) [scale_2_n31]"x"(_mm_set1_ps(1.0f/(1<<15)/(1<<16))), : /* input */ [output2]"+r"(output2) [output1]"+r"(output1), [input]"+r"(input), [chunk]"+r"(chunk), : /* output */ "jnz 1b \n" "sub $1, %[chunk] \n" "add $32, %[output2] \n" "add $32, %[output1] \n" "movdqu %%xmm3, 16(%[output2]) \n" "movdqu %%xmm2, (%[output2]) \n" "movdqu %%xmm1, 16(%[output1]) \n" "movdqu %%xmm0, (%[output1]) \n" "mulps %[scale_2_n15], %%xmm3 \n" "mulps %[scale_2_n15], %%xmm2 \n" "mulps %[scale_2_n31], %%xmm1 \n" "mulps %[scale_2_n31], %%xmm0 \n" "cvtdq2ps %%xmm3, %%xmm3 \n" "cvtdq2ps %%xmm2, %%xmm2 \n" "cvtdq2ps %%xmm1, %%xmm1 \n" "cvtdq2ps %%xmm0, %%xmm0 \n" "psrad $16, %%xmm3 \n" "psrad $16, %%xmm2 \n" "pslld $16, %%xmm1 \n" "pslld $16, %%xmm0 \n" "movdqa %%xmm1, %%xmm3 \n" "movdqa %%xmm0, %%xmm2 \n" "add $32, %[input] \n" "lddqu 16(%[input]), %%xmm1 \n" "lddqu (%[input]), %%xmm0 \n" "1: \n" __asm__ __volatile__ ( if (chunk) { frames &= 7; int chunk = frames >> 3; /* L0 R0 L1 R1 L2 R2 L3 R3... -> L0 L1 L2 L3... R0 R1 R2 R3... */ /* Process 8 frames (16 samples) each loop. */{ float *output2, int frames)static void deinterleave_stereo(int16_t *input, float *output1,#include <emmintrin.h>#ifdef __SSE3__