1 /* Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2 * Use of this source code is governed by a BSD-style license that can be 3 * found in the LICENSE file. 4 */ 5 6 #include <stdlib.h> 7 #include "eq2.h" 8 9 struct eq2 { 10 int n[2]; 11 struct biquad biquad[MAX_BIQUADS_PER_EQ2][2]; 12 }; 13 14 struct eq2 *eq2_new() 15 { 16 struct eq2 *eq2 = (struct eq2 *)calloc(1, sizeof(*eq2)); 17 int i, j; 18 19 /* Initialize all biquads to identity filter, so if two channels have 20 * different numbers of biquads, it still works. */ 21 for (i = 0; i < MAX_BIQUADS_PER_EQ2; i++) 22 for (j = 0; j < 2; j++) 23 biquad_set(&eq2->biquad[i][j], BQ_NONE, 0, 0, 0); 24 25 return eq2; 26 } 27 28 void eq2_free(struct eq2 *eq2) 29 { 30 free(eq2); 31 } 32 33 int eq2_append_biquad(struct eq2 *eq2, int channel, 34 enum biquad_type type, float freq, float Q, float gain) 35 { 36 if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2) 37 return -1; 38 biquad_set(&eq2->biquad[eq2->n[channel]++][channel], type, freq, Q, 39 gain); 40 return 0; 41 } 42 43 int eq2_append_biquad_direct(struct eq2 *eq2, int channel, 44 const struct biquad *biquad) 45 { 46 if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2) 47 return -1; 48 eq2->biquad[eq2->n[channel]++][channel] = *biquad; 49 return 0; 50 } 51 52 static inline void eq2_process_one(struct biquad (*bq)[2], 53 float *data0, float *data1, int count) 54 { 55 struct biquad *qL = &bq[0][0]; 56 struct biquad *qR = &bq[0][1]; 57 58 float x1L = qL->x1; 59 float x2L = qL->x2; 60 float y1L = qL->y1; 61 float y2L = qL->y2; 62 float b0L = qL->b0; 63 float b1L = qL->b1; 64 float b2L = qL->b2; 65 float a1L = qL->a1; 66 float a2L = qL->a2; 67 68 float x1R = qR->x1; 69 float x2R = qR->x2; 70 float y1R = qR->y1; 71 float y2R = qR->y2; 72 float b0R = qR->b0; 73 float b1R = qR->b1; 74 float b2R = qR->b2; 75 float a1R = qR->a1; 76 float a2R = qR->a2; 77 78 int j; 79 for (j = 0; j < count; j++) { 80 float xL = data0[j]; 81 float xR = data1[j]; 82 83 float yL = b0L*xL 84 + b1L*x1L + b2L*x2L 85 - a1L*y1L - a2L*y2L; 86 x2L = x1L; 87 x1L = xL; 88 y2L = y1L; 89 y1L = yL; 90 91 float yR = b0R*xR 92 + b1R*x1R + b2R*x2R 93 - a1R*y1R - a2R*y2R; 94 x2R = x1R; 95 x1R = xR; 96 y2R = y1R; 97 y1R = yR; 98 99 data0[j] = yL; 100 data1[j] = yR; 101 } 102 103 qL->x1 = x1L; 104 qL->x2 = x2L; 105 qL->y1 = y1L; 106 qL->y2 = y2L; 107 qR->x1 = x1R; 108 qR->x2 = x2R; 109 qR->y1 = y1R; 110 qR->y2 = y2R; 111 } 112 113 #ifdef __ARM_NEON__ 114 #include <arm_neon.h> 115 static inline void eq2_process_two_neon(struct biquad (*bq)[2], 116 float *data0, float *data1, int count) 117 { 118 struct biquad *qL = &bq[0][0]; 119 struct biquad *rL = &bq[1][0]; 120 struct biquad *qR = &bq[0][1]; 121 struct biquad *rR = &bq[1][1]; 122 123 float32x2_t x1 = {qL->x1, qR->x1}; 124 float32x2_t x2 = {qL->x2, qR->x2}; 125 float32x2_t y1 = {qL->y1, qR->y1}; 126 float32x2_t y2 = {qL->y2, qR->y2}; 127 float32x2_t qb0 = {qL->b0, qR->b0}; 128 float32x2_t qb1 = {qL->b1, qR->b1}; 129 float32x2_t qb2 = {qL->b2, qR->b2}; 130 float32x2_t qa1 = {qL->a1, qR->a1}; 131 float32x2_t qa2 = {qL->a2, qR->a2}; 132 133 float32x2_t z1 = {rL->y1, rR->y1}; 134 float32x2_t z2 = {rL->y2, rR->y2}; 135 float32x2_t rb0 = {rL->b0, rR->b0}; 136 float32x2_t rb1 = {rL->b1, rR->b1}; 137 float32x2_t rb2 = {rL->b2, rR->b2}; 138 float32x2_t ra1 = {rL->a1, rR->a1}; 139 float32x2_t ra2 = {rL->a2, rR->a2}; 140 141 __asm__ __volatile__( 142 /* d0 = x, d1 = y, d2 = z */ 143 "1: \n" 144 "vmul.f32 d1, %P[qb1], %P[x1] \n" 145 "vld1.32 d0[0], [%[data0]] \n" 146 "vld1.32 d0[1], [%[data1]] \n" 147 "subs %[count], #1 \n" 148 "vmul.f32 d2, %P[rb1], %P[y1] \n" 149 "vmla.f32 d1, %P[qb0], d0 \n" 150 "vmla.f32 d1, %P[qb2], %P[x2] \n" 151 "vmov.f32 %P[x2], %P[x1] \n" 152 "vmov.f32 %P[x1], d0 \n" 153 "vmls.f32 d1, %P[qa1], %P[y1] \n" 154 "vmls.f32 d1, %P[qa2], %P[y2] \n" 155 "vmla.f32 d2, %P[rb0], d1 \n" 156 "vmla.f32 d2, %P[rb2], %P[y2] \n" 157 "vmov.f32 %P[y2], %P[y1] \n" 158 "vmov.f32 %P[y1], d1 \n" 159 "vmls.f32 d2, %P[ra1], %P[z1] \n" 160 "vmls.f32 d2, %P[ra2], %P[z2] \n" 161 "vmov.f32 %P[z2], %P[z1] \n" 162 "vmov.f32 %P[z1], d2 \n" 163 "vst1.f32 d2[0], [%[data0]]! \n" 164 "vst1.f32 d2[1], [%[data1]]! \n" 165 "bne 1b \n" 166 : /* output */ 167 [data0]"+r"(data0), 168 [data1]"+r"(data1), 169 [count]"+r"(count), 170 [x1]"+w"(x1), 171 [x2]"+w"(x2), 172 [y1]"+w"(y1), 173 [y2]"+w"(y2), 174 [z1]"+w"(z1), 175 [z2]"+w"(z2) 176 : /* input */ 177 [qb0]"w"(qb0), 178 [qb1]"w"(qb1), 179 [qb2]"w"(qb2), 180 [qa1]"w"(qa1), 181 [qa2]"w"(qa2), 182 [rb0]"w"(rb0), 183 [rb1]"w"(rb1), 184 [rb2]"w"(rb2), 185 [ra1]"w"(ra1), 186 [ra2]"w"(ra2) 187 : /* clobber */ 188 "d0", "d1", "d2", "memory", "cc" 189 ); 190 191 qL->x1 = x1[0]; 192 qL->x2 = x2[0]; 193 qL->y1 = y1[0]; 194 qL->y2 = y2[0]; 195 rL->y1 = z1[0]; 196 rL->y2 = z2[0]; 197 qR->x1 = x1[1]; 198 qR->x2 = x2[1]; 199 qR->y1 = y1[1]; 200 qR->y2 = y2[1]; 201 rR->y1 = z1[1]; 202 rR->y2 = z2[1]; 203 } 204 #endif 205 206 #if defined(__SSE3__) && defined(__x86_64__) 207 #include <emmintrin.h> 208 static inline void eq2_process_two_sse3(struct biquad (*bq)[2], 209 float *data0, float *data1, int count) 210 { 211 struct biquad *qL = &bq[0][0]; 212 struct biquad *rL = &bq[1][0]; 213 struct biquad *qR = &bq[0][1]; 214 struct biquad *rR = &bq[1][1]; 215 216 __m128 x1 = {qL->x1, qR->x1}; 217 __m128 x2 = {qL->x2, qR->x2}; 218 __m128 y1 = {qL->y1, qR->y1}; 219 __m128 y2 = {qL->y2, qR->y2}; 220 __m128 qb0 = {qL->b0, qR->b0}; 221 __m128 qb1 = {qL->b1, qR->b1}; 222 __m128 qb2 = {qL->b2, qR->b2}; 223 __m128 qa1 = {qL->a1, qR->a1}; 224 __m128 qa2 = {qL->a2, qR->a2}; 225 226 __m128 z1 = {rL->y1, rR->y1}; 227 __m128 z2 = {rL->y2, rR->y2}; 228 __m128 rb0 = {rL->b0, rR->b0}; 229 __m128 rb1 = {rL->b1, rR->b1}; 230 __m128 rb2 = {rL->b2, rR->b2}; 231 __m128 ra1 = {rL->a1, rR->a1}; 232 __m128 ra2 = {rL->a2, rR->a2}; 233 234 __asm__ __volatile__( 235 "1: \n" 236 "movss (%[data0]), %%xmm2 \n" 237 "movss (%[data1]), %%xmm1 \n" 238 "unpcklps %%xmm1, %%xmm2 \n" 239 "mulps %[qb2],%[x2] \n" 240 "lddqu %[qb0],%%xmm0 \n" 241 "mulps %[ra2],%[z2] \n" 242 "lddqu %[qb1],%%xmm1 \n" 243 "mulps %%xmm2,%%xmm0 \n" 244 "mulps %[x1],%%xmm1 \n" 245 "addps %%xmm1,%%xmm0 \n" 246 "movaps %[qa1],%%xmm1 \n" 247 "mulps %[y1],%%xmm1 \n" 248 "addps %[x2],%%xmm0 \n" 249 "movaps %[rb1],%[x2] \n" 250 "mulps %[y1],%[x2] \n" 251 "subps %%xmm1,%%xmm0 \n" 252 "movaps %[qa2],%%xmm1 \n" 253 "mulps %[y2],%%xmm1 \n" 254 "mulps %[rb2],%[y2] \n" 255 "subps %%xmm1,%%xmm0 \n" 256 "movaps %[rb0],%%xmm1 \n" 257 "mulps %%xmm0,%%xmm1 \n" 258 "addps %[x2],%%xmm1 \n" 259 "movaps %[x1],%[x2] \n" 260 "movaps %%xmm2,%[x1] \n" 261 "addps %[y2],%%xmm1 \n" 262 "movaps %[ra1],%[y2] \n" 263 "mulps %[z1],%[y2] \n" 264 "subps %[y2],%%xmm1 \n" 265 "movaps %[y1],%[y2] \n" 266 "movaps %%xmm0,%[y1] \n" 267 "subps %[z2],%%xmm1 \n" 268 "movaps %[z1],%[z2] \n" 269 "movaps %%xmm1,%[z1] \n" 270 "movss %%xmm1, (%[data0]) \n" 271 "shufps $1, %%xmm1, %%xmm1 \n" 272 "movss %%xmm1, (%[data1]) \n" 273 "add $4, %[data0] \n" 274 "add $4, %[data1] \n" 275 "sub $1, %[count] \n" 276 "jnz 1b \n" 277 : /* output */ 278 [data0]"+r"(data0), 279 [data1]"+r"(data1), 280 [count]"+r"(count), 281 [x1]"+x"(x1), 282 [x2]"+x"(x2), 283 [y1]"+x"(y1), 284 [y2]"+x"(y2), 285 [z1]"+x"(z1), 286 [z2]"+x"(z2) 287 : /* input */ 288 [qb0]"m"(qb0), 289 [qb1]"m"(qb1), 290 [qb2]"m"(qb2), 291 [qa1]"x"(qa1), 292 [qa2]"x"(qa2), 293 [rb0]"x"(rb0), 294 [rb1]"x"(rb1), 295 [rb2]"x"(rb2), 296 [ra1]"x"(ra1), 297 [ra2]"x"(ra2) 298 : /* clobber */ 299 "xmm0", "xmm1", "xmm2", "memory", "cc" 300 ); 301 302 qL->x1 = x1[0]; 303 qL->x2 = x2[0]; 304 qL->y1 = y1[0]; 305 qL->y2 = y2[0]; 306 rL->y1 = z1[0]; 307 rL->y2 = z2[0]; 308 qR->x1 = x1[1]; 309 qR->x2 = x2[1]; 310 qR->y1 = y1[1]; 311 qR->y2 = y2[1]; 312 rR->y1 = z1[1]; 313 rR->y2 = z2[1]; 314 } 315 #endif 316 317 void eq2_process(struct eq2 *eq2, float *data0, float *data1, int count) 318 { 319 int i; 320 int n; 321 if (!count) 322 return; 323 n = eq2->n[0]; 324 if (eq2->n[1] > n) 325 n = eq2->n[1]; 326 for (i = 0; i < n; i += 2) { 327 if (i + 1 == n) { 328 eq2_process_one(&eq2->biquad[i], data0, data1, count); 329 } else { 330 #if defined(__ARM_NEON__) 331 eq2_process_two_neon(&eq2->biquad[i], data0, data1, 332 count); 333 #elif defined(__SSE3__) && defined(__x86_64__) 334 eq2_process_two_sse3(&eq2->biquad[i], data0, data1, 335 count); 336 #else 337 eq2_process_one(&eq2->biquad[i], data0, data1, count); 338 eq2_process_one(&eq2->biquad[i+1], data0, data1, count); 339 #endif 340 } 341 } 342 } 343