Home | History | Annotate | Download | only in dsp
      1 /* Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
      2  * Use of this source code is governed by a BSD-style license that can be
      3  * found in the LICENSE file.
      4  */
      5 
      6 #include <stdlib.h>
      7 #include "eq2.h"
      8 
      9 struct eq2 {
     10 	int n[2];
     11 	struct biquad biquad[MAX_BIQUADS_PER_EQ2][2];
     12 };
     13 
     14 struct eq2 *eq2_new()
     15 {
     16 	struct eq2 *eq2 = (struct eq2 *)calloc(1, sizeof(*eq2));
     17 	int i, j;
     18 
     19 	/* Initialize all biquads to identity filter, so if two channels have
     20 	 * different numbers of biquads, it still works. */
     21 	for (i = 0; i < MAX_BIQUADS_PER_EQ2; i++)
     22 		for (j = 0; j < 2; j++)
     23 			biquad_set(&eq2->biquad[i][j], BQ_NONE, 0, 0, 0);
     24 
     25 	return eq2;
     26 }
     27 
     28 void eq2_free(struct eq2 *eq2)
     29 {
     30 	free(eq2);
     31 }
     32 
     33 int eq2_append_biquad(struct eq2 *eq2, int channel,
     34 		      enum biquad_type type, float freq, float Q, float gain)
     35 {
     36 	if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2)
     37 		return -1;
     38 	biquad_set(&eq2->biquad[eq2->n[channel]++][channel], type, freq, Q,
     39 		   gain);
     40 	return 0;
     41 }
     42 
     43 int eq2_append_biquad_direct(struct eq2 *eq2, int channel,
     44 			     const struct biquad *biquad)
     45 {
     46 	if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2)
     47 		return -1;
     48 	eq2->biquad[eq2->n[channel]++][channel] = *biquad;
     49 	return 0;
     50 }
     51 
     52 static inline void eq2_process_one(struct biquad (*bq)[2],
     53 				   float *data0, float *data1, int count)
     54 {
     55 	struct biquad *qL = &bq[0][0];
     56 	struct biquad *qR = &bq[0][1];
     57 
     58 	float x1L = qL->x1;
     59 	float x2L = qL->x2;
     60 	float y1L = qL->y1;
     61 	float y2L = qL->y2;
     62 	float b0L = qL->b0;
     63 	float b1L = qL->b1;
     64 	float b2L = qL->b2;
     65 	float a1L = qL->a1;
     66 	float a2L = qL->a2;
     67 
     68 	float x1R = qR->x1;
     69 	float x2R = qR->x2;
     70 	float y1R = qR->y1;
     71 	float y2R = qR->y2;
     72 	float b0R = qR->b0;
     73 	float b1R = qR->b1;
     74 	float b2R = qR->b2;
     75 	float a1R = qR->a1;
     76 	float a2R = qR->a2;
     77 
     78 	int j;
     79 	for (j = 0; j < count; j++) {
     80 		float xL = data0[j];
     81 		float xR = data1[j];
     82 
     83 		float yL = b0L*xL
     84 			+ b1L*x1L + b2L*x2L
     85 			- a1L*y1L - a2L*y2L;
     86 		x2L = x1L;
     87 		x1L = xL;
     88 		y2L = y1L;
     89 		y1L = yL;
     90 
     91 		float yR = b0R*xR
     92 			+ b1R*x1R + b2R*x2R
     93 			- a1R*y1R - a2R*y2R;
     94 		x2R = x1R;
     95 		x1R = xR;
     96 		y2R = y1R;
     97 		y1R = yR;
     98 
     99 		data0[j] = yL;
    100 		data1[j] = yR;
    101 	}
    102 
    103 	qL->x1 = x1L;
    104 	qL->x2 = x2L;
    105 	qL->y1 = y1L;
    106 	qL->y2 = y2L;
    107 	qR->x1 = x1R;
    108 	qR->x2 = x2R;
    109 	qR->y1 = y1R;
    110 	qR->y2 = y2R;
    111 }
    112 
    113 #ifdef __ARM_NEON__
    114 #include <arm_neon.h>
    115 static inline void eq2_process_two_neon(struct biquad (*bq)[2],
    116 					float *data0, float *data1, int count)
    117 {
    118 	struct biquad *qL = &bq[0][0];
    119 	struct biquad *rL = &bq[1][0];
    120 	struct biquad *qR = &bq[0][1];
    121 	struct biquad *rR = &bq[1][1];
    122 
    123 	float32x2_t x1 = {qL->x1, qR->x1};
    124 	float32x2_t x2 = {qL->x2, qR->x2};
    125 	float32x2_t y1 = {qL->y1, qR->y1};
    126 	float32x2_t y2 = {qL->y2, qR->y2};
    127 	float32x2_t qb0 = {qL->b0, qR->b0};
    128 	float32x2_t qb1 = {qL->b1, qR->b1};
    129 	float32x2_t qb2 = {qL->b2, qR->b2};
    130 	float32x2_t qa1 = {qL->a1, qR->a1};
    131 	float32x2_t qa2 = {qL->a2, qR->a2};
    132 
    133 	float32x2_t z1 = {rL->y1, rR->y1};
    134 	float32x2_t z2 = {rL->y2, rR->y2};
    135 	float32x2_t rb0 = {rL->b0, rR->b0};
    136 	float32x2_t rb1 = {rL->b1, rR->b1};
    137 	float32x2_t rb2 = {rL->b2, rR->b2};
    138 	float32x2_t ra1 = {rL->a1, rR->a1};
    139 	float32x2_t ra2 = {rL->a2, rR->a2};
    140 
    141 	__asm__ __volatile__(
    142 		/* d0 = x, d1 = y, d2 = z */
    143 		"1:                                     \n"
    144 		"vmul.f32 d1, %P[qb1], %P[x1]           \n"
    145 		"vld1.32 d0[0], [%[data0]]              \n"
    146 		"vld1.32 d0[1], [%[data1]]              \n"
    147 		"subs %[count], #1                      \n"
    148 		"vmul.f32 d2, %P[rb1], %P[y1]           \n"
    149 		"vmla.f32 d1, %P[qb0], d0               \n"
    150 		"vmla.f32 d1, %P[qb2], %P[x2]           \n"
    151 		"vmov.f32 %P[x2], %P[x1]                \n"
    152 		"vmov.f32 %P[x1], d0                    \n"
    153 		"vmls.f32 d1, %P[qa1], %P[y1]           \n"
    154 		"vmls.f32 d1, %P[qa2], %P[y2]           \n"
    155 		"vmla.f32 d2, %P[rb0], d1               \n"
    156 		"vmla.f32 d2, %P[rb2], %P[y2]           \n"
    157 		"vmov.f32 %P[y2], %P[y1]                \n"
    158 		"vmov.f32 %P[y1], d1                    \n"
    159 		"vmls.f32 d2, %P[ra1], %P[z1]           \n"
    160 		"vmls.f32 d2, %P[ra2], %P[z2]           \n"
    161 		"vmov.f32 %P[z2], %P[z1]                \n"
    162 		"vmov.f32 %P[z1], d2                    \n"
    163 		"vst1.f32 d2[0], [%[data0]]!            \n"
    164 		"vst1.f32 d2[1], [%[data1]]!            \n"
    165 		"bne 1b                                 \n"
    166 		: /* output */
    167 		  [data0]"+r"(data0),
    168 		  [data1]"+r"(data1),
    169 		  [count]"+r"(count),
    170 		  [x1]"+w"(x1),
    171 		  [x2]"+w"(x2),
    172 		  [y1]"+w"(y1),
    173 		  [y2]"+w"(y2),
    174 		  [z1]"+w"(z1),
    175 		  [z2]"+w"(z2)
    176 		: /* input */
    177 		  [qb0]"w"(qb0),
    178 		  [qb1]"w"(qb1),
    179 		  [qb2]"w"(qb2),
    180 		  [qa1]"w"(qa1),
    181 		  [qa2]"w"(qa2),
    182 		  [rb0]"w"(rb0),
    183 		  [rb1]"w"(rb1),
    184 		  [rb2]"w"(rb2),
    185 		  [ra1]"w"(ra1),
    186 		  [ra2]"w"(ra2)
    187 		: /* clobber */
    188 		  "d0", "d1", "d2", "memory", "cc"
    189 		);
    190 
    191 	qL->x1 = x1[0];
    192 	qL->x2 = x2[0];
    193 	qL->y1 = y1[0];
    194 	qL->y2 = y2[0];
    195 	rL->y1 = z1[0];
    196 	rL->y2 = z2[0];
    197 	qR->x1 = x1[1];
    198 	qR->x2 = x2[1];
    199 	qR->y1 = y1[1];
    200 	qR->y2 = y2[1];
    201 	rR->y1 = z1[1];
    202 	rR->y2 = z2[1];
    203 }
    204 #endif
    205 
    206 #if defined(__SSE3__) && defined(__x86_64__)
    207 #include <emmintrin.h>
    208 static inline void eq2_process_two_sse3(struct biquad (*bq)[2],
    209 					float *data0, float *data1, int count)
    210 {
    211 	struct biquad *qL = &bq[0][0];
    212 	struct biquad *rL = &bq[1][0];
    213 	struct biquad *qR = &bq[0][1];
    214 	struct biquad *rR = &bq[1][1];
    215 
    216 	__m128 x1 = {qL->x1, qR->x1};
    217 	__m128 x2 = {qL->x2, qR->x2};
    218 	__m128 y1 = {qL->y1, qR->y1};
    219 	__m128 y2 = {qL->y2, qR->y2};
    220 	__m128 qb0 = {qL->b0, qR->b0};
    221 	__m128 qb1 = {qL->b1, qR->b1};
    222 	__m128 qb2 = {qL->b2, qR->b2};
    223 	__m128 qa1 = {qL->a1, qR->a1};
    224 	__m128 qa2 = {qL->a2, qR->a2};
    225 
    226 	__m128 z1 = {rL->y1, rR->y1};
    227 	__m128 z2 = {rL->y2, rR->y2};
    228 	__m128 rb0 = {rL->b0, rR->b0};
    229 	__m128 rb1 = {rL->b1, rR->b1};
    230 	__m128 rb2 = {rL->b2, rR->b2};
    231 	__m128 ra1 = {rL->a1, rR->a1};
    232 	__m128 ra2 = {rL->a2, rR->a2};
    233 
    234 	__asm__ __volatile__(
    235 		"1:                                     \n"
    236 		"movss (%[data0]), %%xmm2               \n"
    237 		"movss (%[data1]), %%xmm1               \n"
    238 		"unpcklps %%xmm1, %%xmm2                \n"
    239 		"mulps %[qb2],%[x2]                     \n"
    240 		"lddqu %[qb0],%%xmm0                    \n"
    241 		"mulps %[ra2],%[z2]                     \n"
    242 		"lddqu %[qb1],%%xmm1                    \n"
    243 		"mulps %%xmm2,%%xmm0                    \n"
    244 		"mulps %[x1],%%xmm1                     \n"
    245 		"addps %%xmm1,%%xmm0                    \n"
    246 		"movaps %[qa1],%%xmm1                   \n"
    247 		"mulps %[y1],%%xmm1                     \n"
    248 		"addps %[x2],%%xmm0                     \n"
    249 		"movaps %[rb1],%[x2]                    \n"
    250 		"mulps %[y1],%[x2]                      \n"
    251 		"subps %%xmm1,%%xmm0                    \n"
    252 		"movaps %[qa2],%%xmm1                   \n"
    253 		"mulps %[y2],%%xmm1                     \n"
    254 		"mulps %[rb2],%[y2]                     \n"
    255 		"subps %%xmm1,%%xmm0                    \n"
    256 		"movaps %[rb0],%%xmm1                   \n"
    257 		"mulps %%xmm0,%%xmm1                    \n"
    258 		"addps %[x2],%%xmm1                     \n"
    259 		"movaps %[x1],%[x2]                     \n"
    260 		"movaps %%xmm2,%[x1]                    \n"
    261 		"addps %[y2],%%xmm1                     \n"
    262 		"movaps %[ra1],%[y2]                    \n"
    263 		"mulps %[z1],%[y2]                      \n"
    264 		"subps %[y2],%%xmm1                     \n"
    265 		"movaps %[y1],%[y2]                     \n"
    266 		"movaps %%xmm0,%[y1]                    \n"
    267 		"subps %[z2],%%xmm1                     \n"
    268 		"movaps %[z1],%[z2]                     \n"
    269 		"movaps %%xmm1,%[z1]                    \n"
    270 		"movss %%xmm1, (%[data0])               \n"
    271 		"shufps $1, %%xmm1, %%xmm1              \n"
    272 		"movss %%xmm1, (%[data1])               \n"
    273 		"add $4, %[data0]                       \n"
    274 		"add $4, %[data1]                       \n"
    275 		"sub $1, %[count]                       \n"
    276 		"jnz 1b                                 \n"
    277 		: /* output */
    278 		  [data0]"+r"(data0),
    279 		  [data1]"+r"(data1),
    280 		  [count]"+r"(count),
    281 		  [x1]"+x"(x1),
    282 		  [x2]"+x"(x2),
    283 		  [y1]"+x"(y1),
    284 		  [y2]"+x"(y2),
    285 		  [z1]"+x"(z1),
    286 		  [z2]"+x"(z2)
    287 		: /* input */
    288 		  [qb0]"m"(qb0),
    289 		  [qb1]"m"(qb1),
    290 		  [qb2]"m"(qb2),
    291 		  [qa1]"x"(qa1),
    292 		  [qa2]"x"(qa2),
    293 		  [rb0]"x"(rb0),
    294 		  [rb1]"x"(rb1),
    295 		  [rb2]"x"(rb2),
    296 		  [ra1]"x"(ra1),
    297 		  [ra2]"x"(ra2)
    298 		: /* clobber */
    299 		  "xmm0", "xmm1", "xmm2", "memory", "cc"
    300 		);
    301 
    302 	qL->x1 = x1[0];
    303 	qL->x2 = x2[0];
    304 	qL->y1 = y1[0];
    305 	qL->y2 = y2[0];
    306 	rL->y1 = z1[0];
    307 	rL->y2 = z2[0];
    308 	qR->x1 = x1[1];
    309 	qR->x2 = x2[1];
    310 	qR->y1 = y1[1];
    311 	qR->y2 = y2[1];
    312 	rR->y1 = z1[1];
    313 	rR->y2 = z2[1];
    314 }
    315 #endif
    316 
    317 void eq2_process(struct eq2 *eq2, float *data0, float *data1, int count)
    318 {
    319 	int i;
    320 	int n;
    321 	if (!count)
    322 		return;
    323 	n = eq2->n[0];
    324 	if (eq2->n[1] > n)
    325 		n = eq2->n[1];
    326 	for (i = 0; i < n; i += 2) {
    327 		if (i + 1 == n) {
    328 			eq2_process_one(&eq2->biquad[i], data0, data1, count);
    329 		} else {
    330 #if defined(__ARM_NEON__)
    331 			eq2_process_two_neon(&eq2->biquad[i], data0, data1,
    332 					     count);
    333 #elif defined(__SSE3__) && defined(__x86_64__)
    334 			eq2_process_two_sse3(&eq2->biquad[i], data0, data1,
    335 					     count);
    336 #else
    337 			eq2_process_one(&eq2->biquad[i], data0, data1, count);
    338 			eq2_process_one(&eq2->biquad[i+1], data0, data1, count);
    339 #endif
    340 		}
    341 	}
    342 }
    343