Home | History | Annotate | Download | only in tests
      1 /* Copyright 2016 The Chromium OS Authors. All rights reserved.
      2  * Use of this source code is governed by a BSD-style license that can be
      3  * found in the LICENSE file.
      4  */
      6 #include <math.h>  /* for abs() */
      7 #include <stdio.h>  /* for printf() */
      8 #include <string.h> /* for memset() */
      9 #include <stdint.h> /* for uint64 definition */
     10 #include <stdlib.h> /* for exit() definition */
     11 #include <time.h> /* for clock_gettime */
     13 #include "../drc_math.h"
     14 #include "../dsp_util.h"
     17 /* Constant for converting time to milliseconds. */
     18 #define BILLION 1000000000LL
     19 /* Number of iterations for performance testing. */
     20 #define ITERATIONS 400000
     22 #if defined(__aarch64__)
     23 int16_t float_to_short(float a) {
     24 	int32_t ret;
     25 	asm volatile ("fcvtas %s[ret], %s[a]\n"
     26 		      "sqxtn %h[ret], %s[ret]\n"
     27 		      : [ret] "=w" (ret)
     28 		      : [a] "w" (a)
     29 		      :);
     30 	return (int16_t)(ret);
     31 }
     32 #else
     33 int16_t float_to_short(float a) {
     34 	a += (a >= 0) ? 0.5f : -0.5f;
     35 	return (int16_t)(max(-32768, min(32767, a)));
     36 }
     37 #endif
     39 void dsp_util_deinterleave_reference(int16_t *input, float *const *output,
     40 				     int channels, int frames)
     41 {
     42 	float *output_ptr[channels];
     43 	int i, j;
     45 	for (i = 0; i < channels; i++)
     46 		output_ptr[i] = output[i];
     48 	for (i = 0; i < frames; i++)
     49 		for (j = 0; j < channels; j++)
     50 			*(output_ptr[j]++) = *input++ / 32768.0f;
     51 }
     53 void dsp_util_interleave_reference(float *const *input, int16_t *output,
     54 				   int channels, int frames)
     55 {
     56 	float *input_ptr[channels];
     57 	int i, j;
     59 	for (i = 0; i < channels; i++)
     60 		input_ptr[i] = input[i];
     62 	for (i = 0; i < frames; i++)
     63 		for (j = 0; j < channels; j++) {
     64 			float f = *(input_ptr[j]++) * 32768.0f;
     65 			*output++ = float_to_short(f);
     66 		}
     67 }
     69 /* Use fixed size allocation to avoid performance fluctuation of allocation. */
     70 #define MAXSAMPLES 4096
     71 #define MINSAMPLES 256
     72 /* PAD buffer to check for overflows. */
     73 #define PAD 4096
     75 void TestRounding(float in, int16_t expected, int samples)
     76 {
     77 	int i;
     78 	int max_diff;
     79 	int d;
     81 	short* in_shorts = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
     82 	float* out_floats_left_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
     83 	float* out_floats_right_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
     84 	float* out_floats_left_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
     85 	float* out_floats_right_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
     86 	short* out_shorts_c = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
     87 	short* out_shorts_opt = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
     89 	memset(in_shorts, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
     90 	memset(out_floats_left_c, 0xfb, MAXSAMPLES * 4 + PAD);
     91 	memset(out_floats_right_c, 0xfb, MAXSAMPLES * 4 + PAD);
     92 	memset(out_floats_left_opt, 0xfb, MAXSAMPLES * 4 + PAD);
     93 	memset(out_floats_right_opt, 0xfb, MAXSAMPLES * 4 + PAD);
     94 	memset(out_shorts_c, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
     95 	memset(out_shorts_opt, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
     97 	float *out_floats_ptr_c[2];
     98 	float *out_floats_ptr_opt[2];
    100 	out_floats_ptr_c[0] = out_floats_left_c;
    101 	out_floats_ptr_c[1] = out_floats_right_c;
    102 	out_floats_ptr_opt[0] = out_floats_left_opt;
    103 	out_floats_ptr_opt[1] = out_floats_right_opt;
    105 	for (i = 0; i < MAXSAMPLES; ++i) {
    106 		out_floats_left_c[i] = in;
    107 		out_floats_right_c[i] = in;
    108 	}
    110 	/*  reference C interleave */
    111 	dsp_util_interleave_reference(out_floats_ptr_c, out_shorts_c, 2,
    112 				      samples);
    114 	/* measure optimized interleave */
    115 	for (i = 0; i < ITERATIONS; ++i) {
    116 		dsp_util_interleave(out_floats_ptr_c, (uint8_t *)out_shorts_opt,
    117 				    2, SND_PCM_FORMAT_S16_LE, samples);
    118 	}
    120 	max_diff = 0;
    121 	for (i = 0; i < (MAXSAMPLES * 2 + PAD / 2); ++i) {
    122 		d = abs(out_shorts_c[i] - out_shorts_opt[i]);
    123 		if (d > max_diff) {
    124 			max_diff = d;
    125 		}
    126 	}
    127 	printf("test interleave compare %6d, %10f %13f %6d %6d %6d %s\n",
    128 		max_diff, in, in * 32768.0f, out_shorts_c[0], out_shorts_opt[0],
    129 		expected,
    130 		max_diff == 0 ? "PASS" : (out_shorts_opt[0] == expected ?
    133 	/* measure reference C deinterleave */
    134 	dsp_util_deinterleave_reference(in_shorts, out_floats_ptr_c, 2,
    135 					samples);
    137 	/* measure optimized deinterleave */
    138 	dsp_util_deinterleave((uint8_t *)in_shorts, out_floats_ptr_opt, 2,
    139 			      SND_PCM_FORMAT_S16_LE, samples);
    141 	d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0], samples * 4);
    142 	if (d) printf("left compare %d, %f %f\n", d, out_floats_ptr_c[0][0],
    143 		      out_floats_ptr_opt[0][0]);
    144 	d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1], samples * 4);
    145 	if (d) printf("right compare %d, %f %f\n", d, out_floats_ptr_c[1][0],
    146 		      out_floats_ptr_opt[1][0]);
    148 	free(in_shorts);
    149 	free(out_floats_left_c);
    150 	free(out_floats_right_c);
    151 	free(out_floats_left_opt);
    152 	free(out_floats_right_opt);
    153 	free(out_shorts_c);
    154 	free(out_shorts_opt);
    155 }
    157 int main(int argc, char **argv)
    158 {
    159 	float e = 0.000000001f;
    160 	int samples = 16;
    162 	dsp_enable_flush_denormal_to_zero();
    164 	// Print headings for TestRounding output.
    165 	printf("test interleave compare maxdif,     float,   float * 32k      "
    166 	       "C   SIMD expect pass\n");
    168 	// test clamping
    169 	TestRounding(1.0f, 32767, samples);
    170 	TestRounding(-1.0f, -32768, samples);
    171 	TestRounding(1.1f, 32767, samples);
    172 	TestRounding(-1.1f, -32768, samples);
    173 	TestRounding(2000000000.f / 32768.f, 32767, samples);
    174 	TestRounding(-2000000000.f / 32768.f, -32768, samples);
    176 	/* Infinity produces zero on arm64. */
    177 #if defined(__aarch64__)
    178 #define EXPECTED_INF_RESULT 0
    179 #define EXPECTED_NEGINF_RESULT 0
    180 #elif defined(__i386__) || defined(__x86_64__)
    181 #define EXPECTED_INF_RESULT -32768
    182 #define EXPECTED_NEGINF_RESULT 0
    183 #else
    184 #define EXPECTED_INF_RESULT 32767
    185 #define EXPECTED_NEGINF_RESULT -32768
    186 #endif
    188 	TestRounding(5000000000.f / 32768.f, EXPECTED_INF_RESULT, samples);
    189 	TestRounding(-5000000000.f / 32768.f, EXPECTED_NEGINF_RESULT, samples);
    191 	// test infinity
    192 	union ieee754_float inf;
    193 	inf.ieee.negative = 0;
    194 	inf.ieee.exponent = 0xfe;
    195 	inf.ieee.mantissa = 0x7fffff;
    196 	TestRounding(inf.f, EXPECTED_INF_RESULT, samples);  // expect fail
    197 	inf.ieee.negative = 1;
    198 	inf.ieee.exponent = 0xfe;
    199 	inf.ieee.mantissa = 0x7fffff;
    200 	TestRounding(inf.f, EXPECTED_NEGINF_RESULT, samples);  // expect fail
    202 	// test rounding
    203 	TestRounding(0.25f, 8192, samples);
    204 	TestRounding(-0.25f, -8192, samples);
    205 	TestRounding(0.50f, 16384, samples);
    206 	TestRounding(-0.50f, -16384, samples);
    207 	TestRounding(1.0f / 32768.0f, 1, samples);
    208 	TestRounding(-1.0f / 32768.0f, -1, samples);
    209 	TestRounding(1.0f / 32768.0f + e, 1, samples);
    210 	TestRounding(-1.0f / 32768.0f - e, -1, samples);
    211 	TestRounding(1.0f / 32768.0f - e, 1, samples);
    212 	TestRounding(-1.0f / 32768.0f + e, -1, samples);
    214 	/* Rounding on 'tie' is different for Intel. */
    215 #if defined(__i386__) || defined(__x86_64__)
    216 	TestRounding(0.5f / 32768.0f, 0, samples);  /* Expect round to even */
    217 	TestRounding(-0.5f / 32768.0f, 0, samples);
    218 #else
    219 	TestRounding(0.5f / 32768.0f, 1, samples);  /* Expect round away */
    220 	TestRounding(-0.5f / 32768.0f, -1, samples);
    221 #endif
    223 	TestRounding(0.5f / 32768.0f + e, 1, samples);
    224 	TestRounding(-0.5f / 32768.0f - e, 1, samples);
    225 	TestRounding(0.5f / 32768.0f - e, 0, samples);
    226 	TestRounding(-0.5f / 32768.0f + e, 0, samples);
    228 	TestRounding(1.5f / 32768.0f, 2, samples);
    229 	TestRounding(-1.5f / 32768.0f, -2, samples);
    230 	TestRounding(1.5f / 32768.0f + e, 2, samples);
    231 	TestRounding(-1.5f / 32768.0f - e, -2, samples);
    232 	TestRounding(1.5f / 32768.0f - e, 1, samples);
    233 	TestRounding(-1.5f / 32768.0f + e, -1, samples);
    235 	/* Test denormals */
    236 	union ieee754_float denorm;
    237 	denorm.ieee.negative = 0;
    238 	denorm.ieee.exponent = 0;
    239 	denorm.ieee.mantissa = 1;
    240 	TestRounding(denorm.f, 0, samples);
    241 	denorm.ieee.negative = 1;
    242 	denorm.ieee.exponent = 0;
    243 	denorm.ieee.mantissa = 1;
    244 	TestRounding(denorm.f, 0, samples);
    246 	/* Test NaNs. Caveat Results vary by implementation. */
    247 #if defined(__i386__) || defined(__x86_64__)
    248 #define EXPECTED_NAN_RESULT -32768
    249 #else
    250 #define EXPECTED_NAN_RESULT 0
    251 #endif
    252 	union ieee754_float nan;  /* Quiet NaN */
    253 	nan.ieee.negative = 0;
    254 	nan.ieee.exponent = 0xff;
    255 	nan.ieee.mantissa = 0x400001;
    256 	TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
    257 	nan.ieee.negative = 0;
    258 	nan.ieee.exponent = 0xff;
    259 	nan.ieee.mantissa = 0x000001;  /* Signalling NaN */
    260 	TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
    262 	/* Test Performance */
    263 	uint64_t diff;
    264 	struct timespec start, end;
    265 	int i;
    266 	int d;
    268 	short* in_shorts = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
    269 	float* out_floats_left_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
    270 	float* out_floats_right_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
    271 	float* out_floats_left_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
    272 	float* out_floats_right_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
    273 	short* out_shorts_c = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
    274 	short* out_shorts_opt = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
    276 	memset(in_shorts, 0x11, MAXSAMPLES * 2 * 2 + PAD);
    277 	memset(out_floats_left_c, 0x22, MAXSAMPLES * 4 + PAD);
    278 	memset(out_floats_right_c, 0x33, MAXSAMPLES * 4 + PAD);
    279 	memset(out_floats_left_opt, 0x44, MAXSAMPLES * 4 + PAD);
    280 	memset(out_floats_right_opt, 0x55, MAXSAMPLES * 4 + PAD);
    281 	memset(out_shorts_c, 0x66, MAXSAMPLES * 2 * 2 + PAD);
    282 	memset(out_shorts_opt, 0x66, MAXSAMPLES * 2 * 2 + PAD);
    284 	float *out_floats_ptr_c[2];
    285 	float *out_floats_ptr_opt[2];
    287 	out_floats_ptr_c[0] = out_floats_left_c;
    288 	out_floats_ptr_c[1] = out_floats_right_c;
    289 	out_floats_ptr_opt[0] = out_floats_left_opt;
    290 	out_floats_ptr_opt[1] = out_floats_right_opt;
    292 	/* Benchmark dsp_util_interleave */
    293 	for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
    295 		/* measure original C interleave */
    296 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
    297 		for (i = 0; i < ITERATIONS; ++i) {
    298 			dsp_util_interleave_reference(out_floats_ptr_c,
    299 						      out_shorts_c,
    300 						      2, samples);
    301 		}
    302 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
    303 		diff = (BILLION * (end.tv_sec - start.tv_sec) +
    304 			end.tv_nsec - start.tv_nsec) / 1000000;
    305 		printf("interleave   ORIG size = %6d, elapsed time = %llu ms\n",
    306 		       samples, (long long unsigned int) diff);
    308 		/* measure optimized interleave */
    309 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
    310 		for (i = 0; i < ITERATIONS; ++i) {
    311 			dsp_util_interleave(out_floats_ptr_c,
    312 					    (uint8_t *)out_shorts_opt, 2,
    313 					    SND_PCM_FORMAT_S16_LE, samples);
    314 		}
    315 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
    316 		diff = (BILLION * (end.tv_sec - start.tv_sec) +
    317 			end.tv_nsec - start.tv_nsec) / 1000000;
    318 		printf("interleave   SIMD size = %6d, elapsed time = %llu ms\n",
    319 		       samples, (long long unsigned int) diff);
    321 		/* Test C and SIMD output match */
    322 		d = memcmp(out_shorts_c, out_shorts_opt,
    323 			   MAXSAMPLES * 2 * 2 + PAD);
    324 		if (d) printf("interleave compare %d, %d %d, %d %d\n", d,
    325 			      out_shorts_c[0], out_shorts_c[1],
    326 			      out_shorts_opt[0], out_shorts_opt[1]);
    327 	}
    329 	/* Benchmark dsp_util_deinterleave */
    330 	for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
    332 		/* Measure original C deinterleave */
    333 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
    334 		for (i = 0; i < ITERATIONS; ++i) {
    335 			dsp_util_deinterleave_reference(in_shorts,
    336 							out_floats_ptr_c,
    337 							2, samples);
    338 		}
    339 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
    340 		diff = (BILLION * (end.tv_sec - start.tv_sec) +
    341 			end.tv_nsec - start.tv_nsec) / 1000000;
    342 			printf("deinterleave ORIG size = %6d, "
    343 			       "elapsed time = %llu ms\n",
    344 			       samples, (long long unsigned int) diff);
    346 		/* Measure optimized deinterleave */
    347 		clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
    348 		for (i = 0; i < ITERATIONS; ++i) {
    349 			dsp_util_deinterleave((uint8_t *)in_shorts,
    350 					      out_floats_ptr_opt, 2,
    351 					      SND_PCM_FORMAT_S16_LE, samples);
    352 		}
    353 		clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
    354 		diff = (BILLION * (end.tv_sec - start.tv_sec) +
    355 			end.tv_nsec - start.tv_nsec) / 1000000;
    356 		printf("deinterleave SIMD size = %6d, elapsed time = %llu ms\n",
    357 			samples, (long long unsigned int) diff);
    359 		/* Test C and SIMD output match */
    360 		d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0],
    361 			   samples * 4);
    362 		if (d) printf("left compare %d, %f %f\n", d,
    363 			      out_floats_ptr_c[0][0], out_floats_ptr_opt[0][0]);
    364 		d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1],
    365 			   samples * 4);
    366 		if (d) printf("right compare %d, %f %f\n", d,
    367 			      out_floats_ptr_c[1][0], out_floats_ptr_opt[1][0]);
    368 	}
    370 	free(in_shorts);
    371 	free(out_floats_left_c);
    372 	free(out_floats_right_c);
    373 	free(out_floats_left_opt);
    374 	free(out_floats_right_opt);
    375 	free(out_shorts_c);
    376 	free(out_shorts_opt);
    378 	return 0;
    379 }