1 /* Copyright 2016 The Chromium OS Authors. All rights reserved. 2 * Use of this source code is governed by a BSD-style license that can be 3 * found in the LICENSE file. 4 */ 5 6 #include <math.h> /* for abs() */ 7 #include <stdio.h> /* for printf() */ 8 #include <string.h> /* for memset() */ 9 #include <stdint.h> /* for uint64 definition */ 10 #include <stdlib.h> /* for exit() definition */ 11 #include <time.h> /* for clock_gettime */ 12 13 #include "../drc_math.h" 14 #include "../dsp_util.h" 15 16 17 /* Constant for converting time to milliseconds. */ 18 #define BILLION 1000000000LL 19 /* Number of iterations for performance testing. */ 20 #define ITERATIONS 400000 21 22 #if defined(__aarch64__) 23 int16_t float_to_short(float a) { 24 int32_t ret; 25 asm volatile ("fcvtas %s[ret], %s[a]\n" 26 "sqxtn %h[ret], %s[ret]\n" 27 : [ret] "=w" (ret) 28 : [a] "w" (a) 29 :); 30 return (int16_t)(ret); 31 } 32 #else 33 int16_t float_to_short(float a) { 34 a += (a >= 0) ? 0.5f : -0.5f; 35 return (int16_t)(max(-32768, min(32767, a))); 36 } 37 #endif 38 39 void dsp_util_deinterleave_reference(int16_t *input, float *const *output, 40 int channels, int frames) 41 { 42 float *output_ptr[channels]; 43 int i, j; 44 45 for (i = 0; i < channels; i++) 46 output_ptr[i] = output[i]; 47 48 for (i = 0; i < frames; i++) 49 for (j = 0; j < channels; j++) 50 *(output_ptr[j]++) = *input++ / 32768.0f; 51 } 52 53 void dsp_util_interleave_reference(float *const *input, int16_t *output, 54 int channels, int frames) 55 { 56 float *input_ptr[channels]; 57 int i, j; 58 59 for (i = 0; i < channels; i++) 60 input_ptr[i] = input[i]; 61 62 for (i = 0; i < frames; i++) 63 for (j = 0; j < channels; j++) { 64 float f = *(input_ptr[j]++) * 32768.0f; 65 *output++ = float_to_short(f); 66 } 67 } 68 69 /* Use fixed size allocation to avoid performance fluctuation of allocation. */ 70 #define MAXSAMPLES 4096 71 #define MINSAMPLES 256 72 /* PAD buffer to check for overflows. */ 73 #define PAD 4096 74 75 void TestRounding(float in, int16_t expected, int samples) 76 { 77 int i; 78 int max_diff; 79 int d; 80 81 short* in_shorts = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD); 82 float* out_floats_left_c = (float*) malloc(MAXSAMPLES * 4 + PAD); 83 float* out_floats_right_c = (float*) malloc(MAXSAMPLES * 4 + PAD); 84 float* out_floats_left_opt = (float*) malloc(MAXSAMPLES * 4 + PAD); 85 float* out_floats_right_opt = (float*) malloc(MAXSAMPLES * 4 + PAD); 86 short* out_shorts_c = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD); 87 short* out_shorts_opt = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD); 88 89 memset(in_shorts, 0xfb, MAXSAMPLES * 2 * 2 + PAD); 90 memset(out_floats_left_c, 0xfb, MAXSAMPLES * 4 + PAD); 91 memset(out_floats_right_c, 0xfb, MAXSAMPLES * 4 + PAD); 92 memset(out_floats_left_opt, 0xfb, MAXSAMPLES * 4 + PAD); 93 memset(out_floats_right_opt, 0xfb, MAXSAMPLES * 4 + PAD); 94 memset(out_shorts_c, 0xfb, MAXSAMPLES * 2 * 2 + PAD); 95 memset(out_shorts_opt, 0xfb, MAXSAMPLES * 2 * 2 + PAD); 96 97 float *out_floats_ptr_c[2]; 98 float *out_floats_ptr_opt[2]; 99 100 out_floats_ptr_c[0] = out_floats_left_c; 101 out_floats_ptr_c[1] = out_floats_right_c; 102 out_floats_ptr_opt[0] = out_floats_left_opt; 103 out_floats_ptr_opt[1] = out_floats_right_opt; 104 105 for (i = 0; i < MAXSAMPLES; ++i) { 106 out_floats_left_c[i] = in; 107 out_floats_right_c[i] = in; 108 } 109 110 /* reference C interleave */ 111 dsp_util_interleave_reference(out_floats_ptr_c, out_shorts_c, 2, 112 samples); 113 114 /* measure optimized interleave */ 115 for (i = 0; i < ITERATIONS; ++i) { 116 dsp_util_interleave(out_floats_ptr_c, (uint8_t *)out_shorts_opt, 117 2, SND_PCM_FORMAT_S16_LE, samples); 118 } 119 120 max_diff = 0; 121 for (i = 0; i < (MAXSAMPLES * 2 + PAD / 2); ++i) { 122 d = abs(out_shorts_c[i] - out_shorts_opt[i]); 123 if (d > max_diff) { 124 max_diff = d; 125 } 126 } 127 printf("test interleave compare %6d, %10f %13f %6d %6d %6d %s\n", 128 max_diff, in, in * 32768.0f, out_shorts_c[0], out_shorts_opt[0], 129 expected, 130 max_diff == 0 ? "PASS" : (out_shorts_opt[0] == expected ? 131 "EXPECTED DIFFERENCE" : "UNEXPECTED DIFFERENCE")); 132 133 /* measure reference C deinterleave */ 134 dsp_util_deinterleave_reference(in_shorts, out_floats_ptr_c, 2, 135 samples); 136 137 /* measure optimized deinterleave */ 138 dsp_util_deinterleave((uint8_t *)in_shorts, out_floats_ptr_opt, 2, 139 SND_PCM_FORMAT_S16_LE, samples); 140 141 d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0], samples * 4); 142 if (d) printf("left compare %d, %f %f\n", d, out_floats_ptr_c[0][0], 143 out_floats_ptr_opt[0][0]); 144 d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1], samples * 4); 145 if (d) printf("right compare %d, %f %f\n", d, out_floats_ptr_c[1][0], 146 out_floats_ptr_opt[1][0]); 147 148 free(in_shorts); 149 free(out_floats_left_c); 150 free(out_floats_right_c); 151 free(out_floats_left_opt); 152 free(out_floats_right_opt); 153 free(out_shorts_c); 154 free(out_shorts_opt); 155 } 156 157 int main(int argc, char **argv) 158 { 159 float e = 0.000000001f; 160 int samples = 16; 161 162 dsp_enable_flush_denormal_to_zero(); 163 164 // Print headings for TestRounding output. 165 printf("test interleave compare maxdif, float, float * 32k " 166 "C SIMD expect pass\n"); 167 168 // test clamping 169 TestRounding(1.0f, 32767, samples); 170 TestRounding(-1.0f, -32768, samples); 171 TestRounding(1.1f, 32767, samples); 172 TestRounding(-1.1f, -32768, samples); 173 TestRounding(2000000000.f / 32768.f, 32767, samples); 174 TestRounding(-2000000000.f / 32768.f, -32768, samples); 175 176 /* Infinity produces zero on arm64. */ 177 #if defined(__aarch64__) 178 #define EXPECTED_INF_RESULT 0 179 #define EXPECTED_NEGINF_RESULT 0 180 #elif defined(__i386__) || defined(__x86_64__) 181 #define EXPECTED_INF_RESULT -32768 182 #define EXPECTED_NEGINF_RESULT 0 183 #else 184 #define EXPECTED_INF_RESULT 32767 185 #define EXPECTED_NEGINF_RESULT -32768 186 #endif 187 188 TestRounding(5000000000.f / 32768.f, EXPECTED_INF_RESULT, samples); 189 TestRounding(-5000000000.f / 32768.f, EXPECTED_NEGINF_RESULT, samples); 190 191 // test infinity 192 union ieee754_float inf; 193 inf.ieee.negative = 0; 194 inf.ieee.exponent = 0xfe; 195 inf.ieee.mantissa = 0x7fffff; 196 TestRounding(inf.f, EXPECTED_INF_RESULT, samples); // expect fail 197 inf.ieee.negative = 1; 198 inf.ieee.exponent = 0xfe; 199 inf.ieee.mantissa = 0x7fffff; 200 TestRounding(inf.f, EXPECTED_NEGINF_RESULT, samples); // expect fail 201 202 // test rounding 203 TestRounding(0.25f, 8192, samples); 204 TestRounding(-0.25f, -8192, samples); 205 TestRounding(0.50f, 16384, samples); 206 TestRounding(-0.50f, -16384, samples); 207 TestRounding(1.0f / 32768.0f, 1, samples); 208 TestRounding(-1.0f / 32768.0f, -1, samples); 209 TestRounding(1.0f / 32768.0f + e, 1, samples); 210 TestRounding(-1.0f / 32768.0f - e, -1, samples); 211 TestRounding(1.0f / 32768.0f - e, 1, samples); 212 TestRounding(-1.0f / 32768.0f + e, -1, samples); 213 214 /* Rounding on 'tie' is different for Intel. */ 215 #if defined(__i386__) || defined(__x86_64__) 216 TestRounding(0.5f / 32768.0f, 0, samples); /* Expect round to even */ 217 TestRounding(-0.5f / 32768.0f, 0, samples); 218 #else 219 TestRounding(0.5f / 32768.0f, 1, samples); /* Expect round away */ 220 TestRounding(-0.5f / 32768.0f, -1, samples); 221 #endif 222 223 TestRounding(0.5f / 32768.0f + e, 1, samples); 224 TestRounding(-0.5f / 32768.0f - e, 1, samples); 225 TestRounding(0.5f / 32768.0f - e, 0, samples); 226 TestRounding(-0.5f / 32768.0f + e, 0, samples); 227 228 TestRounding(1.5f / 32768.0f, 2, samples); 229 TestRounding(-1.5f / 32768.0f, -2, samples); 230 TestRounding(1.5f / 32768.0f + e, 2, samples); 231 TestRounding(-1.5f / 32768.0f - e, -2, samples); 232 TestRounding(1.5f / 32768.0f - e, 1, samples); 233 TestRounding(-1.5f / 32768.0f + e, -1, samples); 234 235 /* Test denormals */ 236 union ieee754_float denorm; 237 denorm.ieee.negative = 0; 238 denorm.ieee.exponent = 0; 239 denorm.ieee.mantissa = 1; 240 TestRounding(denorm.f, 0, samples); 241 denorm.ieee.negative = 1; 242 denorm.ieee.exponent = 0; 243 denorm.ieee.mantissa = 1; 244 TestRounding(denorm.f, 0, samples); 245 246 /* Test NaNs. Caveat Results vary by implementation. */ 247 #if defined(__i386__) || defined(__x86_64__) 248 #define EXPECTED_NAN_RESULT -32768 249 #else 250 #define EXPECTED_NAN_RESULT 0 251 #endif 252 union ieee754_float nan; /* Quiet NaN */ 253 nan.ieee.negative = 0; 254 nan.ieee.exponent = 0xff; 255 nan.ieee.mantissa = 0x400001; 256 TestRounding(nan.f, EXPECTED_NAN_RESULT, samples); 257 nan.ieee.negative = 0; 258 nan.ieee.exponent = 0xff; 259 nan.ieee.mantissa = 0x000001; /* Signalling NaN */ 260 TestRounding(nan.f, EXPECTED_NAN_RESULT, samples); 261 262 /* Test Performance */ 263 uint64_t diff; 264 struct timespec start, end; 265 int i; 266 int d; 267 268 short* in_shorts = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD); 269 float* out_floats_left_c = (float*) malloc(MAXSAMPLES * 4 + PAD); 270 float* out_floats_right_c = (float*) malloc(MAXSAMPLES * 4 + PAD); 271 float* out_floats_left_opt = (float*) malloc(MAXSAMPLES * 4 + PAD); 272 float* out_floats_right_opt = (float*) malloc(MAXSAMPLES * 4 + PAD); 273 short* out_shorts_c = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD); 274 short* out_shorts_opt = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD); 275 276 memset(in_shorts, 0x11, MAXSAMPLES * 2 * 2 + PAD); 277 memset(out_floats_left_c, 0x22, MAXSAMPLES * 4 + PAD); 278 memset(out_floats_right_c, 0x33, MAXSAMPLES * 4 + PAD); 279 memset(out_floats_left_opt, 0x44, MAXSAMPLES * 4 + PAD); 280 memset(out_floats_right_opt, 0x55, MAXSAMPLES * 4 + PAD); 281 memset(out_shorts_c, 0x66, MAXSAMPLES * 2 * 2 + PAD); 282 memset(out_shorts_opt, 0x66, MAXSAMPLES * 2 * 2 + PAD); 283 284 float *out_floats_ptr_c[2]; 285 float *out_floats_ptr_opt[2]; 286 287 out_floats_ptr_c[0] = out_floats_left_c; 288 out_floats_ptr_c[1] = out_floats_right_c; 289 out_floats_ptr_opt[0] = out_floats_left_opt; 290 out_floats_ptr_opt[1] = out_floats_right_opt; 291 292 /* Benchmark dsp_util_interleave */ 293 for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) { 294 295 /* measure original C interleave */ 296 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */ 297 for (i = 0; i < ITERATIONS; ++i) { 298 dsp_util_interleave_reference(out_floats_ptr_c, 299 out_shorts_c, 300 2, samples); 301 } 302 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */ 303 diff = (BILLION * (end.tv_sec - start.tv_sec) + 304 end.tv_nsec - start.tv_nsec) / 1000000; 305 printf("interleave ORIG size = %6d, elapsed time = %llu ms\n", 306 samples, (long long unsigned int) diff); 307 308 /* measure optimized interleave */ 309 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */ 310 for (i = 0; i < ITERATIONS; ++i) { 311 dsp_util_interleave(out_floats_ptr_c, 312 (uint8_t *)out_shorts_opt, 2, 313 SND_PCM_FORMAT_S16_LE, samples); 314 } 315 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */ 316 diff = (BILLION * (end.tv_sec - start.tv_sec) + 317 end.tv_nsec - start.tv_nsec) / 1000000; 318 printf("interleave SIMD size = %6d, elapsed time = %llu ms\n", 319 samples, (long long unsigned int) diff); 320 321 /* Test C and SIMD output match */ 322 d = memcmp(out_shorts_c, out_shorts_opt, 323 MAXSAMPLES * 2 * 2 + PAD); 324 if (d) printf("interleave compare %d, %d %d, %d %d\n", d, 325 out_shorts_c[0], out_shorts_c[1], 326 out_shorts_opt[0], out_shorts_opt[1]); 327 } 328 329 /* Benchmark dsp_util_deinterleave */ 330 for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) { 331 332 /* Measure original C deinterleave */ 333 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */ 334 for (i = 0; i < ITERATIONS; ++i) { 335 dsp_util_deinterleave_reference(in_shorts, 336 out_floats_ptr_c, 337 2, samples); 338 } 339 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */ 340 diff = (BILLION * (end.tv_sec - start.tv_sec) + 341 end.tv_nsec - start.tv_nsec) / 1000000; 342 printf("deinterleave ORIG size = %6d, " 343 "elapsed time = %llu ms\n", 344 samples, (long long unsigned int) diff); 345 346 /* Measure optimized deinterleave */ 347 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */ 348 for (i = 0; i < ITERATIONS; ++i) { 349 dsp_util_deinterleave((uint8_t *)in_shorts, 350 out_floats_ptr_opt, 2, 351 SND_PCM_FORMAT_S16_LE, samples); 352 } 353 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */ 354 diff = (BILLION * (end.tv_sec - start.tv_sec) + 355 end.tv_nsec - start.tv_nsec) / 1000000; 356 printf("deinterleave SIMD size = %6d, elapsed time = %llu ms\n", 357 samples, (long long unsigned int) diff); 358 359 /* Test C and SIMD output match */ 360 d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0], 361 samples * 4); 362 if (d) printf("left compare %d, %f %f\n", d, 363 out_floats_ptr_c[0][0], out_floats_ptr_opt[0][0]); 364 d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1], 365 samples * 4); 366 if (d) printf("right compare %d, %f %f\n", d, 367 out_floats_ptr_c[1][0], out_floats_ptr_opt[1][0]); 368 } 369 370 free(in_shorts); 371 free(out_floats_left_c); 372 free(out_floats_right_c); 373 free(out_floats_left_opt); 374 free(out_floats_right_opt); 375 free(out_shorts_c); 376 free(out_shorts_opt); 377 378 return 0; 379 } 380