1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <cutils/bitops.h> /* for popcount() */ 18 #include <audio_utils/primitives.h> 19 #include "private/private.h" 20 21 void ditherAndClamp(int32_t *out, const int32_t *sums, size_t pairs) 22 { 23 for (; pairs > 0; --pairs) { 24 const int32_t l = clamp16(*sums++ >> 12); 25 const int32_t r = clamp16(*sums++ >> 12); 26 *out++ = (r << 16) | (l & 0xFFFF); 27 } 28 } 29 30 void memcpy_to_i16_from_q4_27(int16_t *dst, const int32_t *src, size_t count) 31 { 32 for (; count > 0; --count) { 33 *dst++ = clamp16(*src++ >> 12); 34 } 35 } 36 37 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count) 38 { 39 dst += count; 40 src += count; 41 for (; count > 0; --count) { 42 *--dst = (int16_t)(*--src - 0x80) << 8; 43 } 44 } 45 46 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count) 47 { 48 for (; count > 0; --count) { 49 *dst++ = (*src++ >> 8) + 0x80; 50 } 51 } 52 53 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count) 54 { 55 for (; count > 0; --count) { 56 *dst++ = clamp8_from_float(*src++); 57 } 58 } 59 60 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count) 61 { 62 for (; count > 0; --count) { 63 *dst++ = *src++ >> 16; 64 } 65 } 66 67 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count) 68 { 69 for (; count > 0; --count) { 70 *dst++ = clamp16_from_float(*src++); 71 } 72 } 73 74 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count) 75 { 76 for (; count > 0; --count) { 77 *dst++ = float_from_q4_27(*src++); 78 } 79 } 80 81 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count) 82 { 83 dst += count; 84 src += count; 85 for (; count > 0; --count) { 86 *--dst = float_from_i16(*--src); 87 } 88 } 89 90 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count) 91 { 92 dst += count; 93 src += count; 94 for (; count > 0; --count) { 95 *--dst = float_from_u8(*--src); 96 } 97 } 98 99 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count) 100 { 101 dst += count; 102 src += count * 3; 103 for (; count > 0; --count) { 104 src -= 3; 105 *--dst = float_from_p24(src); 106 } 107 } 108 109 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count) 110 { 111 for (; count > 0; --count) { 112 #if HAVE_BIG_ENDIAN 113 *dst++ = src[1] | (src[0] << 8); 114 #else 115 *dst++ = src[1] | (src[2] << 8); 116 #endif 117 src += 3; 118 } 119 } 120 121 void memcpy_to_i32_from_p24(int32_t *dst, const uint8_t *src, size_t count) 122 { 123 dst += count; 124 src += count * 3; 125 for (; count > 0; --count) { 126 src -= 3; 127 #if HAVE_BIG_ENDIAN 128 *--dst = (src[2] << 8) | (src[1] << 16) | (src[0] << 24); 129 #else 130 *--dst = (src[0] << 8) | (src[1] << 16) | (src[2] << 24); 131 #endif 132 } 133 } 134 135 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count) 136 { 137 dst += count * 3; 138 src += count; 139 for (; count > 0; --count) { 140 dst -= 3; 141 const int16_t sample = *--src; 142 #if HAVE_BIG_ENDIAN 143 dst[0] = sample >> 8; 144 dst[1] = sample; 145 dst[2] = 0; 146 #else 147 dst[0] = 0; 148 dst[1] = sample; 149 dst[2] = sample >> 8; 150 #endif 151 } 152 } 153 154 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count) 155 { 156 for (; count > 0; --count) { 157 int32_t ival = clamp24_from_float(*src++); 158 159 #if HAVE_BIG_ENDIAN 160 *dst++ = ival >> 16; 161 *dst++ = ival >> 8; 162 *dst++ = ival; 163 #else 164 *dst++ = ival; 165 *dst++ = ival >> 8; 166 *dst++ = ival >> 16; 167 #endif 168 } 169 } 170 171 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count) 172 { 173 for (; count > 0; --count) { 174 int32_t ival = clamp24_from_q8_23(*src++); 175 176 #if HAVE_BIG_ENDIAN 177 *dst++ = ival >> 16; 178 *dst++ = ival >> 8; 179 *dst++ = ival; 180 #else 181 *dst++ = ival; 182 *dst++ = ival >> 8; 183 *dst++ = ival >> 16; 184 #endif 185 } 186 } 187 188 void memcpy_to_p24_from_i32(uint8_t *dst, const int32_t *src, size_t count) 189 { 190 for (; count > 0; --count) { 191 int32_t ival = *src++ >> 8; 192 193 #if HAVE_BIG_ENDIAN 194 *dst++ = ival >> 16; 195 *dst++ = ival >> 8; 196 *dst++ = ival; 197 #else 198 *dst++ = ival; 199 *dst++ = ival >> 8; 200 *dst++ = ival >> 16; 201 #endif 202 } 203 } 204 205 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count) 206 { 207 dst += count; 208 src += count; 209 for (; count > 0; --count) { 210 *--dst = (int32_t)*--src << 8; 211 } 212 } 213 214 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count) 215 { 216 for (; count > 0; --count) { 217 *dst++ = clamp24_from_float(*src++); 218 } 219 } 220 221 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count) 222 { 223 dst += count; 224 src += count * 3; 225 for (; count > 0; --count) { 226 src -= 3; 227 #if HAVE_BIG_ENDIAN 228 *--dst = (int8_t)src[0] << 16 | src[1] << 8 | src[2]; 229 #else 230 *--dst = (int8_t)src[2] << 16 | src[1] << 8 | src[0]; 231 #endif 232 } 233 } 234 235 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count) 236 { 237 for (; count > 0; --count) { 238 *dst++ = clampq4_27_from_float(*src++); 239 } 240 } 241 242 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count) 243 { 244 for (; count > 0; --count) { 245 *dst++ = clamp16(*src++ >> 8); 246 } 247 } 248 249 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count) 250 { 251 for (; count > 0; --count) { 252 *dst++ = float_from_q8_23(*src++); 253 } 254 } 255 256 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count) 257 { 258 dst += count; 259 src += count; 260 for (; count > 0; --count) { 261 *--dst = (int32_t)*--src << 16; 262 } 263 } 264 265 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count) 266 { 267 for (; count > 0; --count) { 268 *dst++ = clamp32_from_float(*src++); 269 } 270 } 271 272 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count) 273 { 274 for (; count > 0; --count) { 275 *dst++ = float_from_i32(*src++); 276 } 277 } 278 279 void memcpy_to_float_from_float_with_clamping(float *dst, const float *src, size_t count, 280 float absMax) { 281 // Note: using NEON intrinsics (vminq_f32, vld1q_f32...) did NOT accelerate 282 // the function when benchmarked. The compiler already vectorize using FMINNM f32x4 & similar. 283 // Note: clamping induce a ~20% overhead compared to memcpy for count in [64, 512] 284 // See primitives_benchmark 285 for (; count > 0; --count) { 286 const float sample = *src++; 287 *dst++ = fmax(-absMax, fmin(absMax, sample)); 288 } 289 } 290 291 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count) 292 { 293 for (; count > 0; --count) { 294 *dst++ = (int16_t)(((int32_t)src[0] + (int32_t)src[1]) >> 1); 295 src += 2; 296 } 297 } 298 299 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count) 300 { 301 dst += count * 2; 302 src += count; 303 for (; count > 0; --count) { 304 const int32_t temp = *--src; 305 dst -= 2; 306 dst[0] = temp; 307 dst[1] = temp; 308 } 309 } 310 311 void downmix_to_mono_float_from_stereo_float(float *dst, const float *src, size_t frames) 312 { 313 for (; frames > 0; --frames) { 314 *dst++ = (src[0] + src[1]) * 0.5; 315 src += 2; 316 } 317 } 318 319 void upmix_to_stereo_float_from_mono_float(float *dst, const float *src, size_t frames) 320 { 321 dst += frames * 2; 322 src += frames; 323 for (; frames > 0; --frames) { 324 const float temp = *--src; 325 dst -= 2; 326 dst[0] = temp; 327 dst[1] = temp; 328 } 329 } 330 331 size_t nonZeroMono32(const int32_t *samples, size_t count) 332 { 333 size_t nonZero = 0; 334 for (; count > 0; --count) { 335 nonZero += *samples++ != 0; 336 } 337 return nonZero; 338 } 339 340 size_t nonZeroMono16(const int16_t *samples, size_t count) 341 { 342 size_t nonZero = 0; 343 for (; count > 0; --count) { 344 nonZero += *samples++ != 0; 345 } 346 return nonZero; 347 } 348 349 size_t nonZeroStereo32(const int32_t *frames, size_t count) 350 { 351 size_t nonZero = 0; 352 for (; count > 0; --count) { 353 nonZero += frames[0] != 0 || frames[1] != 0; 354 frames += 2; 355 } 356 return nonZero; 357 } 358 359 size_t nonZeroStereo16(const int16_t *frames, size_t count) 360 { 361 size_t nonZero = 0; 362 for (; count > 0; --count) { 363 nonZero += frames[0] != 0 || frames[1] != 0; 364 frames += 2; 365 } 366 return nonZero; 367 } 368 369 /* 370 * C macro to do channel mask copying independent of dst/src sample type. 371 * Don't pass in any expressions for the macro arguments here. 372 */ 373 #define copy_frame_by_mask(dst, dmask, src, smask, count, zero) \ 374 { \ 375 uint32_t bit, ormask; \ 376 for (; (count) > 0; --(count)) { \ 377 ormask = (dmask) | (smask); \ 378 while (ormask) { \ 379 bit = ormask & -ormask; /* get lowest bit */ \ 380 ormask ^= bit; /* remove lowest bit */ \ 381 if ((dmask) & bit) { \ 382 *(dst)++ = (smask) & bit ? *(src)++ : (zero); \ 383 } else { /* source channel only */ \ 384 ++(src); \ 385 } \ 386 } \ 387 } \ 388 } 389 390 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask, 391 const void *src, uint32_t src_mask, size_t sample_size, size_t count) 392 { 393 #if 0 394 /* alternate way of handling memcpy_by_channel_mask by using the idxary */ 395 int8_t idxary[32]; 396 uint32_t src_channels = popcount(src_mask); 397 uint32_t dst_channels = 398 memcpy_by_index_array_initialization(idxary, 32, dst_mask, src_mask); 399 400 memcpy_by_idxary(dst, dst_channels, src, src_channels, idxary, sample_size, count); 401 #else 402 if (dst_mask == src_mask) { 403 memcpy(dst, src, sample_size * popcount(dst_mask) * count); 404 return; 405 } 406 switch (sample_size) { 407 case 1: { 408 uint8_t *udst = (uint8_t*)dst; 409 const uint8_t *usrc = (const uint8_t*)src; 410 411 copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, 0); 412 } break; 413 case 2: { 414 uint16_t *udst = (uint16_t*)dst; 415 const uint16_t *usrc = (const uint16_t*)src; 416 417 copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, 0); 418 } break; 419 case 3: { /* could be slow. use a struct to represent 3 bytes of data. */ 420 uint8x3_t *udst = (uint8x3_t*)dst; 421 const uint8x3_t *usrc = (const uint8x3_t*)src; 422 static const uint8x3_t zero; /* tricky - we use this to zero out a sample */ 423 424 copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, zero); 425 } break; 426 case 4: { 427 uint32_t *udst = (uint32_t*)dst; 428 const uint32_t *usrc = (const uint32_t*)src; 429 430 copy_frame_by_mask(udst, dst_mask, usrc, src_mask, count, 0); 431 } break; 432 default: 433 abort(); /* illegal value */ 434 break; 435 } 436 #endif 437 } 438 439 /* 440 * C macro to do copying by index array, to rearrange samples 441 * within a frame. This is independent of src/dst sample type. 442 * Don't pass in any expressions for the macro arguments here. 443 */ 444 #define copy_frame_by_idx(dst, dst_channels, src, src_channels, idxary, count, zero) \ 445 { \ 446 unsigned i; \ 447 int index; \ 448 for (; (count) > 0; --(count)) { \ 449 for (i = 0; i < (dst_channels); ++i) { \ 450 index = (idxary)[i]; \ 451 *(dst)++ = index < 0 ? (zero) : (src)[index]; \ 452 } \ 453 (src) += (src_channels); \ 454 } \ 455 } 456 457 void memcpy_by_index_array(void *dst, uint32_t dst_channels, 458 const void *src, uint32_t src_channels, 459 const int8_t *idxary, size_t sample_size, size_t count) 460 { 461 switch (sample_size) { 462 case 1: { 463 uint8_t *udst = (uint8_t*)dst; 464 const uint8_t *usrc = (const uint8_t*)src; 465 466 copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, 0); 467 } break; 468 case 2: { 469 uint16_t *udst = (uint16_t*)dst; 470 const uint16_t *usrc = (const uint16_t*)src; 471 472 copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, 0); 473 } break; 474 case 3: { /* could be slow. use a struct to represent 3 bytes of data. */ 475 uint8x3_t *udst = (uint8x3_t*)dst; 476 const uint8x3_t *usrc = (const uint8x3_t*)src; 477 static const uint8x3_t zero; 478 479 copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, zero); 480 } break; 481 case 4: { 482 uint32_t *udst = (uint32_t*)dst; 483 const uint32_t *usrc = (const uint32_t*)src; 484 485 copy_frame_by_idx(udst, dst_channels, usrc, src_channels, idxary, count, 0); 486 } break; 487 default: 488 abort(); /* illegal value */ 489 break; 490 } 491 } 492 493 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount, 494 uint32_t dst_mask, uint32_t src_mask) 495 { 496 size_t n = 0; 497 int srcidx = 0; 498 uint32_t bit, ormask = src_mask | dst_mask; 499 500 while (ormask && n < idxcount) { 501 bit = ormask & -ormask; /* get lowest bit */ 502 ormask ^= bit; /* remove lowest bit */ 503 if (src_mask & dst_mask & bit) { /* matching channel */ 504 idxary[n++] = srcidx++; 505 } else if (src_mask & bit) { /* source channel only */ 506 ++srcidx; 507 } else { /* destination channel only */ 508 idxary[n++] = -1; 509 } 510 } 511 return n + popcount(ormask & dst_mask); 512 } 513 514 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount, 515 uint32_t dst_mask, uint32_t src_mask) { 516 size_t dst_count = popcount(dst_mask); 517 if (idxcount == 0) { 518 return dst_count; 519 } 520 if (dst_count > idxcount) { 521 dst_count = idxcount; 522 } 523 524 size_t src_idx, dst_idx; 525 for (src_idx = 0, dst_idx = 0; dst_idx < dst_count; ++dst_idx) { 526 if (src_mask & 1) { 527 idxary[dst_idx] = src_idx++; 528 } else { 529 idxary[dst_idx] = -1; 530 } 531 src_mask >>= 1; 532 } 533 return dst_idx; 534 } 535 536 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount, 537 uint32_t dst_mask, uint32_t src_mask) { 538 size_t src_idx, dst_idx; 539 size_t dst_count = __builtin_popcount(dst_mask); 540 size_t src_count = __builtin_popcount(src_mask); 541 if (idxcount == 0) { 542 return dst_count; 543 } 544 if (dst_count > idxcount) { 545 dst_count = idxcount; 546 } 547 for (src_idx = 0, dst_idx = 0; dst_idx < dst_count; ++src_idx) { 548 if (dst_mask & 1) { 549 idxary[dst_idx++] = src_idx < src_count ? (signed)src_idx : -1; 550 } 551 dst_mask >>= 1; 552 } 553 return dst_idx; 554 } 555 556 void accumulate_i16(int16_t *dst, const int16_t *src, size_t count) { 557 while (count--) { 558 *dst = clamp16((int32_t)*dst + *src++); 559 ++dst; 560 } 561 } 562 563 void accumulate_u8(uint8_t *dst, const uint8_t *src, size_t count) { 564 int32_t sum; 565 for (; count > 0; --count) { 566 // 8-bit samples are centered around 0x80. 567 sum = *dst + *src++ - 0x80; 568 // Clamp to [0, 0xff]. 569 *dst++ = (sum & 0x100) ? (~sum >> 9) : sum; 570 } 571 } 572 573 void accumulate_p24(uint8_t *dst, const uint8_t *src, size_t count) { 574 for (; count > 0; --count) { 575 // Unpack. 576 int32_t dst_q8_23 = 0; 577 int32_t src_q8_23 = 0; 578 memcpy_to_q8_23_from_p24(&dst_q8_23, dst, 1); 579 memcpy_to_q8_23_from_p24(&src_q8_23, src, 1); 580 581 // Accumulate and overwrite. 582 dst_q8_23 += src_q8_23; 583 memcpy_to_p24_from_q8_23(dst, &dst_q8_23, 1); 584 585 // Move on to next sample. 586 dst += 3; 587 src += 3; 588 } 589 } 590 591 void accumulate_q8_23(int32_t *dst, const int32_t *src, size_t count) { 592 for (; count > 0; --count) { 593 *dst = clamp24_from_q8_23(*dst + *src++); 594 ++dst; 595 } 596 } 597 598 void accumulate_i32(int32_t *dst, const int32_t *src, size_t count) { 599 for (; count > 0; --count) { 600 *dst = clamp32((int64_t)*dst + *src++); 601 ++dst; 602 } 603 } 604 605 void accumulate_float(float *dst, const float *src, size_t count) { 606 for (; count > 0; --count) { 607 *dst++ += *src++; 608 } 609 } 610