1 #include "rs_core.rsh" 2 #include "rs_f16_util.h" 3 4 extern float2 __attribute__((overloadable)) convert_float2(int2 c); 5 extern float3 __attribute__((overloadable)) convert_float3(int3 c); 6 extern float4 __attribute__((overloadable)) convert_float4(int4 c); 7 8 extern int2 __attribute__((overloadable)) convert_int2(float2 c); 9 extern int3 __attribute__((overloadable)) convert_int3(float3 c); 10 extern int4 __attribute__((overloadable)) convert_int4(float4 c); 11 12 13 extern float __attribute__((overloadable)) fmin(float v, float v2); 14 extern float2 __attribute__((overloadable)) fmin(float2 v, float v2); 15 extern float3 __attribute__((overloadable)) fmin(float3 v, float v2); 16 extern float4 __attribute__((overloadable)) fmin(float4 v, float v2); 17 18 extern float __attribute__((overloadable)) fmax(float v, float v2); 19 extern float2 __attribute__((overloadable)) fmax(float2 v, float v2); 20 extern float3 __attribute__((overloadable)) fmax(float3 v, float v2); 21 extern float4 __attribute__((overloadable)) fmax(float4 v, float v2); 22 23 // Float ops, 6.11.2 24 25 #define FN_FUNC_FN(fnc) \ 26 extern float2 __attribute__((overloadable)) fnc(float2 v) { \ 27 float2 r; \ 28 r.x = fnc(v.x); \ 29 r.y = fnc(v.y); \ 30 return r; \ 31 } \ 32 extern float3 __attribute__((overloadable)) fnc(float3 v) { \ 33 float3 r; \ 34 r.x = fnc(v.x); \ 35 r.y = fnc(v.y); \ 36 r.z = fnc(v.z); \ 37 return r; \ 38 } \ 39 extern float4 __attribute__((overloadable)) fnc(float4 v) { \ 40 float4 r; \ 41 r.x = fnc(v.x); \ 42 r.y = fnc(v.y); \ 43 r.z = fnc(v.z); \ 44 r.w = fnc(v.w); \ 45 return r; \ 46 } 47 48 #define IN_FUNC_FN(fnc) \ 49 extern int2 __attribute__((overloadable)) fnc(float2 v) { \ 50 int2 r; \ 51 r.x = fnc(v.x); \ 52 r.y = fnc(v.y); \ 53 return r; \ 54 } \ 55 extern int3 __attribute__((overloadable)) fnc(float3 v) { \ 56 int3 r; \ 57 r.x = fnc(v.x); \ 58 r.y = fnc(v.y); \ 59 r.z = fnc(v.z); \ 60 return r; \ 61 } \ 62 extern int4 __attribute__((overloadable)) fnc(float4 v) { \ 63 int4 r; \ 64 r.x = fnc(v.x); \ 65 r.y = fnc(v.y); \ 66 r.z = fnc(v.z); \ 67 r.w = fnc(v.w); \ 68 return r; \ 69 } 70 71 #define FN_FUNC_FN_FN(fnc) \ 72 extern float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2) { \ 73 float2 r; \ 74 r.x = fnc(v1.x, v2.x); \ 75 r.y = fnc(v1.y, v2.y); \ 76 return r; \ 77 } \ 78 extern float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2) { \ 79 float3 r; \ 80 r.x = fnc(v1.x, v2.x); \ 81 r.y = fnc(v1.y, v2.y); \ 82 r.z = fnc(v1.z, v2.z); \ 83 return r; \ 84 } \ 85 extern float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2) { \ 86 float4 r; \ 87 r.x = fnc(v1.x, v2.x); \ 88 r.y = fnc(v1.y, v2.y); \ 89 r.z = fnc(v1.z, v2.z); \ 90 r.w = fnc(v1.w, v2.w); \ 91 return r; \ 92 } 93 94 #define FN_FUNC_FN_F(fnc) \ 95 extern float2 __attribute__((overloadable)) fnc(float2 v1, float v2) { \ 96 float2 r; \ 97 r.x = fnc(v1.x, v2); \ 98 r.y = fnc(v1.y, v2); \ 99 return r; \ 100 } \ 101 extern float3 __attribute__((overloadable)) fnc(float3 v1, float v2) { \ 102 float3 r; \ 103 r.x = fnc(v1.x, v2); \ 104 r.y = fnc(v1.y, v2); \ 105 r.z = fnc(v1.z, v2); \ 106 return r; \ 107 } \ 108 extern float4 __attribute__((overloadable)) fnc(float4 v1, float v2) { \ 109 float4 r; \ 110 r.x = fnc(v1.x, v2); \ 111 r.y = fnc(v1.y, v2); \ 112 r.z = fnc(v1.z, v2); \ 113 r.w = fnc(v1.w, v2); \ 114 return r; \ 115 } 116 117 #define FN_FUNC_FN_IN(fnc) \ 118 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2) { \ 119 float2 r; \ 120 r.x = fnc(v1.x, v2.x); \ 121 r.y = fnc(v1.y, v2.y); \ 122 return r; \ 123 } \ 124 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2) { \ 125 float3 r; \ 126 r.x = fnc(v1.x, v2.x); \ 127 r.y = fnc(v1.y, v2.y); \ 128 r.z = fnc(v1.z, v2.z); \ 129 return r; \ 130 } \ 131 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2) { \ 132 float4 r; \ 133 r.x = fnc(v1.x, v2.x); \ 134 r.y = fnc(v1.y, v2.y); \ 135 r.z = fnc(v1.z, v2.z); \ 136 r.w = fnc(v1.w, v2.w); \ 137 return r; \ 138 } 139 140 #define FN_FUNC_FN_I(fnc) \ 141 extern float2 __attribute__((overloadable)) fnc(float2 v1, int v2) { \ 142 float2 r; \ 143 r.x = fnc(v1.x, v2); \ 144 r.y = fnc(v1.y, v2); \ 145 return r; \ 146 } \ 147 extern float3 __attribute__((overloadable)) fnc(float3 v1, int v2) { \ 148 float3 r; \ 149 r.x = fnc(v1.x, v2); \ 150 r.y = fnc(v1.y, v2); \ 151 r.z = fnc(v1.z, v2); \ 152 return r; \ 153 } \ 154 extern float4 __attribute__((overloadable)) fnc(float4 v1, int v2) { \ 155 float4 r; \ 156 r.x = fnc(v1.x, v2); \ 157 r.y = fnc(v1.y, v2); \ 158 r.z = fnc(v1.z, v2); \ 159 r.w = fnc(v1.w, v2); \ 160 return r; \ 161 } 162 163 #define FN_FUNC_FN_PFN(fnc) \ 164 extern float2 __attribute__((overloadable)) \ 165 fnc(float2 v1, float2 *v2) { \ 166 float2 r; \ 167 float t[2]; \ 168 r.x = fnc(v1.x, &t[0]); \ 169 r.y = fnc(v1.y, &t[1]); \ 170 v2->x = t[0]; \ 171 v2->y = t[1]; \ 172 return r; \ 173 } \ 174 extern float3 __attribute__((overloadable)) \ 175 fnc(float3 v1, float3 *v2) { \ 176 float3 r; \ 177 float t[3]; \ 178 r.x = fnc(v1.x, &t[0]); \ 179 r.y = fnc(v1.y, &t[1]); \ 180 r.z = fnc(v1.z, &t[2]); \ 181 v2->x = t[0]; \ 182 v2->y = t[1]; \ 183 v2->z = t[2]; \ 184 return r; \ 185 } \ 186 extern float4 __attribute__((overloadable)) \ 187 fnc(float4 v1, float4 *v2) { \ 188 float4 r; \ 189 float t[4]; \ 190 r.x = fnc(v1.x, &t[0]); \ 191 r.y = fnc(v1.y, &t[1]); \ 192 r.z = fnc(v1.z, &t[2]); \ 193 r.w = fnc(v1.w, &t[3]); \ 194 v2->x = t[0]; \ 195 v2->y = t[1]; \ 196 v2->z = t[2]; \ 197 v2->w = t[3]; \ 198 return r; \ 199 } 200 201 #define FN_FUNC_FN_PIN(fnc) \ 202 extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2) { \ 203 float2 r; \ 204 int t[2]; \ 205 r.x = fnc(v1.x, &t[0]); \ 206 r.y = fnc(v1.y, &t[1]); \ 207 v2->x = t[0]; \ 208 v2->y = t[1]; \ 209 return r; \ 210 } \ 211 extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2) { \ 212 float3 r; \ 213 int t[3]; \ 214 r.x = fnc(v1.x, &t[0]); \ 215 r.y = fnc(v1.y, &t[1]); \ 216 r.z = fnc(v1.z, &t[2]); \ 217 v2->x = t[0]; \ 218 v2->y = t[1]; \ 219 v2->z = t[2]; \ 220 return r; \ 221 } \ 222 extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2) { \ 223 float4 r; \ 224 int t[4]; \ 225 r.x = fnc(v1.x, &t[0]); \ 226 r.y = fnc(v1.y, &t[1]); \ 227 r.z = fnc(v1.z, &t[2]); \ 228 r.w = fnc(v1.w, &t[3]); \ 229 v2->x = t[0]; \ 230 v2->y = t[1]; \ 231 v2->z = t[2]; \ 232 v2->w = t[3]; \ 233 return r; \ 234 } 235 236 #define FN_FUNC_FN_FN_FN(fnc) \ 237 extern float2 __attribute__((overloadable)) \ 238 fnc(float2 v1, float2 v2, float2 v3) { \ 239 float2 r; \ 240 r.x = fnc(v1.x, v2.x, v3.x); \ 241 r.y = fnc(v1.y, v2.y, v3.y); \ 242 return r; \ 243 } \ 244 extern float3 __attribute__((overloadable)) \ 245 fnc(float3 v1, float3 v2, float3 v3) { \ 246 float3 r; \ 247 r.x = fnc(v1.x, v2.x, v3.x); \ 248 r.y = fnc(v1.y, v2.y, v3.y); \ 249 r.z = fnc(v1.z, v2.z, v3.z); \ 250 return r; \ 251 } \ 252 extern float4 __attribute__((overloadable)) \ 253 fnc(float4 v1, float4 v2, float4 v3) { \ 254 float4 r; \ 255 r.x = fnc(v1.x, v2.x, v3.x); \ 256 r.y = fnc(v1.y, v2.y, v3.y); \ 257 r.z = fnc(v1.z, v2.z, v3.z); \ 258 r.w = fnc(v1.w, v2.w, v3.w); \ 259 return r; \ 260 } 261 262 #define FN_FUNC_FN_FN_PIN(fnc) \ 263 extern float2 __attribute__((overloadable)) \ 264 fnc(float2 v1, float2 v2, int2 *v3) { \ 265 float2 r; \ 266 int t[2]; \ 267 r.x = fnc(v1.x, v2.x, &t[0]); \ 268 r.y = fnc(v1.y, v2.y, &t[1]); \ 269 v3->x = t[0]; \ 270 v3->y = t[1]; \ 271 return r; \ 272 } \ 273 extern float3 __attribute__((overloadable)) \ 274 fnc(float3 v1, float3 v2, int3 *v3) { \ 275 float3 r; \ 276 int t[3]; \ 277 r.x = fnc(v1.x, v2.x, &t[0]); \ 278 r.y = fnc(v1.y, v2.y, &t[1]); \ 279 r.z = fnc(v1.z, v2.z, &t[2]); \ 280 v3->x = t[0]; \ 281 v3->y = t[1]; \ 282 v3->z = t[2]; \ 283 return r; \ 284 } \ 285 extern float4 __attribute__((overloadable)) \ 286 fnc(float4 v1, float4 v2, int4 *v3) { \ 287 float4 r; \ 288 int t[4]; \ 289 r.x = fnc(v1.x, v2.x, &t[0]); \ 290 r.y = fnc(v1.y, v2.y, &t[1]); \ 291 r.z = fnc(v1.z, v2.z, &t[2]); \ 292 r.w = fnc(v1.w, v2.w, &t[3]); \ 293 v3->x = t[0]; \ 294 v3->y = t[1]; \ 295 v3->z = t[2]; \ 296 v3->w = t[3]; \ 297 return r; \ 298 } 299 300 static const unsigned int iposinf = 0x7f800000; 301 static const unsigned int ineginf = 0xff800000; 302 303 static float posinf() { 304 float f = *((float*)&iposinf); 305 return f; 306 } 307 308 static unsigned int float_bits(float f) { 309 /* TODO(jeanluc) Use this better approach once the Mac(SDK) build issues are fixed. 310 // Get the bits while following the strict aliasing rules. 311 unsigned int result; 312 memcpy(&result, &f, sizeof(f)); 313 return result; 314 */ 315 return *(unsigned int*)(char*)(&f); 316 } 317 318 static bool isinf(float f) { 319 unsigned int i = float_bits(f); 320 return (i == iposinf) || (i == ineginf); 321 } 322 323 static bool isnan(float f) { 324 unsigned int i = float_bits(f); 325 return (((i & 0x7f800000) == 0x7f800000) && (i & 0x007fffff)); 326 } 327 328 static bool isposzero(float f) { 329 return (float_bits(f) == 0x00000000); 330 } 331 332 static bool isnegzero(float f) { 333 return (float_bits(f) == 0x80000000); 334 } 335 336 static bool iszero(float f) { 337 return isposzero(f) || isnegzero(f); 338 } 339 340 341 extern float __attribute__((overloadable)) SC_acosf(float); 342 float __attribute__((overloadable)) acos(float v) { 343 return SC_acosf(v); 344 } 345 FN_FUNC_FN(acos) 346 347 extern float __attribute__((overloadable)) SC_acoshf(float); 348 float __attribute__((overloadable)) acosh(float v) { 349 return SC_acoshf(v); 350 } 351 FN_FUNC_FN(acosh) 352 353 354 extern float __attribute__((overloadable)) acospi(float v) { 355 return acos(v) / M_PI; 356 } 357 FN_FUNC_FN(acospi) 358 359 extern float __attribute__((overloadable)) SC_asinf(float); 360 float __attribute__((overloadable)) asin(float v) { 361 return SC_asinf(v); 362 } 363 FN_FUNC_FN(asin) 364 365 extern float __attribute__((overloadable)) SC_asinhf(float); 366 float __attribute__((overloadable)) asinh(float v) { 367 return SC_asinhf(v); 368 } 369 FN_FUNC_FN(asinh) 370 371 extern float __attribute__((overloadable)) asinpi(float v) { 372 return asin(v) / M_PI; 373 } 374 FN_FUNC_FN(asinpi) 375 376 extern float __attribute__((overloadable)) SC_atanf(float); 377 float __attribute__((overloadable)) atan(float v) { 378 return SC_atanf(v); 379 } 380 FN_FUNC_FN(atan) 381 382 extern float __attribute__((overloadable)) SC_atan2f(float, float); 383 float __attribute__((overloadable)) atan2(float v1, float v2) { 384 return SC_atan2f(v1, v2); 385 } 386 FN_FUNC_FN_FN(atan2) 387 388 extern float __attribute__((overloadable)) SC_atanhf(float); 389 float __attribute__((overloadable)) atanh(float v) { 390 return SC_atanhf(v); 391 } 392 FN_FUNC_FN(atanh) 393 394 extern float __attribute__((overloadable)) atanpi(float v) { 395 return atan(v) / M_PI; 396 } 397 FN_FUNC_FN(atanpi) 398 399 400 extern float __attribute__((overloadable)) atan2pi(float y, float x) { 401 return atan2(y, x) / M_PI; 402 } 403 FN_FUNC_FN_FN(atan2pi) 404 405 extern float __attribute__((overloadable)) SC_cbrtf(float); 406 float __attribute__((overloadable)) cbrt(float v) { 407 return SC_cbrtf(v); 408 } 409 FN_FUNC_FN(cbrt) 410 411 extern float __attribute__((overloadable)) SC_ceilf(float); 412 float __attribute__((overloadable)) ceil(float v) { 413 return SC_ceilf(v); 414 } 415 FN_FUNC_FN(ceil) 416 417 extern float __attribute__((overloadable)) SC_copysignf(float, float); 418 float __attribute__((overloadable)) copysign(float v1, float v2) { 419 return SC_copysignf(v1, v2); 420 } 421 FN_FUNC_FN_FN(copysign) 422 423 extern float __attribute__((overloadable)) SC_cosf(float); 424 float __attribute__((overloadable)) cos(float v) { 425 return SC_cosf(v); 426 } 427 FN_FUNC_FN(cos) 428 429 extern float __attribute__((overloadable)) SC_coshf(float); 430 float __attribute__((overloadable)) cosh(float v) { 431 return SC_coshf(v); 432 } 433 FN_FUNC_FN(cosh) 434 435 extern float __attribute__((overloadable)) cospi(float v) { 436 return cos(v * M_PI); 437 } 438 FN_FUNC_FN(cospi) 439 440 extern float __attribute__((overloadable)) SC_erfcf(float); 441 float __attribute__((overloadable)) erfc(float v) { 442 return SC_erfcf(v); 443 } 444 FN_FUNC_FN(erfc) 445 446 extern float __attribute__((overloadable)) SC_erff(float); 447 float __attribute__((overloadable)) erf(float v) { 448 return SC_erff(v); 449 } 450 FN_FUNC_FN(erf) 451 452 extern float __attribute__((overloadable)) SC_expf(float); 453 float __attribute__((overloadable)) exp(float v) { 454 return SC_expf(v); 455 } 456 FN_FUNC_FN(exp) 457 458 extern float __attribute__((overloadable)) SC_exp2f(float); 459 float __attribute__((overloadable)) exp2(float v) { 460 return SC_exp2f(v); 461 } 462 FN_FUNC_FN(exp2) 463 464 extern float __attribute__((overloadable)) pow(float, float); 465 466 extern float __attribute__((overloadable)) exp10(float v) { 467 return exp2(v * 3.321928095f); 468 } 469 FN_FUNC_FN(exp10) 470 471 extern float __attribute__((overloadable)) SC_expm1f(float); 472 float __attribute__((overloadable)) expm1(float v) { 473 return SC_expm1f(v); 474 } 475 FN_FUNC_FN(expm1) 476 477 extern float __attribute__((overloadable)) fabs(float v) { 478 int i = *((int*)(void*)&v) & 0x7fffffff; 479 return *((float*)(void*)&i); 480 } 481 FN_FUNC_FN(fabs) 482 483 extern float __attribute__((overloadable)) SC_fdimf(float, float); 484 float __attribute__((overloadable)) fdim(float v1, float v2) { 485 return SC_fdimf(v1, v2); 486 } 487 FN_FUNC_FN_FN(fdim) 488 489 extern float __attribute__((overloadable)) SC_floorf(float); 490 float __attribute__((overloadable)) floor(float v) { 491 return SC_floorf(v); 492 } 493 FN_FUNC_FN(floor) 494 495 extern float __attribute__((overloadable)) SC_fmaf(float, float, float); 496 float __attribute__((overloadable)) fma(float v1, float v2, float v3) { 497 return SC_fmaf(v1, v2, v3); 498 } 499 FN_FUNC_FN_FN_FN(fma) 500 501 extern float __attribute__((overloadable)) SC_fminf(float, float); 502 503 extern float __attribute__((overloadable)) SC_fmodf(float, float); 504 float __attribute__((overloadable)) fmod(float v1, float v2) { 505 return SC_fmodf(v1, v2); 506 } 507 FN_FUNC_FN_FN(fmod) 508 509 extern float __attribute__((overloadable)) fract(float v, float *iptr) { 510 int i = (int)floor(v); 511 if (iptr) { 512 iptr[0] = i; 513 } 514 return fmin(v - i, 0x1.fffffep-1f); 515 } 516 FN_FUNC_FN_PFN(fract) 517 518 extern float __attribute__((const, overloadable)) fract(float v) { 519 float unused; 520 return fract(v, &unused); 521 } 522 FN_FUNC_FN(fract) 523 524 extern float __attribute__((overloadable)) SC_frexpf(float, int *); 525 float __attribute__((overloadable)) frexp(float v1, int* v2) { 526 return SC_frexpf(v1, v2); 527 } 528 FN_FUNC_FN_PIN(frexp) 529 530 extern float __attribute__((overloadable)) SC_hypotf(float, float); 531 float __attribute__((overloadable)) hypot(float v1, float v2) { 532 return SC_hypotf(v1, v2); 533 } 534 FN_FUNC_FN_FN(hypot) 535 536 extern int __attribute__((overloadable)) SC_ilogbf(float); 537 int __attribute__((overloadable)) ilogb(float v) { 538 return SC_ilogbf(v); 539 } 540 IN_FUNC_FN(ilogb) 541 542 extern float __attribute__((overloadable)) SC_ldexpf(float, int); 543 float __attribute__((overloadable)) ldexp(float v1, int v2) { 544 return SC_ldexpf(v1, v2); 545 } 546 FN_FUNC_FN_IN(ldexp) 547 FN_FUNC_FN_I(ldexp) 548 549 extern float __attribute__((overloadable)) SC_lgammaf(float); 550 float __attribute__((overloadable)) lgamma(float v) { 551 return SC_lgammaf(v); 552 } 553 FN_FUNC_FN(lgamma) 554 extern float __attribute__((overloadable)) SC_lgammaf_r(float, int*); 555 float __attribute__((overloadable)) lgamma(float v, int* ptr) { 556 return SC_lgammaf_r(v, ptr); 557 } 558 FN_FUNC_FN_PIN(lgamma) 559 560 extern float __attribute__((overloadable)) SC_logf(float); 561 float __attribute__((overloadable)) log(float v) { 562 return SC_logf(v); 563 } 564 FN_FUNC_FN(log) 565 566 extern float __attribute__((overloadable)) SC_log10f(float); 567 float __attribute__((overloadable)) log10(float v) { 568 return SC_log10f(v); 569 } 570 FN_FUNC_FN(log10) 571 572 573 extern float __attribute__((overloadable)) log2(float v) { 574 return log10(v) * 3.321928095f; 575 } 576 FN_FUNC_FN(log2) 577 578 extern float __attribute__((overloadable)) SC_log1pf(float); 579 float __attribute__((overloadable)) log1p(float v) { 580 return SC_log1pf(v); 581 } 582 FN_FUNC_FN(log1p) 583 584 extern float __attribute__((overloadable)) SC_logbf(float); 585 float __attribute__((overloadable)) logb(float v) { 586 return SC_logbf(v); 587 } 588 FN_FUNC_FN(logb) 589 590 extern float __attribute__((overloadable)) mad(float a, float b, float c) { 591 return a * b + c; 592 } 593 extern float2 __attribute__((overloadable)) mad(float2 a, float2 b, float2 c) { 594 return a * b + c; 595 } 596 extern float3 __attribute__((overloadable)) mad(float3 a, float3 b, float3 c) { 597 return a * b + c; 598 } 599 extern float4 __attribute__((overloadable)) mad(float4 a, float4 b, float4 c) { 600 return a * b + c; 601 } 602 603 extern float __attribute__((overloadable)) SC_modff(float, float *); 604 float __attribute__((overloadable)) modf(float v1, float *v2) { 605 return SC_modff(v1, v2); 606 } 607 FN_FUNC_FN_PFN(modf); 608 609 extern float __attribute__((overloadable)) nan(uint v) { 610 float f[1]; 611 uint32_t *ip = (uint32_t *)f; 612 *ip = v | 0x7fc00000; 613 return f[0]; 614 } 615 616 extern float __attribute__((overloadable)) SC_nextafterf(float, float); 617 float __attribute__((overloadable)) nextafter(float v1, float v2) { 618 return SC_nextafterf(v1, v2); 619 } 620 FN_FUNC_FN_FN(nextafter) 621 622 // This function must be defined here if we're compiling with debug info 623 // (libclcore_g.bc), because we need a C source to get debug information. 624 // Otherwise the implementation can be found in IR. 625 #if defined(RS_G_RUNTIME) 626 extern float __attribute__((overloadable)) SC_powf(float, float); 627 float __attribute__((overloadable)) pow(float v1, float v2) { 628 return SC_powf(v1, v2); 629 } 630 #endif // defined(RS_G_RUNTIME) 631 FN_FUNC_FN_FN(pow) 632 633 extern float __attribute__((overloadable)) pown(float v, int p) { 634 /* The mantissa of a float has fewer bits than an int (24 effective vs. 31). 635 * For very large ints, we'll lose whether the exponent is even or odd, making 636 * the selection of a correct sign incorrect. We correct this. Use copysign 637 * to handle the negative zero case. 638 */ 639 float sign = (p & 0x1) ? copysign(1.f, v) : 1.f; 640 float f = pow(v, (float)p); 641 return copysign(f, sign); 642 } 643 FN_FUNC_FN_IN(pown) 644 645 extern float __attribute__((overloadable)) powr(float v, float p) { 646 return pow(v, p); 647 } 648 extern float2 __attribute__((overloadable)) powr(float2 v, float2 p) { 649 return pow(v, p); 650 } 651 extern float3 __attribute__((overloadable)) powr(float3 v, float3 p) { 652 return pow(v, p); 653 } 654 extern float4 __attribute__((overloadable)) powr(float4 v, float4 p) { 655 return pow(v, p); 656 } 657 658 extern float __attribute__((overloadable)) SC_remainderf(float, float); 659 float __attribute__((overloadable)) remainder(float v1, float v2) { 660 return SC_remainderf(v1, v2); 661 } 662 FN_FUNC_FN_FN(remainder) 663 664 extern float __attribute__((overloadable)) SC_remquof(float, float, int *); 665 float __attribute__((overloadable)) remquo(float v1, float v2, int *v3) { 666 return SC_remquof(v1, v2, v3); 667 } 668 FN_FUNC_FN_FN_PIN(remquo) 669 670 extern float __attribute__((overloadable)) SC_rintf(float); 671 float __attribute__((overloadable)) rint(float v) { 672 return SC_rintf(v); 673 } 674 FN_FUNC_FN(rint) 675 676 extern float __attribute__((overloadable)) rootn(float v, int r) { 677 if (r == 0) { 678 return posinf(); 679 } 680 681 if (iszero(v)) { 682 if (r < 0) { 683 if (r & 1) { 684 return copysign(posinf(), v); 685 } else { 686 return posinf(); 687 } 688 } else { 689 if (r & 1) { 690 return copysign(0.f, v); 691 } else { 692 return 0.f; 693 } 694 } 695 } 696 697 if (!isinf(v) && !isnan(v) && (v < 0.f)) { 698 if (r & 1) { 699 return (-1.f * pow(-1.f * v, 1.f / r)); 700 } else { 701 return nan(0); 702 } 703 } 704 705 return pow(v, 1.f / r); 706 } 707 FN_FUNC_FN_IN(rootn); 708 709 extern float __attribute__((overloadable)) SC_roundf(float); 710 float __attribute__((overloadable)) round(float v) { 711 return SC_roundf(v); 712 } 713 FN_FUNC_FN(round) 714 715 extern float __attribute__((overloadable)) SC_randf2(float, float); 716 float __attribute__((overloadable)) rsRand(float min, float max) { 717 return SC_randf2(min, max); 718 } 719 720 721 extern float __attribute__((overloadable)) rsqrt(float v) { 722 return 1.f / sqrt(v); 723 } 724 725 #if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME) 726 // These functions must be defined here if we are not using the SSE 727 // implementation, which includes when we are built as part of the 728 // debug runtime (libclcore_debug.bc) or compiling with debug info. 729 #if defined(RS_G_RUNTIME) 730 extern float __attribute__((overloadable)) SC_sqrtf(float); 731 float __attribute__((overloadable)) sqrt(float v) { 732 return SC_sqrtf(v); 733 } 734 #endif // defined(RS_G_RUNTIME) 735 736 FN_FUNC_FN(sqrt) 737 #else 738 extern float2 __attribute__((overloadable)) sqrt(float2); 739 extern float3 __attribute__((overloadable)) sqrt(float3); 740 extern float4 __attribute__((overloadable)) sqrt(float4); 741 #endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME) 742 743 FN_FUNC_FN(rsqrt) 744 745 extern float __attribute__((overloadable)) SC_sinf(float); 746 float __attribute__((overloadable)) sin(float v) { 747 return SC_sinf(v); 748 } 749 FN_FUNC_FN(sin) 750 751 extern float __attribute__((overloadable)) sincos(float v, float *cosptr) { 752 *cosptr = cos(v); 753 return sin(v); 754 } 755 extern float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr) { 756 *cosptr = cos(v); 757 return sin(v); 758 } 759 extern float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr) { 760 *cosptr = cos(v); 761 return sin(v); 762 } 763 extern float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr) { 764 *cosptr = cos(v); 765 return sin(v); 766 } 767 768 extern float __attribute__((overloadable)) SC_sinhf(float); 769 float __attribute__((overloadable)) sinh(float v) { 770 return SC_sinhf(v); 771 } 772 FN_FUNC_FN(sinh) 773 774 extern float __attribute__((overloadable)) sinpi(float v) { 775 return sin(v * M_PI); 776 } 777 FN_FUNC_FN(sinpi) 778 779 extern float __attribute__((overloadable)) SC_tanf(float); 780 float __attribute__((overloadable)) tan(float v) { 781 return SC_tanf(v); 782 } 783 FN_FUNC_FN(tan) 784 785 extern float __attribute__((overloadable)) SC_tanhf(float); 786 float __attribute__((overloadable)) tanh(float v) { 787 return SC_tanhf(v); 788 } 789 FN_FUNC_FN(tanh) 790 791 extern float __attribute__((overloadable)) tanpi(float v) { 792 return tan(v * M_PI); 793 } 794 FN_FUNC_FN(tanpi) 795 796 797 extern float __attribute__((overloadable)) SC_tgammaf(float); 798 float __attribute__((overloadable)) tgamma(float v) { 799 return SC_tgammaf(v); 800 } 801 FN_FUNC_FN(tgamma) 802 803 extern float __attribute__((overloadable)) SC_truncf(float); 804 float __attribute__((overloadable)) trunc(float v) { 805 return SC_truncf(v); 806 } 807 FN_FUNC_FN(trunc) 808 809 // Int ops (partial), 6.11.3 810 811 #define XN_FUNC_YN(typeout, fnc, typein) \ 812 extern typeout __attribute__((overloadable)) fnc(typein); \ 813 extern typeout##2 __attribute__((overloadable)) fnc(typein##2 v) { \ 814 typeout##2 r; \ 815 r.x = fnc(v.x); \ 816 r.y = fnc(v.y); \ 817 return r; \ 818 } \ 819 extern typeout##3 __attribute__((overloadable)) fnc(typein##3 v) { \ 820 typeout##3 r; \ 821 r.x = fnc(v.x); \ 822 r.y = fnc(v.y); \ 823 r.z = fnc(v.z); \ 824 return r; \ 825 } \ 826 extern typeout##4 __attribute__((overloadable)) fnc(typein##4 v) { \ 827 typeout##4 r; \ 828 r.x = fnc(v.x); \ 829 r.y = fnc(v.y); \ 830 r.z = fnc(v.z); \ 831 r.w = fnc(v.w); \ 832 return r; \ 833 } 834 835 836 #define UIN_FUNC_IN(fnc) \ 837 XN_FUNC_YN(uchar, fnc, char) \ 838 XN_FUNC_YN(ushort, fnc, short) \ 839 XN_FUNC_YN(uint, fnc, int) 840 841 #define IN_FUNC_IN(fnc) \ 842 XN_FUNC_YN(uchar, fnc, uchar) \ 843 XN_FUNC_YN(char, fnc, char) \ 844 XN_FUNC_YN(ushort, fnc, ushort) \ 845 XN_FUNC_YN(short, fnc, short) \ 846 XN_FUNC_YN(uint, fnc, uint) \ 847 XN_FUNC_YN(int, fnc, int) 848 849 850 #define XN_FUNC_XN_XN_BODY(type, fnc, body) \ 851 extern type __attribute__((overloadable)) \ 852 fnc(type v1, type v2) { \ 853 return body; \ 854 } \ 855 extern type##2 __attribute__((overloadable)) \ 856 fnc(type##2 v1, type##2 v2) { \ 857 type##2 r; \ 858 r.x = fnc(v1.x, v2.x); \ 859 r.y = fnc(v1.y, v2.y); \ 860 return r; \ 861 } \ 862 extern type##3 __attribute__((overloadable)) \ 863 fnc(type##3 v1, type##3 v2) { \ 864 type##3 r; \ 865 r.x = fnc(v1.x, v2.x); \ 866 r.y = fnc(v1.y, v2.y); \ 867 r.z = fnc(v1.z, v2.z); \ 868 return r; \ 869 } \ 870 extern type##4 __attribute__((overloadable)) \ 871 fnc(type##4 v1, type##4 v2) { \ 872 type##4 r; \ 873 r.x = fnc(v1.x, v2.x); \ 874 r.y = fnc(v1.y, v2.y); \ 875 r.z = fnc(v1.z, v2.z); \ 876 r.w = fnc(v1.w, v2.w); \ 877 return r; \ 878 } 879 880 #define IN_FUNC_IN_IN_BODY(fnc, body) \ 881 XN_FUNC_XN_XN_BODY(uchar, fnc, body) \ 882 XN_FUNC_XN_XN_BODY(char, fnc, body) \ 883 XN_FUNC_XN_XN_BODY(ushort, fnc, body) \ 884 XN_FUNC_XN_XN_BODY(short, fnc, body) \ 885 XN_FUNC_XN_XN_BODY(uint, fnc, body) \ 886 XN_FUNC_XN_XN_BODY(int, fnc, body) \ 887 XN_FUNC_XN_XN_BODY(float, fnc, body) 888 889 890 /** 891 * abs 892 */ 893 extern uint32_t __attribute__((overloadable)) abs(int32_t v) { 894 if (v < 0) 895 return -v; 896 return v; 897 } 898 extern uint16_t __attribute__((overloadable)) abs(int16_t v) { 899 if (v < 0) 900 return -v; 901 return v; 902 } 903 extern uint8_t __attribute__((overloadable)) abs(int8_t v) { 904 if (v < 0) 905 return -v; 906 return v; 907 } 908 909 /** 910 * clz 911 * __builtin_clz only accepts a 32-bit unsigned int, so every input will be 912 * expanded to 32 bits. For our smaller data types, we need to subtract off 913 * these unused top bits (that will be always be composed of zeros). 914 */ 915 extern uint32_t __attribute__((overloadable)) clz(uint32_t v) { 916 return __builtin_clz(v); 917 } 918 extern uint16_t __attribute__((overloadable)) clz(uint16_t v) { 919 return __builtin_clz(v) - 16; 920 } 921 extern uint8_t __attribute__((overloadable)) clz(uint8_t v) { 922 return __builtin_clz(v) - 24; 923 } 924 extern int32_t __attribute__((overloadable)) clz(int32_t v) { 925 return __builtin_clz(v); 926 } 927 extern int16_t __attribute__((overloadable)) clz(int16_t v) { 928 return __builtin_clz(((uint32_t)v) & 0x0000ffff) - 16; 929 } 930 extern int8_t __attribute__((overloadable)) clz(int8_t v) { 931 return __builtin_clz(((uint32_t)v) & 0x000000ff) - 24; 932 } 933 934 935 UIN_FUNC_IN(abs) 936 IN_FUNC_IN(clz) 937 938 939 // 6.11.4 940 941 942 extern float __attribute__((overloadable)) degrees(float radians) { 943 return radians * (180.f / M_PI); 944 } 945 extern float2 __attribute__((overloadable)) degrees(float2 radians) { 946 return radians * (180.f / M_PI); 947 } 948 extern float3 __attribute__((overloadable)) degrees(float3 radians) { 949 return radians * (180.f / M_PI); 950 } 951 extern float4 __attribute__((overloadable)) degrees(float4 radians) { 952 return radians * (180.f / M_PI); 953 } 954 955 extern float __attribute__((overloadable)) mix(float start, float stop, float amount) { 956 return start + (stop - start) * amount; 957 } 958 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount) { 959 return start + (stop - start) * amount; 960 } 961 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount) { 962 return start + (stop - start) * amount; 963 } 964 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount) { 965 return start + (stop - start) * amount; 966 } 967 extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount) { 968 return start + (stop - start) * amount; 969 } 970 extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount) { 971 return start + (stop - start) * amount; 972 } 973 extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount) { 974 return start + (stop - start) * amount; 975 } 976 977 extern float __attribute__((overloadable)) radians(float degrees) { 978 return degrees * (M_PI / 180.f); 979 } 980 extern float2 __attribute__((overloadable)) radians(float2 degrees) { 981 return degrees * (M_PI / 180.f); 982 } 983 extern float3 __attribute__((overloadable)) radians(float3 degrees) { 984 return degrees * (M_PI / 180.f); 985 } 986 extern float4 __attribute__((overloadable)) radians(float4 degrees) { 987 return degrees * (M_PI / 180.f); 988 } 989 990 extern float __attribute__((overloadable)) step(float edge, float v) { 991 return (v < edge) ? 0.f : 1.f; 992 } 993 extern float2 __attribute__((overloadable)) step(float2 edge, float2 v) { 994 float2 r; 995 r.x = (v.x < edge.x) ? 0.f : 1.f; 996 r.y = (v.y < edge.y) ? 0.f : 1.f; 997 return r; 998 } 999 extern float3 __attribute__((overloadable)) step(float3 edge, float3 v) { 1000 float3 r; 1001 r.x = (v.x < edge.x) ? 0.f : 1.f; 1002 r.y = (v.y < edge.y) ? 0.f : 1.f; 1003 r.z = (v.z < edge.z) ? 0.f : 1.f; 1004 return r; 1005 } 1006 extern float4 __attribute__((overloadable)) step(float4 edge, float4 v) { 1007 float4 r; 1008 r.x = (v.x < edge.x) ? 0.f : 1.f; 1009 r.y = (v.y < edge.y) ? 0.f : 1.f; 1010 r.z = (v.z < edge.z) ? 0.f : 1.f; 1011 r.w = (v.w < edge.w) ? 0.f : 1.f; 1012 return r; 1013 } 1014 extern float2 __attribute__((overloadable)) step(float2 edge, float v) { 1015 float2 r; 1016 r.x = (v < edge.x) ? 0.f : 1.f; 1017 r.y = (v < edge.y) ? 0.f : 1.f; 1018 return r; 1019 } 1020 extern float3 __attribute__((overloadable)) step(float3 edge, float v) { 1021 float3 r; 1022 r.x = (v < edge.x) ? 0.f : 1.f; 1023 r.y = (v < edge.y) ? 0.f : 1.f; 1024 r.z = (v < edge.z) ? 0.f : 1.f; 1025 return r; 1026 } 1027 extern float4 __attribute__((overloadable)) step(float4 edge, float v) { 1028 float4 r; 1029 r.x = (v < edge.x) ? 0.f : 1.f; 1030 r.y = (v < edge.y) ? 0.f : 1.f; 1031 r.z = (v < edge.z) ? 0.f : 1.f; 1032 r.w = (v < edge.w) ? 0.f : 1.f; 1033 return r; 1034 } 1035 extern float2 __attribute__((overloadable)) step(float edge, float2 v) { 1036 float2 r; 1037 r.x = (v.x < edge) ? 0.f : 1.f; 1038 r.y = (v.y < edge) ? 0.f : 1.f; 1039 return r; 1040 } 1041 extern float3 __attribute__((overloadable)) step(float edge, float3 v) { 1042 float3 r; 1043 r.x = (v.x < edge) ? 0.f : 1.f; 1044 r.y = (v.y < edge) ? 0.f : 1.f; 1045 r.z = (v.z < edge) ? 0.f : 1.f; 1046 return r; 1047 } 1048 extern float4 __attribute__((overloadable)) step(float edge, float4 v) { 1049 float4 r; 1050 r.x = (v.x < edge) ? 0.f : 1.f; 1051 r.y = (v.y < edge) ? 0.f : 1.f; 1052 r.z = (v.z < edge) ? 0.f : 1.f; 1053 r.w = (v.w < edge) ? 0.f : 1.f; 1054 return r; 1055 } 1056 1057 extern float __attribute__((overloadable)) sign(float v) { 1058 if (v > 0) return 1.f; 1059 if (v < 0) return -1.f; 1060 return v; 1061 } 1062 FN_FUNC_FN(sign) 1063 1064 1065 // 6.11.5 1066 extern float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs) { 1067 float3 r; 1068 r.x = lhs.y * rhs.z - lhs.z * rhs.y; 1069 r.y = lhs.z * rhs.x - lhs.x * rhs.z; 1070 r.z = lhs.x * rhs.y - lhs.y * rhs.x; 1071 return r; 1072 } 1073 1074 extern float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs) { 1075 float4 r; 1076 r.x = lhs.y * rhs.z - lhs.z * rhs.y; 1077 r.y = lhs.z * rhs.x - lhs.x * rhs.z; 1078 r.z = lhs.x * rhs.y - lhs.y * rhs.x; 1079 r.w = 0.f; 1080 return r; 1081 } 1082 1083 #if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME) 1084 // These functions must be defined here if we are not using the SSE 1085 // implementation, which includes when we are built as part of the 1086 // debug runtime (libclcore_debug.bc) or compiling with debug info. 1087 1088 extern float __attribute__((overloadable)) dot(float lhs, float rhs) { 1089 return lhs * rhs; 1090 } 1091 extern float __attribute__((overloadable)) dot(float2 lhs, float2 rhs) { 1092 return lhs.x*rhs.x + lhs.y*rhs.y; 1093 } 1094 extern float __attribute__((overloadable)) dot(float3 lhs, float3 rhs) { 1095 return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z; 1096 } 1097 extern float __attribute__((overloadable)) dot(float4 lhs, float4 rhs) { 1098 return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w; 1099 } 1100 1101 extern float __attribute__((overloadable)) length(float v) { 1102 return fabs(v); 1103 } 1104 extern float __attribute__((overloadable)) length(float2 v) { 1105 return sqrt(v.x*v.x + v.y*v.y); 1106 } 1107 extern float __attribute__((overloadable)) length(float3 v) { 1108 return sqrt(v.x*v.x + v.y*v.y + v.z*v.z); 1109 } 1110 extern float __attribute__((overloadable)) length(float4 v) { 1111 return sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w); 1112 } 1113 1114 #else 1115 1116 extern float __attribute__((overloadable)) length(float v); 1117 extern float __attribute__((overloadable)) length(float2 v); 1118 extern float __attribute__((overloadable)) length(float3 v); 1119 extern float __attribute__((overloadable)) length(float4 v); 1120 1121 #endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME) 1122 1123 extern float __attribute__((overloadable)) distance(float lhs, float rhs) { 1124 return length(lhs - rhs); 1125 } 1126 extern float __attribute__((overloadable)) distance(float2 lhs, float2 rhs) { 1127 return length(lhs - rhs); 1128 } 1129 extern float __attribute__((overloadable)) distance(float3 lhs, float3 rhs) { 1130 return length(lhs - rhs); 1131 } 1132 extern float __attribute__((overloadable)) distance(float4 lhs, float4 rhs) { 1133 return length(lhs - rhs); 1134 } 1135 1136 /* For the normalization functions, vectors of length 0 should simply be 1137 * returned (i.e. all the components of that vector are 0). 1138 */ 1139 extern float __attribute__((overloadable)) normalize(float v) { 1140 if (v == 0.0f) { 1141 return 0.0f; 1142 } else if (v < 0.0f) { 1143 return -1.0f; 1144 } else { 1145 return 1.0f; 1146 } 1147 } 1148 extern float2 __attribute__((overloadable)) normalize(float2 v) { 1149 float l = length(v); 1150 return l == 0.0f ? v : v / l; 1151 } 1152 extern float3 __attribute__((overloadable)) normalize(float3 v) { 1153 float l = length(v); 1154 return l == 0.0f ? v : v / l; 1155 } 1156 extern float4 __attribute__((overloadable)) normalize(float4 v) { 1157 float l = length(v); 1158 return l == 0.0f ? v : v / l; 1159 } 1160 1161 extern float __attribute__((overloadable)) half_sqrt(float v) { 1162 return sqrt(v); 1163 } 1164 FN_FUNC_FN(half_sqrt) 1165 1166 extern float __attribute__((overloadable)) fast_length(float v) { 1167 return fabs(v); 1168 } 1169 extern float __attribute__((overloadable)) fast_length(float2 v) { 1170 return half_sqrt(v.x*v.x + v.y*v.y); 1171 } 1172 extern float __attribute__((overloadable)) fast_length(float3 v) { 1173 return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z); 1174 } 1175 extern float __attribute__((overloadable)) fast_length(float4 v) { 1176 return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w); 1177 } 1178 1179 extern float __attribute__((overloadable)) fast_distance(float lhs, float rhs) { 1180 return fast_length(lhs - rhs); 1181 } 1182 extern float __attribute__((overloadable)) fast_distance(float2 lhs, float2 rhs) { 1183 return fast_length(lhs - rhs); 1184 } 1185 extern float __attribute__((overloadable)) fast_distance(float3 lhs, float3 rhs) { 1186 return fast_length(lhs - rhs); 1187 } 1188 extern float __attribute__((overloadable)) fast_distance(float4 lhs, float4 rhs) { 1189 return fast_length(lhs - rhs); 1190 } 1191 1192 extern float __attribute__((overloadable)) half_rsqrt(float); 1193 1194 /* For the normalization functions, vectors of length 0 should simply be 1195 * returned (i.e. all the components of that vector are 0). 1196 */ 1197 extern float __attribute__((overloadable)) fast_normalize(float v) { 1198 if (v == 0.0f) { 1199 return 0.0f; 1200 } else if (v < 0.0f) { 1201 return -1.0f; 1202 } else { 1203 return 1.0f; 1204 } 1205 } 1206 // If the length is 0, then rlength should be NaN. 1207 extern float2 __attribute__((overloadable)) fast_normalize(float2 v) { 1208 float rlength = half_rsqrt(v.x*v.x + v.y*v.y); 1209 return (rlength == rlength) ? v * rlength : v; 1210 } 1211 extern float3 __attribute__((overloadable)) fast_normalize(float3 v) { 1212 float rlength = half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z); 1213 return (rlength == rlength) ? v * rlength : v; 1214 } 1215 extern float4 __attribute__((overloadable)) fast_normalize(float4 v) { 1216 float rlength = half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w); 1217 return (rlength == rlength) ? v * rlength : v; 1218 } 1219 1220 extern float __attribute__((overloadable)) half_recip(float v) { 1221 return 1.f / v; 1222 } 1223 1224 /* 1225 extern float __attribute__((overloadable)) approx_atan(float x) { 1226 if (x == 0.f) 1227 return 0.f; 1228 if (x < 0.f) 1229 return -1.f * approx_atan(-1.f * x); 1230 if (x > 1.f) 1231 return M_PI_2 - approx_atan(approx_recip(x)); 1232 return x * approx_recip(1.f + 0.28f * x*x); 1233 } 1234 FN_FUNC_FN(approx_atan) 1235 */ 1236 1237 typedef union 1238 { 1239 float fv; 1240 int32_t iv; 1241 } ieee_float_shape_type; 1242 1243 /* Get a 32 bit int from a float. */ 1244 1245 #define GET_FLOAT_WORD(i,d) \ 1246 do { \ 1247 ieee_float_shape_type gf_u; \ 1248 gf_u.fv = (d); \ 1249 (i) = gf_u.iv; \ 1250 } while (0) 1251 1252 /* Set a float from a 32 bit int. */ 1253 1254 #define SET_FLOAT_WORD(d,i) \ 1255 do { \ 1256 ieee_float_shape_type sf_u; \ 1257 sf_u.iv = (i); \ 1258 (d) = sf_u.fv; \ 1259 } while (0) 1260 1261 1262 1263 // Valid -125 to 125 1264 extern float __attribute__((overloadable)) native_exp2(float v) { 1265 int32_t iv = (int)v; 1266 int32_t x = iv + (iv >> 31); // ~floor(v) 1267 float r = (v - x); 1268 1269 float fo; 1270 SET_FLOAT_WORD(fo, (x + 127) << 23); 1271 1272 r *= 0.694f; // ~ log(e) / log(2) 1273 float r2 = r*r; 1274 float adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f); 1275 return fo * adj; 1276 } 1277 1278 extern float2 __attribute__((overloadable)) native_exp2(float2 v) { 1279 int2 iv = convert_int2(v); 1280 int2 x = iv + (iv >> (int2)31);//floor(v); 1281 float2 r = (v - convert_float2(x)); 1282 1283 x += 127; 1284 1285 float2 fo = (float2)(x << (int2)23); 1286 1287 r *= 0.694f; // ~ log(e) / log(2) 1288 float2 r2 = r*r; 1289 float2 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f); 1290 return fo * adj; 1291 } 1292 1293 extern float4 __attribute__((overloadable)) native_exp2(float4 v) { 1294 int4 iv = convert_int4(v); 1295 int4 x = iv + (iv >> (int4)31);//floor(v); 1296 float4 r = (v - convert_float4(x)); 1297 1298 x += 127; 1299 1300 float4 fo = (float4)(x << (int4)23); 1301 1302 r *= 0.694f; // ~ log(e) / log(2) 1303 float4 r2 = r*r; 1304 float4 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f); 1305 return fo * adj; 1306 } 1307 1308 extern float3 __attribute__((overloadable)) native_exp2(float3 v) { 1309 float4 t = 1.f; 1310 t.xyz = v; 1311 return native_exp2(t).xyz; 1312 } 1313 1314 1315 extern float __attribute__((overloadable)) native_exp(float v) { 1316 return native_exp2(v * 1.442695041f); 1317 } 1318 extern float2 __attribute__((overloadable)) native_exp(float2 v) { 1319 return native_exp2(v * 1.442695041f); 1320 } 1321 extern float3 __attribute__((overloadable)) native_exp(float3 v) { 1322 return native_exp2(v * 1.442695041f); 1323 } 1324 extern float4 __attribute__((overloadable)) native_exp(float4 v) { 1325 return native_exp2(v * 1.442695041f); 1326 } 1327 1328 extern float __attribute__((overloadable)) native_exp10(float v) { 1329 return native_exp2(v * 3.321928095f); 1330 } 1331 extern float2 __attribute__((overloadable)) native_exp10(float2 v) { 1332 return native_exp2(v * 3.321928095f); 1333 } 1334 extern float3 __attribute__((overloadable)) native_exp10(float3 v) { 1335 return native_exp2(v * 3.321928095f); 1336 } 1337 extern float4 __attribute__((overloadable)) native_exp10(float4 v) { 1338 return native_exp2(v * 3.321928095f); 1339 } 1340 1341 extern float __attribute__((overloadable)) native_log2(float v) { 1342 int32_t ibits; 1343 GET_FLOAT_WORD(ibits, v); 1344 1345 int32_t e = (ibits >> 23) & 0xff; 1346 1347 ibits &= 0x7fffff; 1348 ibits |= 127 << 23; 1349 1350 float ir; 1351 SET_FLOAT_WORD(ir, ibits); 1352 ir -= 1.5f; 1353 float ir2 = ir*ir; 1354 float adj2 = (0.405465108f / 0.693147181f) + 1355 ((0.666666667f / 0.693147181f) * ir) - 1356 ((0.222222222f / 0.693147181f) * ir2) + 1357 ((0.098765432f / 0.693147181f) * ir*ir2) - 1358 ((0.049382716f / 0.693147181f) * ir2*ir2) + 1359 ((0.026337449f / 0.693147181f) * ir*ir2*ir2) - 1360 ((0.014631916f / 0.693147181f) * ir2*ir2*ir2); 1361 return (float)(e - 127) + adj2; 1362 } 1363 extern float2 __attribute__((overloadable)) native_log2(float2 v) { 1364 float2 v2 = {native_log2(v.x), native_log2(v.y)}; 1365 return v2; 1366 } 1367 extern float3 __attribute__((overloadable)) native_log2(float3 v) { 1368 float3 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z)}; 1369 return v2; 1370 } 1371 extern float4 __attribute__((overloadable)) native_log2(float4 v) { 1372 float4 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z), native_log2(v.w)}; 1373 return v2; 1374 } 1375 1376 extern float __attribute__((overloadable)) native_log(float v) { 1377 return native_log2(v) * (1.f / 1.442695041f); 1378 } 1379 extern float2 __attribute__((overloadable)) native_log(float2 v) { 1380 return native_log2(v) * (1.f / 1.442695041f); 1381 } 1382 extern float3 __attribute__((overloadable)) native_log(float3 v) { 1383 return native_log2(v) * (1.f / 1.442695041f); 1384 } 1385 extern float4 __attribute__((overloadable)) native_log(float4 v) { 1386 return native_log2(v) * (1.f / 1.442695041f); 1387 } 1388 1389 extern float __attribute__((overloadable)) native_log10(float v) { 1390 return native_log2(v) * (1.f / 3.321928095f); 1391 } 1392 extern float2 __attribute__((overloadable)) native_log10(float2 v) { 1393 return native_log2(v) * (1.f / 3.321928095f); 1394 } 1395 extern float3 __attribute__((overloadable)) native_log10(float3 v) { 1396 return native_log2(v) * (1.f / 3.321928095f); 1397 } 1398 extern float4 __attribute__((overloadable)) native_log10(float4 v) { 1399 return native_log2(v) * (1.f / 3.321928095f); 1400 } 1401 1402 1403 extern float __attribute__((overloadable)) native_powr(float v, float y) { 1404 float v2 = native_log2(v); 1405 v2 = fmax(v2 * y, -125.f); 1406 return native_exp2(v2); 1407 } 1408 extern float2 __attribute__((overloadable)) native_powr(float2 v, float2 y) { 1409 float2 v2 = native_log2(v); 1410 v2 = fmax(v2 * y, -125.f); 1411 return native_exp2(v2); 1412 } 1413 extern float3 __attribute__((overloadable)) native_powr(float3 v, float3 y) { 1414 float3 v2 = native_log2(v); 1415 v2 = fmax(v2 * y, -125.f); 1416 return native_exp2(v2); 1417 } 1418 extern float4 __attribute__((overloadable)) native_powr(float4 v, float4 y) { 1419 float4 v2 = native_log2(v); 1420 v2 = fmax(v2 * y, -125.f); 1421 return native_exp2(v2); 1422 } 1423 1424 extern double __attribute__((overloadable)) min(double v1, double v2) { 1425 return v1 < v2 ? v1 : v2; 1426 } 1427 1428 extern double2 __attribute__((overloadable)) min(double2 v1, double2 v2) { 1429 double2 r; 1430 r.x = v1.x < v2.x ? v1.x : v2.x; 1431 r.y = v1.y < v2.y ? v1.y : v2.y; 1432 return r; 1433 } 1434 1435 extern double3 __attribute__((overloadable)) min(double3 v1, double3 v2) { 1436 double3 r; 1437 r.x = v1.x < v2.x ? v1.x : v2.x; 1438 r.y = v1.y < v2.y ? v1.y : v2.y; 1439 r.z = v1.z < v2.z ? v1.z : v2.z; 1440 return r; 1441 } 1442 1443 extern double4 __attribute__((overloadable)) min(double4 v1, double4 v2) { 1444 double4 r; 1445 r.x = v1.x < v2.x ? v1.x : v2.x; 1446 r.y = v1.y < v2.y ? v1.y : v2.y; 1447 r.z = v1.z < v2.z ? v1.z : v2.z; 1448 r.w = v1.w < v2.w ? v1.w : v2.w; 1449 return r; 1450 } 1451 1452 extern long __attribute__((overloadable)) min(long v1, long v2) { 1453 return v1 < v2 ? v1 : v2; 1454 } 1455 extern long2 __attribute__((overloadable)) min(long2 v1, long2 v2) { 1456 long2 r; 1457 r.x = v1.x < v2.x ? v1.x : v2.x; 1458 r.y = v1.y < v2.y ? v1.y : v2.y; 1459 return r; 1460 } 1461 extern long3 __attribute__((overloadable)) min(long3 v1, long3 v2) { 1462 long3 r; 1463 r.x = v1.x < v2.x ? v1.x : v2.x; 1464 r.y = v1.y < v2.y ? v1.y : v2.y; 1465 r.z = v1.z < v2.z ? v1.z : v2.z; 1466 return r; 1467 } 1468 extern long4 __attribute__((overloadable)) min(long4 v1, long4 v2) { 1469 long4 r; 1470 r.x = v1.x < v2.x ? v1.x : v2.x; 1471 r.y = v1.y < v2.y ? v1.y : v2.y; 1472 r.z = v1.z < v2.z ? v1.z : v2.z; 1473 r.w = v1.w < v2.w ? v1.w : v2.w; 1474 return r; 1475 } 1476 1477 extern ulong __attribute__((overloadable)) min(ulong v1, ulong v2) { 1478 return v1 < v2 ? v1 : v2; 1479 } 1480 extern ulong2 __attribute__((overloadable)) min(ulong2 v1, ulong2 v2) { 1481 ulong2 r; 1482 r.x = v1.x < v2.x ? v1.x : v2.x; 1483 r.y = v1.y < v2.y ? v1.y : v2.y; 1484 return r; 1485 } 1486 extern ulong3 __attribute__((overloadable)) min(ulong3 v1, ulong3 v2) { 1487 ulong3 r; 1488 r.x = v1.x < v2.x ? v1.x : v2.x; 1489 r.y = v1.y < v2.y ? v1.y : v2.y; 1490 r.z = v1.z < v2.z ? v1.z : v2.z; 1491 return r; 1492 } 1493 extern ulong4 __attribute__((overloadable)) min(ulong4 v1, ulong4 v2) { 1494 ulong4 r; 1495 r.x = v1.x < v2.x ? v1.x : v2.x; 1496 r.y = v1.y < v2.y ? v1.y : v2.y; 1497 r.z = v1.z < v2.z ? v1.z : v2.z; 1498 r.w = v1.w < v2.w ? v1.w : v2.w; 1499 return r; 1500 } 1501 1502 extern double __attribute__((overloadable)) max(double v1, double v2) { 1503 return v1 > v2 ? v1 : v2; 1504 } 1505 1506 extern double2 __attribute__((overloadable)) max(double2 v1, double2 v2) { 1507 double2 r; 1508 r.x = v1.x > v2.x ? v1.x : v2.x; 1509 r.y = v1.y > v2.y ? v1.y : v2.y; 1510 return r; 1511 } 1512 1513 extern double3 __attribute__((overloadable)) max(double3 v1, double3 v2) { 1514 double3 r; 1515 r.x = v1.x > v2.x ? v1.x : v2.x; 1516 r.y = v1.y > v2.y ? v1.y : v2.y; 1517 r.z = v1.z > v2.z ? v1.z : v2.z; 1518 return r; 1519 } 1520 1521 extern double4 __attribute__((overloadable)) max(double4 v1, double4 v2) { 1522 double4 r; 1523 r.x = v1.x > v2.x ? v1.x : v2.x; 1524 r.y = v1.y > v2.y ? v1.y : v2.y; 1525 r.z = v1.z > v2.z ? v1.z : v2.z; 1526 r.w = v1.w > v2.w ? v1.w : v2.w; 1527 return r; 1528 } 1529 1530 extern long __attribute__((overloadable)) max(long v1, long v2) { 1531 return v1 > v2 ? v1 : v2; 1532 } 1533 extern long2 __attribute__((overloadable)) max(long2 v1, long2 v2) { 1534 long2 r; 1535 r.x = v1.x > v2.x ? v1.x : v2.x; 1536 r.y = v1.y > v2.y ? v1.y : v2.y; 1537 return r; 1538 } 1539 extern long3 __attribute__((overloadable)) max(long3 v1, long3 v2) { 1540 long3 r; 1541 r.x = v1.x > v2.x ? v1.x : v2.x; 1542 r.y = v1.y > v2.y ? v1.y : v2.y; 1543 r.z = v1.z > v2.z ? v1.z : v2.z; 1544 return r; 1545 } 1546 extern long4 __attribute__((overloadable)) max(long4 v1, long4 v2) { 1547 long4 r; 1548 r.x = v1.x > v2.x ? v1.x : v2.x; 1549 r.y = v1.y > v2.y ? v1.y : v2.y; 1550 r.z = v1.z > v2.z ? v1.z : v2.z; 1551 r.w = v1.w > v2.w ? v1.w : v2.w; 1552 return r; 1553 } 1554 1555 extern ulong __attribute__((overloadable)) max(ulong v1, ulong v2) { 1556 return v1 > v2 ? v1 : v2; 1557 } 1558 extern ulong2 __attribute__((overloadable)) max(ulong2 v1, ulong2 v2) { 1559 ulong2 r; 1560 r.x = v1.x > v2.x ? v1.x : v2.x; 1561 r.y = v1.y > v2.y ? v1.y : v2.y; 1562 return r; 1563 } 1564 extern ulong3 __attribute__((overloadable)) max(ulong3 v1, ulong3 v2) { 1565 ulong3 r; 1566 r.x = v1.x > v2.x ? v1.x : v2.x; 1567 r.y = v1.y > v2.y ? v1.y : v2.y; 1568 r.z = v1.z > v2.z ? v1.z : v2.z; 1569 return r; 1570 } 1571 extern ulong4 __attribute__((overloadable)) max(ulong4 v1, ulong4 v2) { 1572 ulong4 r; 1573 r.x = v1.x > v2.x ? v1.x : v2.x; 1574 r.y = v1.y > v2.y ? v1.y : v2.y; 1575 r.z = v1.z > v2.z ? v1.z : v2.z; 1576 r.w = v1.w > v2.w ? v1.w : v2.w; 1577 return r; 1578 } 1579 1580 #define THUNK_NATIVE_F(fn) \ 1581 float __attribute__((overloadable)) native_##fn(float v) { return fn(v);} \ 1582 float2 __attribute__((overloadable)) native_##fn(float2 v) { return fn(v);} \ 1583 float3 __attribute__((overloadable)) native_##fn(float3 v) { return fn(v);} \ 1584 float4 __attribute__((overloadable)) native_##fn(float4 v) { return fn(v);} 1585 1586 #define THUNK_NATIVE_F_F(fn) \ 1587 float __attribute__((overloadable)) native_##fn(float v1, float v2) { return fn(v1, v2);} \ 1588 float2 __attribute__((overloadable)) native_##fn(float2 v1, float2 v2) { return fn(v1, v2);} \ 1589 float3 __attribute__((overloadable)) native_##fn(float3 v1, float3 v2) { return fn(v1, v2);} \ 1590 float4 __attribute__((overloadable)) native_##fn(float4 v1, float4 v2) { return fn(v1, v2);} 1591 1592 #define THUNK_NATIVE_F_FP(fn) \ 1593 float __attribute__((overloadable)) native_##fn(float v1, float *v2) { return fn(v1, v2);} \ 1594 float2 __attribute__((overloadable)) native_##fn(float2 v1, float2 *v2) { return fn(v1, v2);} \ 1595 float3 __attribute__((overloadable)) native_##fn(float3 v1, float3 *v2) { return fn(v1, v2);} \ 1596 float4 __attribute__((overloadable)) native_##fn(float4 v1, float4 *v2) { return fn(v1, v2);} 1597 1598 #define THUNK_NATIVE_F_I(fn) \ 1599 float __attribute__((overloadable)) native_##fn(float v1, int v2) { return fn(v1, v2);} \ 1600 float2 __attribute__((overloadable)) native_##fn(float2 v1, int2 v2) { return fn(v1, v2);} \ 1601 float3 __attribute__((overloadable)) native_##fn(float3 v1, int3 v2) { return fn(v1, v2);} \ 1602 float4 __attribute__((overloadable)) native_##fn(float4 v1, int4 v2) { return fn(v1, v2);} 1603 1604 THUNK_NATIVE_F(acos) 1605 THUNK_NATIVE_F(acosh) 1606 THUNK_NATIVE_F(acospi) 1607 THUNK_NATIVE_F(asin) 1608 THUNK_NATIVE_F(asinh) 1609 THUNK_NATIVE_F(asinpi) 1610 THUNK_NATIVE_F(atan) 1611 THUNK_NATIVE_F_F(atan2) 1612 THUNK_NATIVE_F(atanh) 1613 THUNK_NATIVE_F(atanpi) 1614 THUNK_NATIVE_F_F(atan2pi) 1615 THUNK_NATIVE_F(cbrt) 1616 THUNK_NATIVE_F(cos) 1617 THUNK_NATIVE_F(cosh) 1618 THUNK_NATIVE_F(cospi) 1619 THUNK_NATIVE_F(expm1) 1620 THUNK_NATIVE_F_F(hypot) 1621 THUNK_NATIVE_F(log1p) 1622 THUNK_NATIVE_F_I(rootn) 1623 THUNK_NATIVE_F(rsqrt) 1624 THUNK_NATIVE_F(sqrt) 1625 THUNK_NATIVE_F(sin) 1626 THUNK_NATIVE_F_FP(sincos) 1627 THUNK_NATIVE_F(sinh) 1628 THUNK_NATIVE_F(sinpi) 1629 THUNK_NATIVE_F(tan) 1630 THUNK_NATIVE_F(tanh) 1631 THUNK_NATIVE_F(tanpi) 1632 1633 #undef THUNK_NATIVE_F 1634 #undef THUNK_NATIVE_F_F 1635 #undef THUNK_NATIVE_F_I 1636 #undef THUNK_NATIVE_F_FP 1637 1638 float __attribute__((overloadable)) native_normalize(float v) { return fast_normalize(v);} 1639 float2 __attribute__((overloadable)) native_normalize(float2 v) { return fast_normalize(v);} 1640 float3 __attribute__((overloadable)) native_normalize(float3 v) { return fast_normalize(v);} 1641 float4 __attribute__((overloadable)) native_normalize(float4 v) { return fast_normalize(v);} 1642 1643 float __attribute__((overloadable)) native_distance(float v1, float v2) { return fast_distance(v1, v2);} 1644 float __attribute__((overloadable)) native_distance(float2 v1, float2 v2) { return fast_distance(v1, v2);} 1645 float __attribute__((overloadable)) native_distance(float3 v1, float3 v2) { return fast_distance(v1, v2);} 1646 float __attribute__((overloadable)) native_distance(float4 v1, float4 v2) { return fast_distance(v1, v2);} 1647 1648 float __attribute__((overloadable)) native_length(float v) { return fast_length(v);} 1649 float __attribute__((overloadable)) native_length(float2 v) { return fast_length(v);} 1650 float __attribute__((overloadable)) native_length(float3 v) { return fast_length(v);} 1651 float __attribute__((overloadable)) native_length(float4 v) { return fast_length(v);} 1652 1653 float __attribute__((overloadable)) native_divide(float v1, float v2) { return v1 / v2;} 1654 float2 __attribute__((overloadable)) native_divide(float2 v1, float2 v2) { return v1 / v2;} 1655 float3 __attribute__((overloadable)) native_divide(float3 v1, float3 v2) { return v1 / v2;} 1656 float4 __attribute__((overloadable)) native_divide(float4 v1, float4 v2) { return v1 / v2;} 1657 1658 float __attribute__((overloadable)) native_recip(float v) { return 1.f / v;} 1659 float2 __attribute__((overloadable)) native_recip(float2 v) { return ((float2)1.f) / v;} 1660 float3 __attribute__((overloadable)) native_recip(float3 v) { return ((float3)1.f) / v;} 1661 float4 __attribute__((overloadable)) native_recip(float4 v) { return ((float4)1.f) / v;} 1662 1663 1664 1665 1666 1667 #undef FN_FUNC_FN 1668 #undef IN_FUNC_FN 1669 #undef FN_FUNC_FN_FN 1670 #undef FN_FUNC_FN_F 1671 #undef FN_FUNC_FN_IN 1672 #undef FN_FUNC_FN_I 1673 #undef FN_FUNC_FN_PFN 1674 #undef FN_FUNC_FN_PIN 1675 #undef FN_FUNC_FN_FN_FN 1676 #undef FN_FUNC_FN_FN_PIN 1677 #undef XN_FUNC_YN 1678 #undef UIN_FUNC_IN 1679 #undef IN_FUNC_IN 1680 #undef XN_FUNC_XN_XN_BODY 1681 #undef IN_FUNC_IN_IN_BODY 1682 1683 static const unsigned short kHalfPositiveInfinity = 0x7c00; 1684 1685 /* Define f16 functions of the form 1686 * HN output = fn(HN input) 1687 * where HN is scalar or vector half type 1688 */ 1689 #define HN_FUNC_HN(fn) \ 1690 extern half __attribute__((overloadable)) fn(half h) { \ 1691 return (half) fn((float) h); \ 1692 } \ 1693 extern half2 __attribute__((overloadable)) fn(half2 v) { \ 1694 return convert_half2(fn(convert_float2(v))); \ 1695 } \ 1696 extern half3 __attribute__((overloadable)) fn(half3 v) { \ 1697 return convert_half3(fn(convert_float3(v))); \ 1698 } \ 1699 extern half4 __attribute__((overloadable)) fn(half4 v) { \ 1700 return convert_half4(fn(convert_float4(v))); \ 1701 } 1702 1703 /* Define f16 functions of the form 1704 * HN output = fn(HN input1, HN input2) 1705 * where HN is scalar or vector half type 1706 */ 1707 #define HN_FUNC_HN_HN(fn) \ 1708 extern half __attribute__((overloadable)) fn(half h1, half h2) { \ 1709 return (half) fn((float) h1, (float) h2); \ 1710 } \ 1711 extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2) { \ 1712 return convert_half2(fn(convert_float2(v1), \ 1713 convert_float2(v2))); \ 1714 } \ 1715 extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2) { \ 1716 return convert_half3(fn(convert_float3(v1), \ 1717 convert_float3(v2))); \ 1718 } \ 1719 extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2) { \ 1720 return convert_half4(fn(convert_float4(v1), \ 1721 convert_float4(v2))); \ 1722 } 1723 1724 /* Define f16 functions of the form 1725 * HN output = fn(HN input1, half input2) 1726 * where HN is scalar or vector half type 1727 */ 1728 #define HN_FUNC_HN_H(fn) \ 1729 extern half2 __attribute__((overloadable)) fn(half2 v1, half v2) { \ 1730 return convert_half2(fn(convert_float2(v1), (float) v2)); \ 1731 } \ 1732 extern half3 __attribute__((overloadable)) fn(half3 v1, half v2) { \ 1733 return convert_half3(fn(convert_float3(v1), (float) v2)); \ 1734 } \ 1735 extern half4 __attribute__((overloadable)) fn(half4 v1, half v2) { \ 1736 return convert_half4(fn(convert_float4(v1), (float) v2)); \ 1737 } 1738 1739 /* Define f16 functions of the form 1740 * HN output = fn(HN input1, HN input2, HN input3) 1741 * where HN is scalar or vector half type 1742 */ 1743 #define HN_FUNC_HN_HN_HN(fn) \ 1744 extern half __attribute__((overloadable)) fn(half h1, half h2, half h3) { \ 1745 return (half) fn((float) h1, (float) h2, (float) h3); \ 1746 } \ 1747 extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2, half2 v3) { \ 1748 return convert_half2(fn(convert_float2(v1), \ 1749 convert_float2(v2), \ 1750 convert_float2(v3))); \ 1751 } \ 1752 extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2, half3 v3) { \ 1753 return convert_half3(fn(convert_float3(v1), \ 1754 convert_float3(v2), \ 1755 convert_float3(v3))); \ 1756 } \ 1757 extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2, half4 v3) { \ 1758 return convert_half4(fn(convert_float4(v1), \ 1759 convert_float4(v2), \ 1760 convert_float4(v3))); \ 1761 } 1762 1763 /* Define f16 functions of the form 1764 * HN output = fn(HN input1, IN input2) 1765 * where HN is scalar or vector half type and IN the equivalent integer type 1766 * of same vector length. 1767 */ 1768 #define HN_FUNC_HN_IN(fn) \ 1769 extern half __attribute__((overloadable)) fn(half h1, int v) { \ 1770 return (half) fn((float) h1, v); \ 1771 } \ 1772 extern half2 __attribute__((overloadable)) fn(half2 v1, int2 v2) { \ 1773 return convert_half2(fn(convert_float2(v1), v2)); \ 1774 } \ 1775 extern half3 __attribute__((overloadable)) fn(half3 v1, int3 v2) { \ 1776 return convert_half3(fn(convert_float3(v1), v2)); \ 1777 } \ 1778 extern half4 __attribute__((overloadable)) fn(half4 v1, int4 v2) { \ 1779 return convert_half4(fn(convert_float4(v1), v2)); \ 1780 } 1781 1782 /* Define f16 functions of the form 1783 * half output = fn(HN input1) 1784 * where HN is a scalar or vector half type. 1785 */ 1786 #define H_FUNC_HN(fn) \ 1787 extern half __attribute__((overloadable)) fn(half h) { \ 1788 return (half) fn((float) h); \ 1789 } \ 1790 extern half __attribute__((overloadable)) fn(half2 v) { \ 1791 return fn(convert_float2(v)); \ 1792 } \ 1793 extern half __attribute__((overloadable)) fn(half3 v) { \ 1794 return fn(convert_float3(v)); \ 1795 } \ 1796 extern half __attribute__((overloadable)) fn(half4 v) { \ 1797 return fn(convert_float4(v)); \ 1798 } 1799 1800 /* Define f16 functions of the form 1801 * half output = fn(HN input1, HN input2) 1802 * where HN is a scalar or vector half type. 1803 */ 1804 #define H_FUNC_HN_HN(fn) \ 1805 extern half __attribute__((overloadable)) fn(half h1, half h2) { \ 1806 return (half) fn((float) h1, (float) h2); \ 1807 } \ 1808 extern half __attribute__((overloadable)) fn(half2 v1, half2 v2) { \ 1809 return fn(convert_float2(v1), convert_float2(v2)); \ 1810 } \ 1811 extern half __attribute__((overloadable)) fn(half3 v1, half3 v2) { \ 1812 return fn(convert_float3(v1), convert_float3(v2)); \ 1813 } \ 1814 extern half __attribute__((overloadable)) fn(half4 v1, half4 v2) { \ 1815 return fn(convert_float4(v1), convert_float4(v2)); \ 1816 } 1817 1818 #define SCALARIZE_HN_FUNC_HN_PHN(fnc) \ 1819 extern half2 __attribute__((overloadable)) fnc(half2 v1, half2 *v2) { \ 1820 half2 ret; \ 1821 half t[2]; \ 1822 ret.x = fnc(v1.x, &t[0]); \ 1823 ret.y = fnc(v1.y, &t[1]); \ 1824 v2->x = t[0]; \ 1825 v2->y = t[1]; \ 1826 return ret; \ 1827 } \ 1828 extern half3 __attribute__((overloadable)) fnc(half3 v1, half3 *v2) { \ 1829 half3 ret; \ 1830 half t[3]; \ 1831 ret.x = fnc(v1.x, &t[0]); \ 1832 ret.y = fnc(v1.y, &t[1]); \ 1833 ret.z = fnc(v1.z, &t[2]); \ 1834 v2->x = t[0]; \ 1835 v2->y = t[1]; \ 1836 v2->z = t[2]; \ 1837 return ret; \ 1838 } \ 1839 extern half4 __attribute__((overloadable)) fnc(half4 v1, half4 *v2) { \ 1840 half4 ret; \ 1841 half t[4]; \ 1842 ret.x = fnc(v1.x, &t[0]); \ 1843 ret.y = fnc(v1.y, &t[1]); \ 1844 ret.z = fnc(v1.z, &t[2]); \ 1845 ret.w = fnc(v1.w, &t[3]); \ 1846 v2->x = t[0]; \ 1847 v2->y = t[1]; \ 1848 v2->z = t[2]; \ 1849 v2->w = t[3]; \ 1850 return ret; \ 1851 } 1852 1853 /* Define f16 functions of the form 1854 * HN output = fn(HN input1, HN input2) 1855 * where HN is a vector half type. The functions are defined to call the 1856 * scalar function of the same name. 1857 */ 1858 #define SCALARIZE_HN_FUNC_HN_HN(fn) \ 1859 extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2) { \ 1860 half2 ret; \ 1861 ret.x = fn(v1.x, v2.x); \ 1862 ret.y = fn(v1.y, v2.y); \ 1863 return ret; \ 1864 } \ 1865 extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2) { \ 1866 half3 ret; \ 1867 ret.x = fn(v1.x, v2.x); \ 1868 ret.y = fn(v1.y, v2.y); \ 1869 ret.z = fn(v1.z, v2.z); \ 1870 return ret; \ 1871 } \ 1872 extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2) { \ 1873 half4 ret; \ 1874 ret.x = fn(v1.x, v2.x); \ 1875 ret.y = fn(v1.y, v2.y); \ 1876 ret.z = fn(v1.z, v2.z); \ 1877 ret.w = fn(v1.w, v2.w); \ 1878 return ret; \ 1879 } \ 1880 1881 HN_FUNC_HN(acos); 1882 HN_FUNC_HN(acosh); 1883 HN_FUNC_HN(acospi); 1884 HN_FUNC_HN(asin); 1885 HN_FUNC_HN(asinh); 1886 HN_FUNC_HN(asinpi); 1887 HN_FUNC_HN(atan); 1888 HN_FUNC_HN(atanh); 1889 HN_FUNC_HN(atanpi); 1890 HN_FUNC_HN_HN(atan2); 1891 HN_FUNC_HN_HN(atan2pi); 1892 1893 HN_FUNC_HN(cbrt); 1894 HN_FUNC_HN(ceil); 1895 1896 extern half __attribute__((overloadable)) copysign(half x, half y); 1897 SCALARIZE_HN_FUNC_HN_HN(copysign); 1898 1899 HN_FUNC_HN(cos); 1900 HN_FUNC_HN(cosh); 1901 HN_FUNC_HN(cospi); 1902 1903 extern half3 __attribute__((overloadable)) cross(half3 lhs, half3 rhs) { 1904 half3 r; 1905 r.x = lhs.y * rhs.z - lhs.z * rhs.y; 1906 r.y = lhs.z * rhs.x - lhs.x * rhs.z; 1907 r.z = lhs.x * rhs.y - lhs.y * rhs.x; 1908 return r; 1909 } 1910 1911 extern half4 __attribute__((overloadable)) cross(half4 lhs, half4 rhs) { 1912 half4 r; 1913 r.x = lhs.y * rhs.z - lhs.z * rhs.y; 1914 r.y = lhs.z * rhs.x - lhs.x * rhs.z; 1915 r.z = lhs.x * rhs.y - lhs.y * rhs.x; 1916 r.w = 0.f; 1917 return r; 1918 } 1919 1920 HN_FUNC_HN(degrees); 1921 H_FUNC_HN_HN(distance); 1922 H_FUNC_HN_HN(dot); 1923 1924 HN_FUNC_HN(erf); 1925 HN_FUNC_HN(erfc); 1926 HN_FUNC_HN(exp); 1927 HN_FUNC_HN(exp10); 1928 HN_FUNC_HN(exp2); 1929 HN_FUNC_HN(expm1); 1930 1931 HN_FUNC_HN(fabs); 1932 HN_FUNC_HN_HN(fdim); 1933 HN_FUNC_HN(floor); 1934 HN_FUNC_HN_HN_HN(fma); 1935 HN_FUNC_HN_HN(fmax); 1936 HN_FUNC_HN_H(fmax); 1937 HN_FUNC_HN_HN(fmin); 1938 HN_FUNC_HN_H(fmin); 1939 HN_FUNC_HN_HN(fmod); 1940 1941 extern half __attribute__((overloadable)) fract(half v, half *iptr) { 1942 // maxLessThanOne = 0.99951171875, the largest value < 1.0 1943 half maxLessThanOne; 1944 SET_HALF_WORD(maxLessThanOne, 0x3bff); 1945 1946 int i = (int) floor(v); 1947 if (iptr) { 1948 *iptr = i; 1949 } 1950 // return v - floor(v), if strictly less than one 1951 return fmin(v - i, maxLessThanOne); 1952 } 1953 1954 SCALARIZE_HN_FUNC_HN_PHN(fract); 1955 1956 extern half __attribute__((const, overloadable)) fract(half v) { 1957 half unused; 1958 return fract(v, &unused); 1959 } 1960 1961 extern half2 __attribute__((const, overloadable)) fract(half2 v) { 1962 half2 unused; 1963 return fract(v, &unused); 1964 } 1965 1966 extern half3 __attribute__((const, overloadable)) fract(half3 v) { 1967 half3 unused; 1968 return fract(v, &unused); 1969 } 1970 1971 extern half4 __attribute__((const, overloadable)) fract(half4 v) { 1972 half4 unused; 1973 return fract(v, &unused); 1974 } 1975 1976 extern half __attribute__((overloadable)) frexp(half x, int *eptr); 1977 1978 extern half2 __attribute__((overloadable)) frexp(half2 v1, int2 *eptr) { 1979 half2 ret; 1980 int e[2]; 1981 ret.x = frexp(v1.x, &e[0]); 1982 ret.y = frexp(v1.y, &e[1]); 1983 eptr->x = e[0]; 1984 eptr->y = e[1]; 1985 return ret; 1986 } 1987 1988 extern half3 __attribute__((overloadable)) frexp(half3 v1, int3 *eptr) { 1989 half3 ret; 1990 int e[3]; 1991 ret.x = frexp(v1.x, &e[0]); 1992 ret.y = frexp(v1.y, &e[1]); 1993 ret.z = frexp(v1.z, &e[2]); 1994 eptr->x = e[0]; 1995 eptr->y = e[1]; 1996 eptr->z = e[2]; 1997 return ret; 1998 } 1999 2000 extern half4 __attribute__((overloadable)) frexp(half4 v1, int4 *eptr) { 2001 half4 ret; 2002 int e[4]; 2003 ret.x = frexp(v1.x, &e[0]); 2004 ret.y = frexp(v1.y, &e[1]); 2005 ret.z = frexp(v1.z, &e[2]); 2006 ret.w = frexp(v1.w, &e[3]); 2007 eptr->x = e[0]; 2008 eptr->y = e[1]; 2009 eptr->z = e[2]; 2010 eptr->w = e[3]; 2011 return ret; 2012 } 2013 2014 HN_FUNC_HN_HN(hypot); 2015 2016 extern int __attribute__((overloadable)) ilogb(half x); 2017 2018 extern int2 __attribute__((overloadable)) ilogb(half2 v) { 2019 int2 ret; 2020 ret.x = ilogb(v.x); 2021 ret.y = ilogb(v.y); 2022 return ret; 2023 } 2024 extern int3 __attribute__((overloadable)) ilogb(half3 v) { 2025 int3 ret; 2026 ret.x = ilogb(v.x); 2027 ret.y = ilogb(v.y); 2028 ret.z = ilogb(v.z); 2029 return ret; 2030 } 2031 extern int4 __attribute__((overloadable)) ilogb(half4 v) { 2032 int4 ret; 2033 ret.x = ilogb(v.x); 2034 ret.y = ilogb(v.y); 2035 ret.z = ilogb(v.z); 2036 ret.w = ilogb(v.w); 2037 return ret; 2038 } 2039 2040 HN_FUNC_HN_IN(ldexp); 2041 extern half2 __attribute__((overloadable)) ldexp(half2 v, int exponent) { 2042 return convert_half2(ldexp(convert_float2(v), exponent)); 2043 } 2044 extern half3 __attribute__((overloadable)) ldexp(half3 v, int exponent) { 2045 return convert_half3(ldexp(convert_float3(v), exponent)); 2046 } 2047 extern half4 __attribute__((overloadable)) ldexp(half4 v, int exponent) { 2048 return convert_half4(ldexp(convert_float4(v), exponent)); 2049 } 2050 2051 H_FUNC_HN(length); 2052 HN_FUNC_HN(lgamma); 2053 2054 extern half __attribute__((overloadable)) lgamma(half h, int *signp) { 2055 return (half) lgamma((float) h, signp); 2056 } 2057 extern half2 __attribute__((overloadable)) lgamma(half2 v, int2 *signp) { 2058 return convert_half2(lgamma(convert_float2(v), signp)); 2059 } 2060 extern half3 __attribute__((overloadable)) lgamma(half3 v, int3 *signp) { 2061 return convert_half3(lgamma(convert_float3(v), signp)); 2062 } 2063 extern half4 __attribute__((overloadable)) lgamma(half4 v, int4 *signp) { 2064 return convert_half4(lgamma(convert_float4(v), signp)); 2065 } 2066 2067 HN_FUNC_HN(log); 2068 HN_FUNC_HN(log10); 2069 HN_FUNC_HN(log1p); 2070 HN_FUNC_HN(log2); 2071 HN_FUNC_HN(logb); 2072 2073 HN_FUNC_HN_HN_HN(mad); 2074 HN_FUNC_HN_HN(max); 2075 HN_FUNC_HN_H(max); // TODO can this be arch-specific similar to _Z3maxDv2_ff? 2076 HN_FUNC_HN_HN(min); 2077 HN_FUNC_HN_H(min); // TODO can this be arch-specific similar to _Z3minDv2_ff? 2078 2079 extern half __attribute__((overloadable)) mix(half start, half stop, half amount) { 2080 return start + (stop - start) * amount; 2081 } 2082 extern half2 __attribute__((overloadable)) mix(half2 start, half2 stop, half2 amount) { 2083 return start + (stop - start) * amount; 2084 } 2085 extern half3 __attribute__((overloadable)) mix(half3 start, half3 stop, half3 amount) { 2086 return start + (stop - start) * amount; 2087 } 2088 extern half4 __attribute__((overloadable)) mix(half4 start, half4 stop, half4 amount) { 2089 return start + (stop - start) * amount; 2090 } 2091 extern half2 __attribute__((overloadable)) mix(half2 start, half2 stop, half amount) { 2092 return start + (stop - start) * amount; 2093 } 2094 extern half3 __attribute__((overloadable)) mix(half3 start, half3 stop, half amount) { 2095 return start + (stop - start) * amount; 2096 } 2097 extern half4 __attribute__((overloadable)) mix(half4 start, half4 stop, half amount) { 2098 return start + (stop - start) * amount; 2099 } 2100 2101 extern half __attribute__((overloadable)) modf(half x, half *iptr); 2102 SCALARIZE_HN_FUNC_HN_PHN(modf); 2103 2104 half __attribute__((overloadable)) nan_half() { 2105 unsigned short nan_short = kHalfPositiveInfinity | 0x0200; 2106 half nan; 2107 SET_HALF_WORD(nan, nan_short); 2108 return nan; 2109 } 2110 2111 HN_FUNC_HN(normalize); 2112 2113 extern half __attribute__((overloadable)) nextafter(half x, half y); 2114 SCALARIZE_HN_FUNC_HN_HN(nextafter); 2115 2116 HN_FUNC_HN_HN(pow); 2117 HN_FUNC_HN_IN(pown); 2118 HN_FUNC_HN_HN(powr); 2119 HN_FUNC_HN(radians); 2120 HN_FUNC_HN_HN(remainder); 2121 2122 extern half __attribute__((overloadable)) remquo(half n, half d, int *quo) { 2123 return (float) remquo((float) n, (float) d, quo); 2124 } 2125 extern half2 __attribute__((overloadable)) remquo(half2 n, half2 d, int2 *quo) { 2126 return convert_half2(remquo(convert_float2(d), convert_float2(n), quo)); 2127 } 2128 extern half3 __attribute__((overloadable)) remquo(half3 n, half3 d, int3 *quo) { 2129 return convert_half3(remquo(convert_float3(d), convert_float3(n), quo)); 2130 } 2131 extern half4 __attribute__((overloadable)) remquo(half4 n, half4 d, int4 *quo) { 2132 return convert_half4(remquo(convert_float4(d), convert_float4(n), quo)); 2133 } 2134 2135 HN_FUNC_HN(rint); 2136 HN_FUNC_HN_IN(rootn); 2137 HN_FUNC_HN(round); 2138 HN_FUNC_HN(rsqrt); 2139 2140 extern half __attribute__((overloadable)) sign(half h) { 2141 if (h > 0) return (half) 1.f; 2142 if (h < 0) return (half) -1.f; 2143 return h; 2144 } 2145 extern half2 __attribute__((overloadable)) sign(half2 v) { 2146 half2 ret; 2147 ret.x = sign(v.x); 2148 ret.y = sign(v.y); 2149 return ret; 2150 } 2151 extern half3 __attribute__((overloadable)) sign(half3 v) { 2152 half3 ret; 2153 ret.x = sign(v.x); 2154 ret.y = sign(v.y); 2155 ret.z = sign(v.z); 2156 return ret; 2157 } 2158 extern half4 __attribute__((overloadable)) sign(half4 v) { 2159 half4 ret; 2160 ret.x = sign(v.x); 2161 ret.y = sign(v.y); 2162 ret.z = sign(v.z); 2163 ret.w = sign(v.w); 2164 return ret; 2165 } 2166 2167 HN_FUNC_HN(sin); 2168 2169 extern half __attribute__((overloadable)) sincos(half v, half *cosptr) { 2170 *cosptr = cos(v); 2171 return sin(v); 2172 } 2173 // TODO verify if LLVM eliminates the duplicate convert_float2 2174 extern half2 __attribute__((overloadable)) sincos(half2 v, half2 *cosptr) { 2175 *cosptr = cos(v); 2176 return sin(v); 2177 } 2178 extern half3 __attribute__((overloadable)) sincos(half3 v, half3 *cosptr) { 2179 *cosptr = cos(v); 2180 return sin(v); 2181 } 2182 extern half4 __attribute__((overloadable)) sincos(half4 v, half4 *cosptr) { 2183 *cosptr = cos(v); 2184 return sin(v); 2185 } 2186 2187 HN_FUNC_HN(sinh); 2188 HN_FUNC_HN(sinpi); 2189 HN_FUNC_HN(sqrt); 2190 2191 extern half __attribute__((overloadable)) step(half edge, half v) { 2192 return (v < edge) ? 0.f : 1.f; 2193 } 2194 extern half2 __attribute__((overloadable)) step(half2 edge, half2 v) { 2195 half2 r; 2196 r.x = (v.x < edge.x) ? 0.f : 1.f; 2197 r.y = (v.y < edge.y) ? 0.f : 1.f; 2198 return r; 2199 } 2200 extern half3 __attribute__((overloadable)) step(half3 edge, half3 v) { 2201 half3 r; 2202 r.x = (v.x < edge.x) ? 0.f : 1.f; 2203 r.y = (v.y < edge.y) ? 0.f : 1.f; 2204 r.z = (v.z < edge.z) ? 0.f : 1.f; 2205 return r; 2206 } 2207 extern half4 __attribute__((overloadable)) step(half4 edge, half4 v) { 2208 half4 r; 2209 r.x = (v.x < edge.x) ? 0.f : 1.f; 2210 r.y = (v.y < edge.y) ? 0.f : 1.f; 2211 r.z = (v.z < edge.z) ? 0.f : 1.f; 2212 r.w = (v.w < edge.w) ? 0.f : 1.f; 2213 return r; 2214 } 2215 extern half2 __attribute__((overloadable)) step(half2 edge, half v) { 2216 half2 r; 2217 r.x = (v < edge.x) ? 0.f : 1.f; 2218 r.y = (v < edge.y) ? 0.f : 1.f; 2219 return r; 2220 } 2221 extern half3 __attribute__((overloadable)) step(half3 edge, half v) { 2222 half3 r; 2223 r.x = (v < edge.x) ? 0.f : 1.f; 2224 r.y = (v < edge.y) ? 0.f : 1.f; 2225 r.z = (v < edge.z) ? 0.f : 1.f; 2226 return r; 2227 } 2228 extern half4 __attribute__((overloadable)) step(half4 edge, half v) { 2229 half4 r; 2230 r.x = (v < edge.x) ? 0.f : 1.f; 2231 r.y = (v < edge.y) ? 0.f : 1.f; 2232 r.z = (v < edge.z) ? 0.f : 1.f; 2233 r.w = (v < edge.w) ? 0.f : 1.f; 2234 return r; 2235 } 2236 extern half2 __attribute__((overloadable)) step(half edge, half2 v) { 2237 half2 r; 2238 r.x = (v.x < edge) ? 0.f : 1.f; 2239 r.y = (v.y < edge) ? 0.f : 1.f; 2240 return r; 2241 } 2242 extern half3 __attribute__((overloadable)) step(half edge, half3 v) { 2243 half3 r; 2244 r.x = (v.x < edge) ? 0.f : 1.f; 2245 r.y = (v.y < edge) ? 0.f : 1.f; 2246 r.z = (v.z < edge) ? 0.f : 1.f; 2247 return r; 2248 } 2249 extern half4 __attribute__((overloadable)) step(half edge, half4 v) { 2250 half4 r; 2251 r.x = (v.x < edge) ? 0.f : 1.f; 2252 r.y = (v.y < edge) ? 0.f : 1.f; 2253 r.z = (v.z < edge) ? 0.f : 1.f; 2254 r.w = (v.w < edge) ? 0.f : 1.f; 2255 return r; 2256 } 2257 2258 HN_FUNC_HN(tan); 2259 HN_FUNC_HN(tanh); 2260 HN_FUNC_HN(tanpi); 2261 HN_FUNC_HN(tgamma); 2262 HN_FUNC_HN(trunc); // TODO: rethink: needs half-specific implementation? 2263 2264 HN_FUNC_HN(native_acos); 2265 HN_FUNC_HN(native_acosh); 2266 HN_FUNC_HN(native_acospi); 2267 HN_FUNC_HN(native_asin); 2268 HN_FUNC_HN(native_asinh); 2269 HN_FUNC_HN(native_asinpi); 2270 HN_FUNC_HN(native_atan); 2271 HN_FUNC_HN(native_atanh); 2272 HN_FUNC_HN(native_atanpi); 2273 HN_FUNC_HN_HN(native_atan2); 2274 HN_FUNC_HN_HN(native_atan2pi); 2275 2276 HN_FUNC_HN(native_cbrt); 2277 HN_FUNC_HN(native_cos); 2278 HN_FUNC_HN(native_cosh); 2279 HN_FUNC_HN(native_cospi); 2280 2281 H_FUNC_HN_HN(native_distance); 2282 HN_FUNC_HN_HN(native_divide); 2283 2284 HN_FUNC_HN(native_exp); 2285 HN_FUNC_HN(native_exp10); 2286 HN_FUNC_HN(native_exp2); 2287 HN_FUNC_HN(native_expm1); 2288 2289 HN_FUNC_HN_HN(native_hypot); 2290 H_FUNC_HN(native_length); 2291 2292 HN_FUNC_HN(native_log); 2293 HN_FUNC_HN(native_log10); 2294 HN_FUNC_HN(native_log1p); 2295 HN_FUNC_HN(native_log2); 2296 2297 HN_FUNC_HN(native_normalize); 2298 2299 HN_FUNC_HN_HN(native_powr); // TODO are parameter limits different for half? 2300 2301 HN_FUNC_HN(native_recip); 2302 HN_FUNC_HN_IN(native_rootn); 2303 HN_FUNC_HN(native_rsqrt); 2304 2305 HN_FUNC_HN(native_sin); 2306 2307 extern half __attribute__((overloadable)) native_sincos(half v, half *cosptr) { 2308 return sincos(v, cosptr); 2309 } 2310 extern half2 __attribute__((overloadable)) native_sincos(half2 v, half2 *cosptr) { 2311 return sincos(v, cosptr); 2312 } 2313 extern half3 __attribute__((overloadable)) native_sincos(half3 v, half3 *cosptr) { 2314 return sincos(v, cosptr); 2315 } 2316 extern half4 __attribute__((overloadable)) native_sincos(half4 v, half4 *cosptr) { 2317 return sincos(v, cosptr); 2318 } 2319 2320 HN_FUNC_HN(native_sinh); 2321 HN_FUNC_HN(native_sinpi); 2322 HN_FUNC_HN(native_sqrt); 2323 2324 HN_FUNC_HN(native_tan); 2325 HN_FUNC_HN(native_tanh); 2326 HN_FUNC_HN(native_tanpi); 2327 2328 #undef HN_FUNC_HN 2329 #undef HN_FUNC_HN_HN 2330 #undef HN_FUNC_HN_H 2331 #undef HN_FUNC_HN_HN_HN 2332 #undef HN_FUNC_HN_IN 2333 #undef H_FUNC_HN 2334 #undef H_FUNC_HN_HN 2335 #undef SCALARIZE_HN_FUNC_HN_HN 2336 2337 // exports unavailable mathlib functions to compat lib 2338 2339 #ifdef RS_COMPATIBILITY_LIB 2340 2341 // !!! DANGER !!! 2342 // These functions are potentially missing on older Android versions. 2343 // Work around the issue by supplying our own variants. 2344 // !!! DANGER !!! 2345 2346 // The logbl() implementation is taken from the latest bionic/, since 2347 // double == long double on Android. 2348 extern "C" long double logbl(long double x) { return logb(x); } 2349 2350 // __aeabi_idiv0 is a missing function in libcompiler_rt.so, so we just 2351 // pick the simplest implementation based on the ARM EABI doc. 2352 extern "C" int __aeabi_idiv0(int v) { return v; } 2353 2354 #endif // compatibility lib 2355