1 /* 2 Copyright (c) 2014, Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /******************************************************************************/ 32 // ALGORITHM DESCRIPTION 33 // --------------------- 34 // 35 // Assume x=2^{3*k+j} * 1.b1 b2 ... b5 b6 ... b52, where j = 0,1,2. 36 // Let r=(x*2^{-3k-j} - 1.b1 b2 ... b5 1)* rcp[b1 b2 ..b5], 37 // where rcp[b1 b2 .. b5]=1/(1.b1 b2 b3 b4 b5 1) in double precision 38 // cbrt(2^j * 1. b1 b2 .. b5 1) is approximated as T[j][b1..b5]+D[j][b1..b5] 39 // (T stores the high 53 bits, D stores the low order bits) 40 // Result=2^k*T+(2^k*T*r)*P+2^k*D 41 // where P=p1+p2*r+..+p8*r^7 42 // 43 // Special cases: 44 // cbrt(NaN) = quiet NaN, and raise invalid exception 45 // cbrt(INF) = that INF 46 // cbrt(+/-0) = +/-0 47 // 48 /******************************************************************************/ 49 50 #include <private/bionic_asm.h> 51 # -- Begin cbrt 52 ENTRY(cbrt) 53 # parameter 1: %xmm0 54 ..B1.1: 55 ..___tag_value_cbrt.1: 56 subq $24, %rsp 57 ..___tag_value_cbrt.3: 58 movsd %xmm0, (%rsp) 59 ..B1.2: 60 movq %xmm0, %xmm7 61 movl $524032, %edx 62 movsd EXP_MSK3(%rip), %xmm5 63 movsd EXP_MSK2(%rip), %xmm3 64 psrlq $44, %xmm7 65 pextrw $0, %xmm7, %ecx 66 movd %xmm7, %eax 67 movsd EXP_MASK(%rip), %xmm1 68 movsd SIG_MASK(%rip), %xmm2 69 andl $248, %ecx 70 lea rcp_table(%rip), %r8 71 movsd (%rcx,%r8), %xmm4 72 movq %rax, %r9 73 andl %eax, %edx 74 cmpl $0, %edx 75 je .L_2TAG_PACKET_0.0.1 76 cmpl $524032, %edx 77 je .L_2TAG_PACKET_1.0.1 78 shrl $8, %edx 79 shrq $8, %r9 80 andpd %xmm0, %xmm2 81 andpd %xmm5, %xmm0 82 orpd %xmm2, %xmm3 83 orpd %xmm0, %xmm1 84 movapd coeff_table(%rip), %xmm5 85 movl $5462, %eax 86 movapd 16+coeff_table(%rip), %xmm6 87 mull %edx 88 movq %r9, %rdx 89 andq $2047, %r9 90 shrl $14, %eax 91 andl $2048, %edx 92 subq %rax, %r9 93 subq %rax, %r9 94 subq %rax, %r9 95 shlq $8, %r9 96 addl $682, %eax 97 orl %edx, %eax 98 movd %eax, %xmm7 99 addq %r9, %rcx 100 psllq $52, %xmm7 101 .L_2TAG_PACKET_2.0.1: 102 movapd 32+coeff_table(%rip), %xmm2 103 movapd 48+coeff_table(%rip), %xmm0 104 subsd %xmm3, %xmm1 105 movq %xmm7, %xmm3 106 lea cbrt_table(%rip), %r8 107 mulsd (%rcx,%r8), %xmm7 108 mulsd %xmm4, %xmm1 109 lea D_table(%rip), %r8 110 mulsd (%rcx,%r8), %xmm3 111 movapd %xmm1, %xmm4 112 unpcklpd %xmm1, %xmm1 113 mulpd %xmm1, %xmm5 114 mulpd %xmm1, %xmm6 115 mulpd %xmm1, %xmm1 116 addpd %xmm5, %xmm2 117 addpd %xmm6, %xmm0 118 mulpd %xmm1, %xmm2 119 mulpd %xmm1, %xmm1 120 mulsd %xmm7, %xmm4 121 addpd %xmm2, %xmm0 122 mulsd %xmm0, %xmm1 123 unpckhpd %xmm0, %xmm0 124 addsd %xmm1, %xmm0 125 mulsd %xmm4, %xmm0 126 addsd %xmm3, %xmm0 127 addsd %xmm7, %xmm0 128 jmp ..B1.4 129 .L_2TAG_PACKET_0.0.1: 130 mulsd SCALE63(%rip), %xmm0 131 movq %xmm0, %xmm7 132 movl $524032, %edx 133 psrlq $44, %xmm7 134 pextrw $0, %xmm7, %ecx 135 movd %xmm7, %eax 136 andl $248, %ecx 137 lea rcp_table(%rip), %r8 138 movsd (%rcx,%r8), %xmm4 139 movq %rax, %r9 140 andl %eax, %edx 141 shrl $8, %edx 142 shrq $8, %r9 143 cmpl $0, %edx 144 je .L_2TAG_PACKET_3.0.1 145 andpd %xmm0, %xmm2 146 andpd %xmm5, %xmm0 147 orpd %xmm2, %xmm3 148 orpd %xmm0, %xmm1 149 movapd coeff_table(%rip), %xmm5 150 movl $5462, %eax 151 movapd 16+coeff_table(%rip), %xmm6 152 mull %edx 153 movq %r9, %rdx 154 andq $2047, %r9 155 shrl $14, %eax 156 andl $2048, %edx 157 subq %rax, %r9 158 subq %rax, %r9 159 subq %rax, %r9 160 shlq $8, %r9 161 addl $661, %eax 162 orl %edx, %eax 163 movd %eax, %xmm7 164 addq %r9, %rcx 165 psllq $52, %xmm7 166 jmp .L_2TAG_PACKET_2.0.1 167 .L_2TAG_PACKET_3.0.1: 168 cmpq $0, %r9 169 jne .L_2TAG_PACKET_4.0.1 170 xorpd %xmm0, %xmm0 171 jmp ..B1.4 172 .L_2TAG_PACKET_4.0.1: 173 movsd ZERON(%rip), %xmm0 174 jmp ..B1.4 175 .L_2TAG_PACKET_1.0.1: 176 movl 4(%rsp), %eax 177 movl (%rsp), %edx 178 movl %eax, %ecx 179 andl $2147483647, %ecx 180 cmpl $2146435072, %ecx 181 ja .L_2TAG_PACKET_5.0.1 182 cmpl $0, %edx 183 jne .L_2TAG_PACKET_5.0.1 184 cmpl $2146435072, %eax 185 jne .L_2TAG_PACKET_6.0.1 186 movsd INF(%rip), %xmm0 187 jmp ..B1.4 188 .L_2TAG_PACKET_6.0.1: 189 movsd NEG_INF(%rip), %xmm0 190 jmp ..B1.4 191 .L_2TAG_PACKET_5.0.1: 192 movsd (%rsp), %xmm0 193 addsd %xmm0, %xmm0 194 movq %xmm0, 8(%rsp) 195 .L_2TAG_PACKET_7.0.1: 196 ..B1.4: 197 addq $24, %rsp 198 ..___tag_value_cbrt.4: 199 ret 200 ..___tag_value_cbrt.5: 201 END(cbrt) 202 # -- End cbrt 203 .section .rodata, "a" 204 .align 16 205 .align 16 206 coeff_table: 207 .long 1553778919 208 .long 3213899486 209 .long 3534952507 210 .long 3215266280 211 .long 1646371399 212 .long 3214412045 213 .long 477218588 214 .long 3216798151 215 .long 3582521621 216 .long 1066628362 217 .long 1007461464 218 .long 1068473053 219 .long 889629714 220 .long 1067378449 221 .long 1431655765 222 .long 1070945621 223 .type coeff_table,@object 224 .size coeff_table,64 225 .align 4 226 EXP_MSK3: 227 .long 4294967295 228 .long 1048575 229 .type EXP_MSK3,@object 230 .size EXP_MSK3,8 231 .align 4 232 EXP_MSK2: 233 .long 0 234 .long 3220193280 235 .type EXP_MSK2,@object 236 .size EXP_MSK2,8 237 .align 4 238 EXP_MASK: 239 .long 0 240 .long 3220176896 241 .type EXP_MASK,@object 242 .size EXP_MASK,8 243 .align 4 244 SIG_MASK: 245 .long 0 246 .long 1032192 247 .type SIG_MASK,@object 248 .size SIG_MASK,8 249 .align 4 250 rcp_table: 251 .long 528611360 252 .long 3220144632 253 .long 2884679527 254 .long 3220082993 255 .long 1991868891 256 .long 3220024928 257 .long 2298714891 258 .long 3219970134 259 .long 58835168 260 .long 3219918343 261 .long 3035110223 262 .long 3219869313 263 .long 1617585086 264 .long 3219822831 265 .long 2500867033 266 .long 3219778702 267 .long 4241943008 268 .long 3219736752 269 .long 258732970 270 .long 3219696825 271 .long 404232216 272 .long 3219658776 273 .long 2172167368 274 .long 3219622476 275 .long 1544257904 276 .long 3219587808 277 .long 377579543 278 .long 3219554664 279 .long 1616385542 280 .long 3219522945 281 .long 813783277 282 .long 3219492562 283 .long 3940743189 284 .long 3219463431 285 .long 2689777499 286 .long 3219435478 287 .long 1700977147 288 .long 3219408632 289 .long 3169102082 290 .long 3219382828 291 .long 327235604 292 .long 3219358008 293 .long 1244336319 294 .long 3219334115 295 .long 1300311200 296 .long 3219311099 297 .long 3095471925 298 .long 3219288912 299 .long 2166487928 300 .long 3219267511 301 .long 2913108253 302 .long 3219246854 303 .long 293672978 304 .long 3219226904 305 .long 288737297 306 .long 3219207624 307 .long 1810275472 308 .long 3219188981 309 .long 174592167 310 .long 3219170945 311 .long 3539053052 312 .long 3219153485 313 .long 2164392968 314 .long 3219136576 315 .type rcp_table,@object 316 .size rcp_table,256 317 .align 4 318 cbrt_table: 319 .long 572345495 320 .long 1072698681 321 .long 1998204467 322 .long 1072709382 323 .long 3861501553 324 .long 1072719872 325 .long 2268192434 326 .long 1072730162 327 .long 2981979308 328 .long 1072740260 329 .long 270859143 330 .long 1072750176 331 .long 2958651392 332 .long 1072759916 333 .long 313113243 334 .long 1072769490 335 .long 919449400 336 .long 1072778903 337 .long 2809328903 338 .long 1072788162 339 .long 2222981587 340 .long 1072797274 341 .long 2352530781 342 .long 1072806244 343 .long 594152517 344 .long 1072815078 345 .long 1555767199 346 .long 1072823780 347 .long 4282421314 348 .long 1072832355 349 .long 2355578597 350 .long 1072840809 351 .long 1162590619 352 .long 1072849145 353 .long 797864051 354 .long 1072857367 355 .long 431273680 356 .long 1072865479 357 .long 2669831148 358 .long 1072873484 359 .long 733477752 360 .long 1072881387 361 .long 4280220604 362 .long 1072889189 363 .long 801961634 364 .long 1072896896 365 .long 2915370760 366 .long 1072904508 367 .long 1159613482 368 .long 1072912030 369 .long 2689944798 370 .long 1072919463 371 .long 1248687822 372 .long 1072926811 373 .long 2967951030 374 .long 1072934075 375 .long 630170432 376 .long 1072941259 377 .long 3760898254 378 .long 1072948363 379 .long 0 380 .long 1072955392 381 .long 2370273294 382 .long 1072962345 383 .long 1261754802 384 .long 1072972640 385 .long 546334065 386 .long 1072986123 387 .long 1054893830 388 .long 1072999340 389 .long 1571187597 390 .long 1073012304 391 .long 1107975175 392 .long 1073025027 393 .long 3606909377 394 .long 1073037519 395 .long 1113616747 396 .long 1073049792 397 .long 4154744632 398 .long 1073061853 399 .long 3358931423 400 .long 1073073713 401 .long 4060702372 402 .long 1073085379 403 .long 747576176 404 .long 1073096860 405 .long 3023138255 406 .long 1073108161 407 .long 1419988548 408 .long 1073119291 409 .long 1914185305 410 .long 1073130255 411 .long 294389948 412 .long 1073141060 413 .long 3761802570 414 .long 1073151710 415 .long 978281566 416 .long 1073162213 417 .long 823148820 418 .long 1073172572 419 .long 2420954441 420 .long 1073182792 421 .long 3815449908 422 .long 1073192878 423 .long 2046058587 424 .long 1073202835 425 .long 1807524753 426 .long 1073212666 427 .long 2628681401 428 .long 1073222375 429 .long 3225667357 430 .long 1073231966 431 .long 1555307421 432 .long 1073241443 433 .long 3454043099 434 .long 1073250808 435 .long 1208137896 436 .long 1073260066 437 .long 3659916772 438 .long 1073269218 439 .long 1886261264 440 .long 1073278269 441 .long 3593647839 442 .long 1073287220 443 .long 3086012205 444 .long 1073296075 445 .long 2769796922 446 .long 1073304836 447 .long 888716057 448 .long 1073317807 449 .long 2201465623 450 .long 1073334794 451 .long 164369365 452 .long 1073351447 453 .long 3462666733 454 .long 1073367780 455 .long 2773905457 456 .long 1073383810 457 .long 1342879088 458 .long 1073399550 459 .long 2543933975 460 .long 1073415012 461 .long 1684477781 462 .long 1073430209 463 .long 3532178543 464 .long 1073445151 465 .long 1147747300 466 .long 1073459850 467 .long 1928031793 468 .long 1073474314 469 .long 2079717015 470 .long 1073488553 471 .long 4016765315 472 .long 1073502575 473 .long 3670431139 474 .long 1073516389 475 .long 3549227225 476 .long 1073530002 477 .long 11637607 478 .long 1073543422 479 .long 588220169 480 .long 1073556654 481 .long 2635407503 482 .long 1073569705 483 .long 2042029317 484 .long 1073582582 485 .long 1925128962 486 .long 1073595290 487 .long 4136375664 488 .long 1073607834 489 .long 759964600 490 .long 1073620221 491 .long 4257606771 492 .long 1073632453 493 .long 297278907 494 .long 1073644538 495 .long 3655053093 496 .long 1073656477 497 .long 2442253172 498 .long 1073668277 499 .long 1111876799 500 .long 1073679941 501 .long 3330973139 502 .long 1073691472 503 .long 3438879452 504 .long 1073702875 505 .long 3671565478 506 .long 1073714153 507 .long 1317849547 508 .long 1073725310 509 .long 1642364115 510 .long 1073736348 511 .type cbrt_table,@object 512 .size cbrt_table,768 513 .align 4 514 D_table: 515 .long 4050900474 516 .long 1014427190 517 .long 1157977860 518 .long 1016444461 519 .long 1374568199 520 .long 1017271387 521 .long 2809163288 522 .long 1016882676 523 .long 3742377377 524 .long 1013168191 525 .long 3101606597 526 .long 1017541672 527 .long 65224358 528 .long 1017217597 529 .long 2691591250 530 .long 1017266643 531 .long 4020758549 532 .long 1017689313 533 .long 1316310992 534 .long 1018030788 535 .long 1031537856 536 .long 1014090882 537 .long 3261395239 538 .long 1016413641 539 .long 886424999 540 .long 1016313335 541 .long 3114776834 542 .long 1014195875 543 .long 1681120620 544 .long 1017825416 545 .long 1329600273 546 .long 1016625740 547 .long 465474623 548 .long 1017097119 549 .long 4251633980 550 .long 1017169077 551 .long 1986990133 552 .long 1017710645 553 .long 752958613 554 .long 1017159641 555 .long 2216216792 556 .long 1018020163 557 .long 4282860129 558 .long 1015924861 559 .long 1557627859 560 .long 1016039538 561 .long 3889219754 562 .long 1018086237 563 .long 3684996408 564 .long 1017353275 565 .long 723532103 566 .long 1017717141 567 .long 2951149676 568 .long 1012528470 569 .long 831890937 570 .long 1017830553 571 .long 1031212645 572 .long 1017387331 573 .long 2741737450 574 .long 1017604974 575 .long 2863311531 576 .long 1003776682 577 .long 4276736099 578 .long 1013153088 579 .long 4111778382 580 .long 1015673686 581 .long 1728065769 582 .long 1016413986 583 .long 2708718031 584 .long 1018078833 585 .long 1069335005 586 .long 1015291224 587 .long 700037144 588 .long 1016482032 589 .long 2904566452 590 .long 1017226861 591 .long 4074156649 592 .long 1017622651 593 .long 25019565 594 .long 1015245366 595 .long 3601952608 596 .long 1015771755 597 .long 3267129373 598 .long 1017904664 599 .long 503203103 600 .long 1014921629 601 .long 2122011730 602 .long 1018027866 603 .long 3927295461 604 .long 1014189456 605 .long 2790625147 606 .long 1016024251 607 .long 1330460186 608 .long 1016940346 609 .long 4033568463 610 .long 1015538390 611 .long 3695818227 612 .long 1017509621 613 .long 257573361 614 .long 1017208868 615 .long 3227697852 616 .long 1017337964 617 .long 234118548 618 .long 1017169577 619 .long 4009025803 620 .long 1017278524 621 .long 1948343394 622 .long 1017749310 623 .long 678398162 624 .long 1018144239 625 .long 3083864863 626 .long 1016669086 627 .long 2415453452 628 .long 1017890370 629 .long 175467344 630 .long 1017330033 631 .long 3197359580 632 .long 1010339928 633 .long 2071276951 634 .long 1015941358 635 .long 268372543 636 .long 1016737773 637 .long 938132959 638 .long 1017389108 639 .long 1816750559 640 .long 1017337448 641 .long 4119203749 642 .long 1017152174 643 .long 2578653878 644 .long 1013108497 645 .long 2470331096 646 .long 1014678606 647 .long 123855735 648 .long 1016553320 649 .long 1265650889 650 .long 1014782687 651 .long 3414398172 652 .long 1017182638 653 .long 1040773369 654 .long 1016158401 655 .long 3483628886 656 .long 1016886550 657 .long 4140499405 658 .long 1016191425 659 .long 3893477850 660 .long 1016964495 661 .long 3935319771 662 .long 1009634717 663 .long 2978982660 664 .long 1015027112 665 .long 2452709923 666 .long 1017990229 667 .long 3190365712 668 .long 1015835149 669 .long 4237588139 670 .long 1015832925 671 .long 2610678389 672 .long 1017962711 673 .long 2127316774 674 .long 1017405770 675 .long 824267502 676 .long 1017959463 677 .long 2165924042 678 .long 1017912225 679 .long 2774007076 680 .long 1013257418 681 .long 4123916326 682 .long 1017582284 683 .long 1976417958 684 .long 1016959909 685 .long 4092806412 686 .long 1017711279 687 .long 119251817 688 .long 1015363631 689 .long 3475418768 690 .long 1017675415 691 .long 1972580503 692 .long 1015470684 693 .long 815541017 694 .long 1017517969 695 .long 2429917451 696 .long 1017397776 697 .long 4062888482 698 .long 1016749897 699 .long 68284153 700 .long 1017925678 701 .long 2207779246 702 .long 1016320298 703 .long 1183466520 704 .long 1017408657 705 .long 143326427 706 .long 1017060403 707 .type D_table,@object 708 .size D_table,768 709 .align 4 710 SCALE63: 711 .long 0 712 .long 1138753536 713 .type SCALE63,@object 714 .size SCALE63,8 715 .align 4 716 ZERON: 717 .long 0 718 .long 2147483648 719 .type ZERON,@object 720 .size ZERON,8 721 .align 4 722 INF: 723 .long 0 724 .long 2146435072 725 .type INF,@object 726 .size INF,8 727 .align 4 728 NEG_INF: 729 .long 0 730 .long 4293918720 731 .type NEG_INF,@object 732 .size NEG_INF,8 733 .data 734 .section .note.GNU-stack, "" 735 // -- Begin DWARF2 SEGMENT .eh_frame 736 .section .eh_frame,"a",@progbits 737 .eh_frame_seg: 738 .align 1 739 .4byte 0x00000014 740 .8byte 0x00527a0100000000 741 .8byte 0x08070c1b01107801 742 .4byte 0x00000190 743 .4byte 0x0000001c 744 .4byte 0x0000001c 745 .4byte ..___tag_value_cbrt.1-. 746 .4byte ..___tag_value_cbrt.5-..___tag_value_cbrt.1 747 .2byte 0x0400 748 .4byte ..___tag_value_cbrt.3-..___tag_value_cbrt.1 749 .2byte 0x200e 750 .byte 0x04 751 .4byte ..___tag_value_cbrt.4-..___tag_value_cbrt.3 752 .2byte 0x080e 753 .byte 0x00 754 # End 755