Home | History | Annotate | Download | only in x86_64
      1 /*
      2 Copyright (c) 2014, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 /******************************************************************************/
     32 //                     ALGORITHM DESCRIPTION
     33 //                     ---------------------
     34 //
     35 // Description:
     36 //  Let K = 64 (table size).
     37 //        x    x/log(2)     n
     38 //       e  = 2          = 2 * T[j] * (1 + P(y))
     39 //  where
     40 //       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
     41 //       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
     42 //                  j/K
     43 //       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
     44 //
     45 //       P(y) is a minimax polynomial approximation of exp(x)-1
     46 //       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
     47 //
     48 //  To avoid problems with arithmetic overflow and underflow,
     49 //            n                        n1  n2
     50 //  value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
     51 //  where BIAS is a value of exponent bias.
     52 //
     53 // Special cases:
     54 //  exp(NaN) = NaN
     55 //  exp(+INF) = +INF
     56 //  exp(-INF) = 0
     57 //  exp(x) = 1 for subnormals
     58 //  for finite argument, only exp(0)=1 is exact
     59 //  For IEEE double
     60 //    if x >  709.782712893383973096 then exp(x) overflow
     61 //    if x < -745.133219101941108420 then exp(x) underflow
     62 //
     63 /******************************************************************************/
     64 
     65 #include <private/bionic_asm.h>
     66 # -- Begin  exp
     67 ENTRY(exp)
     68 # parameter 1: %xmm0
     69 ..B1.1:
     70 ..___tag_value_exp.1:
     71         subq      $24, %rsp
     72 ..___tag_value_exp.3:
     73         movsd     %xmm0, 8(%rsp)
     74 ..B1.2:
     75         unpcklpd  %xmm0, %xmm0
     76         movapd    cv(%rip), %xmm1
     77         movapd    Shifter(%rip), %xmm6
     78         movapd    16+cv(%rip), %xmm2
     79         movapd    32+cv(%rip), %xmm3
     80         pextrw    $3, %xmm0, %eax
     81         andl      $32767, %eax
     82         movl      $16527, %edx
     83         subl      %eax, %edx
     84         subl      $15504, %eax
     85         orl       %eax, %edx
     86         cmpl      $-2147483648, %edx
     87         jae       .L_2TAG_PACKET_0.0.2
     88         mulpd     %xmm0, %xmm1
     89         addpd     %xmm6, %xmm1
     90         movapd    %xmm1, %xmm7
     91         subpd     %xmm6, %xmm1
     92         mulpd     %xmm1, %xmm2
     93         movapd    64+cv(%rip), %xmm4
     94         mulpd     %xmm1, %xmm3
     95         movapd    80+cv(%rip), %xmm5
     96         subpd     %xmm2, %xmm0
     97         movd      %xmm7, %eax
     98         movl      %eax, %ecx
     99         andl      $63, %ecx
    100         shll      $4, %ecx
    101         sarl      $6, %eax
    102         movl      %eax, %edx
    103         movdqa    mmask(%rip), %xmm6
    104         pand      %xmm6, %xmm7
    105         movdqa    bias(%rip), %xmm6
    106         paddq     %xmm6, %xmm7
    107         psllq     $46, %xmm7
    108         subpd     %xmm3, %xmm0
    109         lea       Tbl_addr(%rip), %r8
    110         movapd    (%rcx,%r8), %xmm2
    111         mulpd     %xmm0, %xmm4
    112         movapd    %xmm0, %xmm6
    113         movapd    %xmm0, %xmm1
    114         mulpd     %xmm6, %xmm6
    115         mulpd     %xmm6, %xmm0
    116         addpd     %xmm4, %xmm5
    117         mulsd     %xmm6, %xmm0
    118         mulpd     48+cv(%rip), %xmm6
    119         addsd     %xmm2, %xmm1
    120         unpckhpd  %xmm2, %xmm2
    121         mulpd     %xmm5, %xmm0
    122         addsd     %xmm0, %xmm1
    123         orpd      %xmm7, %xmm2
    124         unpckhpd  %xmm0, %xmm0
    125         addsd     %xmm1, %xmm0
    126         addsd     %xmm6, %xmm0
    127         addl      $894, %edx
    128         cmpl      $1916, %edx
    129         ja        .L_2TAG_PACKET_1.0.2
    130         mulsd     %xmm2, %xmm0
    131         addsd     %xmm2, %xmm0
    132         jmp       ..B1.5
    133 .L_2TAG_PACKET_1.0.2:
    134         xorpd     %xmm3, %xmm3
    135         movapd    ALLONES(%rip), %xmm4
    136         movl      $-1022, %edx
    137         subl      %eax, %edx
    138         movd      %edx, %xmm5
    139         psllq     %xmm5, %xmm4
    140         movl      %eax, %ecx
    141         sarl      $1, %eax
    142         pinsrw    $3, %eax, %xmm3
    143         movapd    ebias(%rip), %xmm6
    144         psllq     $4, %xmm3
    145         psubd     %xmm3, %xmm2
    146         mulsd     %xmm2, %xmm0
    147         cmpl      $52, %edx
    148         jg        .L_2TAG_PACKET_2.0.2
    149         andpd     %xmm2, %xmm4
    150         paddd     %xmm6, %xmm3
    151         subsd     %xmm4, %xmm2
    152         addsd     %xmm2, %xmm0
    153         cmpl      $1023, %ecx
    154         jge       .L_2TAG_PACKET_3.0.2
    155         pextrw    $3, %xmm0, %ecx
    156         andl      $32768, %ecx
    157         orl       %ecx, %edx
    158         cmpl      $0, %edx
    159         je        .L_2TAG_PACKET_4.0.2
    160         movapd    %xmm0, %xmm6
    161         addsd     %xmm4, %xmm0
    162         mulsd     %xmm3, %xmm0
    163         pextrw    $3, %xmm0, %ecx
    164         andl      $32752, %ecx
    165         cmpl      $0, %ecx
    166         je        .L_2TAG_PACKET_5.0.2
    167         jmp       ..B1.5
    168 .L_2TAG_PACKET_5.0.2:
    169         mulsd     %xmm3, %xmm6
    170         mulsd     %xmm3, %xmm4
    171         movq      %xmm6, %xmm0
    172         pxor      %xmm4, %xmm6
    173         psrad     $31, %xmm6
    174         pshufd    $85, %xmm6, %xmm6
    175         psllq     $1, %xmm0
    176         psrlq     $1, %xmm0
    177         pxor      %xmm6, %xmm0
    178         psrlq     $63, %xmm6
    179         paddq     %xmm6, %xmm0
    180         paddq     %xmm4, %xmm0
    181         movl      $15, (%rsp)
    182         jmp       .L_2TAG_PACKET_6.0.2
    183 .L_2TAG_PACKET_4.0.2:
    184         addsd     %xmm4, %xmm0
    185         mulsd     %xmm3, %xmm0
    186         jmp       ..B1.5
    187 .L_2TAG_PACKET_3.0.2:
    188         addsd     %xmm4, %xmm0
    189         mulsd     %xmm3, %xmm0
    190         pextrw    $3, %xmm0, %ecx
    191         andl      $32752, %ecx
    192         cmpl      $32752, %ecx
    193         jnb       .L_2TAG_PACKET_7.0.2
    194         jmp       ..B1.5
    195 .L_2TAG_PACKET_2.0.2:
    196         paddd     %xmm6, %xmm3
    197         addpd     %xmm2, %xmm0
    198         mulsd     %xmm3, %xmm0
    199         movl      $15, (%rsp)
    200         jmp       .L_2TAG_PACKET_6.0.2
    201 .L_2TAG_PACKET_8.0.2:
    202         cmpl      $2146435072, %eax
    203         jae       .L_2TAG_PACKET_9.0.2
    204         movl      12(%rsp), %eax
    205         cmpl      $-2147483648, %eax
    206         jae       .L_2TAG_PACKET_10.0.2
    207         movsd     XMAX(%rip), %xmm0
    208         mulsd     %xmm0, %xmm0
    209 .L_2TAG_PACKET_7.0.2:
    210         movl      $14, (%rsp)
    211         jmp       .L_2TAG_PACKET_6.0.2
    212 .L_2TAG_PACKET_10.0.2:
    213         movsd     XMIN(%rip), %xmm0
    214         mulsd     %xmm0, %xmm0
    215         movl      $15, (%rsp)
    216         jmp       .L_2TAG_PACKET_6.0.2
    217 .L_2TAG_PACKET_9.0.2:
    218         movl      8(%rsp), %edx
    219         cmpl      $2146435072, %eax
    220         ja        .L_2TAG_PACKET_11.0.2
    221         cmpl      $0, %edx
    222         jne       .L_2TAG_PACKET_11.0.2
    223         movl      12(%rsp), %eax
    224         cmpl      $2146435072, %eax
    225         jne       .L_2TAG_PACKET_12.0.2
    226         movsd     INF(%rip), %xmm0
    227         jmp       ..B1.5
    228 .L_2TAG_PACKET_12.0.2:
    229         movsd     ZERO(%rip), %xmm0
    230         jmp       ..B1.5
    231 .L_2TAG_PACKET_11.0.2:
    232         movsd     8(%rsp), %xmm0
    233         addsd     %xmm0, %xmm0
    234         jmp       ..B1.5
    235 .L_2TAG_PACKET_0.0.2:
    236         movl      12(%rsp), %eax
    237         andl      $2147483647, %eax
    238         cmpl      $1083179008, %eax
    239         jae       .L_2TAG_PACKET_8.0.2
    240         movsd     8(%rsp), %xmm0
    241         addsd     ONE_val(%rip), %xmm0
    242         jmp       ..B1.5
    243 .L_2TAG_PACKET_6.0.2:
    244         movq      %xmm0, 16(%rsp)
    245 ..B1.3:
    246         movq      16(%rsp), %xmm0
    247 .L_2TAG_PACKET_13.0.2:
    248 ..B1.5:
    249         addq      $24, %rsp
    250 ..___tag_value_exp.4:
    251         ret
    252 ..___tag_value_exp.5:
    253 END(exp)
    254 # -- End  exp
    255 	.section .rodata, "a"
    256 	.align 16
    257 	.align 16
    258 cv:
    259 	.long	1697350398
    260 	.long	1079448903
    261 	.long	1697350398
    262 	.long	1079448903
    263 	.long	4277796864
    264 	.long	1065758274
    265 	.long	4277796864
    266 	.long	1065758274
    267 	.long	3164486458
    268 	.long	1025308570
    269 	.long	3164486458
    270 	.long	1025308570
    271 	.long	4294967294
    272 	.long	1071644671
    273 	.long	4294967294
    274 	.long	1071644671
    275 	.long	3811088480
    276 	.long	1062650204
    277 	.long	1432067621
    278 	.long	1067799893
    279 	.long	3230715663
    280 	.long	1065423125
    281 	.long	1431604129
    282 	.long	1069897045
    283 	.type	cv,@object
    284 	.size	cv,96
    285 	.align 16
    286 Shifter:
    287 	.long	0
    288 	.long	1127743488
    289 	.long	0
    290 	.long	1127743488
    291 	.type	Shifter,@object
    292 	.size	Shifter,16
    293 	.align 16
    294 mmask:
    295 	.long	4294967232
    296 	.long	0
    297 	.long	4294967232
    298 	.long	0
    299 	.type	mmask,@object
    300 	.size	mmask,16
    301 	.align 16
    302 bias:
    303 	.long	65472
    304 	.long	0
    305 	.long	65472
    306 	.long	0
    307 	.type	bias,@object
    308 	.size	bias,16
    309 	.align 16
    310 Tbl_addr:
    311 	.long	0
    312 	.long	0
    313 	.long	0
    314 	.long	0
    315 	.long	235107661
    316 	.long	1018002367
    317 	.long	1048019040
    318 	.long	11418
    319 	.long	896005651
    320 	.long	1015861842
    321 	.long	3541402996
    322 	.long	22960
    323 	.long	1642514529
    324 	.long	1012987726
    325 	.long	410360776
    326 	.long	34629
    327 	.long	1568897900
    328 	.long	1016568486
    329 	.long	1828292879
    330 	.long	46424
    331 	.long	1882168529
    332 	.long	1010744893
    333 	.long	852742562
    334 	.long	58348
    335 	.long	509852888
    336 	.long	1017336174
    337 	.long	3490863952
    338 	.long	70401
    339 	.long	653277307
    340 	.long	1017431380
    341 	.long	2930322911
    342 	.long	82586
    343 	.long	1649557430
    344 	.long	1017729363
    345 	.long	1014845818
    346 	.long	94904
    347 	.long	1058231231
    348 	.long	1015777676
    349 	.long	3949972341
    350 	.long	107355
    351 	.long	1044000607
    352 	.long	1016786167
    353 	.long	828946858
    354 	.long	119943
    355 	.long	1151779725
    356 	.long	1015705409
    357 	.long	2288159958
    358 	.long	132667
    359 	.long	3819481236
    360 	.long	1016499965
    361 	.long	1853186616
    362 	.long	145530
    363 	.long	2552227826
    364 	.long	1015039787
    365 	.long	1709341917
    366 	.long	158533
    367 	.long	1829350193
    368 	.long	1015216097
    369 	.long	4112506593
    370 	.long	171677
    371 	.long	1913391795
    372 	.long	1015756674
    373 	.long	2799960843
    374 	.long	184965
    375 	.long	1303423926
    376 	.long	1015238005
    377 	.long	171030293
    378 	.long	198398
    379 	.long	1574172746
    380 	.long	1016061241
    381 	.long	2992903935
    382 	.long	211976
    383 	.long	3424156969
    384 	.long	1017196428
    385 	.long	926591434
    386 	.long	225703
    387 	.long	1938513547
    388 	.long	1017631273
    389 	.long	887463926
    390 	.long	239579
    391 	.long	2804567149
    392 	.long	1015390024
    393 	.long	1276261410
    394 	.long	253606
    395 	.long	631083525
    396 	.long	1017690182
    397 	.long	569847337
    398 	.long	267786
    399 	.long	1623370770
    400 	.long	1011049453
    401 	.long	1617004845
    402 	.long	282120
    403 	.long	3667985273
    404 	.long	1013894369
    405 	.long	3049340112
    406 	.long	296610
    407 	.long	3145379760
    408 	.long	1014403278
    409 	.long	3577096743
    410 	.long	311258
    411 	.long	2603100681
    412 	.long	1017152460
    413 	.long	1990012070
    414 	.long	326066
    415 	.long	3249202951
    416 	.long	1017448880
    417 	.long	1453150081
    418 	.long	341035
    419 	.long	419288974
    420 	.long	1016280325
    421 	.long	917841882
    422 	.long	356167
    423 	.long	3793507337
    424 	.long	1016095713
    425 	.long	3712504873
    426 	.long	371463
    427 	.long	728023093
    428 	.long	1016345318
    429 	.long	363667784
    430 	.long	386927
    431 	.long	2582678538
    432 	.long	1017123460
    433 	.long	2956612996
    434 	.long	402558
    435 	.long	7592966
    436 	.long	1016721543
    437 	.long	2186617380
    438 	.long	418360
    439 	.long	228611441
    440 	.long	1016696141
    441 	.long	1719614412
    442 	.long	434334
    443 	.long	2261665670
    444 	.long	1017457593
    445 	.long	1013258798
    446 	.long	450482
    447 	.long	544148907
    448 	.long	1017323666
    449 	.long	3907805043
    450 	.long	466805
    451 	.long	2383914918
    452 	.long	1017143586
    453 	.long	1447192520
    454 	.long	483307
    455 	.long	1176412038
    456 	.long	1017267372
    457 	.long	1944781190
    458 	.long	499988
    459 	.long	2882956373
    460 	.long	1013312481
    461 	.long	919555682
    462 	.long	516851
    463 	.long	3154077648
    464 	.long	1016528543
    465 	.long	2571947538
    466 	.long	533897
    467 	.long	348651999
    468 	.long	1016405780
    469 	.long	2604962540
    470 	.long	551129
    471 	.long	3253791412
    472 	.long	1015920431
    473 	.long	1110089947
    474 	.long	568549
    475 	.long	1509121860
    476 	.long	1014756995
    477 	.long	2568320822
    478 	.long	586158
    479 	.long	2617649212
    480 	.long	1017340090
    481 	.long	2966275556
    482 	.long	603959
    483 	.long	553214634
    484 	.long	1016457425
    485 	.long	2682146383
    486 	.long	621954
    487 	.long	730975783
    488 	.long	1014083580
    489 	.long	2191782032
    490 	.long	640145
    491 	.long	1486499517
    492 	.long	1016818996
    493 	.long	2069751140
    494 	.long	658534
    495 	.long	2595788928
    496 	.long	1016407932
    497 	.long	2990417244
    498 	.long	677123
    499 	.long	1853053619
    500 	.long	1015310724
    501 	.long	1434058175
    502 	.long	695915
    503 	.long	2462790535
    504 	.long	1015814775
    505 	.long	2572866477
    506 	.long	714911
    507 	.long	3693944214
    508 	.long	1017259110
    509 	.long	3092190714
    510 	.long	734114
    511 	.long	2979333550
    512 	.long	1017188654
    513 	.long	4076559942
    514 	.long	753526
    515 	.long	174054861
    516 	.long	1014300631
    517 	.long	2420883922
    518 	.long	773150
    519 	.long	816778419
    520 	.long	1014197934
    521 	.long	3716502172
    522 	.long	792987
    523 	.long	3507050924
    524 	.long	1015341199
    525 	.long	777507147
    526 	.long	813041
    527 	.long	1821514088
    528 	.long	1013410604
    529 	.long	3706687593
    530 	.long	833312
    531 	.long	920623539
    532 	.long	1016295433
    533 	.long	1242007931
    534 	.long	853805
    535 	.long	2789017511
    536 	.long	1014276997
    537 	.long	3707479175
    538 	.long	874520
    539 	.long	3586233004
    540 	.long	1015962192
    541 	.long	64696965
    542 	.long	895462
    543 	.long	474650514
    544 	.long	1016642419
    545 	.long	863738718
    546 	.long	916631
    547 	.long	1614448851
    548 	.long	1014281732
    549 	.long	3884662774
    550 	.long	938030
    551 	.long	2450082086
    552 	.long	1016164135
    553 	.long	2728693977
    554 	.long	959663
    555 	.long	1101668360
    556 	.long	1015989180
    557 	.long	3999357479
    558 	.long	981531
    559 	.long	835814894
    560 	.long	1015702697
    561 	.long	1533953344
    562 	.long	1003638
    563 	.long	1301400989
    564 	.long	1014466875
    565 	.long	2174652632
    566 	.long	1025985
    567 	.type	Tbl_addr,@object
    568 	.size	Tbl_addr,1024
    569 	.align 16
    570 ALLONES:
    571 	.long	4294967295
    572 	.long	4294967295
    573 	.long	4294967295
    574 	.long	4294967295
    575 	.type	ALLONES,@object
    576 	.size	ALLONES,16
    577 	.align 16
    578 ebias:
    579 	.long	0
    580 	.long	1072693248
    581 	.long	0
    582 	.long	1072693248
    583 	.type	ebias,@object
    584 	.size	ebias,16
    585 	.align 4
    586 XMAX:
    587 	.long	4294967295
    588 	.long	2146435071
    589 	.type	XMAX,@object
    590 	.size	XMAX,8
    591 	.align 4
    592 XMIN:
    593 	.long	0
    594 	.long	1048576
    595 	.type	XMIN,@object
    596 	.size	XMIN,8
    597 	.align 4
    598 INF:
    599 	.long	0
    600 	.long	2146435072
    601 	.type	INF,@object
    602 	.size	INF,8
    603 	.align 4
    604 ZERO:
    605 	.long	0
    606 	.long	0
    607 	.type	ZERO,@object
    608 	.size	ZERO,8
    609 	.align 4
    610 ONE_val:
    611 	.long	0
    612 	.long	1072693248
    613 	.type	ONE_val,@object
    614 	.size	ONE_val,8
    615 	.data
    616 	.section .note.GNU-stack, ""
    617 // -- Begin DWARF2 SEGMENT .eh_frame
    618 	.section .eh_frame,"a",@progbits
    619 .eh_frame_seg:
    620 	.align 1
    621 	.4byte 0x00000014
    622 	.8byte 0x00527a0100000000
    623 	.8byte 0x08070c1b01107801
    624 	.4byte 0x00000190
    625 	.4byte 0x0000001c
    626 	.4byte 0x0000001c
    627 	.4byte ..___tag_value_exp.1-.
    628 	.4byte ..___tag_value_exp.5-..___tag_value_exp.1
    629 	.2byte 0x0400
    630 	.4byte ..___tag_value_exp.3-..___tag_value_exp.1
    631 	.2byte 0x200e
    632 	.byte 0x04
    633 	.4byte ..___tag_value_exp.4-..___tag_value_exp.3
    634 	.2byte 0x080e
    635 	.byte 0x00
    636 # End
    637