Home | History | Annotate | Download | only in x86
      1 /*
      2 Copyright (c) 2014, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 /******************************************************************************/
     32 //                     ALGORITHM DESCRIPTION
     33 //                     ---------------------
     34 //
     35 // Description:
     36 //  Let K = 64 (table size).
     37 //        x    x/log(2)     n
     38 //       e  = 2          = 2 * T[j] * (1 + P(y))
     39 //  where
     40 //       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
     41 //       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
     42 //                  j/K
     43 //       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
     44 //
     45 //       P(y) is a minimax polynomial approximation of exp(x)-1
     46 //       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
     47 //
     48 //  To avoid problems with arithmetic overflow and underflow,
     49 //            n                        n1  n2
     50 //  value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
     51 //  where BIAS is a value of exponent bias.
     52 //
     53 // Special cases:
     54 //  exp(NaN) = NaN
     55 //  exp(+INF) = +INF
     56 //  exp(-INF) = 0
     57 //  exp(x) = 1 for subnormals
     58 //  for finite argument, only exp(0)=1 is exact
     59 //  For IEEE double
     60 //    if x >  709.782712893383973096 then exp(x) overflow
     61 //    if x < -745.133219101941108420 then exp(x) underflow
     62 //
     63 /******************************************************************************/
     64 
     65 #include <private/bionic_asm.h>
     66 # -- Begin  static_func
     67         .text
     68         .align __bionic_asm_align
     69         .type static_func, @function
     70 static_func:
     71 ..B1.1:
     72         call      ..L2
     73 ..L2:
     74         popl      %eax
     75         lea       _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
     76         lea       static_const_table@GOTOFF(%eax), %eax
     77         ret
     78         .size   static_func,.-static_func
     79 # -- End  static_func
     80 
     81 # -- Begin  exp
     82 ENTRY(exp)
     83 # parameter 1: 8 + %ebp
     84 ..B2.1:
     85 ..B2.2:
     86         pushl     %ebp
     87         movl      %esp, %ebp
     88         subl      $120, %esp
     89         movl      %ebx, 64(%esp)
     90         call      static_func
     91         movl      %eax, %ebx
     92         movsd     128(%esp), %xmm0
     93         unpcklpd  %xmm0, %xmm0
     94         movapd    64(%ebx), %xmm1
     95         movapd    48(%ebx), %xmm6
     96         movapd    80(%ebx), %xmm2
     97         movapd    96(%ebx), %xmm3
     98         pextrw    $3, %xmm0, %eax
     99         andl      $32767, %eax
    100         movl      $16527, %edx
    101         subl      %eax, %edx
    102         subl      $15504, %eax
    103         orl       %eax, %edx
    104         cmpl      $-2147483648, %edx
    105         jae       .L_2TAG_PACKET_0.0.2
    106         mulpd     %xmm0, %xmm1
    107         addpd     %xmm6, %xmm1
    108         movapd    %xmm1, %xmm7
    109         subpd     %xmm6, %xmm1
    110         mulpd     %xmm1, %xmm2
    111         movapd    128(%ebx), %xmm4
    112         mulpd     %xmm1, %xmm3
    113         movapd    144(%ebx), %xmm5
    114         subpd     %xmm2, %xmm0
    115         movd      %xmm7, %eax
    116         movl      %eax, %ecx
    117         andl      $63, %ecx
    118         shll      $4, %ecx
    119         sarl      $6, %eax
    120         movl      %eax, %edx
    121         movdqa    16(%ebx), %xmm6
    122         pand      %xmm6, %xmm7
    123         movdqa    32(%ebx), %xmm6
    124         paddq     %xmm6, %xmm7
    125         psllq     $46, %xmm7
    126         subpd     %xmm3, %xmm0
    127         movapd    160(%ebx,%ecx), %xmm2
    128         mulpd     %xmm0, %xmm4
    129         movapd    %xmm0, %xmm6
    130         movapd    %xmm0, %xmm1
    131         mulpd     %xmm6, %xmm6
    132         mulpd     %xmm6, %xmm0
    133         addpd     %xmm4, %xmm5
    134         mulsd     %xmm6, %xmm0
    135         mulpd     112(%ebx), %xmm6
    136         addsd     %xmm2, %xmm1
    137         unpckhpd  %xmm2, %xmm2
    138         mulpd     %xmm5, %xmm0
    139         addsd     %xmm0, %xmm1
    140         orpd      %xmm7, %xmm2
    141         unpckhpd  %xmm0, %xmm0
    142         addsd     %xmm1, %xmm0
    143         addsd     %xmm6, %xmm0
    144         addl      $894, %edx
    145         cmpl      $1916, %edx
    146         ja        .L_2TAG_PACKET_1.0.2
    147         mulsd     %xmm2, %xmm0
    148         addsd     %xmm2, %xmm0
    149         jmp       .L_2TAG_PACKET_2.0.2
    150 .L_2TAG_PACKET_1.0.2:
    151         fstcw     24(%esp)
    152         movzwl    24(%esp), %edx
    153         orl       $768, %edx
    154         movw      %dx, 28(%esp)
    155         fldcw     28(%esp)
    156         movl      %eax, %edx
    157         sarl      $1, %eax
    158         subl      %eax, %edx
    159         movdqa    (%ebx), %xmm6
    160         pandn     %xmm2, %xmm6
    161         addl      $1023, %eax
    162         movd      %eax, %xmm3
    163         psllq     $52, %xmm3
    164         orpd      %xmm3, %xmm6
    165         addl      $1023, %edx
    166         movd      %edx, %xmm4
    167         psllq     $52, %xmm4
    168         movsd     %xmm0, 8(%esp)
    169         fldl      8(%esp)
    170         movsd     %xmm6, 16(%esp)
    171         fldl      16(%esp)
    172         fmul      %st, %st(1)
    173         faddp     %st, %st(1)
    174         movsd     %xmm4, 8(%esp)
    175         fldl      8(%esp)
    176         fmulp     %st, %st(1)
    177         fstpl     8(%esp)
    178         movsd     8(%esp), %xmm0
    179         fldcw     24(%esp)
    180         pextrw    $3, %xmm0, %ecx
    181         andl      $32752, %ecx
    182         cmpl      $32752, %ecx
    183         jae       .L_2TAG_PACKET_3.0.2
    184         cmpl      $0, %ecx
    185         je        .L_2TAG_PACKET_4.0.2
    186         jmp       .L_2TAG_PACKET_2.0.2
    187         cmpl      $-2147483648, %ecx
    188         jb        .L_2TAG_PACKET_3.0.2
    189         cmpl      $-1064950997, %ecx
    190         jb        .L_2TAG_PACKET_2.0.2
    191         ja        .L_2TAG_PACKET_4.0.2
    192         movl      128(%esp), %edx
    193         cmpl      $-17155601, %edx
    194         jb        .L_2TAG_PACKET_2.0.2
    195         jmp       .L_2TAG_PACKET_4.0.2
    196 .L_2TAG_PACKET_3.0.2:
    197         movl      $14, %edx
    198         jmp       .L_2TAG_PACKET_5.0.2
    199 .L_2TAG_PACKET_4.0.2:
    200         movl      $15, %edx
    201 .L_2TAG_PACKET_5.0.2:
    202         movsd     %xmm0, (%esp)
    203         movsd     128(%esp), %xmm0
    204         fldl      (%esp)
    205         jmp       .L_2TAG_PACKET_6.0.2
    206 .L_2TAG_PACKET_7.0.2:
    207         cmpl      $2146435072, %eax
    208         jae       .L_2TAG_PACKET_8.0.2
    209         movl      132(%esp), %eax
    210         cmpl      $-2147483648, %eax
    211         jae       .L_2TAG_PACKET_9.0.2
    212         movsd     1208(%ebx), %xmm0
    213         mulsd     %xmm0, %xmm0
    214         movl      $14, %edx
    215         jmp       .L_2TAG_PACKET_5.0.2
    216 .L_2TAG_PACKET_9.0.2:
    217         movsd     1216(%ebx), %xmm0
    218         mulsd     %xmm0, %xmm0
    219         movl      $15, %edx
    220         jmp       .L_2TAG_PACKET_5.0.2
    221 .L_2TAG_PACKET_8.0.2:
    222         movl      128(%esp), %edx
    223         cmpl      $2146435072, %eax
    224         ja        .L_2TAG_PACKET_10.0.2
    225         cmpl      $0, %edx
    226         jne       .L_2TAG_PACKET_10.0.2
    227         movl      132(%esp), %eax
    228         cmpl      $2146435072, %eax
    229         jne       .L_2TAG_PACKET_11.0.2
    230         movsd     1192(%ebx), %xmm0
    231         jmp       .L_2TAG_PACKET_2.0.2
    232 .L_2TAG_PACKET_11.0.2:
    233         movsd     1200(%ebx), %xmm0
    234         jmp       .L_2TAG_PACKET_2.0.2
    235 .L_2TAG_PACKET_10.0.2:
    236         movsd     128(%esp), %xmm0
    237         addsd     %xmm0, %xmm0
    238         jmp       .L_2TAG_PACKET_2.0.2
    239 .L_2TAG_PACKET_0.0.2:
    240         movl      132(%esp), %eax
    241         andl      $2147483647, %eax
    242         cmpl      $1083179008, %eax
    243         jae       .L_2TAG_PACKET_7.0.2
    244         movsd     128(%esp), %xmm0
    245         addsd     1184(%ebx), %xmm0
    246         jmp       .L_2TAG_PACKET_2.0.2
    247 .L_2TAG_PACKET_2.0.2:
    248         movsd     %xmm0, 48(%esp)
    249         fldl      48(%esp)
    250 .L_2TAG_PACKET_6.0.2:
    251         movl      64(%esp), %ebx
    252         movl      %ebp, %esp
    253         popl      %ebp
    254         ret
    255 ..B2.3:
    256 END(exp)
    257 # -- End  exp
    258 
    259 # Start file scope ASM
    260 ALIAS_SYMBOL(expl, exp);
    261 # End file scope ASM
    262 	.section .rodata, "a"
    263 	.align 16
    264 	.align 16
    265 static_const_table:
    266 	.long	0
    267 	.long	4293918720
    268 	.long	0
    269 	.long	4293918720
    270 	.long	4294967232
    271 	.long	0
    272 	.long	4294967232
    273 	.long	0
    274 	.long	65472
    275 	.long	0
    276 	.long	65472
    277 	.long	0
    278 	.long	0
    279 	.long	1127743488
    280 	.long	0
    281 	.long	1127743488
    282 	.long	1697350398
    283 	.long	1079448903
    284 	.long	1697350398
    285 	.long	1079448903
    286 	.long	4277796864
    287 	.long	1065758274
    288 	.long	4277796864
    289 	.long	1065758274
    290 	.long	3164486458
    291 	.long	1025308570
    292 	.long	3164486458
    293 	.long	1025308570
    294 	.long	4294967294
    295 	.long	1071644671
    296 	.long	4294967294
    297 	.long	1071644671
    298 	.long	3811088480
    299 	.long	1062650204
    300 	.long	1432067621
    301 	.long	1067799893
    302 	.long	3230715663
    303 	.long	1065423125
    304 	.long	1431604129
    305 	.long	1069897045
    306 	.long	0
    307 	.long	0
    308 	.long	0
    309 	.long	0
    310 	.long	235107661
    311 	.long	1018002367
    312 	.long	1048019040
    313 	.long	11418
    314 	.long	896005651
    315 	.long	1015861842
    316 	.long	3541402996
    317 	.long	22960
    318 	.long	1642514529
    319 	.long	1012987726
    320 	.long	410360776
    321 	.long	34629
    322 	.long	1568897900
    323 	.long	1016568486
    324 	.long	1828292879
    325 	.long	46424
    326 	.long	1882168529
    327 	.long	1010744893
    328 	.long	852742562
    329 	.long	58348
    330 	.long	509852888
    331 	.long	1017336174
    332 	.long	3490863952
    333 	.long	70401
    334 	.long	653277307
    335 	.long	1017431380
    336 	.long	2930322911
    337 	.long	82586
    338 	.long	1649557430
    339 	.long	1017729363
    340 	.long	1014845818
    341 	.long	94904
    342 	.long	1058231231
    343 	.long	1015777676
    344 	.long	3949972341
    345 	.long	107355
    346 	.long	1044000607
    347 	.long	1016786167
    348 	.long	828946858
    349 	.long	119943
    350 	.long	1151779725
    351 	.long	1015705409
    352 	.long	2288159958
    353 	.long	132667
    354 	.long	3819481236
    355 	.long	1016499965
    356 	.long	1853186616
    357 	.long	145530
    358 	.long	2552227826
    359 	.long	1015039787
    360 	.long	1709341917
    361 	.long	158533
    362 	.long	1829350193
    363 	.long	1015216097
    364 	.long	4112506593
    365 	.long	171677
    366 	.long	1913391795
    367 	.long	1015756674
    368 	.long	2799960843
    369 	.long	184965
    370 	.long	1303423926
    371 	.long	1015238005
    372 	.long	171030293
    373 	.long	198398
    374 	.long	1574172746
    375 	.long	1016061241
    376 	.long	2992903935
    377 	.long	211976
    378 	.long	3424156969
    379 	.long	1017196428
    380 	.long	926591434
    381 	.long	225703
    382 	.long	1938513547
    383 	.long	1017631273
    384 	.long	887463926
    385 	.long	239579
    386 	.long	2804567149
    387 	.long	1015390024
    388 	.long	1276261410
    389 	.long	253606
    390 	.long	631083525
    391 	.long	1017690182
    392 	.long	569847337
    393 	.long	267786
    394 	.long	1623370770
    395 	.long	1011049453
    396 	.long	1617004845
    397 	.long	282120
    398 	.long	3667985273
    399 	.long	1013894369
    400 	.long	3049340112
    401 	.long	296610
    402 	.long	3145379760
    403 	.long	1014403278
    404 	.long	3577096743
    405 	.long	311258
    406 	.long	2603100681
    407 	.long	1017152460
    408 	.long	1990012070
    409 	.long	326066
    410 	.long	3249202951
    411 	.long	1017448880
    412 	.long	1453150081
    413 	.long	341035
    414 	.long	419288974
    415 	.long	1016280325
    416 	.long	917841882
    417 	.long	356167
    418 	.long	3793507337
    419 	.long	1016095713
    420 	.long	3712504873
    421 	.long	371463
    422 	.long	728023093
    423 	.long	1016345318
    424 	.long	363667784
    425 	.long	386927
    426 	.long	2582678538
    427 	.long	1017123460
    428 	.long	2956612996
    429 	.long	402558
    430 	.long	7592966
    431 	.long	1016721543
    432 	.long	2186617380
    433 	.long	418360
    434 	.long	228611441
    435 	.long	1016696141
    436 	.long	1719614412
    437 	.long	434334
    438 	.long	2261665670
    439 	.long	1017457593
    440 	.long	1013258798
    441 	.long	450482
    442 	.long	544148907
    443 	.long	1017323666
    444 	.long	3907805043
    445 	.long	466805
    446 	.long	2383914918
    447 	.long	1017143586
    448 	.long	1447192520
    449 	.long	483307
    450 	.long	1176412038
    451 	.long	1017267372
    452 	.long	1944781190
    453 	.long	499988
    454 	.long	2882956373
    455 	.long	1013312481
    456 	.long	919555682
    457 	.long	516851
    458 	.long	3154077648
    459 	.long	1016528543
    460 	.long	2571947538
    461 	.long	533897
    462 	.long	348651999
    463 	.long	1016405780
    464 	.long	2604962540
    465 	.long	551129
    466 	.long	3253791412
    467 	.long	1015920431
    468 	.long	1110089947
    469 	.long	568549
    470 	.long	1509121860
    471 	.long	1014756995
    472 	.long	2568320822
    473 	.long	586158
    474 	.long	2617649212
    475 	.long	1017340090
    476 	.long	2966275556
    477 	.long	603959
    478 	.long	553214634
    479 	.long	1016457425
    480 	.long	2682146383
    481 	.long	621954
    482 	.long	730975783
    483 	.long	1014083580
    484 	.long	2191782032
    485 	.long	640145
    486 	.long	1486499517
    487 	.long	1016818996
    488 	.long	2069751140
    489 	.long	658534
    490 	.long	2595788928
    491 	.long	1016407932
    492 	.long	2990417244
    493 	.long	677123
    494 	.long	1853053619
    495 	.long	1015310724
    496 	.long	1434058175
    497 	.long	695915
    498 	.long	2462790535
    499 	.long	1015814775
    500 	.long	2572866477
    501 	.long	714911
    502 	.long	3693944214
    503 	.long	1017259110
    504 	.long	3092190714
    505 	.long	734114
    506 	.long	2979333550
    507 	.long	1017188654
    508 	.long	4076559942
    509 	.long	753526
    510 	.long	174054861
    511 	.long	1014300631
    512 	.long	2420883922
    513 	.long	773150
    514 	.long	816778419
    515 	.long	1014197934
    516 	.long	3716502172
    517 	.long	792987
    518 	.long	3507050924
    519 	.long	1015341199
    520 	.long	777507147
    521 	.long	813041
    522 	.long	1821514088
    523 	.long	1013410604
    524 	.long	3706687593
    525 	.long	833312
    526 	.long	920623539
    527 	.long	1016295433
    528 	.long	1242007931
    529 	.long	853805
    530 	.long	2789017511
    531 	.long	1014276997
    532 	.long	3707479175
    533 	.long	874520
    534 	.long	3586233004
    535 	.long	1015962192
    536 	.long	64696965
    537 	.long	895462
    538 	.long	474650514
    539 	.long	1016642419
    540 	.long	863738718
    541 	.long	916631
    542 	.long	1614448851
    543 	.long	1014281732
    544 	.long	3884662774
    545 	.long	938030
    546 	.long	2450082086
    547 	.long	1016164135
    548 	.long	2728693977
    549 	.long	959663
    550 	.long	1101668360
    551 	.long	1015989180
    552 	.long	3999357479
    553 	.long	981531
    554 	.long	835814894
    555 	.long	1015702697
    556 	.long	1533953344
    557 	.long	1003638
    558 	.long	1301400989
    559 	.long	1014466875
    560 	.long	2174652632
    561 	.long	1025985
    562 	.long	0
    563 	.long	1072693248
    564 	.long	0
    565 	.long	2146435072
    566 	.long	0
    567 	.long	0
    568 	.long	4294967295
    569 	.long	2146435071
    570 	.long	0
    571 	.long	1048576
    572 	.type	static_const_table,@object
    573 	.size	static_const_table,1224
    574 	.data
    575 	.section .note.GNU-stack, ""
    576 # End
    577