Home | History | Annotate | Download | only in x86_64
      1 /*
      2 Copyright (c) 2014, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 /******************************************************************************/
     32 //                     ALGORITHM DESCRIPTION
     33 //                     ---------------------
     34 //
     35 //     1. RANGE REDUCTION
     36 //
     37 //     We perform an initial range reduction from X to r with
     38 //
     39 //          X =~= N * pi/32 + r
     40 //
     41 //     so that |r| <= pi/64 + epsilon. We restrict inputs to those
     42 //     where |N| <= 932560. Beyond this, the range reduction is
     43 //     insufficiently accurate. For extremely small inputs,
     44 //     denormalization can occur internally, impacting performance.
     45 //     This means that the main path is actually only taken for
     46 //     2^-252 <= |X| < 90112.
     47 //
     48 //     To avoid branches, we perform the range reduction to full
     49 //     accuracy each time.
     50 //
     51 //          X - N * (P_1 + P_2 + P_3)
     52 //
     53 //     where P_1 and P_2 are 32-bit numbers (so multiplication by N
     54 //     is exact) and P_3 is a 53-bit number. Together, these
     55 //     approximate pi well enough for all cases in the restricted
     56 //     range.
     57 //
     58 //     The main reduction sequence is:
     59 //
     60 //             y = 32/pi * x
     61 //             N = integer(y)
     62 //     (computed by adding and subtracting off SHIFTER)
     63 //
     64 //             m_1 = N * P_1
     65 //             m_2 = N * P_2
     66 //             r_1 = x - m_1
     67 //             r = r_1 - m_2
     68 //     (this r can be used for most of the calculation)
     69 //
     70 //             c_1 = r_1 - r
     71 //             m_3 = N * P_3
     72 //             c_2 = c_1 - m_2
     73 //             c = c_2 - m_3
     74 //
     75 //     2. MAIN ALGORITHM
     76 //
     77 //     The algorithm uses a table lookup based on B = M * pi / 32
     78 //     where M = N mod 64. The stored values are:
     79 //       sigma             closest power of 2 to cos(B)
     80 //       C_hl              53-bit cos(B) - sigma
     81 //       S_hi + S_lo       2 * 53-bit sin(B)
     82 //
     83 //     The computation is organized as follows:
     84 //
     85 //          sin(B + r + c) = [sin(B) + sigma * r] +
     86 //                           r * (cos(B) - sigma) +
     87 //                           sin(B) * [cos(r + c) - 1] +
     88 //                           cos(B) * [sin(r + c) - r]
     89 //
     90 //     which is approximately:
     91 //
     92 //          [S_hi + sigma * r] +
     93 //          C_hl * r +
     94 //          S_lo + S_hi * [(cos(r) - 1) - r * c] +
     95 //          (C_hl + sigma) * [(sin(r) - r) + c]
     96 //
     97 //     and this is what is actually computed. We separate this sum
     98 //     into four parts:
     99 //
    100 //          hi + med + pols + corr
    101 //
    102 //     where
    103 //
    104 //          hi       = S_hi + sigma r
    105 //          med      = C_hl * r
    106 //          pols     = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
    107 //          corr     = S_lo + c * ((C_hl + sigma) - S_hi * r)
    108 //
    109 //     3. POLYNOMIAL
    110 //
    111 //     The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
    112 //     (sin(r) - r) can be rearranged freely, since it is quite
    113 //     small, so we exploit parallelism to the fullest.
    114 //
    115 //          psc4       =   SC_4 * r_1
    116 //          msc4       =   psc4 * r
    117 //          r2         =   r * r
    118 //          msc2       =   SC_2 * r2
    119 //          r4         =   r2 * r2
    120 //          psc3       =   SC_3 + msc4
    121 //          psc1       =   SC_1 + msc2
    122 //          msc3       =   r4 * psc3
    123 //          sincospols =   psc1 + msc3
    124 //          pols       =   sincospols *
    125 //                         <S_hi * r^2 | (C_hl + sigma) * r^3>
    126 //
    127 //     4. CORRECTION TERM
    128 //
    129 //     This is where the "c" component of the range reduction is
    130 //     taken into account; recall that just "r" is used for most of
    131 //     the calculation.
    132 //
    133 //          -c   = m_3 - c_2
    134 //          -d   = S_hi * r - (C_hl + sigma)
    135 //          corr = -c * -d + S_lo
    136 //
    137 //     5. COMPENSATED SUMMATIONS
    138 //
    139 //     The two successive compensated summations add up the high
    140 //     and medium parts, leaving just the low parts to add up at
    141 //     the end.
    142 //
    143 //          rs        =  sigma * r
    144 //          res_int   =  S_hi + rs
    145 //          k_0       =  S_hi - res_int
    146 //          k_2       =  k_0 + rs
    147 //          med       =  C_hl * r
    148 //          res_hi    =  res_int + med
    149 //          k_1       =  res_int - res_hi
    150 //          k_3       =  k_1 + med
    151 //
    152 //     6. FINAL SUMMATION
    153 //
    154 //     We now add up all the small parts:
    155 //
    156 //          res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
    157 //
    158 //     Now the overall result is just:
    159 //
    160 //          res_hi + res_lo
    161 //
    162 //     7. SMALL ARGUMENTS
    163 //
    164 //     Inputs with |X| < 2^-252 are treated specially as
    165 //     1 - |x|.
    166 //
    167 // Special cases:
    168 //  cos(NaN) = quiet NaN, and raise invalid exception
    169 //  cos(INF) = NaN and raise invalid exception
    170 //  cos(0) = 1
    171 //
    172 /******************************************************************************/
    173 
    174 #include <private/bionic_asm.h>
    175 # -- Begin  cos
    176 ENTRY(cos)
    177 # parameter 1: %xmm0
    178 ..B1.1:
    179 ..___tag_value_cos.1:
    180         pushq     %rbx
    181 ..___tag_value_cos.3:
    182         subq      $16, %rsp
    183 ..___tag_value_cos.5:
    184         movsd     %xmm0, 8(%rsp)
    185 ..B1.2:
    186         movl      12(%rsp), %eax
    187         movq      PI32INV(%rip), %xmm1
    188         andl      $2147418112, %eax
    189         subl      $808452096, %eax
    190         cmpl      $281346048, %eax
    191         ja        .L_2TAG_PACKET_0.0.1
    192         mulsd     %xmm0, %xmm1
    193         movapd    ONEHALF(%rip), %xmm5
    194         movq      SIGN_MASK(%rip), %xmm4
    195         andpd     %xmm0, %xmm4
    196         orps      %xmm4, %xmm5
    197         addpd     %xmm5, %xmm1
    198         cvttsd2si %xmm1, %edx
    199         cvtsi2sd  %edx, %xmm1
    200         movapd    P_2(%rip), %xmm2
    201         movq      P_1(%rip), %xmm3
    202         mulsd     %xmm1, %xmm3
    203         unpcklpd  %xmm1, %xmm1
    204         addq      $1865232, %rdx
    205         movq      %xmm0, %xmm4
    206         andq      $63, %rdx
    207         movapd    SC_4(%rip), %xmm5
    208         lea       Ctable(%rip), %rax
    209         shlq      $5, %rdx
    210         addq      %rdx, %rax
    211         mulpd     %xmm1, %xmm2
    212         subsd     %xmm3, %xmm0
    213         mulsd     P_3(%rip), %xmm1
    214         subsd     %xmm3, %xmm4
    215         movq      8(%rax), %xmm7
    216         unpcklpd  %xmm0, %xmm0
    217         movq      %xmm4, %xmm3
    218         subsd     %xmm2, %xmm4
    219         mulpd     %xmm0, %xmm5
    220         subpd     %xmm2, %xmm0
    221         movapd    SC_2(%rip), %xmm6
    222         mulsd     %xmm4, %xmm7
    223         subsd     %xmm4, %xmm3
    224         mulpd     %xmm0, %xmm5
    225         mulpd     %xmm0, %xmm0
    226         subsd     %xmm2, %xmm3
    227         movapd    (%rax), %xmm2
    228         subsd     %xmm3, %xmm1
    229         movq      24(%rax), %xmm3
    230         addsd     %xmm3, %xmm2
    231         subsd     %xmm2, %xmm7
    232         mulsd     %xmm4, %xmm2
    233         mulpd     %xmm0, %xmm6
    234         mulsd     %xmm4, %xmm3
    235         mulpd     %xmm0, %xmm2
    236         mulpd     %xmm0, %xmm0
    237         addpd     SC_3(%rip), %xmm5
    238         mulsd     (%rax), %xmm4
    239         addpd     SC_1(%rip), %xmm6
    240         mulpd     %xmm0, %xmm5
    241         movq      %xmm3, %xmm0
    242         addsd     8(%rax), %xmm3
    243         mulpd     %xmm7, %xmm1
    244         movq      %xmm4, %xmm7
    245         addsd     %xmm3, %xmm4
    246         addpd     %xmm5, %xmm6
    247         movq      8(%rax), %xmm5
    248         subsd     %xmm3, %xmm5
    249         subsd     %xmm4, %xmm3
    250         addsd     16(%rax), %xmm1
    251         mulpd     %xmm2, %xmm6
    252         addsd     %xmm5, %xmm0
    253         addsd     %xmm7, %xmm3
    254         addsd     %xmm1, %xmm0
    255         addsd     %xmm3, %xmm0
    256         addsd     %xmm6, %xmm0
    257         unpckhpd  %xmm6, %xmm6
    258         addsd     %xmm6, %xmm0
    259         addsd     %xmm4, %xmm0
    260         jmp       ..B1.4
    261 .L_2TAG_PACKET_0.0.1:
    262         jg        .L_2TAG_PACKET_1.0.1
    263         pextrw    $3, %xmm0, %eax
    264         andw      $32767, %ax
    265         pinsrw    $3, %eax, %xmm0
    266         movq      ONE(%rip), %xmm1
    267         subsd     %xmm0, %xmm1
    268         movq      %xmm1, %xmm0
    269         jmp       ..B1.4
    270 .L_2TAG_PACKET_1.0.1:
    271         pextrw    $3, %xmm0, %eax
    272         andl      $32752, %eax
    273         cmpl      $32752, %eax
    274         je        .L_2TAG_PACKET_2.0.1
    275         pextrw    $3, %xmm0, %ecx
    276         andl      $32752, %ecx
    277         subl      $16224, %ecx
    278         shrl      $7, %ecx
    279         andl      $65532, %ecx
    280         lea       PI_INV_TABLE(%rip), %r11
    281         addq      %r11, %rcx
    282         movd      %xmm0, %rax
    283         movl      20(%rcx), %r10d
    284         movl      24(%rcx), %r8d
    285         movl      %eax, %edx
    286         shrq      $21, %rax
    287         orl       $-2147483648, %eax
    288         shrl      $11, %eax
    289         movl      %r10d, %r9d
    290         imulq     %rdx, %r10
    291         imulq     %rax, %r9
    292         imulq     %rax, %r8
    293         movl      16(%rcx), %esi
    294         movl      12(%rcx), %edi
    295         movl      %r10d, %r11d
    296         shrq      $32, %r10
    297         addq      %r10, %r9
    298         addq      %r8, %r11
    299         movl      %r11d, %r8d
    300         shrq      $32, %r11
    301         addq      %r11, %r9
    302         movl      %esi, %r10d
    303         imulq     %rdx, %rsi
    304         imulq     %rax, %r10
    305         movl      %edi, %r11d
    306         imulq     %rdx, %rdi
    307         movl      %esi, %ebx
    308         shrq      $32, %rsi
    309         addq      %rbx, %r9
    310         movl      %r9d, %ebx
    311         shrq      $32, %r9
    312         addq      %rsi, %r10
    313         addq      %r9, %r10
    314         shlq      $32, %rbx
    315         orq       %rbx, %r8
    316         imulq     %rax, %r11
    317         movl      8(%rcx), %r9d
    318         movl      4(%rcx), %esi
    319         movl      %edi, %ebx
    320         shrq      $32, %rdi
    321         addq      %rbx, %r10
    322         movl      %r10d, %ebx
    323         shrq      $32, %r10
    324         addq      %rdi, %r11
    325         addq      %r10, %r11
    326         movq      %r9, %rdi
    327         imulq     %rdx, %r9
    328         imulq     %rax, %rdi
    329         movl      %r9d, %r10d
    330         shrq      $32, %r9
    331         addq      %r10, %r11
    332         movl      %r11d, %r10d
    333         shrq      $32, %r11
    334         addq      %r9, %rdi
    335         addq      %r11, %rdi
    336         movq      %rsi, %r9
    337         imulq     %rdx, %rsi
    338         imulq     %rax, %r9
    339         shlq      $32, %r10
    340         orq       %rbx, %r10
    341         movl      (%rcx), %eax
    342         movl      %esi, %r11d
    343         shrq      $32, %rsi
    344         addq      %r11, %rdi
    345         movl      %edi, %r11d
    346         shrq      $32, %rdi
    347         addq      %rsi, %r9
    348         addq      %rdi, %r9
    349         imulq     %rax, %rdx
    350         pextrw    $3, %xmm0, %ebx
    351         lea       PI_INV_TABLE(%rip), %rdi
    352         subq      %rdi, %rcx
    353         addl      %ecx, %ecx
    354         addl      %ecx, %ecx
    355         addl      %ecx, %ecx
    356         addl      $19, %ecx
    357         movl      $32768, %esi
    358         andl      %ebx, %esi
    359         shrl      $4, %ebx
    360         andl      $2047, %ebx
    361         subl      $1023, %ebx
    362         subl      %ebx, %ecx
    363         addq      %rdx, %r9
    364         movl      %ecx, %edx
    365         addl      $32, %edx
    366         cmpl      $1, %ecx
    367         jl        .L_2TAG_PACKET_3.0.1
    368         negl      %ecx
    369         addl      $29, %ecx
    370         shll      %cl, %r9d
    371         movl      %r9d, %edi
    372         andl      $536870911, %r9d
    373         testl     $268435456, %r9d
    374         jne       .L_2TAG_PACKET_4.0.1
    375         shrl      %cl, %r9d
    376         movl      $0, %ebx
    377         shlq      $32, %r9
    378         orq       %r11, %r9
    379 .L_2TAG_PACKET_5.0.1:
    380 .L_2TAG_PACKET_6.0.1:
    381         cmpq      $0, %r9
    382         je        .L_2TAG_PACKET_7.0.1
    383 .L_2TAG_PACKET_8.0.1:
    384         bsr       %r9, %r11
    385         movl      $29, %ecx
    386         subl      %r11d, %ecx
    387         jle       .L_2TAG_PACKET_9.0.1
    388         shlq      %cl, %r9
    389         movq      %r10, %rax
    390         shlq      %cl, %r10
    391         addl      %ecx, %edx
    392         negl      %ecx
    393         addl      $64, %ecx
    394         shrq      %cl, %rax
    395         shrq      %cl, %r8
    396         orq       %rax, %r9
    397         orq       %r8, %r10
    398 .L_2TAG_PACKET_10.0.1:
    399         cvtsi2sdq %r9, %xmm0
    400         shrq      $1, %r10
    401         cvtsi2sdq %r10, %xmm3
    402         xorpd     %xmm4, %xmm4
    403         shll      $4, %edx
    404         negl      %edx
    405         addl      $16368, %edx
    406         orl       %esi, %edx
    407         xorl      %ebx, %edx
    408         pinsrw    $3, %edx, %xmm4
    409         movq      PI_4(%rip), %xmm2
    410         movq      8+PI_4(%rip), %xmm6
    411         xorpd     %xmm5, %xmm5
    412         subl      $1008, %edx
    413         pinsrw    $3, %edx, %xmm5
    414         mulsd     %xmm4, %xmm0
    415         shll      $16, %esi
    416         sarl      $31, %esi
    417         mulsd     %xmm5, %xmm3
    418         movq      %xmm0, %xmm1
    419         mulsd     %xmm2, %xmm0
    420         shrl      $29, %edi
    421         addsd     %xmm3, %xmm1
    422         mulsd     %xmm2, %xmm3
    423         addl      %esi, %edi
    424         xorl      %esi, %edi
    425         mulsd     %xmm1, %xmm6
    426         movl      %edi, %eax
    427         addsd     %xmm3, %xmm6
    428         movq      %xmm0, %xmm2
    429         addsd     %xmm6, %xmm0
    430         subsd     %xmm0, %xmm2
    431         addsd     %xmm2, %xmm6
    432 .L_2TAG_PACKET_11.0.1:
    433         movq      PI32INV(%rip), %xmm1
    434         mulsd     %xmm0, %xmm1
    435         movq      ONEHALF(%rip), %xmm5
    436         movq      SIGN_MASK(%rip), %xmm4
    437         andpd     %xmm0, %xmm4
    438         orps      %xmm4, %xmm5
    439         addpd     %xmm5, %xmm1
    440         cvttsd2si %xmm1, %rdx
    441         cvtsi2sdq %rdx, %xmm1
    442         movq      P_1(%rip), %xmm3
    443         movapd    P_2(%rip), %xmm2
    444         mulsd     %xmm1, %xmm3
    445         unpcklpd  %xmm1, %xmm1
    446         shll      $3, %eax
    447         addl      $1865232, %edx
    448         movq      %xmm0, %xmm4
    449         addl      %eax, %edx
    450         andl      $63, %edx
    451         movapd    SC_4(%rip), %xmm5
    452         lea       Ctable(%rip), %rax
    453         shll      $5, %edx
    454         addq      %rdx, %rax
    455         mulpd     %xmm1, %xmm2
    456         subsd     %xmm3, %xmm0
    457         mulsd     P_3(%rip), %xmm1
    458         subsd     %xmm3, %xmm4
    459         movq      8(%rax), %xmm7
    460         unpcklpd  %xmm0, %xmm0
    461         movq      %xmm4, %xmm3
    462         subsd     %xmm2, %xmm4
    463         mulpd     %xmm0, %xmm5
    464         subpd     %xmm2, %xmm0
    465         mulsd     %xmm4, %xmm7
    466         subsd     %xmm4, %xmm3
    467         mulpd     %xmm0, %xmm5
    468         mulpd     %xmm0, %xmm0
    469         subsd     %xmm2, %xmm3
    470         movapd    (%rax), %xmm2
    471         subsd     %xmm3, %xmm1
    472         movq      24(%rax), %xmm3
    473         addsd     %xmm3, %xmm2
    474         subsd     %xmm2, %xmm7
    475         subsd     %xmm6, %xmm1
    476         movapd    SC_2(%rip), %xmm6
    477         mulsd     %xmm4, %xmm2
    478         mulpd     %xmm0, %xmm6
    479         mulsd     %xmm4, %xmm3
    480         mulpd     %xmm0, %xmm2
    481         mulpd     %xmm0, %xmm0
    482         addpd     SC_3(%rip), %xmm5
    483         mulsd     (%rax), %xmm4
    484         addpd     SC_1(%rip), %xmm6
    485         mulpd     %xmm0, %xmm5
    486         movq      %xmm3, %xmm0
    487         addsd     8(%rax), %xmm3
    488         mulpd     %xmm7, %xmm1
    489         movq      %xmm4, %xmm7
    490         addsd     %xmm3, %xmm4
    491         addpd     %xmm5, %xmm6
    492         movq      8(%rax), %xmm5
    493         subsd     %xmm3, %xmm5
    494         subsd     %xmm4, %xmm3
    495         addsd     16(%rax), %xmm1
    496         mulpd     %xmm2, %xmm6
    497         addsd     %xmm0, %xmm5
    498         addsd     %xmm7, %xmm3
    499         addsd     %xmm5, %xmm1
    500         addsd     %xmm3, %xmm1
    501         addsd     %xmm6, %xmm1
    502         unpckhpd  %xmm6, %xmm6
    503         movq      %xmm4, %xmm0
    504         addsd     %xmm6, %xmm1
    505         addsd     %xmm1, %xmm0
    506         jmp       ..B1.4
    507 .L_2TAG_PACKET_7.0.1:
    508         addl      $64, %edx
    509         movq      %r10, %r9
    510         movq      %r8, %r10
    511         movq      $0, %r8
    512         cmpq      $0, %r9
    513         jne       .L_2TAG_PACKET_8.0.1
    514         addl      $64, %edx
    515         movq      %r10, %r9
    516         movq      %r8, %r10
    517         cmpq      $0, %r9
    518         jne       .L_2TAG_PACKET_8.0.1
    519         xorpd     %xmm0, %xmm0
    520         xorpd     %xmm6, %xmm6
    521         jmp       .L_2TAG_PACKET_11.0.1
    522 .L_2TAG_PACKET_9.0.1:
    523         je        .L_2TAG_PACKET_10.0.1
    524         negl      %ecx
    525         shrq      %cl, %r10
    526         movq      %r9, %rax
    527         shrq      %cl, %r9
    528         subl      %ecx, %edx
    529         negl      %ecx
    530         addl      $64, %ecx
    531         shlq      %cl, %rax
    532         orq       %rax, %r10
    533         jmp       .L_2TAG_PACKET_10.0.1
    534 .L_2TAG_PACKET_3.0.1:
    535         negl      %ecx
    536         shlq      $32, %r9
    537         orq       %r11, %r9
    538         shlq      %cl, %r9
    539         movq      %r9, %rdi
    540         testl     $-2147483648, %r9d
    541         jne       .L_2TAG_PACKET_12.0.1
    542         shrl      %cl, %r9d
    543         movl      $0, %ebx
    544         shrq      $3, %rdi
    545         jmp       .L_2TAG_PACKET_6.0.1
    546 .L_2TAG_PACKET_4.0.1:
    547         shrl      %cl, %r9d
    548         movl      $536870912, %ebx
    549         shrl      %cl, %ebx
    550         shlq      $32, %r9
    551         orq       %r11, %r9
    552         shlq      $32, %rbx
    553         addl      $536870912, %edi
    554         movq      $0, %rcx
    555         movq      $0, %r11
    556         subq      %r8, %rcx
    557         sbbq      %r10, %r11
    558         sbbq      %r9, %rbx
    559         movq      %rcx, %r8
    560         movq      %r11, %r10
    561         movq      %rbx, %r9
    562         movl      $32768, %ebx
    563         jmp       .L_2TAG_PACKET_5.0.1
    564 .L_2TAG_PACKET_12.0.1:
    565         shrl      %cl, %r9d
    566         movq      $0x100000000, %rbx
    567         shrq      %cl, %rbx
    568         movq      $0, %rcx
    569         movq      $0, %r11
    570         subq      %r8, %rcx
    571         sbbq      %r10, %r11
    572         sbbq      %r9, %rbx
    573         movq      %rcx, %r8
    574         movq      %r11, %r10
    575         movq      %rbx, %r9
    576         movl      $32768, %ebx
    577         shrq      $3, %rdi
    578         addl      $536870912, %edi
    579         jmp       .L_2TAG_PACKET_6.0.1
    580 .L_2TAG_PACKET_2.0.1:
    581         movsd     8(%rsp), %xmm0
    582         mulsd     NEG_ZERO(%rip), %xmm0
    583         movq      %xmm0, (%rsp)
    584 .L_2TAG_PACKET_13.0.1:
    585 ..B1.4:
    586         addq      $16, %rsp
    587 ..___tag_value_cos.6:
    588         popq      %rbx
    589 ..___tag_value_cos.8:
    590         ret
    591 ..___tag_value_cos.9:
    592 END(cos)
    593 # -- End  cos
    594 	.section .rodata, "a"
    595 	.align 16
    596 	.align 16
    597 ONEHALF:
    598 	.long	0
    599 	.long	1071644672
    600 	.long	0
    601 	.long	1071644672
    602 	.type	ONEHALF,@object
    603 	.size	ONEHALF,16
    604 	.align 16
    605 P_2:
    606 	.long	442499072
    607 	.long	1032893537
    608 	.long	442499072
    609 	.long	1032893537
    610 	.type	P_2,@object
    611 	.size	P_2,16
    612 	.align 16
    613 SC_4:
    614 	.long	2773927732
    615 	.long	1053236707
    616 	.long	436314138
    617 	.long	1056571808
    618 	.type	SC_4,@object
    619 	.size	SC_4,16
    620 	.align 16
    621 Ctable:
    622 	.long	0
    623 	.long	0
    624 	.long	0
    625 	.long	0
    626 	.long	0
    627 	.long	0
    628 	.long	0
    629 	.long	1072693248
    630 	.long	393047345
    631 	.long	3212032302
    632 	.long	3156849708
    633 	.long	1069094822
    634 	.long	3758096384
    635 	.long	3158189848
    636 	.long	0
    637 	.long	1072693248
    638 	.long	18115067
    639 	.long	3214126342
    640 	.long	1013556747
    641 	.long	1070135480
    642 	.long	3221225472
    643 	.long	3160567065
    644 	.long	0
    645 	.long	1072693248
    646 	.long	2476548698
    647 	.long	3215330282
    648 	.long	785751814
    649 	.long	1070765062
    650 	.long	2684354560
    651 	.long	3161838221
    652 	.long	0
    653 	.long	1072693248
    654 	.long	2255197647
    655 	.long	3216211105
    656 	.long	2796464483
    657 	.long	1071152610
    658 	.long	3758096384
    659 	.long	3160878317
    660 	.long	0
    661 	.long	1072693248
    662 	.long	1945768569
    663 	.long	3216915048
    664 	.long	939980347
    665 	.long	1071524701
    666 	.long	536870912
    667 	.long	1012796809
    668 	.long	0
    669 	.long	1072693248
    670 	.long	1539668340
    671 	.long	3217396327
    672 	.long	967731400
    673 	.long	1071761211
    674 	.long	536870912
    675 	.long	1015752157
    676 	.long	0
    677 	.long	1072693248
    678 	.long	1403757309
    679 	.long	3217886718
    680 	.long	621354454
    681 	.long	1071926515
    682 	.long	536870912
    683 	.long	1013450602
    684 	.long	0
    685 	.long	1072693248
    686 	.long	2583490354
    687 	.long	1070236281
    688 	.long	1719614413
    689 	.long	1072079006
    690 	.long	536870912
    691 	.long	3163282740
    692 	.long	0
    693 	.long	1071644672
    694 	.long	2485417816
    695 	.long	1069626316
    696 	.long	1796544321
    697 	.long	1072217216
    698 	.long	536870912
    699 	.long	3162686945
    700 	.long	0
    701 	.long	1071644672
    702 	.long	2598800519
    703 	.long	1068266419
    704 	.long	688824739
    705 	.long	1072339814
    706 	.long	3758096384
    707 	.long	1010431536
    708 	.long	0
    709 	.long	1071644672
    710 	.long	2140183630
    711 	.long	3214756396
    712 	.long	4051746225
    713 	.long	1072445618
    714 	.long	2147483648
    715 	.long	3161907377
    716 	.long	0
    717 	.long	1071644672
    718 	.long	1699043957
    719 	.long	3216902261
    720 	.long	3476196678
    721 	.long	1072533611
    722 	.long	536870912
    723 	.long	1014257638
    724 	.long	0
    725 	.long	1071644672
    726 	.long	1991047213
    727 	.long	1067753521
    728 	.long	1455828442
    729 	.long	1072602945
    730 	.long	3758096384
    731 	.long	1015505073
    732 	.long	0
    733 	.long	1070596096
    734 	.long	240740309
    735 	.long	3215727903
    736 	.long	3489094832
    737 	.long	1072652951
    738 	.long	536870912
    739 	.long	1014325783
    740 	.long	0
    741 	.long	1070596096
    742 	.long	257503056
    743 	.long	3214647653
    744 	.long	2748392742
    745 	.long	1072683149
    746 	.long	1073741824
    747 	.long	3163061750
    748 	.long	0
    749 	.long	1069547520
    750 	.long	0
    751 	.long	0
    752 	.long	0
    753 	.long	1072693248
    754 	.long	0
    755 	.long	0
    756 	.long	0
    757 	.long	0
    758 	.long	257503056
    759 	.long	1067164005
    760 	.long	2748392742
    761 	.long	1072683149
    762 	.long	1073741824
    763 	.long	3163061750
    764 	.long	0
    765 	.long	3217031168
    766 	.long	240740309
    767 	.long	1068244255
    768 	.long	3489094832
    769 	.long	1072652951
    770 	.long	536870912
    771 	.long	1014325783
    772 	.long	0
    773 	.long	3218079744
    774 	.long	1991047213
    775 	.long	3215237169
    776 	.long	1455828442
    777 	.long	1072602945
    778 	.long	3758096384
    779 	.long	1015505073
    780 	.long	0
    781 	.long	3218079744
    782 	.long	1699043957
    783 	.long	1069418613
    784 	.long	3476196678
    785 	.long	1072533611
    786 	.long	536870912
    787 	.long	1014257638
    788 	.long	0
    789 	.long	3219128320
    790 	.long	2140183630
    791 	.long	1067272748
    792 	.long	4051746225
    793 	.long	1072445618
    794 	.long	2147483648
    795 	.long	3161907377
    796 	.long	0
    797 	.long	3219128320
    798 	.long	2598800519
    799 	.long	3215750067
    800 	.long	688824739
    801 	.long	1072339814
    802 	.long	3758096384
    803 	.long	1010431536
    804 	.long	0
    805 	.long	3219128320
    806 	.long	2485417816
    807 	.long	3217109964
    808 	.long	1796544321
    809 	.long	1072217216
    810 	.long	536870912
    811 	.long	3162686945
    812 	.long	0
    813 	.long	3219128320
    814 	.long	2583490354
    815 	.long	3217719929
    816 	.long	1719614413
    817 	.long	1072079006
    818 	.long	536870912
    819 	.long	3163282740
    820 	.long	0
    821 	.long	3219128320
    822 	.long	1403757309
    823 	.long	1070403070
    824 	.long	621354454
    825 	.long	1071926515
    826 	.long	536870912
    827 	.long	1013450602
    828 	.long	0
    829 	.long	3220176896
    830 	.long	1539668340
    831 	.long	1069912679
    832 	.long	967731400
    833 	.long	1071761211
    834 	.long	536870912
    835 	.long	1015752157
    836 	.long	0
    837 	.long	3220176896
    838 	.long	1945768569
    839 	.long	1069431400
    840 	.long	939980347
    841 	.long	1071524701
    842 	.long	536870912
    843 	.long	1012796809
    844 	.long	0
    845 	.long	3220176896
    846 	.long	2255197647
    847 	.long	1068727457
    848 	.long	2796464483
    849 	.long	1071152610
    850 	.long	3758096384
    851 	.long	3160878317
    852 	.long	0
    853 	.long	3220176896
    854 	.long	2476548698
    855 	.long	1067846634
    856 	.long	785751814
    857 	.long	1070765062
    858 	.long	2684354560
    859 	.long	3161838221
    860 	.long	0
    861 	.long	3220176896
    862 	.long	18115067
    863 	.long	1066642694
    864 	.long	1013556747
    865 	.long	1070135480
    866 	.long	3221225472
    867 	.long	3160567065
    868 	.long	0
    869 	.long	3220176896
    870 	.long	393047345
    871 	.long	1064548654
    872 	.long	3156849708
    873 	.long	1069094822
    874 	.long	3758096384
    875 	.long	3158189848
    876 	.long	0
    877 	.long	3220176896
    878 	.long	0
    879 	.long	0
    880 	.long	0
    881 	.long	0
    882 	.long	0
    883 	.long	0
    884 	.long	0
    885 	.long	3220176896
    886 	.long	393047345
    887 	.long	1064548654
    888 	.long	3156849708
    889 	.long	3216578470
    890 	.long	3758096384
    891 	.long	1010706200
    892 	.long	0
    893 	.long	3220176896
    894 	.long	18115067
    895 	.long	1066642694
    896 	.long	1013556747
    897 	.long	3217619128
    898 	.long	3221225472
    899 	.long	1013083417
    900 	.long	0
    901 	.long	3220176896
    902 	.long	2476548698
    903 	.long	1067846634
    904 	.long	785751814
    905 	.long	3218248710
    906 	.long	2684354560
    907 	.long	1014354573
    908 	.long	0
    909 	.long	3220176896
    910 	.long	2255197647
    911 	.long	1068727457
    912 	.long	2796464483
    913 	.long	3218636258
    914 	.long	3758096384
    915 	.long	1013394669
    916 	.long	0
    917 	.long	3220176896
    918 	.long	1945768569
    919 	.long	1069431400
    920 	.long	939980347
    921 	.long	3219008349
    922 	.long	536870912
    923 	.long	3160280457
    924 	.long	0
    925 	.long	3220176896
    926 	.long	1539668340
    927 	.long	1069912679
    928 	.long	967731400
    929 	.long	3219244859
    930 	.long	536870912
    931 	.long	3163235805
    932 	.long	0
    933 	.long	3220176896
    934 	.long	1403757309
    935 	.long	1070403070
    936 	.long	621354454
    937 	.long	3219410163
    938 	.long	536870912
    939 	.long	3160934250
    940 	.long	0
    941 	.long	3220176896
    942 	.long	2583490354
    943 	.long	3217719929
    944 	.long	1719614413
    945 	.long	3219562654
    946 	.long	536870912
    947 	.long	1015799092
    948 	.long	0
    949 	.long	3219128320
    950 	.long	2485417816
    951 	.long	3217109964
    952 	.long	1796544321
    953 	.long	3219700864
    954 	.long	536870912
    955 	.long	1015203297
    956 	.long	0
    957 	.long	3219128320
    958 	.long	2598800519
    959 	.long	3215750067
    960 	.long	688824739
    961 	.long	3219823462
    962 	.long	3758096384
    963 	.long	3157915184
    964 	.long	0
    965 	.long	3219128320
    966 	.long	2140183630
    967 	.long	1067272748
    968 	.long	4051746225
    969 	.long	3219929266
    970 	.long	2147483648
    971 	.long	1014423729
    972 	.long	0
    973 	.long	3219128320
    974 	.long	1699043957
    975 	.long	1069418613
    976 	.long	3476196678
    977 	.long	3220017259
    978 	.long	536870912
    979 	.long	3161741286
    980 	.long	0
    981 	.long	3219128320
    982 	.long	1991047213
    983 	.long	3215237169
    984 	.long	1455828442
    985 	.long	3220086593
    986 	.long	3758096384
    987 	.long	3162988721
    988 	.long	0
    989 	.long	3218079744
    990 	.long	240740309
    991 	.long	1068244255
    992 	.long	3489094832
    993 	.long	3220136599
    994 	.long	536870912
    995 	.long	3161809431
    996 	.long	0
    997 	.long	3218079744
    998 	.long	257503056
    999 	.long	1067164005
   1000 	.long	2748392742
   1001 	.long	3220166797
   1002 	.long	1073741824
   1003 	.long	1015578102
   1004 	.long	0
   1005 	.long	3217031168
   1006 	.long	0
   1007 	.long	0
   1008 	.long	0
   1009 	.long	3220176896
   1010 	.long	0
   1011 	.long	0
   1012 	.long	0
   1013 	.long	0
   1014 	.long	257503056
   1015 	.long	3214647653
   1016 	.long	2748392742
   1017 	.long	3220166797
   1018 	.long	1073741824
   1019 	.long	1015578102
   1020 	.long	0
   1021 	.long	1069547520
   1022 	.long	240740309
   1023 	.long	3215727903
   1024 	.long	3489094832
   1025 	.long	3220136599
   1026 	.long	536870912
   1027 	.long	3161809431
   1028 	.long	0
   1029 	.long	1070596096
   1030 	.long	1991047213
   1031 	.long	1067753521
   1032 	.long	1455828442
   1033 	.long	3220086593
   1034 	.long	3758096384
   1035 	.long	3162988721
   1036 	.long	0
   1037 	.long	1070596096
   1038 	.long	1699043957
   1039 	.long	3216902261
   1040 	.long	3476196678
   1041 	.long	3220017259
   1042 	.long	536870912
   1043 	.long	3161741286
   1044 	.long	0
   1045 	.long	1071644672
   1046 	.long	2140183630
   1047 	.long	3214756396
   1048 	.long	4051746225
   1049 	.long	3219929266
   1050 	.long	2147483648
   1051 	.long	1014423729
   1052 	.long	0
   1053 	.long	1071644672
   1054 	.long	2598800519
   1055 	.long	1068266419
   1056 	.long	688824739
   1057 	.long	3219823462
   1058 	.long	3758096384
   1059 	.long	3157915184
   1060 	.long	0
   1061 	.long	1071644672
   1062 	.long	2485417816
   1063 	.long	1069626316
   1064 	.long	1796544321
   1065 	.long	3219700864
   1066 	.long	536870912
   1067 	.long	1015203297
   1068 	.long	0
   1069 	.long	1071644672
   1070 	.long	2583490354
   1071 	.long	1070236281
   1072 	.long	1719614413
   1073 	.long	3219562654
   1074 	.long	536870912
   1075 	.long	1015799092
   1076 	.long	0
   1077 	.long	1071644672
   1078 	.long	1403757309
   1079 	.long	3217886718
   1080 	.long	621354454
   1081 	.long	3219410163
   1082 	.long	536870912
   1083 	.long	3160934250
   1084 	.long	0
   1085 	.long	1072693248
   1086 	.long	1539668340
   1087 	.long	3217396327
   1088 	.long	967731400
   1089 	.long	3219244859
   1090 	.long	536870912
   1091 	.long	3163235805
   1092 	.long	0
   1093 	.long	1072693248
   1094 	.long	1945768569
   1095 	.long	3216915048
   1096 	.long	939980347
   1097 	.long	3219008349
   1098 	.long	536870912
   1099 	.long	3160280457
   1100 	.long	0
   1101 	.long	1072693248
   1102 	.long	2255197647
   1103 	.long	3216211105
   1104 	.long	2796464483
   1105 	.long	3218636258
   1106 	.long	3758096384
   1107 	.long	1013394669
   1108 	.long	0
   1109 	.long	1072693248
   1110 	.long	2476548698
   1111 	.long	3215330282
   1112 	.long	785751814
   1113 	.long	3218248710
   1114 	.long	2684354560
   1115 	.long	1014354573
   1116 	.long	0
   1117 	.long	1072693248
   1118 	.long	18115067
   1119 	.long	3214126342
   1120 	.long	1013556747
   1121 	.long	3217619128
   1122 	.long	3221225472
   1123 	.long	1013083417
   1124 	.long	0
   1125 	.long	1072693248
   1126 	.long	393047345
   1127 	.long	3212032302
   1128 	.long	3156849708
   1129 	.long	3216578470
   1130 	.long	3758096384
   1131 	.long	1010706200
   1132 	.long	0
   1133 	.long	1072693248
   1134 	.type	Ctable,@object
   1135 	.size	Ctable,2048
   1136 	.align 16
   1137 SC_2:
   1138 	.long	286331153
   1139 	.long	1065423121
   1140 	.long	1431655765
   1141 	.long	1067799893
   1142 	.type	SC_2,@object
   1143 	.size	SC_2,16
   1144 	.align 16
   1145 SC_3:
   1146 	.long	436314138
   1147 	.long	3207201184
   1148 	.long	381774871
   1149 	.long	3210133868
   1150 	.type	SC_3,@object
   1151 	.size	SC_3,16
   1152 	.align 16
   1153 SC_1:
   1154 	.long	1431655765
   1155 	.long	3217380693
   1156 	.long	0
   1157 	.long	3219128320
   1158 	.type	SC_1,@object
   1159 	.size	SC_1,16
   1160 	.align 16
   1161 PI_INV_TABLE:
   1162 	.long	0
   1163 	.long	0
   1164 	.long	2734261102
   1165 	.long	1313084713
   1166 	.long	4230436817
   1167 	.long	4113882560
   1168 	.long	3680671129
   1169 	.long	1011060801
   1170 	.long	4266746795
   1171 	.long	3736847713
   1172 	.long	3072618042
   1173 	.long	1112396512
   1174 	.long	105459434
   1175 	.long	164729372
   1176 	.long	4263373596
   1177 	.long	2972297022
   1178 	.long	3900847605
   1179 	.long	784024708
   1180 	.long	3919343654
   1181 	.long	3026157121
   1182 	.long	965858873
   1183 	.long	2203269620
   1184 	.long	2625920907
   1185 	.long	3187222587
   1186 	.long	536385535
   1187 	.long	3724908559
   1188 	.long	4012839307
   1189 	.long	1510632735
   1190 	.long	1832287951
   1191 	.long	667617719
   1192 	.long	1330003814
   1193 	.long	2657085997
   1194 	.long	1965537991
   1195 	.long	3957715323
   1196 	.long	1023883767
   1197 	.long	2320667370
   1198 	.long	1811636145
   1199 	.long	529358088
   1200 	.long	1443049542
   1201 	.long	4235946923
   1202 	.long	4040145953
   1203 	.type	PI_INV_TABLE,@object
   1204 	.size	PI_INV_TABLE,164
   1205 	.space 12, 0x00 	# pad
   1206 	.align 16
   1207 PI_4:
   1208 	.long	1073741824
   1209 	.long	1072243195
   1210 	.long	407279769
   1211 	.long	1046758445
   1212 	.type	PI_4,@object
   1213 	.size	PI_4,16
   1214 	.align 8
   1215 PI32INV:
   1216 	.long	1841940611
   1217 	.long	1076125488
   1218 	.type	PI32INV,@object
   1219 	.size	PI32INV,8
   1220 	.align 8
   1221 SIGN_MASK:
   1222 	.long	0
   1223 	.long	2147483648
   1224 	.type	SIGN_MASK,@object
   1225 	.size	SIGN_MASK,8
   1226 	.align 8
   1227 P_1:
   1228 	.long	1413480448
   1229 	.long	1069097467
   1230 	.type	P_1,@object
   1231 	.size	P_1,8
   1232 	.align 8
   1233 P_3:
   1234 	.long	771977331
   1235 	.long	996350346
   1236 	.type	P_3,@object
   1237 	.size	P_3,8
   1238 	.align 8
   1239 ONE:
   1240 	.long	0
   1241 	.long	1072693248
   1242 	.type	ONE,@object
   1243 	.size	ONE,8
   1244 	.align 8
   1245 NEG_ZERO:
   1246 	.long	0
   1247 	.long	2147483648
   1248 	.type	NEG_ZERO,@object
   1249 	.size	NEG_ZERO,8
   1250 	.data
   1251 	.section .note.GNU-stack, ""
   1252 // -- Begin DWARF2 SEGMENT .eh_frame
   1253 	.section .eh_frame,"a",@progbits
   1254 .eh_frame_seg:
   1255 	.align 1
   1256 	.4byte 0x00000014
   1257 	.8byte 0x00527a0100000000
   1258 	.8byte 0x08070c1b01107801
   1259 	.4byte 0x00000190
   1260 	.4byte 0x0000002c
   1261 	.4byte 0x0000001c
   1262 	.4byte ..___tag_value_cos.1-.
   1263 	.4byte ..___tag_value_cos.9-..___tag_value_cos.1
   1264 	.2byte 0x0400
   1265 	.4byte ..___tag_value_cos.3-..___tag_value_cos.1
   1266 	.4byte 0x0283100e
   1267 	.byte 0x04
   1268 	.4byte ..___tag_value_cos.5-..___tag_value_cos.3
   1269 	.2byte 0x200e
   1270 	.byte 0x04
   1271 	.4byte ..___tag_value_cos.6-..___tag_value_cos.5
   1272 	.4byte 0x04c3100e
   1273 	.4byte ..___tag_value_cos.8-..___tag_value_cos.6
   1274 	.2byte 0x080e
   1275 # End
   1276