Home | History | Annotate | Download | only in x86
      1 /*
      2 Copyright (c) 2014, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 /******************************************************************************/
     32 //                     ALGORITHM DESCRIPTION
     33 //                     ---------------------
     34 //
     35 // X87 version:
     36 // Use 80-bit FPU precision fmul, fsqrt to compute square and sqrt.
     37 //
     38 // SSE version:
     39 // Swap x, y if |x|<|y|
     40 // For x=2^k*x, get y=y*2^(-k)
     41 // Get S ~ sqrt(x^2+y^2)  (leading 1 + leading 25 mantissa bits)
     42 //
     43 // Get D = ( RN(x^2+y^2) - S^2 ) + ( x^2 - RN(x^2) ) +
     44 //                               + ( y^2 - ((RN(x^2+y^2)-RN(x^2)) )
     45 //
     46 // Result is 2^k*(S + Se),  where Se = S*e
     47 //        S*e is approximated as (D/2S)*( 1 - (D/2S)^2*1.0/S )
     48 //
     49 // Return 2^k*(S+Se)
     50 //
     51 // For |y/x|<2^(-64), return x
     52 //
     53 // For cases where maximum biased exponent is either greater than 7fdh or
     54 // below 32, take a special path to check for special cases (0, NaN, Inf),
     55 // possible overflow, and more accurate computation for denormal results
     56 //
     57 // Special cases:
     58 //  hypot(x,y), hypot(y,x), and hypot(x,-y) are equivalent
     59 //  hypot(x,+-0) is equivalent to fabs(x)
     60 //  hypot(x,y) = y if (x==NaN or x==INF) and y==INF
     61 //  hypot(x,y) = x if (x==NaN or x==INF) and y!=INF (even if y==NaN!)
     62 //  hypot(x,y) = y if (x!=NaN and x!=INF) and (y==NaN or y==INF)
     63 //
     64 /******************************************************************************/
     65 
     66 #include <private/bionic_asm.h>
     67 # -- Begin  static_func
     68         .text
     69         .align __bionic_asm_align
     70         .type static_func, @function
     71 static_func:
     72 ..B1.1:
     73         call      ..L2
     74 ..L2:
     75         popl      %eax
     76         lea       _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
     77         lea       static_const_table@GOTOFF(%eax), %eax
     78         ret
     79         .size   static_func,.-static_func
     80 # -- End  static_func
     81 
     82 # -- Begin  hypot
     83 ENTRY(hypot)
     84 # parameter 1: 8 + %ebp
     85 # parameter 2: 16 + %ebp
     86 ..B2.1:
     87 ..B2.2:
     88         pushl     %ebp
     89         movl      %esp, %ebp
     90         subl      $152, %esp
     91         movl      %ebx, 96(%esp)
     92         call      static_func
     93         movl      %eax, %ebx
     94         movapd    (%ebx), %xmm3
     95         movsd     160(%esp), %xmm0
     96         movsd     168(%esp), %xmm1
     97         andpd     %xmm3, %xmm0
     98         andpd     %xmm3, %xmm1
     99         pextrw    $3, %xmm0, %eax
    100         pextrw    $3, %xmm1, %edx
    101         cmpl      $24528, %eax
    102         ja        .L_2TAG_PACKET_0.0.2
    103         cmpl      $24528, %edx
    104         ja        .L_2TAG_PACKET_0.0.2
    105 .L_2TAG_PACKET_1.0.2:
    106         fldl      160(%esp)
    107         fldl      168(%esp)
    108         fxch      %st(1)
    109         fmul      %st(0), %st
    110         fxch      %st(1)
    111         nop
    112         fmul      %st(0), %st
    113         faddp     %st, %st(1)
    114         fsqrt
    115         jmp       .L_2TAG_PACKET_2.0.2
    116 .L_2TAG_PACKET_0.0.2:
    117         cmpl      $32752, %eax
    118         movl      %eax, %ecx
    119         jae       .L_2TAG_PACKET_3.0.2
    120         subl      %edx, %ecx
    121         cmpl      $32752, %edx
    122         jae       .L_2TAG_PACKET_3.0.2
    123         addl      $928, %ecx
    124         addl      %edx, %eax
    125         cmpl      $1856, %ecx
    126         ja        .L_2TAG_PACKET_4.0.2
    127         cmpl      $49056, %eax
    128         jb        .L_2TAG_PACKET_1.0.2
    129         fldl      160(%esp)
    130         fldl      168(%esp)
    131         fxch      %st(1)
    132         fmul      %st(0), %st
    133         fxch      %st(1)
    134         nop
    135         fmul      %st(0), %st
    136         faddp     %st, %st(1)
    137         fsqrt
    138 .L_2TAG_PACKET_5.0.2:
    139         fstl      (%esp)
    140         fstpt     16(%esp)
    141         xorl      %eax, %eax
    142         movw      24(%esp), %ax
    143         cmpl      $17407, %eax
    144         jae       .L_2TAG_PACKET_6.0.2
    145         fldl      (%esp)
    146         jmp       .L_2TAG_PACKET_7.0.2
    147 .L_2TAG_PACKET_4.0.2:
    148         movsd     %xmm0, 32(%esp)
    149         movsd     %xmm1, 40(%esp)
    150         fldl      32(%esp)
    151         faddl     40(%esp)
    152         jmp       .L_2TAG_PACKET_5.0.2
    153 .L_2TAG_PACKET_6.0.2:
    154         movl      $46, %edx
    155 .L_2TAG_PACKET_8.0.2:
    156         movsd     160(%esp), %xmm0
    157         movsd     168(%esp), %xmm1
    158         fldl      (%esp)
    159         jmp       .L_2TAG_PACKET_7.0.2
    160 .L_2TAG_PACKET_3.0.2:
    161         shufpd    $0, %xmm1, %xmm0
    162         movdqa    %xmm0, %xmm2
    163         movdqa    16(%ebx), %xmm3
    164         movsd     %xmm0, 32(%esp)
    165         movsd     %xmm1, 40(%esp)
    166         cmppd     $3, %xmm0, %xmm2
    167         cmppd     $0, %xmm0, %xmm3
    168         movmskpd  %xmm2, %edx
    169         movmskpd  %xmm3, %eax
    170         testl     %edx, %edx
    171         je        .L_2TAG_PACKET_9.0.2
    172         fldl      32(%esp)
    173         fmull     40(%esp)
    174         testl     $1, %eax
    175         jne       .L_2TAG_PACKET_10.0.2
    176         testl     $2, %eax
    177         jne       .L_2TAG_PACKET_11.0.2
    178         jmp       .L_2TAG_PACKET_2.0.2
    179 .L_2TAG_PACKET_9.0.2:
    180         fldl      32(%esp)
    181         faddl     40(%esp)
    182         jmp       .L_2TAG_PACKET_2.0.2
    183 .L_2TAG_PACKET_10.0.2:
    184         fstpl     40(%esp)
    185         fldl      32(%esp)
    186         jmp       .L_2TAG_PACKET_7.0.2
    187 .L_2TAG_PACKET_11.0.2:
    188         fstpl     32(%esp)
    189         fldl      40(%esp)
    190         jmp       .L_2TAG_PACKET_7.0.2
    191 .L_2TAG_PACKET_2.0.2:
    192 .L_2TAG_PACKET_7.0.2:
    193         movl      96(%esp), %ebx
    194         movl      %ebp, %esp
    195         popl      %ebp
    196         ret
    197 ..B2.3:
    198 END(hypot)
    199 # -- End  hypot
    200 
    201 # Start file scope ASM
    202 ALIAS_SYMBOL(hypotl, hypot);
    203 # End file scope ASM
    204 	.section .rodata, "a"
    205 	.align 16
    206 	.align 16
    207 static_const_table:
    208 	.long	4294967295
    209 	.long	2147483647
    210 	.long	4294967295
    211 	.long	2147483647
    212 	.long	0
    213 	.long	2146435072
    214 	.long	0
    215 	.long	2146435072
    216 	.type	static_const_table,@object
    217 	.size	static_const_table,32
    218 	.data
    219 	.section .note.GNU-stack, ""
    220 # End
    221