Home | History | Annotate | Download | only in math
      1 // Copyright 2017 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 #include "textflag.h"
      6 
      7 #define PosInf		0x7FF0000000000000
      8 #define NegInf		0xFFF0000000000000
      9 #define NegZero		0x8000000000000000
     10 #define Pi		0x400921FB54442D18
     11 #define NegPi		0xC00921FB54442D18
     12 #define Pi3Div4		0x4002D97C7F3321D2	// 3Pi/4
     13 #define NegPi3Div4	0xC002D97C7F3321D2	// -3Pi/4
     14 #define PiDiv4		0x3FE921FB54442D18	// Pi/4
     15 #define NegPiDiv4	0xBFE921FB54442D18	// -Pi/4
     16 
     17 // Minimax polynomial coefficients and other constants
     18 DATA atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
     19 DATA atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
     20 DATA atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
     21 DATA atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
     22 DATA atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
     23 DATA atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
     24 DATA atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
     25 DATA atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
     26 DATA atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
     27 DATA atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
     28 DATA atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
     29 DATA atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
     30 DATA atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
     31 DATA atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
     32 DATA atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
     33 DATA atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
     34 DATA atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
     35 DATA atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
     36 DATA atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
     37 DATA atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
     38 GLOBL atan2rodataL25<> + 0(SB), RODATA, $160
     39 
     40 DATA atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
     41 DATA atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
     42 DATA atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
     43 DATA atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
     44 GLOBL atan2xpi2h<> + 0(SB), RODATA, $32
     45 DATA atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
     46 GLOBL atan2xpim<> + 0(SB), RODATA, $8
     47 
     48 // Atan2 returns the arc tangent of y/x, using
     49 // the signs of the two to determine the quadrant
     50 // of the return value.
     51 //
     52 // Special cases are (in order):
     53 //      Atan2(y, NaN) = NaN
     54 //      Atan2(NaN, x) = NaN
     55 //      Atan2(+0, x>=0) = +0
     56 //      Atan2(-0, x>=0) = -0
     57 //      Atan2(+0, x<=-0) = +Pi
     58 //      Atan2(-0, x<=-0) = -Pi
     59 //      Atan2(y>0, 0) = +Pi/2
     60 //      Atan2(y<0, 0) = -Pi/2
     61 //      Atan2(+Inf, +Inf) = +Pi/4
     62 //      Atan2(-Inf, +Inf) = -Pi/4
     63 //      Atan2(+Inf, -Inf) = 3Pi/4
     64 //      Atan2(-Inf, -Inf) = -3Pi/4
     65 //      Atan2(y, +Inf) = 0
     66 //      Atan2(y>0, -Inf) = +Pi
     67 //      Atan2(y<0, -Inf) = -Pi
     68 //      Atan2(+Inf, x) = +Pi/2
     69 //      Atan2(-Inf, x) = -Pi/2
     70 // The algorithm used is minimax polynomial approximation
     71 // with coefficients determined with a Remez exchange algorithm.
     72 
     73 TEXT	atan2Asm(SB), NOSPLIT, $0-24
     74 	// special case
     75 	MOVD	x+0(FP), R1
     76 	MOVD	y+8(FP), R2
     77 
     78 	// special case Atan2(NaN, y) = NaN
     79 	MOVD	$~(1<<63), R5
     80 	AND	R1, R5		// x = |x|
     81 	MOVD	$PosInf, R3
     82 	CMPUBLT	R3, R5, returnX
     83 
     84 	// special case Atan2(x, NaN) = NaN
     85 	MOVD	$~(1<<63), R5
     86 	AND	R2, R5
     87 	CMPUBLT R3, R5, returnY
     88 
     89 	MOVD	$NegZero, R3
     90 	CMPUBEQ	R3, R1, xIsNegZero
     91 
     92 	MOVD	$0, R3
     93 	CMPUBEQ	R3, R1, xIsPosZero
     94 
     95 	MOVD	$PosInf, R4
     96 	CMPUBEQ	R4, R2, yIsPosInf
     97 
     98 	MOVD	$NegInf, R4
     99 	CMPUBEQ	R4, R2, yIsNegInf
    100 	BR	Normal
    101 xIsNegZero:
    102 	// special case Atan(-0, y>=0) = -0
    103 	MOVD	$0, R4
    104 	CMPBLE	R4, R2, returnX
    105 
    106 	//special case Atan2(-0, y<=-0) = -Pi
    107 	MOVD	$NegZero, R4
    108 	CMPBGE	R4, R2, returnNegPi
    109 	BR	Normal
    110 xIsPosZero:
    111 	//special case Atan2(0, 0) = 0
    112 	MOVD	$0, R4
    113 	CMPUBEQ	R4, R2, returnX
    114 
    115 	//special case Atan2(0, y<=-0) = Pi
    116 	MOVD	$NegZero, R4
    117 	CMPBGE	R4, R2, returnPi
    118 	BR Normal
    119 yIsNegInf:
    120 	//special case Atan2(+Inf, -Inf) = 3Pi/4
    121 	MOVD	$PosInf, R3
    122 	CMPUBEQ	R3, R1, posInfNegInf
    123 
    124 	//special case Atan2(-Inf, -Inf) = -3Pi/4
    125 	MOVD	$NegInf, R3
    126 	CMPUBEQ	R3, R1, negInfNegInf
    127 	BR Normal
    128 yIsPosInf:
    129 	//special case Atan2(+Inf, +Inf) = Pi/4
    130 	MOVD	$PosInf, R3
    131 	CMPUBEQ	R3, R1, posInfPosInf
    132 
    133 	//special case Atan2(-Inf, +Inf) = -Pi/4
    134 	MOVD	$NegInf, R3
    135 	CMPUBEQ	R3, R1, negInfPosInf
    136 
    137 	//special case Atan2(-Pi, +Inf) = Pi
    138 	MOVD	$NegPi, R3
    139 	CMPUBEQ	R3, R1, negPiPosInf
    140 
    141 Normal:
    142 	FMOVD	x+0(FP), F0
    143 	FMOVD	y+8(FP), F2
    144 	MOVD	$atan2rodataL25<>+0(SB), R9
    145 	WORD	$0xB3CD0020	//lgdr	%r2,%f0
    146 	WORD	$0xB3CD0012	//lgdr	%r1,%f2
    147 	WORD	$0xEC2220BF	//risbgn	%r2,%r2,64-32,128+63,64+0+32
    148 	BYTE	$0x60
    149 	BYTE	$0x59
    150 	WORD	$0xEC1120BF	//risbgn	%r1,%r1,64-32,128+63,64+0+32
    151 	BYTE	$0x60
    152 	BYTE	$0x59
    153 	WORD	$0xB9170032	//llgtr	%r3,%r2
    154 	WORD	$0xEC523FBF	//risbg	%r5,%r2,64-1,128+63,64+32+1
    155 	BYTE	$0x61
    156 	BYTE	$0x55
    157 	WORD	$0xB9170041	//llgtr	%r4,%r1
    158 	WFLCDB	V0, V20
    159 	MOVW	R4, R6
    160 	MOVW	R3, R7
    161 	CMPUBLT	R6, R7, L17
    162 	WFDDB	V2, V0, V3
    163 	ADDW	$2, R5, R2
    164 	MOVW	R4, R6
    165 	MOVW	R3, R7
    166 	CMPUBLE	R6, R7, L20
    167 L3:
    168 	WFMDB	V3, V3, V4
    169 	VLEG	$0, 152(R9), V18
    170 	VLEG	$0, 144(R9), V16
    171 	FMOVD	136(R9), F1
    172 	FMOVD	128(R9), F5
    173 	FMOVD	120(R9), F6
    174 	WFMADB	V4, V16, V5, V16
    175 	WFMADB	V4, V6, V1, V6
    176 	FMOVD	112(R9), F7
    177 	WFMDB	V4, V4, V1
    178 	WFMADB	V4, V7, V18, V7
    179 	VLEG	$0, 104(R9), V18
    180 	WFMADB	V1, V6, V16, V6
    181 	CMPWU	R4, R3
    182 	FMOVD	96(R9), F5
    183 	VLEG	$0, 88(R9), V16
    184 	WFMADB	V4, V5, V18, V5
    185 	VLEG	$0, 80(R9), V18
    186 	VLEG	$0, 72(R9), V22
    187 	WFMADB	V4, V16, V18, V16
    188 	VLEG	$0, 64(R9), V18
    189 	WFMADB	V1, V7, V5, V7
    190 	WFMADB	V4, V18, V22, V18
    191 	WFMDB	V1, V1, V5
    192 	WFMADB	V1, V16, V18, V16
    193 	VLEG	$0, 56(R9), V18
    194 	WFMADB	V5, V6, V7, V6
    195 	VLEG	$0, 48(R9), V22
    196 	FMOVD	40(R9), F7
    197 	WFMADB	V4, V7, V18, V7
    198 	VLEG	$0, 32(R9), V18
    199 	WFMADB	V5, V6, V16, V6
    200 	WFMADB	V4, V18, V22, V18
    201 	VLEG	$0, 24(R9), V16
    202 	WFMADB	V1, V7, V18, V7
    203 	VLEG	$0, 16(R9), V18
    204 	VLEG	$0, 8(R9), V22
    205 	WFMADB	V4, V18, V16, V18
    206 	VLEG	$0, 0(R9), V16
    207 	WFMADB	V5, V6, V7, V6
    208 	WFMADB	V4, V16, V22, V16
    209 	FMUL	F3, F4
    210 	WFMADB	V1, V18, V16, V1
    211 	FMADD	F6, F5, F1
    212 	WFMADB	V4, V1, V3, V4
    213 	BLT	L18
    214 	BGT	L7
    215 	WORD	$0xB3120022	//ltdbr	%f2,%f2
    216 	BLTU	L21
    217 L8:
    218 	WORD	$0xB3120000	//ltdbr	%f0,%f0
    219 	BLTU	L22
    220 L9:
    221 	WFCHDBS	V2, V0, V0
    222 	BNE	L18
    223 L7:
    224 	MOVW	R1, R6
    225 	CMPBGE	R6, $0, L1
    226 L18:
    227 	WORD	$0xEC223ABC	//risbg	%r2,%r2,58,128+60,3
    228 	BYTE	$0x03
    229 	BYTE	$0x55
    230 	MOVD	$atan2xpi2h<>+0(SB), R1
    231 	MOVD	atan2xpim<>+0(SB), R3
    232 	WORD	$0xB3C10003	//ldgr	%f0,%r3
    233 	WORD	$0xED021000	//madb	%f4,%f0,0(%r2,%r1)
    234 	BYTE	$0x40
    235 	BYTE	$0x1E
    236 L1:
    237 	FMOVD	F4, ret+16(FP)
    238 	RET
    239 
    240 L20:
    241 	WORD	$0xB3120022	//ltdbr	%f2,%f2
    242 	BLTU	L23
    243 	FMOVD	F2, F6
    244 L4:
    245 	WORD	$0xB3120000	//ltdbr	%f0,%f0
    246 	BLTU	L24
    247 	FMOVD	F0, F4
    248 L5:
    249 	WFCHDBS	V6, V4, V4
    250 	BEQ	L3
    251 L17:
    252 	WFDDB	V0, V2, V4
    253 	BYTE	$0x18	//lr	%r2,%r5
    254 	BYTE	$0x25
    255 	WORD	$0xB3130034	//lcdbr	%f3,%f4
    256 	BR	L3
    257 L23:
    258 	WORD	$0xB3130062	//lcdbr	%f6,%f2
    259 	BR	L4
    260 L22:
    261 	VLR	V20, V0
    262 	BR	L9
    263 L21:
    264 	WORD	$0xB3130022	//lcdbr	%f2,%f2
    265 	BR	L8
    266 L24:
    267 	VLR	V20, V4
    268 	BR	L5
    269 returnX:	//the result is same as the first argument
    270 	MOVD	R1, ret+16(FP)
    271 	RET
    272 returnY:	//the result is same as the second argument
    273 	MOVD	R2, ret+16(FP)
    274 	RET
    275 returnPi:
    276 	MOVD	$Pi, R1
    277 	MOVD	R1, ret+16(FP)
    278 	RET
    279 returnNegPi:
    280 	MOVD	$NegPi, R1
    281 	MOVD	R1, ret+16(FP)
    282 	RET
    283 posInfNegInf:
    284 	MOVD	$Pi3Div4, R1
    285 	MOVD	R1, ret+16(FP)
    286 	RET
    287 negInfNegInf:
    288 	MOVD	$NegPi3Div4, R1
    289 	MOVD	R1, ret+16(FP)
    290 	RET
    291 posInfPosInf:
    292 	MOVD	$PiDiv4, R1
    293 	MOVD	R1, ret+16(FP)
    294 	RET
    295 negInfPosInf:
    296 	MOVD	$NegPiDiv4, R1
    297 	MOVD	R1, ret+16(FP)
    298 	RET
    299 negPiPosInf:
    300 	MOVD	$NegZero, R1
    301 	MOVD	R1, ret+16(FP)
    302 	RET
    303