Home | History | Annotate | Download | only in arm
      1 	.macro f16_dq_ifsu reg0 reg1 reg2
      2 	.irp op, vabd.f16, vmax.f16, vmin.f16
      3 		\op d\reg0, d\reg1, d\reg2
      4 		\op q\reg0, q\reg1, q\reg2
      5 	.endr
      6 	.endm
      7 
      8 	.macro f16_q_ifsu reg0 reg1 reg2
      9 	.irp op, vabdq.f16, vmaxq.f16, vminq.f16
     10 		\op q\reg0, q\reg1, q\reg2
     11 	.endr
     12 	.endm
     13 
     14 	.macro f16_dq_abs_neg reg0 reg1
     15 	.irp op, vabs.f16, vneg.f16
     16 		\op d\reg0, d\reg1
     17 		\op q\reg0, q\reg1
     18 	.endr
     19 	.endm
     20 
     21 	.macro f16_q_abs_neg reg0 reg1
     22 	.irp op, vabsq.f16, vnegq.f16
     23 		\op q\reg0, q\reg1
     24 	.endr
     25 	.endm
     26 
     27 	.macro f16_dq_fcmp reg0 reg1 reg2
     28 	.irp op, vacge.f16, vacgt.f16, vaclt.f16, vacle.f16, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16
     29 		\op d\reg0, d\reg1, d\reg2
     30 		\op q\reg0, q\reg1, q\reg2
     31 	.endr
     32 	.endm
     33 
     34 	.macro f16_dq_fcmp_imm0 reg0 reg1
     35 	.irp op, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16
     36 		\op d\reg0, d\reg1, #0
     37 		\op q\reg0, q\reg1, #0
     38 	.endr
     39 	.endm
     40 
     41 	.macro f16_q_fcmp reg0 reg1 reg2
     42 	.irp op, vacgeq.f16, vacgtq.f16, vacltq.f16, vacleq.f16, vceqq.f16, vcgeq.f16, vcgtq.f16, vcleq.f16, vcltq.f16
     43 		\op q\reg0, q\reg1, q\reg2
     44 	.endr
     45 	.endm
     46 
     47 	.macro f16_dq_addsub reg0 reg1 reg2
     48 	.irp op, vadd.f16, vsub.f16
     49 		\op d\reg0, d\reg1, d\reg2
     50 		\op q\reg0, q\reg1, q\reg2
     51 	.endr
     52 	.endm
     53 
     54 	.macro f16_q_addsub reg0 reg1 reg2
     55 	.irp op, vaddq.f16, vsubq.f16
     56 		\op q\reg0, q\reg1, q\reg2
     57 	.endr
     58 	.endm
     59 
     60 	.macro f16_dq_vmaxnm reg0 reg1 reg2
     61 	.irp op, vmaxnm.f16, vminnm.f16
     62 		\op d\reg0, d\reg1, d\reg2
     63 		\op q\reg0, q\reg1, q\reg2
     64 	.endr
     65 	.endm
     66 
     67 	.macro f16_dq_fmac reg0 reg1 reg2
     68 	.irp op, vfma.f16, vfms.f16
     69 		\op d\reg0, d\reg1, d\reg2
     70 		\op q\reg0, q\reg1, q\reg2
     71 	.endr
     72 	.endm
     73 
     74 	.macro f16_dq_fmacmaybe reg0 reg1 reg2
     75 	.irp op, vmla.f16, vmls.f16
     76 		\op d\reg0, d\reg1, d\reg2
     77 		\op q\reg0, q\reg1, q\reg2
     78 	.endr
     79 	.endm
     80 
     81 	.macro f16_dq_vrint reg0 reg1
     82 	.irp op, vrintz.f16, vrintx.f16, vrinta.f16, vrintn.f16, vrintp.f16, vrintm.f16
     83 		\op d\reg0, d\reg1
     84 		\op q\reg0, q\reg1
     85 	.endr
     86 	.endm
     87 
     88 	.macro f16_dq_recip reg0 reg1
     89 	.irp op, vrecpe.f16, vrsqrte.f16
     90 		\op d\reg0, d\reg1
     91 		\op q\reg0, q\reg1
     92 	.endr
     93 	.endm
     94 
     95 	.macro f16_q_recip reg0 reg1
     96 	.irp op, vrecpeq.f16, vrsqrteq.f16
     97 		\op q\reg0, q\reg1
     98 	.endr
     99 	.endm
    100 
    101 	.macro f16_dq_step reg0 reg1 reg2
    102 	.irp op, vrecps.f16, vrsqrts.f16
    103 		\op d\reg0, d\reg1, d\reg2
    104 		\op q\reg0, q\reg1, q\reg2
    105 	.endr
    106 	.endm
    107 
    108 	.macro f16_q_step reg0 reg1 reg2
    109 	.irp op, vrecpsq.f16, vrsqrtsq.f16
    110 		\op q\reg0, q\reg1, q\reg2
    111 	.endr
    112 	.endm
    113 
    114 	.macro f16_dq_cvt reg0 reg1
    115 	.irp op, vcvta.s16.f16, vcvtm.s16.f16, vcvtn.s16.f16, vcvtp.s16.f16, vcvta.u16.f16, vcvtm.u16.f16, vcvtn.u16.f16, vcvtp.u16.f16,
    116 		\op d\reg0, d\reg1
    117 		\op q\reg0, q\reg1
    118 	.endr
    119 	.endm
    120 
    121 	.macro f16_dq_cvtz reg0 reg1
    122 	.irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16,
    123 		\op d\reg0, d\reg1
    124 		\op q\reg0, q\reg1
    125 	.endr
    126 	.endm
    127 
    128 	.macro f16_dq_cvtz_fixed reg0 reg1 imm
    129 	.irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16,
    130 		\op d\reg0, d\reg1, #\imm
    131 		\op q\reg0, q\reg1, #\imm
    132 	.endr
    133 	.endm
    134 
    135 	.macro f16_dq op reg0 reg1 reg2
    136 		\op d\reg0, d\reg1, d\reg2
    137 		\op q\reg0, q\reg1, q\reg2
    138 	.endm
    139 
    140 	.macro f16_d op reg0 reg1 reg2
    141 		\op d\reg0, d\reg1, d\reg2
    142 	.endm
    143 
    144 	.macro f16_q op reg0 reg1 reg2
    145 		\op q\reg0, q\reg1, q\reg2
    146 	.endm
    147 
    148 	.macro f16_dq_2 op reg0 reg1
    149 		\op d\reg0, d\reg1
    150 		\op q\reg0, q\reg1
    151 	.endm
    152 
    153 	.macro f16_d_2 op reg0 reg1
    154 		\op d\reg0, d\reg1
    155 	.endm
    156 
    157 	.macro f16_q_2 op reg0 reg1
    158 		\op q\reg0, q\reg1
    159 	.endm
    160 
    161 func:
    162 	# neon_dyadic_if_su
    163 	f16_dq_ifsu 2 4 14
    164 	f16_q_ifsu 0 8 14
    165 	f16_d  vabd.f16 1 3 15
    166 	f16_d  vabd.f16 0 1 8
    167 
    168 	# neon_abs_neg
    169 	f16_dq_abs_neg 0 8
    170 	f16_q_abs_neg 2 6
    171 	f16_d_2  vabs.f16 7 3
    172 	f16_d_2  vneg.f16 9 1
    173 
    174 	# neon_fcmp
    175 	f16_dq_fcmp 2 4 14
    176 	f16_q_fcmp 0 8 14
    177 
    178 	# neon_addsub_if_i
    179 	f16_dq_addsub 2 4 14
    180 	f16_q_addsub 0 8 14
    181 
    182 	# neon_vmaxnm
    183 	f16_dq_vmaxnm 2 4 14
    184 
    185 	# neon_fmac
    186 	f16_dq_fmac 2 4 14
    187 
    188 	# neon_mac_maybe_scalar
    189 	f16_dq_fmacmaybe 2 4 14
    190 
    191 	# vrint
    192 	f16_dq_vrint 4 14
    193 
    194 	# neon_dyadic_if_i_d
    195 	f16_d vpadd.f16 4 8 14
    196 
    197 	# neon_recip_est
    198 	f16_dq_recip 4 8
    199 	f16_q_recip 0 10
    200 
    201 	# neon_step
    202 	f16_dq_step 8 10 12
    203 	f16_q_step 2 0 4
    204 
    205 	# neon_dyadic_if_su_d
    206 	f16_d vpmax.f16 4 8 14
    207 	f16_d vpmin.f16 10 8 2
    208 
    209 	# neon_mul
    210 	f16_d vmul.f16 4 8 14
    211 	f16_d vmul.f16 7 0 1
    212 	f16_q vmul.f16 2 8 0
    213 
    214 	# neon_cvt
    215 	f16_dq_cvt 6 12
    216 
    217 	# neon_cvtz
    218 	f16_dq_cvtz 14, 0
    219 
    220 	# neon_cvtz_fixed
    221 	f16_dq_cvtz_fixed 14, 0, 3
    222 
    223 	# neon_fcmp_imm0
    224 	f16_dq_fcmp_imm0 14, 2
    225 
    226 	.macro f16_d_by_scalar op reg0 reg1 reg2 idx
    227 		\op d\reg0, d\reg1, d\reg2[\idx]
    228 	.endm
    229 
    230 	.macro f16_q_by_scalar op reg0 reg1 reg2 idx
    231 		\op q\reg0, q\reg1, d\reg2[\idx]
    232 	.endm
    233 
    234 	.macro f16_dq_fmacmaybe_by_scalar reg0 reg1 reg2 idx
    235 	.irp op, vmla.f16, vmls.f16
    236 		\op d\reg0, d\reg1, d\reg2[\idx]
    237 		\op q\reg0, q\reg1, d\reg2[\idx]
    238 	.endr
    239 	.endm
    240 
    241 	# neon_mul (by scalar)
    242 	f16_d_by_scalar vmul.f16 7 0 1 0
    243 	f16_d_by_scalar vmul.f16 4 8 6 2
    244 	f16_q_by_scalar vmul.f16 2 8 0 1
    245 	f16_q_by_scalar vmul.f16 2 8 7 3
    246 
    247 	# neon_mac_maybe_scalar (by scalar)
    248 	f16_dq_fmacmaybe_by_scalar 2 4 1 0
    249 	f16_dq_fmacmaybe_by_scalar 1 8 7 3
    250