1 .macro f16_dq_ifsu reg0 reg1 reg2 2 .irp op, vabd.f16, vmax.f16, vmin.f16 3 \op d\reg0, d\reg1, d\reg2 4 \op q\reg0, q\reg1, q\reg2 5 .endr 6 .endm 7 8 .macro f16_q_ifsu reg0 reg1 reg2 9 .irp op, vabdq.f16, vmaxq.f16, vminq.f16 10 \op q\reg0, q\reg1, q\reg2 11 .endr 12 .endm 13 14 .macro f16_dq_abs_neg reg0 reg1 15 .irp op, vabs.f16, vneg.f16 16 \op d\reg0, d\reg1 17 \op q\reg0, q\reg1 18 .endr 19 .endm 20 21 .macro f16_q_abs_neg reg0 reg1 22 .irp op, vabsq.f16, vnegq.f16 23 \op q\reg0, q\reg1 24 .endr 25 .endm 26 27 .macro f16_dq_fcmp reg0 reg1 reg2 28 .irp op, vacge.f16, vacgt.f16, vaclt.f16, vacle.f16, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16 29 \op d\reg0, d\reg1, d\reg2 30 \op q\reg0, q\reg1, q\reg2 31 .endr 32 .endm 33 34 .macro f16_dq_fcmp_imm0 reg0 reg1 35 .irp op, vceq.f16, vcge.f16, vcgt.f16, vcle.f16, vclt.f16 36 \op d\reg0, d\reg1, #0 37 \op q\reg0, q\reg1, #0 38 .endr 39 .endm 40 41 .macro f16_q_fcmp reg0 reg1 reg2 42 .irp op, vacgeq.f16, vacgtq.f16, vacltq.f16, vacleq.f16, vceqq.f16, vcgeq.f16, vcgtq.f16, vcleq.f16, vcltq.f16 43 \op q\reg0, q\reg1, q\reg2 44 .endr 45 .endm 46 47 .macro f16_dq_addsub reg0 reg1 reg2 48 .irp op, vadd.f16, vsub.f16 49 \op d\reg0, d\reg1, d\reg2 50 \op q\reg0, q\reg1, q\reg2 51 .endr 52 .endm 53 54 .macro f16_q_addsub reg0 reg1 reg2 55 .irp op, vaddq.f16, vsubq.f16 56 \op q\reg0, q\reg1, q\reg2 57 .endr 58 .endm 59 60 .macro f16_dq_vmaxnm reg0 reg1 reg2 61 .irp op, vmaxnm.f16, vminnm.f16 62 \op d\reg0, d\reg1, d\reg2 63 \op q\reg0, q\reg1, q\reg2 64 .endr 65 .endm 66 67 .macro f16_dq_fmac reg0 reg1 reg2 68 .irp op, vfma.f16, vfms.f16 69 \op d\reg0, d\reg1, d\reg2 70 \op q\reg0, q\reg1, q\reg2 71 .endr 72 .endm 73 74 .macro f16_dq_fmacmaybe reg0 reg1 reg2 75 .irp op, vmla.f16, vmls.f16 76 \op d\reg0, d\reg1, d\reg2 77 \op q\reg0, q\reg1, q\reg2 78 .endr 79 .endm 80 81 .macro f16_dq_vrint reg0 reg1 82 .irp op, vrintz.f16, vrintx.f16, vrinta.f16, vrintn.f16, vrintp.f16, vrintm.f16 83 \op d\reg0, d\reg1 84 \op q\reg0, q\reg1 85 .endr 86 .endm 87 88 .macro f16_dq_recip reg0 reg1 89 .irp op, vrecpe.f16, vrsqrte.f16 90 \op d\reg0, d\reg1 91 \op q\reg0, q\reg1 92 .endr 93 .endm 94 95 .macro f16_q_recip reg0 reg1 96 .irp op, vrecpeq.f16, vrsqrteq.f16 97 \op q\reg0, q\reg1 98 .endr 99 .endm 100 101 .macro f16_dq_step reg0 reg1 reg2 102 .irp op, vrecps.f16, vrsqrts.f16 103 \op d\reg0, d\reg1, d\reg2 104 \op q\reg0, q\reg1, q\reg2 105 .endr 106 .endm 107 108 .macro f16_q_step reg0 reg1 reg2 109 .irp op, vrecpsq.f16, vrsqrtsq.f16 110 \op q\reg0, q\reg1, q\reg2 111 .endr 112 .endm 113 114 .macro f16_dq_cvt reg0 reg1 115 .irp op, vcvta.s16.f16, vcvtm.s16.f16, vcvtn.s16.f16, vcvtp.s16.f16, vcvta.u16.f16, vcvtm.u16.f16, vcvtn.u16.f16, vcvtp.u16.f16, 116 \op d\reg0, d\reg1 117 \op q\reg0, q\reg1 118 .endr 119 .endm 120 121 .macro f16_dq_cvtz reg0 reg1 122 .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16, 123 \op d\reg0, d\reg1 124 \op q\reg0, q\reg1 125 .endr 126 .endm 127 128 .macro f16_dq_cvtz_fixed reg0 reg1 imm 129 .irp op, vcvt.s16.f16, vcvt.u16.f16, vcvt.f16.s16, vcvt.f16.u16, 130 \op d\reg0, d\reg1, #\imm 131 \op q\reg0, q\reg1, #\imm 132 .endr 133 .endm 134 135 .macro f16_dq op reg0 reg1 reg2 136 \op d\reg0, d\reg1, d\reg2 137 \op q\reg0, q\reg1, q\reg2 138 .endm 139 140 .macro f16_d op reg0 reg1 reg2 141 \op d\reg0, d\reg1, d\reg2 142 .endm 143 144 .macro f16_q op reg0 reg1 reg2 145 \op q\reg0, q\reg1, q\reg2 146 .endm 147 148 .macro f16_dq_2 op reg0 reg1 149 \op d\reg0, d\reg1 150 \op q\reg0, q\reg1 151 .endm 152 153 .macro f16_d_2 op reg0 reg1 154 \op d\reg0, d\reg1 155 .endm 156 157 .macro f16_q_2 op reg0 reg1 158 \op q\reg0, q\reg1 159 .endm 160 161 func: 162 # neon_dyadic_if_su 163 f16_dq_ifsu 2 4 14 164 f16_q_ifsu 0 8 14 165 f16_d vabd.f16 1 3 15 166 f16_d vabd.f16 0 1 8 167 168 # neon_abs_neg 169 f16_dq_abs_neg 0 8 170 f16_q_abs_neg 2 6 171 f16_d_2 vabs.f16 7 3 172 f16_d_2 vneg.f16 9 1 173 174 # neon_fcmp 175 f16_dq_fcmp 2 4 14 176 f16_q_fcmp 0 8 14 177 178 # neon_addsub_if_i 179 f16_dq_addsub 2 4 14 180 f16_q_addsub 0 8 14 181 182 # neon_vmaxnm 183 f16_dq_vmaxnm 2 4 14 184 185 # neon_fmac 186 f16_dq_fmac 2 4 14 187 188 # neon_mac_maybe_scalar 189 f16_dq_fmacmaybe 2 4 14 190 191 # vrint 192 f16_dq_vrint 4 14 193 194 # neon_dyadic_if_i_d 195 f16_d vpadd.f16 4 8 14 196 197 # neon_recip_est 198 f16_dq_recip 4 8 199 f16_q_recip 0 10 200 201 # neon_step 202 f16_dq_step 8 10 12 203 f16_q_step 2 0 4 204 205 # neon_dyadic_if_su_d 206 f16_d vpmax.f16 4 8 14 207 f16_d vpmin.f16 10 8 2 208 209 # neon_mul 210 f16_d vmul.f16 4 8 14 211 f16_d vmul.f16 7 0 1 212 f16_q vmul.f16 2 8 0 213 214 # neon_cvt 215 f16_dq_cvt 6 12 216 217 # neon_cvtz 218 f16_dq_cvtz 14, 0 219 220 # neon_cvtz_fixed 221 f16_dq_cvtz_fixed 14, 0, 3 222 223 # neon_fcmp_imm0 224 f16_dq_fcmp_imm0 14, 2 225 226 .macro f16_d_by_scalar op reg0 reg1 reg2 idx 227 \op d\reg0, d\reg1, d\reg2[\idx] 228 .endm 229 230 .macro f16_q_by_scalar op reg0 reg1 reg2 idx 231 \op q\reg0, q\reg1, d\reg2[\idx] 232 .endm 233 234 .macro f16_dq_fmacmaybe_by_scalar reg0 reg1 reg2 idx 235 .irp op, vmla.f16, vmls.f16 236 \op d\reg0, d\reg1, d\reg2[\idx] 237 \op q\reg0, q\reg1, d\reg2[\idx] 238 .endr 239 .endm 240 241 # neon_mul (by scalar) 242 f16_d_by_scalar vmul.f16 7 0 1 0 243 f16_d_by_scalar vmul.f16 4 8 6 2 244 f16_q_by_scalar vmul.f16 2 8 0 1 245 f16_q_by_scalar vmul.f16 2 8 7 3 246 247 # neon_mac_maybe_scalar (by scalar) 248 f16_dq_fmacmaybe_by_scalar 2 4 1 0 249 f16_dq_fmacmaybe_by_scalar 1 8 7 3 250