1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 2 3 4 define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { 5 ;CHECK-LABEL: sabdl8h: 6 ;CHECK: sabdl.8h 7 %tmp1 = load <8 x i8>* %A 8 %tmp2 = load <8 x i8>* %B 9 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 10 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> 11 ret <8 x i16> %tmp4 12 } 13 14 define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 15 ;CHECK-LABEL: sabdl4s: 16 ;CHECK: sabdl.4s 17 %tmp1 = load <4 x i16>* %A 18 %tmp2 = load <4 x i16>* %B 19 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 20 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> 21 ret <4 x i32> %tmp4 22 } 23 24 define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 25 ;CHECK-LABEL: sabdl2d: 26 ;CHECK: sabdl.2d 27 %tmp1 = load <2 x i32>* %A 28 %tmp2 = load <2 x i32>* %B 29 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 30 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> 31 ret <2 x i64> %tmp4 32 } 33 34 define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { 35 ;CHECK-LABEL: sabdl2_8h: 36 ;CHECK: sabdl2.8h 37 %load1 = load <16 x i8>* %A 38 %load2 = load <16 x i8>* %B 39 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 40 %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 41 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 42 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> 43 ret <8 x i16> %tmp4 44 } 45 46 define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { 47 ;CHECK-LABEL: sabdl2_4s: 48 ;CHECK: sabdl2.4s 49 %load1 = load <8 x i16>* %A 50 %load2 = load <8 x i16>* %B 51 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 52 %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 53 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 54 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> 55 ret <4 x i32> %tmp4 56 } 57 58 define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { 59 ;CHECK-LABEL: sabdl2_2d: 60 ;CHECK: sabdl2.2d 61 %load1 = load <4 x i32>* %A 62 %load2 = load <4 x i32>* %B 63 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 64 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 65 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 66 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> 67 ret <2 x i64> %tmp4 68 } 69 70 define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { 71 ;CHECK-LABEL: uabdl8h: 72 ;CHECK: uabdl.8h 73 %tmp1 = load <8 x i8>* %A 74 %tmp2 = load <8 x i8>* %B 75 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 76 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> 77 ret <8 x i16> %tmp4 78 } 79 80 define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 81 ;CHECK-LABEL: uabdl4s: 82 ;CHECK: uabdl.4s 83 %tmp1 = load <4 x i16>* %A 84 %tmp2 = load <4 x i16>* %B 85 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 86 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> 87 ret <4 x i32> %tmp4 88 } 89 90 define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 91 ;CHECK-LABEL: uabdl2d: 92 ;CHECK: uabdl.2d 93 %tmp1 = load <2 x i32>* %A 94 %tmp2 = load <2 x i32>* %B 95 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 96 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> 97 ret <2 x i64> %tmp4 98 } 99 100 define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { 101 ;CHECK-LABEL: uabdl2_8h: 102 ;CHECK: uabdl2.8h 103 %load1 = load <16 x i8>* %A 104 %load2 = load <16 x i8>* %B 105 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 106 %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 107 108 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 109 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> 110 ret <8 x i16> %tmp4 111 } 112 113 define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { 114 ;CHECK-LABEL: uabdl2_4s: 115 ;CHECK: uabdl2.4s 116 %load1 = load <8 x i16>* %A 117 %load2 = load <8 x i16>* %B 118 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 119 %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 120 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 121 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> 122 ret <4 x i32> %tmp4 123 } 124 125 define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { 126 ;CHECK-LABEL: uabdl2_2d: 127 ;CHECK: uabdl2.2d 128 %load1 = load <4 x i32>* %A 129 %load2 = load <4 x i32>* %B 130 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 131 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 132 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 133 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> 134 ret <2 x i64> %tmp4 135 } 136 137 define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 138 ;CHECK-LABEL: fabd_2s: 139 ;CHECK: fabd.2s 140 %tmp1 = load <2 x float>* %A 141 %tmp2 = load <2 x float>* %B 142 %tmp3 = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 143 ret <2 x float> %tmp3 144 } 145 146 define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 147 ;CHECK-LABEL: fabd_4s: 148 ;CHECK: fabd.4s 149 %tmp1 = load <4 x float>* %A 150 %tmp2 = load <4 x float>* %B 151 %tmp3 = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 152 ret <4 x float> %tmp3 153 } 154 155 define <2 x double> @fabd_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 156 ;CHECK-LABEL: fabd_2d: 157 ;CHECK: fabd.2d 158 %tmp1 = load <2 x double>* %A 159 %tmp2 = load <2 x double>* %B 160 %tmp3 = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 161 ret <2 x double> %tmp3 162 } 163 164 declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) nounwind readnone 165 declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone 166 declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone 167 168 define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 169 ;CHECK-LABEL: sabd_8b: 170 ;CHECK: sabd.8b 171 %tmp1 = load <8 x i8>* %A 172 %tmp2 = load <8 x i8>* %B 173 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 174 ret <8 x i8> %tmp3 175 } 176 177 define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 178 ;CHECK-LABEL: sabd_16b: 179 ;CHECK: sabd.16b 180 %tmp1 = load <16 x i8>* %A 181 %tmp2 = load <16 x i8>* %B 182 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 183 ret <16 x i8> %tmp3 184 } 185 186 define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 187 ;CHECK-LABEL: sabd_4h: 188 ;CHECK: sabd.4h 189 %tmp1 = load <4 x i16>* %A 190 %tmp2 = load <4 x i16>* %B 191 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 192 ret <4 x i16> %tmp3 193 } 194 195 define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 196 ;CHECK-LABEL: sabd_8h: 197 ;CHECK: sabd.8h 198 %tmp1 = load <8 x i16>* %A 199 %tmp2 = load <8 x i16>* %B 200 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 201 ret <8 x i16> %tmp3 202 } 203 204 define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 205 ;CHECK-LABEL: sabd_2s: 206 ;CHECK: sabd.2s 207 %tmp1 = load <2 x i32>* %A 208 %tmp2 = load <2 x i32>* %B 209 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 210 ret <2 x i32> %tmp3 211 } 212 213 define <4 x i32> @sabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 214 ;CHECK-LABEL: sabd_4s: 215 ;CHECK: sabd.4s 216 %tmp1 = load <4 x i32>* %A 217 %tmp2 = load <4 x i32>* %B 218 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 219 ret <4 x i32> %tmp3 220 } 221 222 declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 223 declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 224 declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 225 declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 226 declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 227 declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 228 229 define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 230 ;CHECK-LABEL: uabd_8b: 231 ;CHECK: uabd.8b 232 %tmp1 = load <8 x i8>* %A 233 %tmp2 = load <8 x i8>* %B 234 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 235 ret <8 x i8> %tmp3 236 } 237 238 define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 239 ;CHECK-LABEL: uabd_16b: 240 ;CHECK: uabd.16b 241 %tmp1 = load <16 x i8>* %A 242 %tmp2 = load <16 x i8>* %B 243 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 244 ret <16 x i8> %tmp3 245 } 246 247 define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 248 ;CHECK-LABEL: uabd_4h: 249 ;CHECK: uabd.4h 250 %tmp1 = load <4 x i16>* %A 251 %tmp2 = load <4 x i16>* %B 252 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 253 ret <4 x i16> %tmp3 254 } 255 256 define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 257 ;CHECK-LABEL: uabd_8h: 258 ;CHECK: uabd.8h 259 %tmp1 = load <8 x i16>* %A 260 %tmp2 = load <8 x i16>* %B 261 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 262 ret <8 x i16> %tmp3 263 } 264 265 define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 266 ;CHECK-LABEL: uabd_2s: 267 ;CHECK: uabd.2s 268 %tmp1 = load <2 x i32>* %A 269 %tmp2 = load <2 x i32>* %B 270 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 271 ret <2 x i32> %tmp3 272 } 273 274 define <4 x i32> @uabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 275 ;CHECK-LABEL: uabd_4s: 276 ;CHECK: uabd.4s 277 %tmp1 = load <4 x i32>* %A 278 %tmp2 = load <4 x i32>* %B 279 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 280 ret <4 x i32> %tmp3 281 } 282 283 declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 284 declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 285 declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 286 declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 287 declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 288 declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 289 290 define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind { 291 ;CHECK-LABEL: sqabs_8b: 292 ;CHECK: sqabs.8b 293 %tmp1 = load <8 x i8>* %A 294 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %tmp1) 295 ret <8 x i8> %tmp3 296 } 297 298 define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind { 299 ;CHECK-LABEL: sqabs_16b: 300 ;CHECK: sqabs.16b 301 %tmp1 = load <16 x i8>* %A 302 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %tmp1) 303 ret <16 x i8> %tmp3 304 } 305 306 define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind { 307 ;CHECK-LABEL: sqabs_4h: 308 ;CHECK: sqabs.4h 309 %tmp1 = load <4 x i16>* %A 310 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %tmp1) 311 ret <4 x i16> %tmp3 312 } 313 314 define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind { 315 ;CHECK-LABEL: sqabs_8h: 316 ;CHECK: sqabs.8h 317 %tmp1 = load <8 x i16>* %A 318 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %tmp1) 319 ret <8 x i16> %tmp3 320 } 321 322 define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind { 323 ;CHECK-LABEL: sqabs_2s: 324 ;CHECK: sqabs.2s 325 %tmp1 = load <2 x i32>* %A 326 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %tmp1) 327 ret <2 x i32> %tmp3 328 } 329 330 define <4 x i32> @sqabs_4s(<4 x i32>* %A) nounwind { 331 ;CHECK-LABEL: sqabs_4s: 332 ;CHECK: sqabs.4s 333 %tmp1 = load <4 x i32>* %A 334 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %tmp1) 335 ret <4 x i32> %tmp3 336 } 337 338 declare <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8>) nounwind readnone 339 declare <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8>) nounwind readnone 340 declare <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16>) nounwind readnone 341 declare <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16>) nounwind readnone 342 declare <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32>) nounwind readnone 343 declare <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32>) nounwind readnone 344 345 define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind { 346 ;CHECK-LABEL: sqneg_8b: 347 ;CHECK: sqneg.8b 348 %tmp1 = load <8 x i8>* %A 349 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %tmp1) 350 ret <8 x i8> %tmp3 351 } 352 353 define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind { 354 ;CHECK-LABEL: sqneg_16b: 355 ;CHECK: sqneg.16b 356 %tmp1 = load <16 x i8>* %A 357 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %tmp1) 358 ret <16 x i8> %tmp3 359 } 360 361 define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind { 362 ;CHECK-LABEL: sqneg_4h: 363 ;CHECK: sqneg.4h 364 %tmp1 = load <4 x i16>* %A 365 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %tmp1) 366 ret <4 x i16> %tmp3 367 } 368 369 define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind { 370 ;CHECK-LABEL: sqneg_8h: 371 ;CHECK: sqneg.8h 372 %tmp1 = load <8 x i16>* %A 373 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %tmp1) 374 ret <8 x i16> %tmp3 375 } 376 377 define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind { 378 ;CHECK-LABEL: sqneg_2s: 379 ;CHECK: sqneg.2s 380 %tmp1 = load <2 x i32>* %A 381 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %tmp1) 382 ret <2 x i32> %tmp3 383 } 384 385 define <4 x i32> @sqneg_4s(<4 x i32>* %A) nounwind { 386 ;CHECK-LABEL: sqneg_4s: 387 ;CHECK: sqneg.4s 388 %tmp1 = load <4 x i32>* %A 389 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %tmp1) 390 ret <4 x i32> %tmp3 391 } 392 393 declare <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8>) nounwind readnone 394 declare <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8>) nounwind readnone 395 declare <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16>) nounwind readnone 396 declare <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16>) nounwind readnone 397 declare <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32>) nounwind readnone 398 declare <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32>) nounwind readnone 399 400 define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind { 401 ;CHECK-LABEL: abs_8b: 402 ;CHECK: abs.8b 403 %tmp1 = load <8 x i8>* %A 404 %tmp3 = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %tmp1) 405 ret <8 x i8> %tmp3 406 } 407 408 define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind { 409 ;CHECK-LABEL: abs_16b: 410 ;CHECK: abs.16b 411 %tmp1 = load <16 x i8>* %A 412 %tmp3 = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %tmp1) 413 ret <16 x i8> %tmp3 414 } 415 416 define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind { 417 ;CHECK-LABEL: abs_4h: 418 ;CHECK: abs.4h 419 %tmp1 = load <4 x i16>* %A 420 %tmp3 = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %tmp1) 421 ret <4 x i16> %tmp3 422 } 423 424 define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind { 425 ;CHECK-LABEL: abs_8h: 426 ;CHECK: abs.8h 427 %tmp1 = load <8 x i16>* %A 428 %tmp3 = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %tmp1) 429 ret <8 x i16> %tmp3 430 } 431 432 define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind { 433 ;CHECK-LABEL: abs_2s: 434 ;CHECK: abs.2s 435 %tmp1 = load <2 x i32>* %A 436 %tmp3 = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %tmp1) 437 ret <2 x i32> %tmp3 438 } 439 440 define <4 x i32> @abs_4s(<4 x i32>* %A) nounwind { 441 ;CHECK-LABEL: abs_4s: 442 ;CHECK: abs.4s 443 %tmp1 = load <4 x i32>* %A 444 %tmp3 = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %tmp1) 445 ret <4 x i32> %tmp3 446 } 447 448 define <1 x i64> @abs_1d(<1 x i64> %A) nounwind { 449 ; CHECK-LABEL: abs_1d: 450 ; CHECK: abs d0, d0 451 %abs = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %A) 452 ret <1 x i64> %abs 453 } 454 455 define i64 @abs_1d_honestly(i64 %A) nounwind { 456 ; CHECK-LABEL: abs_1d_honestly: 457 ; CHECK: abs d0, d0 458 %abs = call i64 @llvm.aarch64.neon.abs.i64(i64 %A) 459 ret i64 %abs 460 } 461 462 declare <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8>) nounwind readnone 463 declare <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8>) nounwind readnone 464 declare <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16>) nounwind readnone 465 declare <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16>) nounwind readnone 466 declare <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32>) nounwind readnone 467 declare <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32>) nounwind readnone 468 declare <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64>) nounwind readnone 469 declare i64 @llvm.aarch64.neon.abs.i64(i64) nounwind readnone 470 471 define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind { 472 ;CHECK-LABEL: sabal8h: 473 ;CHECK: sabal.8h 474 %tmp1 = load <8 x i8>* %A 475 %tmp2 = load <8 x i8>* %B 476 %tmp3 = load <8 x i16>* %C 477 %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 478 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> 479 %tmp5 = add <8 x i16> %tmp3, %tmp4.1 480 ret <8 x i16> %tmp5 481 } 482 483 define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 484 ;CHECK-LABEL: sabal4s: 485 ;CHECK: sabal.4s 486 %tmp1 = load <4 x i16>* %A 487 %tmp2 = load <4 x i16>* %B 488 %tmp3 = load <4 x i32>* %C 489 %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 490 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> 491 %tmp5 = add <4 x i32> %tmp3, %tmp4.1 492 ret <4 x i32> %tmp5 493 } 494 495 define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 496 ;CHECK-LABEL: sabal2d: 497 ;CHECK: sabal.2d 498 %tmp1 = load <2 x i32>* %A 499 %tmp2 = load <2 x i32>* %B 500 %tmp3 = load <2 x i64>* %C 501 %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 502 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> 503 %tmp4.1.1 = zext <2 x i32> %tmp4 to <2 x i64> 504 %tmp5 = add <2 x i64> %tmp3, %tmp4.1 505 ret <2 x i64> %tmp5 506 } 507 508 define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind { 509 ;CHECK-LABEL: sabal2_8h: 510 ;CHECK: sabal2.8h 511 %load1 = load <16 x i8>* %A 512 %load2 = load <16 x i8>* %B 513 %tmp3 = load <8 x i16>* %C 514 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 515 %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 516 %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 517 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> 518 %tmp5 = add <8 x i16> %tmp3, %tmp4.1 519 ret <8 x i16> %tmp5 520 } 521 522 define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { 523 ;CHECK-LABEL: sabal2_4s: 524 ;CHECK: sabal2.4s 525 %load1 = load <8 x i16>* %A 526 %load2 = load <8 x i16>* %B 527 %tmp3 = load <4 x i32>* %C 528 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 529 %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 530 %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 531 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> 532 %tmp5 = add <4 x i32> %tmp3, %tmp4.1 533 ret <4 x i32> %tmp5 534 } 535 536 define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { 537 ;CHECK-LABEL: sabal2_2d: 538 ;CHECK: sabal2.2d 539 %load1 = load <4 x i32>* %A 540 %load2 = load <4 x i32>* %B 541 %tmp3 = load <2 x i64>* %C 542 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 543 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 544 %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 545 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> 546 %tmp5 = add <2 x i64> %tmp3, %tmp4.1 547 ret <2 x i64> %tmp5 548 } 549 550 define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind { 551 ;CHECK-LABEL: uabal8h: 552 ;CHECK: uabal.8h 553 %tmp1 = load <8 x i8>* %A 554 %tmp2 = load <8 x i8>* %B 555 %tmp3 = load <8 x i16>* %C 556 %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 557 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> 558 %tmp5 = add <8 x i16> %tmp3, %tmp4.1 559 ret <8 x i16> %tmp5 560 } 561 562 define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 563 ;CHECK-LABEL: uabal4s: 564 ;CHECK: uabal.4s 565 %tmp1 = load <4 x i16>* %A 566 %tmp2 = load <4 x i16>* %B 567 %tmp3 = load <4 x i32>* %C 568 %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 569 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> 570 %tmp5 = add <4 x i32> %tmp3, %tmp4.1 571 ret <4 x i32> %tmp5 572 } 573 574 define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 575 ;CHECK-LABEL: uabal2d: 576 ;CHECK: uabal.2d 577 %tmp1 = load <2 x i32>* %A 578 %tmp2 = load <2 x i32>* %B 579 %tmp3 = load <2 x i64>* %C 580 %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 581 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> 582 %tmp5 = add <2 x i64> %tmp3, %tmp4.1 583 ret <2 x i64> %tmp5 584 } 585 586 define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind { 587 ;CHECK-LABEL: uabal2_8h: 588 ;CHECK: uabal2.8h 589 %load1 = load <16 x i8>* %A 590 %load2 = load <16 x i8>* %B 591 %tmp3 = load <8 x i16>* %C 592 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 593 %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 594 %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 595 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> 596 %tmp5 = add <8 x i16> %tmp3, %tmp4.1 597 ret <8 x i16> %tmp5 598 } 599 600 define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { 601 ;CHECK-LABEL: uabal2_4s: 602 ;CHECK: uabal2.4s 603 %load1 = load <8 x i16>* %A 604 %load2 = load <8 x i16>* %B 605 %tmp3 = load <4 x i32>* %C 606 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 607 %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 608 %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 609 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> 610 %tmp5 = add <4 x i32> %tmp3, %tmp4.1 611 ret <4 x i32> %tmp5 612 } 613 614 define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { 615 ;CHECK-LABEL: uabal2_2d: 616 ;CHECK: uabal2.2d 617 %load1 = load <4 x i32>* %A 618 %load2 = load <4 x i32>* %B 619 %tmp3 = load <2 x i64>* %C 620 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 621 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 622 %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 623 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> 624 %tmp5 = add <2 x i64> %tmp3, %tmp4.1 625 ret <2 x i64> %tmp5 626 } 627 628 define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { 629 ;CHECK-LABEL: saba_8b: 630 ;CHECK: saba.8b 631 %tmp1 = load <8 x i8>* %A 632 %tmp2 = load <8 x i8>* %B 633 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 634 %tmp4 = load <8 x i8>* %C 635 %tmp5 = add <8 x i8> %tmp3, %tmp4 636 ret <8 x i8> %tmp5 637 } 638 639 define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { 640 ;CHECK-LABEL: saba_16b: 641 ;CHECK: saba.16b 642 %tmp1 = load <16 x i8>* %A 643 %tmp2 = load <16 x i8>* %B 644 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 645 %tmp4 = load <16 x i8>* %C 646 %tmp5 = add <16 x i8> %tmp3, %tmp4 647 ret <16 x i8> %tmp5 648 } 649 650 define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { 651 ;CHECK-LABEL: saba_4h: 652 ;CHECK: saba.4h 653 %tmp1 = load <4 x i16>* %A 654 %tmp2 = load <4 x i16>* %B 655 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 656 %tmp4 = load <4 x i16>* %C 657 %tmp5 = add <4 x i16> %tmp3, %tmp4 658 ret <4 x i16> %tmp5 659 } 660 661 define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { 662 ;CHECK-LABEL: saba_8h: 663 ;CHECK: saba.8h 664 %tmp1 = load <8 x i16>* %A 665 %tmp2 = load <8 x i16>* %B 666 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 667 %tmp4 = load <8 x i16>* %C 668 %tmp5 = add <8 x i16> %tmp3, %tmp4 669 ret <8 x i16> %tmp5 670 } 671 672 define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { 673 ;CHECK-LABEL: saba_2s: 674 ;CHECK: saba.2s 675 %tmp1 = load <2 x i32>* %A 676 %tmp2 = load <2 x i32>* %B 677 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 678 %tmp4 = load <2 x i32>* %C 679 %tmp5 = add <2 x i32> %tmp3, %tmp4 680 ret <2 x i32> %tmp5 681 } 682 683 define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { 684 ;CHECK-LABEL: saba_4s: 685 ;CHECK: saba.4s 686 %tmp1 = load <4 x i32>* %A 687 %tmp2 = load <4 x i32>* %B 688 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 689 %tmp4 = load <4 x i32>* %C 690 %tmp5 = add <4 x i32> %tmp3, %tmp4 691 ret <4 x i32> %tmp5 692 } 693 694 define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { 695 ;CHECK-LABEL: uaba_8b: 696 ;CHECK: uaba.8b 697 %tmp1 = load <8 x i8>* %A 698 %tmp2 = load <8 x i8>* %B 699 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 700 %tmp4 = load <8 x i8>* %C 701 %tmp5 = add <8 x i8> %tmp3, %tmp4 702 ret <8 x i8> %tmp5 703 } 704 705 define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { 706 ;CHECK-LABEL: uaba_16b: 707 ;CHECK: uaba.16b 708 %tmp1 = load <16 x i8>* %A 709 %tmp2 = load <16 x i8>* %B 710 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 711 %tmp4 = load <16 x i8>* %C 712 %tmp5 = add <16 x i8> %tmp3, %tmp4 713 ret <16 x i8> %tmp5 714 } 715 716 define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { 717 ;CHECK-LABEL: uaba_4h: 718 ;CHECK: uaba.4h 719 %tmp1 = load <4 x i16>* %A 720 %tmp2 = load <4 x i16>* %B 721 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 722 %tmp4 = load <4 x i16>* %C 723 %tmp5 = add <4 x i16> %tmp3, %tmp4 724 ret <4 x i16> %tmp5 725 } 726 727 define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { 728 ;CHECK-LABEL: uaba_8h: 729 ;CHECK: uaba.8h 730 %tmp1 = load <8 x i16>* %A 731 %tmp2 = load <8 x i16>* %B 732 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 733 %tmp4 = load <8 x i16>* %C 734 %tmp5 = add <8 x i16> %tmp3, %tmp4 735 ret <8 x i16> %tmp5 736 } 737 738 define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { 739 ;CHECK-LABEL: uaba_2s: 740 ;CHECK: uaba.2s 741 %tmp1 = load <2 x i32>* %A 742 %tmp2 = load <2 x i32>* %B 743 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 744 %tmp4 = load <2 x i32>* %C 745 %tmp5 = add <2 x i32> %tmp3, %tmp4 746 ret <2 x i32> %tmp5 747 } 748 749 define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { 750 ;CHECK-LABEL: uaba_4s: 751 ;CHECK: uaba.4s 752 %tmp1 = load <4 x i32>* %A 753 %tmp2 = load <4 x i32>* %B 754 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 755 %tmp4 = load <4 x i32>* %C 756 %tmp5 = add <4 x i32> %tmp3, %tmp4 757 ret <4 x i32> %tmp5 758 } 759 760 ; Scalar FABD 761 define float @fabds(float %a, float %b) nounwind { 762 ; CHECK-LABEL: fabds: 763 ; CHECK: fabd s0, s0, s1 764 %vabd.i = tail call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) nounwind 765 ret float %vabd.i 766 } 767 768 define double @fabdd(double %a, double %b) nounwind { 769 ; CHECK-LABEL: fabdd: 770 ; CHECK: fabd d0, d0, d1 771 %vabd.i = tail call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) nounwind 772 ret double %vabd.i 773 } 774 775 declare double @llvm.aarch64.sisd.fabd.f64(double, double) nounwind readnone 776 declare float @llvm.aarch64.sisd.fabd.f32(float, float) nounwind readnone 777 778 define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { 779 ; CHECK-LABEL: uabdl_from_extract_dup: 780 ; CHECK-NOT: ext.16b 781 ; CHECK: uabdl2.2d 782 %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 783 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 784 785 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 786 787 %res = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind 788 %res1 = zext <2 x i32> %res to <2 x i64> 789 ret <2 x i64> %res1 790 } 791 792 define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { 793 ; CHECK-LABEL: sabdl_from_extract_dup: 794 ; CHECK-NOT: ext.16b 795 ; CHECK: sabdl2.2d 796 %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 797 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 798 799 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 800 801 %res = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind 802 %res1 = zext <2 x i32> %res to <2 x i64> 803 ret <2 x i64> %res1 804 } 805