1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3 declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) 4 5 declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) 6 7 declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) 8 9 declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) 10 11 declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) 12 13 declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) 14 15 declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) 16 17 declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) 18 19 declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) 20 21 declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) 22 23 declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) 24 25 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) 26 27 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) 28 29 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 30 31 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 32 33 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 34 35 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 36 37 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) 38 39 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) 40 41 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) 42 43 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) 44 45 define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 46 ; CHECK-LABEL: test_vmla_lane_s16: 47 ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 48 ; CHECK-NEXT: ret 49 entry: 50 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 51 %mul = mul <4 x i16> %shuffle, %b 52 %add = add <4 x i16> %mul, %a 53 ret <4 x i16> %add 54 } 55 56 define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 57 ; CHECK-LABEL: test_vmlaq_lane_s16: 58 ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 59 ; CHECK-NEXT: ret 60 entry: 61 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 62 %mul = mul <8 x i16> %shuffle, %b 63 %add = add <8 x i16> %mul, %a 64 ret <8 x i16> %add 65 } 66 67 define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 68 ; CHECK-LABEL: test_vmla_lane_s32: 69 ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 70 ; CHECK-NEXT: ret 71 entry: 72 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 73 %mul = mul <2 x i32> %shuffle, %b 74 %add = add <2 x i32> %mul, %a 75 ret <2 x i32> %add 76 } 77 78 define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 79 ; CHECK-LABEL: test_vmlaq_lane_s32: 80 ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 81 ; CHECK-NEXT: ret 82 entry: 83 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 84 %mul = mul <4 x i32> %shuffle, %b 85 %add = add <4 x i32> %mul, %a 86 ret <4 x i32> %add 87 } 88 89 define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 90 ; CHECK-LABEL: test_vmla_laneq_s16: 91 ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 92 ; CHECK-NEXT: ret 93 entry: 94 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 95 %mul = mul <4 x i16> %shuffle, %b 96 %add = add <4 x i16> %mul, %a 97 ret <4 x i16> %add 98 } 99 100 define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 101 ; CHECK-LABEL: test_vmlaq_laneq_s16: 102 ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 103 ; CHECK-NEXT: ret 104 entry: 105 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 106 %mul = mul <8 x i16> %shuffle, %b 107 %add = add <8 x i16> %mul, %a 108 ret <8 x i16> %add 109 } 110 111 define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 112 ; CHECK-LABEL: test_vmla_laneq_s32: 113 ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 114 ; CHECK-NEXT: ret 115 entry: 116 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 117 %mul = mul <2 x i32> %shuffle, %b 118 %add = add <2 x i32> %mul, %a 119 ret <2 x i32> %add 120 } 121 122 define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 123 ; CHECK-LABEL: test_vmlaq_laneq_s32: 124 ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 125 ; CHECK-NEXT: ret 126 entry: 127 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 128 %mul = mul <4 x i32> %shuffle, %b 129 %add = add <4 x i32> %mul, %a 130 ret <4 x i32> %add 131 } 132 133 define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 134 ; CHECK-LABEL: test_vmls_lane_s16: 135 ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 136 ; CHECK-NEXT: ret 137 entry: 138 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 139 %mul = mul <4 x i16> %shuffle, %b 140 %sub = sub <4 x i16> %a, %mul 141 ret <4 x i16> %sub 142 } 143 144 define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 145 ; CHECK-LABEL: test_vmlsq_lane_s16: 146 ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 147 ; CHECK-NEXT: ret 148 entry: 149 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 150 %mul = mul <8 x i16> %shuffle, %b 151 %sub = sub <8 x i16> %a, %mul 152 ret <8 x i16> %sub 153 } 154 155 define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 156 ; CHECK-LABEL: test_vmls_lane_s32: 157 ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 158 ; CHECK-NEXT: ret 159 entry: 160 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 161 %mul = mul <2 x i32> %shuffle, %b 162 %sub = sub <2 x i32> %a, %mul 163 ret <2 x i32> %sub 164 } 165 166 define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 167 ; CHECK-LABEL: test_vmlsq_lane_s32: 168 ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 169 ; CHECK-NEXT: ret 170 entry: 171 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 172 %mul = mul <4 x i32> %shuffle, %b 173 %sub = sub <4 x i32> %a, %mul 174 ret <4 x i32> %sub 175 } 176 177 define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 178 ; CHECK-LABEL: test_vmls_laneq_s16: 179 ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 180 ; CHECK-NEXT: ret 181 entry: 182 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 183 %mul = mul <4 x i16> %shuffle, %b 184 %sub = sub <4 x i16> %a, %mul 185 ret <4 x i16> %sub 186 } 187 188 define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 189 ; CHECK-LABEL: test_vmlsq_laneq_s16: 190 ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 191 ; CHECK-NEXT: ret 192 entry: 193 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 194 %mul = mul <8 x i16> %shuffle, %b 195 %sub = sub <8 x i16> %a, %mul 196 ret <8 x i16> %sub 197 } 198 199 define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 200 ; CHECK-LABEL: test_vmls_laneq_s32: 201 ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 202 ; CHECK-NEXT: ret 203 entry: 204 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 205 %mul = mul <2 x i32> %shuffle, %b 206 %sub = sub <2 x i32> %a, %mul 207 ret <2 x i32> %sub 208 } 209 210 define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 211 ; CHECK-LABEL: test_vmlsq_laneq_s32: 212 ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 213 ; CHECK-NEXT: ret 214 entry: 215 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 216 %mul = mul <4 x i32> %shuffle, %b 217 %sub = sub <4 x i32> %a, %mul 218 ret <4 x i32> %sub 219 } 220 221 define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { 222 ; CHECK-LABEL: test_vmul_lane_s16: 223 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 224 ; CHECK-NEXT: ret 225 entry: 226 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 227 %mul = mul <4 x i16> %shuffle, %a 228 ret <4 x i16> %mul 229 } 230 231 define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 232 ; CHECK-LABEL: test_vmulq_lane_s16: 233 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 234 ; CHECK-NEXT: ret 235 entry: 236 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 237 %mul = mul <8 x i16> %shuffle, %a 238 ret <8 x i16> %mul 239 } 240 241 define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { 242 ; CHECK-LABEL: test_vmul_lane_s32: 243 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 244 ; CHECK-NEXT: ret 245 entry: 246 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 247 %mul = mul <2 x i32> %shuffle, %a 248 ret <2 x i32> %mul 249 } 250 251 define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 252 ; CHECK-LABEL: test_vmulq_lane_s32: 253 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 254 ; CHECK-NEXT: ret 255 entry: 256 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 257 %mul = mul <4 x i32> %shuffle, %a 258 ret <4 x i32> %mul 259 } 260 261 define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { 262 ; CHECK-LABEL: test_vmul_lane_u16: 263 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 264 ; CHECK-NEXT: ret 265 entry: 266 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 267 %mul = mul <4 x i16> %shuffle, %a 268 ret <4 x i16> %mul 269 } 270 271 define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { 272 ; CHECK-LABEL: test_vmulq_lane_u16: 273 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 274 ; CHECK-NEXT: ret 275 entry: 276 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 277 %mul = mul <8 x i16> %shuffle, %a 278 ret <8 x i16> %mul 279 } 280 281 define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { 282 ; CHECK-LABEL: test_vmul_lane_u32: 283 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 284 ; CHECK-NEXT: ret 285 entry: 286 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 287 %mul = mul <2 x i32> %shuffle, %a 288 ret <2 x i32> %mul 289 } 290 291 define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { 292 ; CHECK-LABEL: test_vmulq_lane_u32: 293 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 294 ; CHECK-NEXT: ret 295 entry: 296 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 297 %mul = mul <4 x i32> %shuffle, %a 298 ret <4 x i32> %mul 299 } 300 301 define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 302 ; CHECK-LABEL: test_vmul_laneq_s16: 303 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 304 ; CHECK-NEXT: ret 305 entry: 306 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 307 %mul = mul <4 x i16> %shuffle, %a 308 ret <4 x i16> %mul 309 } 310 311 define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 312 ; CHECK-LABEL: test_vmulq_laneq_s16: 313 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 314 ; CHECK-NEXT: ret 315 entry: 316 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 317 %mul = mul <8 x i16> %shuffle, %a 318 ret <8 x i16> %mul 319 } 320 321 define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 322 ; CHECK-LABEL: test_vmul_laneq_s32: 323 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 324 ; CHECK-NEXT: ret 325 entry: 326 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 327 %mul = mul <2 x i32> %shuffle, %a 328 ret <2 x i32> %mul 329 } 330 331 define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 332 ; CHECK-LABEL: test_vmulq_laneq_s32: 333 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 334 ; CHECK-NEXT: ret 335 entry: 336 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 337 %mul = mul <4 x i32> %shuffle, %a 338 ret <4 x i32> %mul 339 } 340 341 define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 342 ; CHECK-LABEL: test_vmul_laneq_u16: 343 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 344 ; CHECK-NEXT: ret 345 entry: 346 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 347 %mul = mul <4 x i16> %shuffle, %a 348 ret <4 x i16> %mul 349 } 350 351 define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 352 ; CHECK-LABEL: test_vmulq_laneq_u16: 353 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 354 ; CHECK-NEXT: ret 355 entry: 356 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 357 %mul = mul <8 x i16> %shuffle, %a 358 ret <8 x i16> %mul 359 } 360 361 define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 362 ; CHECK-LABEL: test_vmul_laneq_u32: 363 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 364 ; CHECK-NEXT: ret 365 entry: 366 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 367 %mul = mul <2 x i32> %shuffle, %a 368 ret <2 x i32> %mul 369 } 370 371 define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 372 ; CHECK-LABEL: test_vmulq_laneq_u32: 373 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 374 ; CHECK-NEXT: ret 375 entry: 376 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 377 %mul = mul <4 x i32> %shuffle, %a 378 ret <4 x i32> %mul 379 } 380 381 define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 382 ; CHECK-LABEL: test_vfma_lane_f32: 383 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 384 ; CHECK-NEXT: ret 385 entry: 386 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 387 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 388 ret <2 x float> %0 389 } 390 391 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 392 393 define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 394 ; CHECK-LABEL: test_vfmaq_lane_f32: 395 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 396 ; CHECK-NEXT: ret 397 entry: 398 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 399 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 400 ret <4 x float> %0 401 } 402 403 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 404 405 define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 406 ; CHECK-LABEL: test_vfma_laneq_f32: 407 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 408 ; CHECK-NEXT: ret 409 entry: 410 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 411 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 412 ret <2 x float> %0 413 } 414 415 define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 416 ; CHECK-LABEL: test_vfmaq_laneq_f32: 417 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 418 ; CHECK-NEXT: ret 419 entry: 420 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 421 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 422 ret <4 x float> %0 423 } 424 425 define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 426 ; CHECK-LABEL: test_vfms_lane_f32: 427 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 428 ; CHECK-NEXT: ret 429 entry: 430 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 431 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1> 432 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 433 ret <2 x float> %0 434 } 435 436 define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 437 ; CHECK-LABEL: test_vfmsq_lane_f32: 438 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 439 ; CHECK-NEXT: ret 440 entry: 441 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 442 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 443 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 444 ret <4 x float> %0 445 } 446 447 define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 448 ; CHECK-LABEL: test_vfms_laneq_f32: 449 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 450 ; CHECK-NEXT: ret 451 entry: 452 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 453 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3> 454 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 455 ret <2 x float> %0 456 } 457 458 define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 459 ; CHECK-LABEL: test_vfmsq_laneq_f32: 460 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 461 ; CHECK-NEXT: ret 462 entry: 463 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 464 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 465 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 466 ret <4 x float> %0 467 } 468 469 define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 470 ; CHECK-LABEL: test_vfmaq_lane_f64: 471 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 472 ; CHECK-NEXT: ret 473 entry: 474 %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 475 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 476 ret <2 x double> %0 477 } 478 479 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) 480 481 define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 482 ; CHECK-LABEL: test_vfmaq_laneq_f64: 483 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 484 ; CHECK-NEXT: ret 485 entry: 486 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 487 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 488 ret <2 x double> %0 489 } 490 491 define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 492 ; CHECK-LABEL: test_vfmsq_lane_f64: 493 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 494 ; CHECK-NEXT: ret 495 entry: 496 %sub = fsub <1 x double> <double -0.000000e+00>, %v 497 %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer 498 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 499 ret <2 x double> %0 500 } 501 502 define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 503 ; CHECK-LABEL: test_vfmsq_laneq_f64: 504 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 505 ; CHECK-NEXT: ret 506 entry: 507 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 508 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1> 509 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 510 ret <2 x double> %0 511 } 512 513 define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { 514 ; CHECK-LABEL: test_vfmas_laneq_f32 515 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 516 ; CHECK-NEXT: ret 517 entry: 518 %extract = extractelement <4 x float> %v, i32 3 519 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 520 ret float %0 521 } 522 523 declare float @llvm.fma.f32(float, float, float) 524 525 define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { 526 ; CHECK-LABEL: test_vfmsd_lane_f64 527 ; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 528 ; CHECK-NEXT: ret 529 entry: 530 %extract.rhs = extractelement <1 x double> %v, i32 0 531 %extract = fsub double -0.000000e+00, %extract.rhs 532 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 533 ret double %0 534 } 535 536 declare double @llvm.fma.f64(double, double, double) 537 538 define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) { 539 ; CHECK-LABEL: test_vfmss_lane_f32 540 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 541 ; CHECK-NEXT: ret 542 entry: 543 %extract.rhs = extractelement <2 x float> %v, i32 1 544 %extract = fsub float -0.000000e+00, %extract.rhs 545 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 546 ret float %0 547 } 548 549 define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { 550 ; CHECK-LABEL: test_vfmss_laneq_f32 551 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 552 ; CHECK-NEXT: ret 553 entry: 554 %extract.rhs = extractelement <4 x float> %v, i32 3 555 %extract = fsub float -0.000000e+00, %extract.rhs 556 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 557 ret float %0 558 } 559 560 define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { 561 ; CHECK-LABEL: test_vfmsd_laneq_f64 562 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 563 ; CHECK-NEXT: ret 564 entry: 565 %extract.rhs = extractelement <2 x double> %v, i32 1 566 %extract = fsub double -0.000000e+00, %extract.rhs 567 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 568 ret double %0 569 } 570 571 define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) { 572 ; CHCK-LABEL: test_vfmsd_lane_f64_0 573 ; CHCK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 574 ; CHCK-NEXT: ret 575 entry: 576 %tmp0 = fsub <1 x double> <double -0.000000e+00>, %v 577 %tmp1 = extractelement <1 x double> %tmp0, i32 0 578 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) 579 ret double %0 580 } 581 582 define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) { 583 ; CHECK-LABEL: test_vfmss_lane_f32_0 584 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] 585 ; CHECK-NEXT: ret 586 entry: 587 %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 588 %tmp1 = extractelement <2 x float> %tmp0, i32 1 589 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) 590 ret float %0 591 } 592 593 define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) { 594 ; CHECK-LABEL: test_vfmss_laneq_f32_0 595 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 596 ; CHECK-NEXT: ret 597 entry: 598 %tmp0 = fsub <4 x float><float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 599 %tmp1 = extractelement <4 x float> %tmp0, i32 3 600 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) 601 ret float %0 602 } 603 604 define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) { 605 ; CHECK-LABEL: test_vfmsd_laneq_f64_0 606 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 607 ; CHECK-NEXT: ret 608 entry: 609 %tmp0 = fsub <2 x double><double -0.000000e+00, double -0.000000e+00>, %v 610 %tmp1 = extractelement <2 x double> %tmp0, i32 1 611 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) 612 ret double %0 613 } 614 615 define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 616 ; CHECK-LABEL: test_vmlal_lane_s16: 617 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 618 ; CHECK-NEXT: ret 619 entry: 620 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 621 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 622 %add = add <4 x i32> %vmull2.i, %a 623 ret <4 x i32> %add 624 } 625 626 define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 627 ; CHECK-LABEL: test_vmlal_lane_s32: 628 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 629 ; CHECK-NEXT: ret 630 entry: 631 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 632 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 633 %add = add <2 x i64> %vmull2.i, %a 634 ret <2 x i64> %add 635 } 636 637 define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 638 ; CHECK-LABEL: test_vmlal_laneq_s16: 639 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 640 ; CHECK-NEXT: ret 641 entry: 642 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 643 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 644 %add = add <4 x i32> %vmull2.i, %a 645 ret <4 x i32> %add 646 } 647 648 define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 649 ; CHECK-LABEL: test_vmlal_laneq_s32: 650 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 651 ; CHECK-NEXT: ret 652 entry: 653 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 654 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 655 %add = add <2 x i64> %vmull2.i, %a 656 ret <2 x i64> %add 657 } 658 659 define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 660 ; CHECK-LABEL: test_vmlal_high_lane_s16: 661 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 662 ; CHECK-NEXT: ret 663 entry: 664 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 665 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 666 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 667 %add = add <4 x i32> %vmull2.i, %a 668 ret <4 x i32> %add 669 } 670 671 define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 672 ; CHECK-LABEL: test_vmlal_high_lane_s32: 673 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 674 ; CHECK-NEXT: ret 675 entry: 676 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 677 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 678 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 679 %add = add <2 x i64> %vmull2.i, %a 680 ret <2 x i64> %add 681 } 682 683 define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 684 ; CHECK-LABEL: test_vmlal_high_laneq_s16: 685 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 686 ; CHECK-NEXT: ret 687 entry: 688 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 689 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 690 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 691 %add = add <4 x i32> %vmull2.i, %a 692 ret <4 x i32> %add 693 } 694 695 define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 696 ; CHECK-LABEL: test_vmlal_high_laneq_s32: 697 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 698 ; CHECK-NEXT: ret 699 entry: 700 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 701 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 702 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 703 %add = add <2 x i64> %vmull2.i, %a 704 ret <2 x i64> %add 705 } 706 707 define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 708 ; CHECK-LABEL: test_vmlsl_lane_s16: 709 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 710 ; CHECK-NEXT: ret 711 entry: 712 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 713 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 714 %sub = sub <4 x i32> %a, %vmull2.i 715 ret <4 x i32> %sub 716 } 717 718 define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 719 ; CHECK-LABEL: test_vmlsl_lane_s32: 720 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 721 ; CHECK-NEXT: ret 722 entry: 723 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 724 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 725 %sub = sub <2 x i64> %a, %vmull2.i 726 ret <2 x i64> %sub 727 } 728 729 define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 730 ; CHECK-LABEL: test_vmlsl_laneq_s16: 731 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 732 ; CHECK-NEXT: ret 733 entry: 734 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 735 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 736 %sub = sub <4 x i32> %a, %vmull2.i 737 ret <4 x i32> %sub 738 } 739 740 define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 741 ; CHECK-LABEL: test_vmlsl_laneq_s32: 742 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 743 ; CHECK-NEXT: ret 744 entry: 745 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 746 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 747 %sub = sub <2 x i64> %a, %vmull2.i 748 ret <2 x i64> %sub 749 } 750 751 define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 752 ; CHECK-LABEL: test_vmlsl_high_lane_s16: 753 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 754 ; CHECK-NEXT: ret 755 entry: 756 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 757 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 758 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 759 %sub = sub <4 x i32> %a, %vmull2.i 760 ret <4 x i32> %sub 761 } 762 763 define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 764 ; CHECK-LABEL: test_vmlsl_high_lane_s32: 765 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 766 ; CHECK-NEXT: ret 767 entry: 768 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 769 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 770 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 771 %sub = sub <2 x i64> %a, %vmull2.i 772 ret <2 x i64> %sub 773 } 774 775 define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 776 ; CHECK-LABEL: test_vmlsl_high_laneq_s16: 777 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 778 ; CHECK-NEXT: ret 779 entry: 780 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 781 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 782 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 783 %sub = sub <4 x i32> %a, %vmull2.i 784 ret <4 x i32> %sub 785 } 786 787 define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 788 ; CHECK-LABEL: test_vmlsl_high_laneq_s32: 789 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 790 ; CHECK-NEXT: ret 791 entry: 792 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 793 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 794 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 795 %sub = sub <2 x i64> %a, %vmull2.i 796 ret <2 x i64> %sub 797 } 798 799 define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 800 ; CHECK-LABEL: test_vmlal_lane_u16: 801 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 802 ; CHECK-NEXT: ret 803 entry: 804 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 805 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 806 %add = add <4 x i32> %vmull2.i, %a 807 ret <4 x i32> %add 808 } 809 810 define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 811 ; CHECK-LABEL: test_vmlal_lane_u32: 812 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 813 ; CHECK-NEXT: ret 814 entry: 815 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 816 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 817 %add = add <2 x i64> %vmull2.i, %a 818 ret <2 x i64> %add 819 } 820 821 define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 822 ; CHECK-LABEL: test_vmlal_laneq_u16: 823 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 824 ; CHECK-NEXT: ret 825 entry: 826 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 827 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 828 %add = add <4 x i32> %vmull2.i, %a 829 ret <4 x i32> %add 830 } 831 832 define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 833 ; CHECK-LABEL: test_vmlal_laneq_u32: 834 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 835 ; CHECK-NEXT: ret 836 entry: 837 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 838 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 839 %add = add <2 x i64> %vmull2.i, %a 840 ret <2 x i64> %add 841 } 842 843 define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 844 ; CHECK-LABEL: test_vmlal_high_lane_u16: 845 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 846 ; CHECK-NEXT: ret 847 entry: 848 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 849 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 850 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 851 %add = add <4 x i32> %vmull2.i, %a 852 ret <4 x i32> %add 853 } 854 855 define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 856 ; CHECK-LABEL: test_vmlal_high_lane_u32: 857 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 858 ; CHECK-NEXT: ret 859 entry: 860 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 861 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 862 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 863 %add = add <2 x i64> %vmull2.i, %a 864 ret <2 x i64> %add 865 } 866 867 define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 868 ; CHECK-LABEL: test_vmlal_high_laneq_u16: 869 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 870 ; CHECK-NEXT: ret 871 entry: 872 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 873 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 874 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 875 %add = add <4 x i32> %vmull2.i, %a 876 ret <4 x i32> %add 877 } 878 879 define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 880 ; CHECK-LABEL: test_vmlal_high_laneq_u32: 881 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 882 ; CHECK-NEXT: ret 883 entry: 884 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 885 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 886 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 887 %add = add <2 x i64> %vmull2.i, %a 888 ret <2 x i64> %add 889 } 890 891 define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 892 ; CHECK-LABEL: test_vmlsl_lane_u16: 893 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 894 ; CHECK-NEXT: ret 895 entry: 896 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 897 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 898 %sub = sub <4 x i32> %a, %vmull2.i 899 ret <4 x i32> %sub 900 } 901 902 define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 903 ; CHECK-LABEL: test_vmlsl_lane_u32: 904 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 905 ; CHECK-NEXT: ret 906 entry: 907 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 908 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 909 %sub = sub <2 x i64> %a, %vmull2.i 910 ret <2 x i64> %sub 911 } 912 913 define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 914 ; CHECK-LABEL: test_vmlsl_laneq_u16: 915 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 916 ; CHECK-NEXT: ret 917 entry: 918 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 919 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 920 %sub = sub <4 x i32> %a, %vmull2.i 921 ret <4 x i32> %sub 922 } 923 924 define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 925 ; CHECK-LABEL: test_vmlsl_laneq_u32: 926 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 927 ; CHECK-NEXT: ret 928 entry: 929 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 930 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 931 %sub = sub <2 x i64> %a, %vmull2.i 932 ret <2 x i64> %sub 933 } 934 935 define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 936 ; CHECK-LABEL: test_vmlsl_high_lane_u16: 937 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 938 ; CHECK-NEXT: ret 939 entry: 940 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 941 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 942 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 943 %sub = sub <4 x i32> %a, %vmull2.i 944 ret <4 x i32> %sub 945 } 946 947 define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 948 ; CHECK-LABEL: test_vmlsl_high_lane_u32: 949 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 950 ; CHECK-NEXT: ret 951 entry: 952 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 953 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 954 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 955 %sub = sub <2 x i64> %a, %vmull2.i 956 ret <2 x i64> %sub 957 } 958 959 define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 960 ; CHECK-LABEL: test_vmlsl_high_laneq_u16: 961 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 962 ; CHECK-NEXT: ret 963 entry: 964 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 965 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 966 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 967 %sub = sub <4 x i32> %a, %vmull2.i 968 ret <4 x i32> %sub 969 } 970 971 define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 972 ; CHECK-LABEL: test_vmlsl_high_laneq_u32: 973 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 974 ; CHECK-NEXT: ret 975 entry: 976 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 977 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 978 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 979 %sub = sub <2 x i64> %a, %vmull2.i 980 ret <2 x i64> %sub 981 } 982 983 define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 984 ; CHECK-LABEL: test_vmull_lane_s16: 985 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 986 ; CHECK-NEXT: ret 987 entry: 988 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 989 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 990 ret <4 x i32> %vmull2.i 991 } 992 993 define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 994 ; CHECK-LABEL: test_vmull_lane_s32: 995 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 996 ; CHECK-NEXT: ret 997 entry: 998 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 999 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1000 ret <2 x i64> %vmull2.i 1001 } 1002 1003 define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { 1004 ; CHECK-LABEL: test_vmull_lane_u16: 1005 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1006 ; CHECK-NEXT: ret 1007 entry: 1008 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1009 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1010 ret <4 x i32> %vmull2.i 1011 } 1012 1013 define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { 1014 ; CHECK-LABEL: test_vmull_lane_u32: 1015 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1016 ; CHECK-NEXT: ret 1017 entry: 1018 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1019 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1020 ret <2 x i64> %vmull2.i 1021 } 1022 1023 define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1024 ; CHECK-LABEL: test_vmull_high_lane_s16: 1025 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1026 ; CHECK-NEXT: ret 1027 entry: 1028 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1029 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1030 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1031 ret <4 x i32> %vmull2.i 1032 } 1033 1034 define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1035 ; CHECK-LABEL: test_vmull_high_lane_s32: 1036 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1037 ; CHECK-NEXT: ret 1038 entry: 1039 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1040 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1041 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1042 ret <2 x i64> %vmull2.i 1043 } 1044 1045 define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { 1046 ; CHECK-LABEL: test_vmull_high_lane_u16: 1047 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1048 ; CHECK-NEXT: ret 1049 entry: 1050 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1051 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1052 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1053 ret <4 x i32> %vmull2.i 1054 } 1055 1056 define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { 1057 ; CHECK-LABEL: test_vmull_high_lane_u32: 1058 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1059 ; CHECK-NEXT: ret 1060 entry: 1061 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1062 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1063 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1064 ret <2 x i64> %vmull2.i 1065 } 1066 1067 define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1068 ; CHECK-LABEL: test_vmull_laneq_s16: 1069 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 1070 ; CHECK-NEXT: ret 1071 entry: 1072 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1073 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1074 ret <4 x i32> %vmull2.i 1075 } 1076 1077 define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1078 ; CHECK-LABEL: test_vmull_laneq_s32: 1079 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1080 ; CHECK-NEXT: ret 1081 entry: 1082 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1083 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1084 ret <2 x i64> %vmull2.i 1085 } 1086 1087 define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 1088 ; CHECK-LABEL: test_vmull_laneq_u16: 1089 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 1090 ; CHECK-NEXT: ret 1091 entry: 1092 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1093 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1094 ret <4 x i32> %vmull2.i 1095 } 1096 1097 define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 1098 ; CHECK-LABEL: test_vmull_laneq_u32: 1099 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1100 ; CHECK-NEXT: ret 1101 entry: 1102 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1103 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1104 ret <2 x i64> %vmull2.i 1105 } 1106 1107 define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1108 ; CHECK-LABEL: test_vmull_high_laneq_s16: 1109 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1110 ; CHECK-NEXT: ret 1111 entry: 1112 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1113 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1114 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1115 ret <4 x i32> %vmull2.i 1116 } 1117 1118 define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1119 ; CHECK-LABEL: test_vmull_high_laneq_s32: 1120 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1121 ; CHECK-NEXT: ret 1122 entry: 1123 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1124 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1125 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1126 ret <2 x i64> %vmull2.i 1127 } 1128 1129 define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 1130 ; CHECK-LABEL: test_vmull_high_laneq_u16: 1131 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1132 ; CHECK-NEXT: ret 1133 entry: 1134 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1135 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1136 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1137 ret <4 x i32> %vmull2.i 1138 } 1139 1140 define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 1141 ; CHECK-LABEL: test_vmull_high_laneq_u32: 1142 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1143 ; CHECK-NEXT: ret 1144 entry: 1145 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1146 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1147 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1148 ret <2 x i64> %vmull2.i 1149 } 1150 1151 define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1152 ; CHECK-LABEL: test_vqdmlal_lane_s16: 1153 ; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1154 ; CHECK-NEXT: ret 1155 entry: 1156 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1157 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1158 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1159 ret <4 x i32> %vqdmlal4.i 1160 } 1161 1162 define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1163 ; CHECK-LABEL: test_vqdmlal_lane_s32: 1164 ; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1165 ; CHECK-NEXT: ret 1166 entry: 1167 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1168 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1169 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1170 ret <2 x i64> %vqdmlal4.i 1171 } 1172 1173 define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1174 ; CHECK-LABEL: test_vqdmlal_high_lane_s16: 1175 ; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1176 ; CHECK-NEXT: ret 1177 entry: 1178 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1179 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1180 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1181 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1182 ret <4 x i32> %vqdmlal4.i 1183 } 1184 1185 define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1186 ; CHECK-LABEL: test_vqdmlal_high_lane_s32: 1187 ; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1188 ; CHECK-NEXT: ret 1189 entry: 1190 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1191 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1192 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1193 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1194 ret <2 x i64> %vqdmlal4.i 1195 } 1196 1197 define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1198 ; CHECK-LABEL: test_vqdmlsl_lane_s16: 1199 ; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1200 ; CHECK-NEXT: ret 1201 entry: 1202 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1203 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1204 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1205 ret <4 x i32> %vqdmlsl4.i 1206 } 1207 1208 define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1209 ; CHECK-LABEL: test_vqdmlsl_lane_s32: 1210 ; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1211 ; CHECK-NEXT: ret 1212 entry: 1213 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1214 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1215 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1216 ret <2 x i64> %vqdmlsl4.i 1217 } 1218 1219 define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1220 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16: 1221 ; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1222 ; CHECK-NEXT: ret 1223 entry: 1224 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1225 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1226 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1227 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1228 ret <4 x i32> %vqdmlsl4.i 1229 } 1230 1231 define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1232 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32: 1233 ; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1234 ; CHECK-NEXT: ret 1235 entry: 1236 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1237 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1238 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1239 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1240 ret <2 x i64> %vqdmlsl4.i 1241 } 1242 1243 define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1244 ; CHECK-LABEL: test_vqdmull_lane_s16: 1245 ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1246 ; CHECK-NEXT: ret 1247 entry: 1248 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1249 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1250 ret <4 x i32> %vqdmull2.i 1251 } 1252 1253 define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1254 ; CHECK-LABEL: test_vqdmull_lane_s32: 1255 ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1256 ; CHECK-NEXT: ret 1257 entry: 1258 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1259 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1260 ret <2 x i64> %vqdmull2.i 1261 } 1262 1263 define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1264 ; CHECK-LABEL: test_vqdmull_laneq_s16: 1265 ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1266 ; CHECK-NEXT: ret 1267 entry: 1268 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1269 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1270 ret <4 x i32> %vqdmull2.i 1271 } 1272 1273 define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1274 ; CHECK-LABEL: test_vqdmull_laneq_s32: 1275 ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1276 ; CHECK-NEXT: ret 1277 entry: 1278 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1279 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1280 ret <2 x i64> %vqdmull2.i 1281 } 1282 1283 define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1284 ; CHECK-LABEL: test_vqdmull_high_lane_s16: 1285 ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1286 ; CHECK-NEXT: ret 1287 entry: 1288 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1289 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1290 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1291 ret <4 x i32> %vqdmull2.i 1292 } 1293 1294 define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1295 ; CHECK-LABEL: test_vqdmull_high_lane_s32: 1296 ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1297 ; CHECK-NEXT: ret 1298 entry: 1299 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1300 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1301 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1302 ret <2 x i64> %vqdmull2.i 1303 } 1304 1305 define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1306 ; CHECK-LABEL: test_vqdmull_high_laneq_s16: 1307 ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1308 ; CHECK-NEXT: ret 1309 entry: 1310 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1311 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1312 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1313 ret <4 x i32> %vqdmull2.i 1314 } 1315 1316 define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1317 ; CHECK-LABEL: test_vqdmull_high_laneq_s32: 1318 ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1319 ; CHECK-NEXT: ret 1320 entry: 1321 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1322 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1323 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1324 ret <2 x i64> %vqdmull2.i 1325 } 1326 1327 define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1328 ; CHECK-LABEL: test_vqdmulh_lane_s16: 1329 ; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1330 ; CHECK-NEXT: ret 1331 entry: 1332 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1333 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1334 ret <4 x i16> %vqdmulh2.i 1335 } 1336 1337 define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1338 ; CHECK-LABEL: test_vqdmulhq_lane_s16: 1339 ; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1340 ; CHECK-NEXT: ret 1341 entry: 1342 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1343 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1344 ret <8 x i16> %vqdmulh2.i 1345 } 1346 1347 define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1348 ; CHECK-LABEL: test_vqdmulh_lane_s32: 1349 ; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1350 ; CHECK-NEXT: ret 1351 entry: 1352 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1353 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1354 ret <2 x i32> %vqdmulh2.i 1355 } 1356 1357 define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1358 ; CHECK-LABEL: test_vqdmulhq_lane_s32: 1359 ; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1360 ; CHECK-NEXT: ret 1361 entry: 1362 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1363 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1364 ret <4 x i32> %vqdmulh2.i 1365 } 1366 1367 define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1368 ; CHECK-LABEL: test_vqrdmulh_lane_s16: 1369 ; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1370 ; CHECK-NEXT: ret 1371 entry: 1372 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1373 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1374 ret <4 x i16> %vqrdmulh2.i 1375 } 1376 1377 define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1378 ; CHECK-LABEL: test_vqrdmulhq_lane_s16: 1379 ; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1380 ; CHECK-NEXT: ret 1381 entry: 1382 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1383 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1384 ret <8 x i16> %vqrdmulh2.i 1385 } 1386 1387 define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1388 ; CHECK-LABEL: test_vqrdmulh_lane_s32: 1389 ; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1390 ; CHECK-NEXT: ret 1391 entry: 1392 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1393 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1394 ret <2 x i32> %vqrdmulh2.i 1395 } 1396 1397 define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1398 ; CHECK-LABEL: test_vqrdmulhq_lane_s32: 1399 ; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1400 ; CHECK-NEXT: ret 1401 entry: 1402 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1403 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1404 ret <4 x i32> %vqrdmulh2.i 1405 } 1406 1407 define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { 1408 ; CHECK-LABEL: test_vmul_lane_f32: 1409 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1410 ; CHECK-NEXT: ret 1411 entry: 1412 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1413 %mul = fmul <2 x float> %shuffle, %a 1414 ret <2 x float> %mul 1415 } 1416 1417 define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { 1418 ; CHECK-LABEL: test_vmul_lane_f64: 1419 ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 1420 ; CHECK-NEXT: ret 1421 entry: 1422 %0 = bitcast <1 x double> %a to <8 x i8> 1423 %1 = bitcast <8 x i8> %0 to double 1424 %extract = extractelement <1 x double> %v, i32 0 1425 %2 = fmul double %1, %extract 1426 %3 = insertelement <1 x double> undef, double %2, i32 0 1427 ret <1 x double> %3 1428 } 1429 1430 define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { 1431 ; CHECK-LABEL: test_vmulq_lane_f32: 1432 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1433 ; CHECK-NEXT: ret 1434 entry: 1435 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1436 %mul = fmul <4 x float> %shuffle, %a 1437 ret <4 x float> %mul 1438 } 1439 1440 define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { 1441 ; CHECK-LABEL: test_vmulq_lane_f64: 1442 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1443 ; CHECK-NEXT: ret 1444 entry: 1445 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1446 %mul = fmul <2 x double> %shuffle, %a 1447 ret <2 x double> %mul 1448 } 1449 1450 define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { 1451 ; CHECK-LABEL: test_vmul_laneq_f32: 1452 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1453 ; CHECK-NEXT: ret 1454 entry: 1455 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1456 %mul = fmul <2 x float> %shuffle, %a 1457 ret <2 x float> %mul 1458 } 1459 1460 define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { 1461 ; CHECK-LABEL: test_vmul_laneq_f64: 1462 ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 1463 ; CHECK-NEXT: ret 1464 entry: 1465 %0 = bitcast <1 x double> %a to <8 x i8> 1466 %1 = bitcast <8 x i8> %0 to double 1467 %extract = extractelement <2 x double> %v, i32 1 1468 %2 = fmul double %1, %extract 1469 %3 = insertelement <1 x double> undef, double %2, i32 0 1470 ret <1 x double> %3 1471 } 1472 1473 define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1474 ; CHECK-LABEL: test_vmulq_laneq_f32: 1475 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1476 ; CHECK-NEXT: ret 1477 entry: 1478 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1479 %mul = fmul <4 x float> %shuffle, %a 1480 ret <4 x float> %mul 1481 } 1482 1483 define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1484 ; CHECK-LABEL: test_vmulq_laneq_f64: 1485 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1486 ; CHECK-NEXT: ret 1487 entry: 1488 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1489 %mul = fmul <2 x double> %shuffle, %a 1490 ret <2 x double> %mul 1491 } 1492 1493 define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { 1494 ; CHECK-LABEL: test_vmulx_lane_f32: 1495 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1496 ; CHECK-NEXT: ret 1497 entry: 1498 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1499 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1500 ret <2 x float> %vmulx2.i 1501 } 1502 1503 define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { 1504 ; CHECK-LABEL: test_vmulxq_lane_f32: 1505 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1506 ; CHECK-NEXT: ret 1507 entry: 1508 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1509 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1510 ret <4 x float> %vmulx2.i 1511 } 1512 1513 define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { 1514 ; CHECK-LABEL: test_vmulxq_lane_f64: 1515 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1516 ; CHECK-NEXT: ret 1517 entry: 1518 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1519 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1520 ret <2 x double> %vmulx2.i 1521 } 1522 1523 define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { 1524 ; CHECK-LABEL: test_vmulx_laneq_f32: 1525 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1526 ; CHECK-NEXT: ret 1527 entry: 1528 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1529 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1530 ret <2 x float> %vmulx2.i 1531 } 1532 1533 define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1534 ; CHECK-LABEL: test_vmulxq_laneq_f32: 1535 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1536 ; CHECK-NEXT: ret 1537 entry: 1538 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1539 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1540 ret <4 x float> %vmulx2.i 1541 } 1542 1543 define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1544 ; CHECK-LABEL: test_vmulxq_laneq_f64: 1545 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1546 ; CHECK-NEXT: ret 1547 entry: 1548 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1549 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1550 ret <2 x double> %vmulx2.i 1551 } 1552 1553 define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1554 ; CHECK-LABEL: test_vmla_lane_s16_0: 1555 ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1556 ; CHECK-NEXT: ret 1557 entry: 1558 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1559 %mul = mul <4 x i16> %shuffle, %b 1560 %add = add <4 x i16> %mul, %a 1561 ret <4 x i16> %add 1562 } 1563 1564 define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1565 ; CHECK-LABEL: test_vmlaq_lane_s16_0: 1566 ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1567 ; CHECK-NEXT: ret 1568 entry: 1569 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1570 %mul = mul <8 x i16> %shuffle, %b 1571 %add = add <8 x i16> %mul, %a 1572 ret <8 x i16> %add 1573 } 1574 1575 define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1576 ; CHECK-LABEL: test_vmla_lane_s32_0: 1577 ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1578 ; CHECK-NEXT: ret 1579 entry: 1580 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1581 %mul = mul <2 x i32> %shuffle, %b 1582 %add = add <2 x i32> %mul, %a 1583 ret <2 x i32> %add 1584 } 1585 1586 define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1587 ; CHECK-LABEL: test_vmlaq_lane_s32_0: 1588 ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1589 ; CHECK-NEXT: ret 1590 entry: 1591 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1592 %mul = mul <4 x i32> %shuffle, %b 1593 %add = add <4 x i32> %mul, %a 1594 ret <4 x i32> %add 1595 } 1596 1597 define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 1598 ; CHECK-LABEL: test_vmla_laneq_s16_0: 1599 ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1600 ; CHECK-NEXT: ret 1601 entry: 1602 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1603 %mul = mul <4 x i16> %shuffle, %b 1604 %add = add <4 x i16> %mul, %a 1605 ret <4 x i16> %add 1606 } 1607 1608 define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 1609 ; CHECK-LABEL: test_vmlaq_laneq_s16_0: 1610 ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1611 ; CHECK-NEXT: ret 1612 entry: 1613 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1614 %mul = mul <8 x i16> %shuffle, %b 1615 %add = add <8 x i16> %mul, %a 1616 ret <8 x i16> %add 1617 } 1618 1619 define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 1620 ; CHECK-LABEL: test_vmla_laneq_s32_0: 1621 ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1622 ; CHECK-NEXT: ret 1623 entry: 1624 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1625 %mul = mul <2 x i32> %shuffle, %b 1626 %add = add <2 x i32> %mul, %a 1627 ret <2 x i32> %add 1628 } 1629 1630 define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 1631 ; CHECK-LABEL: test_vmlaq_laneq_s32_0: 1632 ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1633 ; CHECK-NEXT: ret 1634 entry: 1635 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1636 %mul = mul <4 x i32> %shuffle, %b 1637 %add = add <4 x i32> %mul, %a 1638 ret <4 x i32> %add 1639 } 1640 1641 define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1642 ; CHECK-LABEL: test_vmls_lane_s16_0: 1643 ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1644 ; CHECK-NEXT: ret 1645 entry: 1646 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1647 %mul = mul <4 x i16> %shuffle, %b 1648 %sub = sub <4 x i16> %a, %mul 1649 ret <4 x i16> %sub 1650 } 1651 1652 define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1653 ; CHECK-LABEL: test_vmlsq_lane_s16_0: 1654 ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1655 ; CHECK-NEXT: ret 1656 entry: 1657 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1658 %mul = mul <8 x i16> %shuffle, %b 1659 %sub = sub <8 x i16> %a, %mul 1660 ret <8 x i16> %sub 1661 } 1662 1663 define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1664 ; CHECK-LABEL: test_vmls_lane_s32_0: 1665 ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1666 ; CHECK-NEXT: ret 1667 entry: 1668 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1669 %mul = mul <2 x i32> %shuffle, %b 1670 %sub = sub <2 x i32> %a, %mul 1671 ret <2 x i32> %sub 1672 } 1673 1674 define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1675 ; CHECK-LABEL: test_vmlsq_lane_s32_0: 1676 ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1677 ; CHECK-NEXT: ret 1678 entry: 1679 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1680 %mul = mul <4 x i32> %shuffle, %b 1681 %sub = sub <4 x i32> %a, %mul 1682 ret <4 x i32> %sub 1683 } 1684 1685 define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 1686 ; CHECK-LABEL: test_vmls_laneq_s16_0: 1687 ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1688 ; CHECK-NEXT: ret 1689 entry: 1690 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1691 %mul = mul <4 x i16> %shuffle, %b 1692 %sub = sub <4 x i16> %a, %mul 1693 ret <4 x i16> %sub 1694 } 1695 1696 define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 1697 ; CHECK-LABEL: test_vmlsq_laneq_s16_0: 1698 ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1699 ; CHECK-NEXT: ret 1700 entry: 1701 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1702 %mul = mul <8 x i16> %shuffle, %b 1703 %sub = sub <8 x i16> %a, %mul 1704 ret <8 x i16> %sub 1705 } 1706 1707 define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 1708 ; CHECK-LABEL: test_vmls_laneq_s32_0: 1709 ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1710 ; CHECK-NEXT: ret 1711 entry: 1712 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1713 %mul = mul <2 x i32> %shuffle, %b 1714 %sub = sub <2 x i32> %a, %mul 1715 ret <2 x i32> %sub 1716 } 1717 1718 define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 1719 ; CHECK-LABEL: test_vmlsq_laneq_s32_0: 1720 ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1721 ; CHECK-NEXT: ret 1722 entry: 1723 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1724 %mul = mul <4 x i32> %shuffle, %b 1725 %sub = sub <4 x i32> %a, %mul 1726 ret <4 x i32> %sub 1727 } 1728 1729 define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 1730 ; CHECK-LABEL: test_vmul_lane_s16_0: 1731 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1732 ; CHECK-NEXT: ret 1733 entry: 1734 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1735 %mul = mul <4 x i16> %shuffle, %a 1736 ret <4 x i16> %mul 1737 } 1738 1739 define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 1740 ; CHECK-LABEL: test_vmulq_lane_s16_0: 1741 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1742 ; CHECK-NEXT: ret 1743 entry: 1744 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1745 %mul = mul <8 x i16> %shuffle, %a 1746 ret <8 x i16> %mul 1747 } 1748 1749 define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 1750 ; CHECK-LABEL: test_vmul_lane_s32_0: 1751 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1752 ; CHECK-NEXT: ret 1753 entry: 1754 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1755 %mul = mul <2 x i32> %shuffle, %a 1756 ret <2 x i32> %mul 1757 } 1758 1759 define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 1760 ; CHECK-LABEL: test_vmulq_lane_s32_0: 1761 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1762 ; CHECK-NEXT: ret 1763 entry: 1764 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1765 %mul = mul <4 x i32> %shuffle, %a 1766 ret <4 x i32> %mul 1767 } 1768 1769 define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 1770 ; CHECK-LABEL: test_vmul_lane_u16_0: 1771 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1772 ; CHECK-NEXT: ret 1773 entry: 1774 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1775 %mul = mul <4 x i16> %shuffle, %a 1776 ret <4 x i16> %mul 1777 } 1778 1779 define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 1780 ; CHECK-LABEL: test_vmulq_lane_u16_0: 1781 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1782 ; CHECK-NEXT: ret 1783 entry: 1784 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1785 %mul = mul <8 x i16> %shuffle, %a 1786 ret <8 x i16> %mul 1787 } 1788 1789 define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 1790 ; CHECK-LABEL: test_vmul_lane_u32_0: 1791 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1792 ; CHECK-NEXT: ret 1793 entry: 1794 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1795 %mul = mul <2 x i32> %shuffle, %a 1796 ret <2 x i32> %mul 1797 } 1798 1799 define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 1800 ; CHECK-LABEL: test_vmulq_lane_u32_0: 1801 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1802 ; CHECK-NEXT: ret 1803 entry: 1804 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1805 %mul = mul <4 x i32> %shuffle, %a 1806 ret <4 x i32> %mul 1807 } 1808 1809 define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 1810 ; CHECK-LABEL: test_vmul_laneq_s16_0: 1811 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1812 ; CHECK-NEXT: ret 1813 entry: 1814 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1815 %mul = mul <4 x i16> %shuffle, %a 1816 ret <4 x i16> %mul 1817 } 1818 1819 define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 1820 ; CHECK-LABEL: test_vmulq_laneq_s16_0: 1821 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1822 ; CHECK-NEXT: ret 1823 entry: 1824 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1825 %mul = mul <8 x i16> %shuffle, %a 1826 ret <8 x i16> %mul 1827 } 1828 1829 define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 1830 ; CHECK-LABEL: test_vmul_laneq_s32_0: 1831 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1832 ; CHECK-NEXT: ret 1833 entry: 1834 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1835 %mul = mul <2 x i32> %shuffle, %a 1836 ret <2 x i32> %mul 1837 } 1838 1839 define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 1840 ; CHECK-LABEL: test_vmulq_laneq_s32_0: 1841 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1842 ; CHECK-NEXT: ret 1843 entry: 1844 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1845 %mul = mul <4 x i32> %shuffle, %a 1846 ret <4 x i32> %mul 1847 } 1848 1849 define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 1850 ; CHECK-LABEL: test_vmul_laneq_u16_0: 1851 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1852 ; CHECK-NEXT: ret 1853 entry: 1854 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1855 %mul = mul <4 x i16> %shuffle, %a 1856 ret <4 x i16> %mul 1857 } 1858 1859 define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 1860 ; CHECK-LABEL: test_vmulq_laneq_u16_0: 1861 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1862 ; CHECK-NEXT: ret 1863 entry: 1864 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1865 %mul = mul <8 x i16> %shuffle, %a 1866 ret <8 x i16> %mul 1867 } 1868 1869 define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 1870 ; CHECK-LABEL: test_vmul_laneq_u32_0: 1871 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1872 ; CHECK-NEXT: ret 1873 entry: 1874 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1875 %mul = mul <2 x i32> %shuffle, %a 1876 ret <2 x i32> %mul 1877 } 1878 1879 define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 1880 ; CHECK-LABEL: test_vmulq_laneq_u32_0: 1881 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1882 ; CHECK-NEXT: ret 1883 entry: 1884 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1885 %mul = mul <4 x i32> %shuffle, %a 1886 ret <4 x i32> %mul 1887 } 1888 1889 define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 1890 ; CHECK-LABEL: test_vfma_lane_f32_0: 1891 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1892 ; CHECK-NEXT: ret 1893 entry: 1894 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 1895 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1896 ret <2 x float> %0 1897 } 1898 1899 define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 1900 ; CHECK-LABEL: test_vfmaq_lane_f32_0: 1901 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1902 ; CHECK-NEXT: ret 1903 entry: 1904 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 1905 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1906 ret <4 x float> %0 1907 } 1908 1909 define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 1910 ; CHECK-LABEL: test_vfma_laneq_f32_0: 1911 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1912 ; CHECK-NEXT: ret 1913 entry: 1914 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 1915 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1916 ret <2 x float> %0 1917 } 1918 1919 define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 1920 ; CHECK-LABEL: test_vfmaq_laneq_f32_0: 1921 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1922 ; CHECK-NEXT: ret 1923 entry: 1924 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 1925 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1926 ret <4 x float> %0 1927 } 1928 1929 define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 1930 ; CHECK-LABEL: test_vfms_lane_f32_0: 1931 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1932 ; CHECK-NEXT: ret 1933 entry: 1934 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 1935 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer 1936 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1937 ret <2 x float> %0 1938 } 1939 1940 define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 1941 ; CHECK-LABEL: test_vfmsq_lane_f32_0: 1942 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1943 ; CHECK-NEXT: ret 1944 entry: 1945 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 1946 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer 1947 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1948 ret <4 x float> %0 1949 } 1950 1951 define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 1952 ; CHECK-LABEL: test_vfms_laneq_f32_0: 1953 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1954 ; CHECK-NEXT: ret 1955 entry: 1956 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 1957 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer 1958 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1959 ret <2 x float> %0 1960 } 1961 1962 define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 1963 ; CHECK-LABEL: test_vfmsq_laneq_f32_0: 1964 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1965 ; CHECK-NEXT: ret 1966 entry: 1967 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 1968 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer 1969 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1970 ret <4 x float> %0 1971 } 1972 1973 define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 1974 ; CHECK-LABEL: test_vfmaq_laneq_f64_0: 1975 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1976 ; CHECK-NEXT: ret 1977 entry: 1978 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 1979 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 1980 ret <2 x double> %0 1981 } 1982 1983 define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 1984 ; CHECK-LABEL: test_vfmsq_laneq_f64_0: 1985 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1986 ; CHECK-NEXT: ret 1987 entry: 1988 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 1989 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer 1990 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 1991 ret <2 x double> %0 1992 } 1993 1994 define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1995 ; CHECK-LABEL: test_vmlal_lane_s16_0: 1996 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1997 ; CHECK-NEXT: ret 1998 entry: 1999 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2000 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2001 %add = add <4 x i32> %vmull2.i, %a 2002 ret <4 x i32> %add 2003 } 2004 2005 define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2006 ; CHECK-LABEL: test_vmlal_lane_s32_0: 2007 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2008 ; CHECK-NEXT: ret 2009 entry: 2010 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2011 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2012 %add = add <2 x i64> %vmull2.i, %a 2013 ret <2 x i64> %add 2014 } 2015 2016 define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2017 ; CHECK-LABEL: test_vmlal_laneq_s16_0: 2018 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2019 ; CHECK-NEXT: ret 2020 entry: 2021 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2022 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2023 %add = add <4 x i32> %vmull2.i, %a 2024 ret <4 x i32> %add 2025 } 2026 2027 define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2028 ; CHECK-LABEL: test_vmlal_laneq_s32_0: 2029 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2030 ; CHECK-NEXT: ret 2031 entry: 2032 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2033 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2034 %add = add <2 x i64> %vmull2.i, %a 2035 ret <2 x i64> %add 2036 } 2037 2038 define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2039 ; CHECK-LABEL: test_vmlal_high_lane_s16_0: 2040 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2041 ; CHECK-NEXT: ret 2042 entry: 2043 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2044 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2045 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2046 %add = add <4 x i32> %vmull2.i, %a 2047 ret <4 x i32> %add 2048 } 2049 2050 define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2051 ; CHECK-LABEL: test_vmlal_high_lane_s32_0: 2052 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2053 ; CHECK-NEXT: ret 2054 entry: 2055 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2056 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2057 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2058 %add = add <2 x i64> %vmull2.i, %a 2059 ret <2 x i64> %add 2060 } 2061 2062 define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2063 ; CHECK-LABEL: test_vmlal_high_laneq_s16_0: 2064 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2065 ; CHECK-NEXT: ret 2066 entry: 2067 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2068 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2069 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2070 %add = add <4 x i32> %vmull2.i, %a 2071 ret <4 x i32> %add 2072 } 2073 2074 define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2075 ; CHECK-LABEL: test_vmlal_high_laneq_s32_0: 2076 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2077 ; CHECK-NEXT: ret 2078 entry: 2079 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2080 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2081 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2082 %add = add <2 x i64> %vmull2.i, %a 2083 ret <2 x i64> %add 2084 } 2085 2086 define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2087 ; CHECK-LABEL: test_vmlsl_lane_s16_0: 2088 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2089 ; CHECK-NEXT: ret 2090 entry: 2091 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2092 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2093 %sub = sub <4 x i32> %a, %vmull2.i 2094 ret <4 x i32> %sub 2095 } 2096 2097 define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2098 ; CHECK-LABEL: test_vmlsl_lane_s32_0: 2099 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2100 ; CHECK-NEXT: ret 2101 entry: 2102 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2103 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2104 %sub = sub <2 x i64> %a, %vmull2.i 2105 ret <2 x i64> %sub 2106 } 2107 2108 define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2109 ; CHECK-LABEL: test_vmlsl_laneq_s16_0: 2110 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2111 ; CHECK-NEXT: ret 2112 entry: 2113 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2114 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2115 %sub = sub <4 x i32> %a, %vmull2.i 2116 ret <4 x i32> %sub 2117 } 2118 2119 define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2120 ; CHECK-LABEL: test_vmlsl_laneq_s32_0: 2121 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2122 ; CHECK-NEXT: ret 2123 entry: 2124 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2125 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2126 %sub = sub <2 x i64> %a, %vmull2.i 2127 ret <2 x i64> %sub 2128 } 2129 2130 define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2131 ; CHECK-LABEL: test_vmlsl_high_lane_s16_0: 2132 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2133 ; CHECK-NEXT: ret 2134 entry: 2135 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2136 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2137 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2138 %sub = sub <4 x i32> %a, %vmull2.i 2139 ret <4 x i32> %sub 2140 } 2141 2142 define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2143 ; CHECK-LABEL: test_vmlsl_high_lane_s32_0: 2144 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2145 ; CHECK-NEXT: ret 2146 entry: 2147 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2148 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2149 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2150 %sub = sub <2 x i64> %a, %vmull2.i 2151 ret <2 x i64> %sub 2152 } 2153 2154 define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2155 ; CHECK-LABEL: test_vmlsl_high_laneq_s16_0: 2156 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2157 ; CHECK-NEXT: ret 2158 entry: 2159 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2160 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2161 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2162 %sub = sub <4 x i32> %a, %vmull2.i 2163 ret <4 x i32> %sub 2164 } 2165 2166 define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2167 ; CHECK-LABEL: test_vmlsl_high_laneq_s32_0: 2168 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2169 ; CHECK-NEXT: ret 2170 entry: 2171 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2172 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2173 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2174 %sub = sub <2 x i64> %a, %vmull2.i 2175 ret <2 x i64> %sub 2176 } 2177 2178 define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2179 ; CHECK-LABEL: test_vmlal_lane_u16_0: 2180 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2181 ; CHECK-NEXT: ret 2182 entry: 2183 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2184 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2185 %add = add <4 x i32> %vmull2.i, %a 2186 ret <4 x i32> %add 2187 } 2188 2189 define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2190 ; CHECK-LABEL: test_vmlal_lane_u32_0: 2191 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2192 ; CHECK-NEXT: ret 2193 entry: 2194 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2195 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2196 %add = add <2 x i64> %vmull2.i, %a 2197 ret <2 x i64> %add 2198 } 2199 2200 define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2201 ; CHECK-LABEL: test_vmlal_laneq_u16_0: 2202 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2203 ; CHECK-NEXT: ret 2204 entry: 2205 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2206 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2207 %add = add <4 x i32> %vmull2.i, %a 2208 ret <4 x i32> %add 2209 } 2210 2211 define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2212 ; CHECK-LABEL: test_vmlal_laneq_u32_0: 2213 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2214 ; CHECK-NEXT: ret 2215 entry: 2216 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2217 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2218 %add = add <2 x i64> %vmull2.i, %a 2219 ret <2 x i64> %add 2220 } 2221 2222 define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2223 ; CHECK-LABEL: test_vmlal_high_lane_u16_0: 2224 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2225 ; CHECK-NEXT: ret 2226 entry: 2227 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2228 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2229 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2230 %add = add <4 x i32> %vmull2.i, %a 2231 ret <4 x i32> %add 2232 } 2233 2234 define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2235 ; CHECK-LABEL: test_vmlal_high_lane_u32_0: 2236 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2237 ; CHECK-NEXT: ret 2238 entry: 2239 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2240 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2241 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2242 %add = add <2 x i64> %vmull2.i, %a 2243 ret <2 x i64> %add 2244 } 2245 2246 define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2247 ; CHECK-LABEL: test_vmlal_high_laneq_u16_0: 2248 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2249 ; CHECK-NEXT: ret 2250 entry: 2251 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2252 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2253 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2254 %add = add <4 x i32> %vmull2.i, %a 2255 ret <4 x i32> %add 2256 } 2257 2258 define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2259 ; CHECK-LABEL: test_vmlal_high_laneq_u32_0: 2260 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2261 ; CHECK-NEXT: ret 2262 entry: 2263 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2264 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2265 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2266 %add = add <2 x i64> %vmull2.i, %a 2267 ret <2 x i64> %add 2268 } 2269 2270 define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2271 ; CHECK-LABEL: test_vmlsl_lane_u16_0: 2272 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2273 ; CHECK-NEXT: ret 2274 entry: 2275 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2276 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2277 %sub = sub <4 x i32> %a, %vmull2.i 2278 ret <4 x i32> %sub 2279 } 2280 2281 define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2282 ; CHECK-LABEL: test_vmlsl_lane_u32_0: 2283 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2284 ; CHECK-NEXT: ret 2285 entry: 2286 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2287 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2288 %sub = sub <2 x i64> %a, %vmull2.i 2289 ret <2 x i64> %sub 2290 } 2291 2292 define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2293 ; CHECK-LABEL: test_vmlsl_laneq_u16_0: 2294 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2295 ; CHECK-NEXT: ret 2296 entry: 2297 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2298 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2299 %sub = sub <4 x i32> %a, %vmull2.i 2300 ret <4 x i32> %sub 2301 } 2302 2303 define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2304 ; CHECK-LABEL: test_vmlsl_laneq_u32_0: 2305 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2306 ; CHECK-NEXT: ret 2307 entry: 2308 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2309 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2310 %sub = sub <2 x i64> %a, %vmull2.i 2311 ret <2 x i64> %sub 2312 } 2313 2314 define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2315 ; CHECK-LABEL: test_vmlsl_high_lane_u16_0: 2316 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2317 ; CHECK-NEXT: ret 2318 entry: 2319 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2320 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2321 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2322 %sub = sub <4 x i32> %a, %vmull2.i 2323 ret <4 x i32> %sub 2324 } 2325 2326 define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2327 ; CHECK-LABEL: test_vmlsl_high_lane_u32_0: 2328 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2329 ; CHECK-NEXT: ret 2330 entry: 2331 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2332 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2333 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2334 %sub = sub <2 x i64> %a, %vmull2.i 2335 ret <2 x i64> %sub 2336 } 2337 2338 define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2339 ; CHECK-LABEL: test_vmlsl_high_laneq_u16_0: 2340 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2341 ; CHECK-NEXT: ret 2342 entry: 2343 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2344 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2345 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2346 %sub = sub <4 x i32> %a, %vmull2.i 2347 ret <4 x i32> %sub 2348 } 2349 2350 define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2351 ; CHECK-LABEL: test_vmlsl_high_laneq_u32_0: 2352 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2353 ; CHECK-NEXT: ret 2354 entry: 2355 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2356 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2357 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2358 %sub = sub <2 x i64> %a, %vmull2.i 2359 ret <2 x i64> %sub 2360 } 2361 2362 define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2363 ; CHECK-LABEL: test_vmull_lane_s16_0: 2364 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2365 ; CHECK-NEXT: ret 2366 entry: 2367 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2368 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2369 ret <4 x i32> %vmull2.i 2370 } 2371 2372 define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2373 ; CHECK-LABEL: test_vmull_lane_s32_0: 2374 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2375 ; CHECK-NEXT: ret 2376 entry: 2377 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2378 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2379 ret <2 x i64> %vmull2.i 2380 } 2381 2382 define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 2383 ; CHECK-LABEL: test_vmull_lane_u16_0: 2384 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2385 ; CHECK-NEXT: ret 2386 entry: 2387 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2388 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2389 ret <4 x i32> %vmull2.i 2390 } 2391 2392 define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 2393 ; CHECK-LABEL: test_vmull_lane_u32_0: 2394 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2395 ; CHECK-NEXT: ret 2396 entry: 2397 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2398 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2399 ret <2 x i64> %vmull2.i 2400 } 2401 2402 define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2403 ; CHECK-LABEL: test_vmull_high_lane_s16_0: 2404 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2405 ; CHECK-NEXT: ret 2406 entry: 2407 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2408 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2409 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2410 ret <4 x i32> %vmull2.i 2411 } 2412 2413 define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2414 ; CHECK-LABEL: test_vmull_high_lane_s32_0: 2415 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2416 ; CHECK-NEXT: ret 2417 entry: 2418 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2419 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2420 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2421 ret <2 x i64> %vmull2.i 2422 } 2423 2424 define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 2425 ; CHECK-LABEL: test_vmull_high_lane_u16_0: 2426 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2427 ; CHECK-NEXT: ret 2428 entry: 2429 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2430 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2431 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2432 ret <4 x i32> %vmull2.i 2433 } 2434 2435 define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 2436 ; CHECK-LABEL: test_vmull_high_lane_u32_0: 2437 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2438 ; CHECK-NEXT: ret 2439 entry: 2440 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2441 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2442 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2443 ret <2 x i64> %vmull2.i 2444 } 2445 2446 define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2447 ; CHECK-LABEL: test_vmull_laneq_s16_0: 2448 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2449 ; CHECK-NEXT: ret 2450 entry: 2451 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2452 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2453 ret <4 x i32> %vmull2.i 2454 } 2455 2456 define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2457 ; CHECK-LABEL: test_vmull_laneq_s32_0: 2458 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2459 ; CHECK-NEXT: ret 2460 entry: 2461 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2462 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2463 ret <2 x i64> %vmull2.i 2464 } 2465 2466 define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 2467 ; CHECK-LABEL: test_vmull_laneq_u16_0: 2468 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2469 ; CHECK-NEXT: ret 2470 entry: 2471 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2472 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2473 ret <4 x i32> %vmull2.i 2474 } 2475 2476 define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 2477 ; CHECK-LABEL: test_vmull_laneq_u32_0: 2478 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2479 ; CHECK-NEXT: ret 2480 entry: 2481 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2482 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2483 ret <2 x i64> %vmull2.i 2484 } 2485 2486 define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2487 ; CHECK-LABEL: test_vmull_high_laneq_s16_0: 2488 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2489 ; CHECK-NEXT: ret 2490 entry: 2491 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2492 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2493 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2494 ret <4 x i32> %vmull2.i 2495 } 2496 2497 define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2498 ; CHECK-LABEL: test_vmull_high_laneq_s32_0: 2499 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2500 ; CHECK-NEXT: ret 2501 entry: 2502 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2503 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2504 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2505 ret <2 x i64> %vmull2.i 2506 } 2507 2508 define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 2509 ; CHECK-LABEL: test_vmull_high_laneq_u16_0: 2510 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2511 ; CHECK-NEXT: ret 2512 entry: 2513 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2514 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2515 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2516 ret <4 x i32> %vmull2.i 2517 } 2518 2519 define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 2520 ; CHECK-LABEL: test_vmull_high_laneq_u32_0: 2521 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2522 ; CHECK-NEXT: ret 2523 entry: 2524 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2525 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2526 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2527 ret <2 x i64> %vmull2.i 2528 } 2529 2530 define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2531 ; CHECK-LABEL: test_vqdmlal_lane_s16_0: 2532 ; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2533 ; CHECK-NEXT: ret 2534 entry: 2535 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2536 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2537 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2538 ret <4 x i32> %vqdmlal4.i 2539 } 2540 2541 define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2542 ; CHECK-LABEL: test_vqdmlal_lane_s32_0: 2543 ; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2544 ; CHECK-NEXT: ret 2545 entry: 2546 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2547 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2548 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2549 ret <2 x i64> %vqdmlal4.i 2550 } 2551 2552 define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2553 ; CHECK-LABEL: test_vqdmlal_high_lane_s16_0: 2554 ; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2555 ; CHECK-NEXT: ret 2556 entry: 2557 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2558 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2559 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2560 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2561 ret <4 x i32> %vqdmlal4.i 2562 } 2563 2564 define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2565 ; CHECK-LABEL: test_vqdmlal_high_lane_s32_0: 2566 ; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2567 ; CHECK-NEXT: ret 2568 entry: 2569 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2570 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2571 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2572 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2573 ret <2 x i64> %vqdmlal4.i 2574 } 2575 2576 define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2577 ; CHECK-LABEL: test_vqdmlsl_lane_s16_0: 2578 ; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2579 ; CHECK-NEXT: ret 2580 entry: 2581 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2582 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2583 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 2584 ret <4 x i32> %vqdmlsl4.i 2585 } 2586 2587 define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2588 ; CHECK-LABEL: test_vqdmlsl_lane_s32_0: 2589 ; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2590 ; CHECK-NEXT: ret 2591 entry: 2592 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2593 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2594 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 2595 ret <2 x i64> %vqdmlsl4.i 2596 } 2597 2598 define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2599 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0: 2600 ; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2601 ; CHECK-NEXT: ret 2602 entry: 2603 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2604 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2605 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2606 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 2607 ret <4 x i32> %vqdmlsl4.i 2608 } 2609 2610 define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2611 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0: 2612 ; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2613 ; CHECK-NEXT: ret 2614 entry: 2615 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2616 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2617 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2618 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 2619 ret <2 x i64> %vqdmlsl4.i 2620 } 2621 2622 define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2623 ; CHECK-LABEL: test_vqdmull_lane_s16_0: 2624 ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2625 ; CHECK-NEXT: ret 2626 entry: 2627 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2628 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2629 ret <4 x i32> %vqdmull2.i 2630 } 2631 2632 define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2633 ; CHECK-LABEL: test_vqdmull_lane_s32_0: 2634 ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2635 ; CHECK-NEXT: ret 2636 entry: 2637 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2638 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2639 ret <2 x i64> %vqdmull2.i 2640 } 2641 2642 define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2643 ; CHECK-LABEL: test_vqdmull_laneq_s16_0: 2644 ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2645 ; CHECK-NEXT: ret 2646 entry: 2647 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2648 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2649 ret <4 x i32> %vqdmull2.i 2650 } 2651 2652 define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2653 ; CHECK-LABEL: test_vqdmull_laneq_s32_0: 2654 ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2655 ; CHECK-NEXT: ret 2656 entry: 2657 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2658 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2659 ret <2 x i64> %vqdmull2.i 2660 } 2661 2662 define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2663 ; CHECK-LABEL: test_vqdmull_high_lane_s16_0: 2664 ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2665 ; CHECK-NEXT: ret 2666 entry: 2667 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2668 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2669 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2670 ret <4 x i32> %vqdmull2.i 2671 } 2672 2673 define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2674 ; CHECK-LABEL: test_vqdmull_high_lane_s32_0: 2675 ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2676 ; CHECK-NEXT: ret 2677 entry: 2678 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2679 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2680 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2681 ret <2 x i64> %vqdmull2.i 2682 } 2683 2684 define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2685 ; CHECK-LABEL: test_vqdmull_high_laneq_s16_0: 2686 ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2687 ; CHECK-NEXT: ret 2688 entry: 2689 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2690 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2691 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2692 ret <4 x i32> %vqdmull2.i 2693 } 2694 2695 define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2696 ; CHECK-LABEL: test_vqdmull_high_laneq_s32_0: 2697 ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2698 ; CHECK-NEXT: ret 2699 entry: 2700 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2701 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2702 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2703 ret <2 x i64> %vqdmull2.i 2704 } 2705 2706 define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2707 ; CHECK-LABEL: test_vqdmulh_lane_s16_0: 2708 ; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2709 ; CHECK-NEXT: ret 2710 entry: 2711 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2712 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 2713 ret <4 x i16> %vqdmulh2.i 2714 } 2715 2716 define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2717 ; CHECK-LABEL: test_vqdmulhq_lane_s16_0: 2718 ; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2719 ; CHECK-NEXT: ret 2720 entry: 2721 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2722 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 2723 ret <8 x i16> %vqdmulh2.i 2724 } 2725 2726 define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2727 ; CHECK-LABEL: test_vqdmulh_lane_s32_0: 2728 ; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2729 ; CHECK-NEXT: ret 2730 entry: 2731 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2732 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 2733 ret <2 x i32> %vqdmulh2.i 2734 } 2735 2736 define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2737 ; CHECK-LABEL: test_vqdmulhq_lane_s32_0: 2738 ; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2739 ; CHECK-NEXT: ret 2740 entry: 2741 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2742 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 2743 ret <4 x i32> %vqdmulh2.i 2744 } 2745 2746 define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2747 ; CHECK-LABEL: test_vqrdmulh_lane_s16_0: 2748 ; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2749 ; CHECK-NEXT: ret 2750 entry: 2751 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2752 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 2753 ret <4 x i16> %vqrdmulh2.i 2754 } 2755 2756 define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2757 ; CHECK-LABEL: test_vqrdmulhq_lane_s16_0: 2758 ; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2759 ; CHECK-NEXT: ret 2760 entry: 2761 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2762 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 2763 ret <8 x i16> %vqrdmulh2.i 2764 } 2765 2766 define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2767 ; CHECK-LABEL: test_vqrdmulh_lane_s32_0: 2768 ; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2769 ; CHECK-NEXT: ret 2770 entry: 2771 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2772 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 2773 ret <2 x i32> %vqrdmulh2.i 2774 } 2775 2776 define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2777 ; CHECK-LABEL: test_vqrdmulhq_lane_s32_0: 2778 ; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2779 ; CHECK-NEXT: ret 2780 entry: 2781 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2782 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 2783 ret <4 x i32> %vqrdmulh2.i 2784 } 2785 2786 define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { 2787 ; CHECK-LABEL: test_vmul_lane_f32_0: 2788 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2789 ; CHECK-NEXT: ret 2790 entry: 2791 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 2792 %mul = fmul <2 x float> %shuffle, %a 2793 ret <2 x float> %mul 2794 } 2795 2796 define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 2797 ; CHECK-LABEL: test_vmulq_lane_f32_0: 2798 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2799 ; CHECK-NEXT: ret 2800 entry: 2801 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 2802 %mul = fmul <4 x float> %shuffle, %a 2803 ret <4 x float> %mul 2804 } 2805 2806 define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 2807 ; CHECK-LABEL: test_vmul_laneq_f32_0: 2808 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2809 ; CHECK-NEXT: ret 2810 entry: 2811 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 2812 %mul = fmul <2 x float> %shuffle, %a 2813 ret <2 x float> %mul 2814 } 2815 2816 define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { 2817 ; CHECK-LABEL: test_vmul_laneq_f64_0: 2818 ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] 2819 ; CHECK-NEXT: ret 2820 entry: 2821 %0 = bitcast <1 x double> %a to <8 x i8> 2822 %1 = bitcast <8 x i8> %0 to double 2823 %extract = extractelement <2 x double> %v, i32 0 2824 %2 = fmul double %1, %extract 2825 %3 = insertelement <1 x double> undef, double %2, i32 0 2826 ret <1 x double> %3 2827 } 2828 2829 define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 2830 ; CHECK-LABEL: test_vmulq_laneq_f32_0: 2831 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2832 ; CHECK-NEXT: ret 2833 entry: 2834 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 2835 %mul = fmul <4 x float> %shuffle, %a 2836 ret <4 x float> %mul 2837 } 2838 2839 define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 2840 ; CHECK-LABEL: test_vmulq_laneq_f64_0: 2841 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2842 ; CHECK-NEXT: ret 2843 entry: 2844 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 2845 %mul = fmul <2 x double> %shuffle, %a 2846 ret <2 x double> %mul 2847 } 2848 2849 define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { 2850 ; CHECK-LABEL: test_vmulx_lane_f32_0: 2851 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2852 ; CHECK-NEXT: ret 2853 entry: 2854 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 2855 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 2856 ret <2 x float> %vmulx2.i 2857 } 2858 2859 define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 2860 ; CHECK-LABEL: test_vmulxq_lane_f32_0: 2861 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2862 ; CHECK-NEXT: ret 2863 entry: 2864 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 2865 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 2866 ret <4 x float> %vmulx2.i 2867 } 2868 2869 define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { 2870 ; CHECK-LABEL: test_vmulxq_lane_f64_0: 2871 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2872 ; CHECK-NEXT: ret 2873 entry: 2874 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 2875 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 2876 ret <2 x double> %vmulx2.i 2877 } 2878 2879 define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 2880 ; CHECK-LABEL: test_vmulx_laneq_f32_0: 2881 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2882 ; CHECK-NEXT: ret 2883 entry: 2884 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 2885 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 2886 ret <2 x float> %vmulx2.i 2887 } 2888 2889 define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 2890 ; CHECK-LABEL: test_vmulxq_laneq_f32_0: 2891 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2892 ; CHECK-NEXT: ret 2893 entry: 2894 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 2895 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 2896 ret <4 x float> %vmulx2.i 2897 } 2898 2899 define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 2900 ; CHECK-LABEL: test_vmulxq_laneq_f64_0: 2901 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2902 ; CHECK-NEXT: ret 2903 entry: 2904 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 2905 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 2906 ret <2 x double> %vmulx2.i 2907 } 2908 2909