1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3 declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) 4 5 declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) 6 7 declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) 8 9 declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) 10 11 declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) 12 13 declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) 14 15 declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) 16 17 declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) 18 19 declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) 20 21 declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) 22 23 declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) 24 25 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) 26 27 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) 28 29 declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 30 31 declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 32 33 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 34 35 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 36 37 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) 38 39 declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) 40 41 declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) 42 43 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) 44 45 define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 46 ; CHECK-LABEL: test_vmla_lane_s16: 47 ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 48 ; CHECK-NEXT: ret 49 entry: 50 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 51 %mul = mul <4 x i16> %shuffle, %b 52 %add = add <4 x i16> %mul, %a 53 ret <4 x i16> %add 54 } 55 56 define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 57 ; CHECK-LABEL: test_vmlaq_lane_s16: 58 ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 59 ; CHECK-NEXT: ret 60 entry: 61 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 62 %mul = mul <8 x i16> %shuffle, %b 63 %add = add <8 x i16> %mul, %a 64 ret <8 x i16> %add 65 } 66 67 define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 68 ; CHECK-LABEL: test_vmla_lane_s32: 69 ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 70 ; CHECK-NEXT: ret 71 entry: 72 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 73 %mul = mul <2 x i32> %shuffle, %b 74 %add = add <2 x i32> %mul, %a 75 ret <2 x i32> %add 76 } 77 78 define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 79 ; CHECK-LABEL: test_vmlaq_lane_s32: 80 ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 81 ; CHECK-NEXT: ret 82 entry: 83 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 84 %mul = mul <4 x i32> %shuffle, %b 85 %add = add <4 x i32> %mul, %a 86 ret <4 x i32> %add 87 } 88 89 define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 90 ; CHECK-LABEL: test_vmla_laneq_s16: 91 ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 92 ; CHECK-NEXT: ret 93 entry: 94 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 95 %mul = mul <4 x i16> %shuffle, %b 96 %add = add <4 x i16> %mul, %a 97 ret <4 x i16> %add 98 } 99 100 define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 101 ; CHECK-LABEL: test_vmlaq_laneq_s16: 102 ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 103 ; CHECK-NEXT: ret 104 entry: 105 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 106 %mul = mul <8 x i16> %shuffle, %b 107 %add = add <8 x i16> %mul, %a 108 ret <8 x i16> %add 109 } 110 111 define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 112 ; CHECK-LABEL: test_vmla_laneq_s32: 113 ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 114 ; CHECK-NEXT: ret 115 entry: 116 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 117 %mul = mul <2 x i32> %shuffle, %b 118 %add = add <2 x i32> %mul, %a 119 ret <2 x i32> %add 120 } 121 122 define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 123 ; CHECK-LABEL: test_vmlaq_laneq_s32: 124 ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 125 ; CHECK-NEXT: ret 126 entry: 127 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 128 %mul = mul <4 x i32> %shuffle, %b 129 %add = add <4 x i32> %mul, %a 130 ret <4 x i32> %add 131 } 132 133 define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 134 ; CHECK-LABEL: test_vmls_lane_s16: 135 ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 136 ; CHECK-NEXT: ret 137 entry: 138 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 139 %mul = mul <4 x i16> %shuffle, %b 140 %sub = sub <4 x i16> %a, %mul 141 ret <4 x i16> %sub 142 } 143 144 define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 145 ; CHECK-LABEL: test_vmlsq_lane_s16: 146 ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 147 ; CHECK-NEXT: ret 148 entry: 149 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 150 %mul = mul <8 x i16> %shuffle, %b 151 %sub = sub <8 x i16> %a, %mul 152 ret <8 x i16> %sub 153 } 154 155 define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 156 ; CHECK-LABEL: test_vmls_lane_s32: 157 ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 158 ; CHECK-NEXT: ret 159 entry: 160 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 161 %mul = mul <2 x i32> %shuffle, %b 162 %sub = sub <2 x i32> %a, %mul 163 ret <2 x i32> %sub 164 } 165 166 define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 167 ; CHECK-LABEL: test_vmlsq_lane_s32: 168 ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 169 ; CHECK-NEXT: ret 170 entry: 171 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 172 %mul = mul <4 x i32> %shuffle, %b 173 %sub = sub <4 x i32> %a, %mul 174 ret <4 x i32> %sub 175 } 176 177 define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 178 ; CHECK-LABEL: test_vmls_laneq_s16: 179 ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 180 ; CHECK-NEXT: ret 181 entry: 182 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 183 %mul = mul <4 x i16> %shuffle, %b 184 %sub = sub <4 x i16> %a, %mul 185 ret <4 x i16> %sub 186 } 187 188 define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 189 ; CHECK-LABEL: test_vmlsq_laneq_s16: 190 ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 191 ; CHECK-NEXT: ret 192 entry: 193 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 194 %mul = mul <8 x i16> %shuffle, %b 195 %sub = sub <8 x i16> %a, %mul 196 ret <8 x i16> %sub 197 } 198 199 define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 200 ; CHECK-LABEL: test_vmls_laneq_s32: 201 ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 202 ; CHECK-NEXT: ret 203 entry: 204 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 205 %mul = mul <2 x i32> %shuffle, %b 206 %sub = sub <2 x i32> %a, %mul 207 ret <2 x i32> %sub 208 } 209 210 define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 211 ; CHECK-LABEL: test_vmlsq_laneq_s32: 212 ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 213 ; CHECK-NEXT: ret 214 entry: 215 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 216 %mul = mul <4 x i32> %shuffle, %b 217 %sub = sub <4 x i32> %a, %mul 218 ret <4 x i32> %sub 219 } 220 221 define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { 222 ; CHECK-LABEL: test_vmul_lane_s16: 223 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 224 ; CHECK-NEXT: ret 225 entry: 226 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 227 %mul = mul <4 x i16> %shuffle, %a 228 ret <4 x i16> %mul 229 } 230 231 define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 232 ; CHECK-LABEL: test_vmulq_lane_s16: 233 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 234 ; CHECK-NEXT: ret 235 entry: 236 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 237 %mul = mul <8 x i16> %shuffle, %a 238 ret <8 x i16> %mul 239 } 240 241 define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { 242 ; CHECK-LABEL: test_vmul_lane_s32: 243 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 244 ; CHECK-NEXT: ret 245 entry: 246 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 247 %mul = mul <2 x i32> %shuffle, %a 248 ret <2 x i32> %mul 249 } 250 251 define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 252 ; CHECK-LABEL: test_vmulq_lane_s32: 253 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 254 ; CHECK-NEXT: ret 255 entry: 256 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 257 %mul = mul <4 x i32> %shuffle, %a 258 ret <4 x i32> %mul 259 } 260 261 define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { 262 ; CHECK-LABEL: test_vmul_lane_u16: 263 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 264 ; CHECK-NEXT: ret 265 entry: 266 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 267 %mul = mul <4 x i16> %shuffle, %a 268 ret <4 x i16> %mul 269 } 270 271 define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { 272 ; CHECK-LABEL: test_vmulq_lane_u16: 273 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 274 ; CHECK-NEXT: ret 275 entry: 276 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 277 %mul = mul <8 x i16> %shuffle, %a 278 ret <8 x i16> %mul 279 } 280 281 define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { 282 ; CHECK-LABEL: test_vmul_lane_u32: 283 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 284 ; CHECK-NEXT: ret 285 entry: 286 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 287 %mul = mul <2 x i32> %shuffle, %a 288 ret <2 x i32> %mul 289 } 290 291 define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { 292 ; CHECK-LABEL: test_vmulq_lane_u32: 293 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 294 ; CHECK-NEXT: ret 295 entry: 296 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 297 %mul = mul <4 x i32> %shuffle, %a 298 ret <4 x i32> %mul 299 } 300 301 define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 302 ; CHECK-LABEL: test_vmul_laneq_s16: 303 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 304 ; CHECK-NEXT: ret 305 entry: 306 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 307 %mul = mul <4 x i16> %shuffle, %a 308 ret <4 x i16> %mul 309 } 310 311 define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 312 ; CHECK-LABEL: test_vmulq_laneq_s16: 313 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 314 ; CHECK-NEXT: ret 315 entry: 316 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 317 %mul = mul <8 x i16> %shuffle, %a 318 ret <8 x i16> %mul 319 } 320 321 define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 322 ; CHECK-LABEL: test_vmul_laneq_s32: 323 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 324 ; CHECK-NEXT: ret 325 entry: 326 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 327 %mul = mul <2 x i32> %shuffle, %a 328 ret <2 x i32> %mul 329 } 330 331 define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 332 ; CHECK-LABEL: test_vmulq_laneq_s32: 333 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 334 ; CHECK-NEXT: ret 335 entry: 336 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 337 %mul = mul <4 x i32> %shuffle, %a 338 ret <4 x i32> %mul 339 } 340 341 define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 342 ; CHECK-LABEL: test_vmul_laneq_u16: 343 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 344 ; CHECK-NEXT: ret 345 entry: 346 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 347 %mul = mul <4 x i16> %shuffle, %a 348 ret <4 x i16> %mul 349 } 350 351 define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 352 ; CHECK-LABEL: test_vmulq_laneq_u16: 353 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 354 ; CHECK-NEXT: ret 355 entry: 356 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 357 %mul = mul <8 x i16> %shuffle, %a 358 ret <8 x i16> %mul 359 } 360 361 define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 362 ; CHECK-LABEL: test_vmul_laneq_u32: 363 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 364 ; CHECK-NEXT: ret 365 entry: 366 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 367 %mul = mul <2 x i32> %shuffle, %a 368 ret <2 x i32> %mul 369 } 370 371 define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 372 ; CHECK-LABEL: test_vmulq_laneq_u32: 373 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 374 ; CHECK-NEXT: ret 375 entry: 376 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 377 %mul = mul <4 x i32> %shuffle, %a 378 ret <4 x i32> %mul 379 } 380 381 define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 382 ; CHECK-LABEL: test_vfma_lane_f32: 383 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 384 ; CHECK-NEXT: ret 385 entry: 386 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 387 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 388 ret <2 x float> %0 389 } 390 391 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 392 393 define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 394 ; CHECK-LABEL: test_vfmaq_lane_f32: 395 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 396 ; CHECK-NEXT: ret 397 entry: 398 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 399 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 400 ret <4 x float> %0 401 } 402 403 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 404 405 define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 406 ; CHECK-LABEL: test_vfma_laneq_f32: 407 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 408 ; CHECK-NEXT: ret 409 entry: 410 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 411 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 412 ret <2 x float> %0 413 } 414 415 define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 416 ; CHECK-LABEL: test_vfmaq_laneq_f32: 417 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 418 ; CHECK-NEXT: ret 419 entry: 420 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 421 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 422 ret <4 x float> %0 423 } 424 425 define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 426 ; CHECK-LABEL: test_vfms_lane_f32: 427 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 428 ; CHECK-NEXT: ret 429 entry: 430 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 431 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1> 432 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 433 ret <2 x float> %0 434 } 435 436 define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 437 ; CHECK-LABEL: test_vfmsq_lane_f32: 438 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 439 ; CHECK-NEXT: ret 440 entry: 441 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 442 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 443 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 444 ret <4 x float> %0 445 } 446 447 define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 448 ; CHECK-LABEL: test_vfms_laneq_f32: 449 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 450 ; CHECK-NEXT: ret 451 entry: 452 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 453 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3> 454 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 455 ret <2 x float> %0 456 } 457 458 define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 459 ; CHECK-LABEL: test_vfmsq_laneq_f32: 460 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 461 ; CHECK-NEXT: ret 462 entry: 463 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 464 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 465 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 466 ret <4 x float> %0 467 } 468 469 define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 470 ; CHECK-LABEL: test_vfmaq_lane_f64: 471 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 472 ; CHECK-NEXT: ret 473 entry: 474 %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 475 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 476 ret <2 x double> %0 477 } 478 479 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) 480 481 define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 482 ; CHECK-LABEL: test_vfmaq_laneq_f64: 483 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 484 ; CHECK-NEXT: ret 485 entry: 486 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 487 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 488 ret <2 x double> %0 489 } 490 491 define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 492 ; CHECK-LABEL: test_vfmsq_lane_f64: 493 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 494 ; CHECK-NEXT: ret 495 entry: 496 %sub = fsub <1 x double> <double -0.000000e+00>, %v 497 %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer 498 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 499 ret <2 x double> %0 500 } 501 502 define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 503 ; CHECK-LABEL: test_vfmsq_laneq_f64: 504 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 505 ; CHECK-NEXT: ret 506 entry: 507 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 508 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1> 509 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 510 ret <2 x double> %0 511 } 512 513 define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { 514 ; CHECK-LABEL: test_vfmas_laneq_f32 515 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 516 ; CHECK-NEXT: ret 517 entry: 518 %extract = extractelement <4 x float> %v, i32 3 519 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 520 ret float %0 521 } 522 523 declare float @llvm.fma.f32(float, float, float) 524 525 define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { 526 ; CHECK-LABEL: test_vfmsd_lane_f64 527 ; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 528 ; CHECK-NEXT: ret 529 entry: 530 %extract.rhs = extractelement <1 x double> %v, i32 0 531 %extract = fsub double -0.000000e+00, %extract.rhs 532 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 533 ret double %0 534 } 535 536 declare double @llvm.fma.f64(double, double, double) 537 538 define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { 539 ; CHECK-LABEL: test_vfmss_laneq_f32 540 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 541 ; CHECK-NEXT: ret 542 entry: 543 %extract.rhs = extractelement <4 x float> %v, i32 3 544 %extract = fsub float -0.000000e+00, %extract.rhs 545 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 546 ret float %0 547 } 548 549 define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { 550 ; CHECK-LABEL: test_vfmsd_laneq_f64 551 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 552 ; CHECK-NEXT: ret 553 entry: 554 %extract.rhs = extractelement <2 x double> %v, i32 1 555 %extract = fsub double -0.000000e+00, %extract.rhs 556 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 557 ret double %0 558 } 559 560 define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 561 ; CHECK-LABEL: test_vmlal_lane_s16: 562 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 563 ; CHECK-NEXT: ret 564 entry: 565 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 566 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 567 %add = add <4 x i32> %vmull2.i, %a 568 ret <4 x i32> %add 569 } 570 571 define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 572 ; CHECK-LABEL: test_vmlal_lane_s32: 573 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 574 ; CHECK-NEXT: ret 575 entry: 576 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 577 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 578 %add = add <2 x i64> %vmull2.i, %a 579 ret <2 x i64> %add 580 } 581 582 define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 583 ; CHECK-LABEL: test_vmlal_laneq_s16: 584 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 585 ; CHECK-NEXT: ret 586 entry: 587 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 588 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 589 %add = add <4 x i32> %vmull2.i, %a 590 ret <4 x i32> %add 591 } 592 593 define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 594 ; CHECK-LABEL: test_vmlal_laneq_s32: 595 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 596 ; CHECK-NEXT: ret 597 entry: 598 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 599 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 600 %add = add <2 x i64> %vmull2.i, %a 601 ret <2 x i64> %add 602 } 603 604 define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 605 ; CHECK-LABEL: test_vmlal_high_lane_s16: 606 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 607 ; CHECK-NEXT: ret 608 entry: 609 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 610 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 611 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 612 %add = add <4 x i32> %vmull2.i, %a 613 ret <4 x i32> %add 614 } 615 616 define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 617 ; CHECK-LABEL: test_vmlal_high_lane_s32: 618 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 619 ; CHECK-NEXT: ret 620 entry: 621 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 622 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 623 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 624 %add = add <2 x i64> %vmull2.i, %a 625 ret <2 x i64> %add 626 } 627 628 define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 629 ; CHECK-LABEL: test_vmlal_high_laneq_s16: 630 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 631 ; CHECK-NEXT: ret 632 entry: 633 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 634 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 635 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 636 %add = add <4 x i32> %vmull2.i, %a 637 ret <4 x i32> %add 638 } 639 640 define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 641 ; CHECK-LABEL: test_vmlal_high_laneq_s32: 642 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 643 ; CHECK-NEXT: ret 644 entry: 645 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 646 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 647 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 648 %add = add <2 x i64> %vmull2.i, %a 649 ret <2 x i64> %add 650 } 651 652 define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 653 ; CHECK-LABEL: test_vmlsl_lane_s16: 654 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 655 ; CHECK-NEXT: ret 656 entry: 657 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 658 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 659 %sub = sub <4 x i32> %a, %vmull2.i 660 ret <4 x i32> %sub 661 } 662 663 define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 664 ; CHECK-LABEL: test_vmlsl_lane_s32: 665 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 666 ; CHECK-NEXT: ret 667 entry: 668 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 669 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 670 %sub = sub <2 x i64> %a, %vmull2.i 671 ret <2 x i64> %sub 672 } 673 674 define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 675 ; CHECK-LABEL: test_vmlsl_laneq_s16: 676 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 677 ; CHECK-NEXT: ret 678 entry: 679 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 680 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 681 %sub = sub <4 x i32> %a, %vmull2.i 682 ret <4 x i32> %sub 683 } 684 685 define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 686 ; CHECK-LABEL: test_vmlsl_laneq_s32: 687 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 688 ; CHECK-NEXT: ret 689 entry: 690 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 691 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 692 %sub = sub <2 x i64> %a, %vmull2.i 693 ret <2 x i64> %sub 694 } 695 696 define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 697 ; CHECK-LABEL: test_vmlsl_high_lane_s16: 698 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 699 ; CHECK-NEXT: ret 700 entry: 701 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 702 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 703 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 704 %sub = sub <4 x i32> %a, %vmull2.i 705 ret <4 x i32> %sub 706 } 707 708 define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 709 ; CHECK-LABEL: test_vmlsl_high_lane_s32: 710 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 711 ; CHECK-NEXT: ret 712 entry: 713 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 714 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 715 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 716 %sub = sub <2 x i64> %a, %vmull2.i 717 ret <2 x i64> %sub 718 } 719 720 define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 721 ; CHECK-LABEL: test_vmlsl_high_laneq_s16: 722 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 723 ; CHECK-NEXT: ret 724 entry: 725 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 726 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 727 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 728 %sub = sub <4 x i32> %a, %vmull2.i 729 ret <4 x i32> %sub 730 } 731 732 define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 733 ; CHECK-LABEL: test_vmlsl_high_laneq_s32: 734 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 735 ; CHECK-NEXT: ret 736 entry: 737 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 738 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 739 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 740 %sub = sub <2 x i64> %a, %vmull2.i 741 ret <2 x i64> %sub 742 } 743 744 define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 745 ; CHECK-LABEL: test_vmlal_lane_u16: 746 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 747 ; CHECK-NEXT: ret 748 entry: 749 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 750 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 751 %add = add <4 x i32> %vmull2.i, %a 752 ret <4 x i32> %add 753 } 754 755 define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 756 ; CHECK-LABEL: test_vmlal_lane_u32: 757 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 758 ; CHECK-NEXT: ret 759 entry: 760 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 761 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 762 %add = add <2 x i64> %vmull2.i, %a 763 ret <2 x i64> %add 764 } 765 766 define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 767 ; CHECK-LABEL: test_vmlal_laneq_u16: 768 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 769 ; CHECK-NEXT: ret 770 entry: 771 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 772 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 773 %add = add <4 x i32> %vmull2.i, %a 774 ret <4 x i32> %add 775 } 776 777 define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 778 ; CHECK-LABEL: test_vmlal_laneq_u32: 779 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 780 ; CHECK-NEXT: ret 781 entry: 782 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 783 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 784 %add = add <2 x i64> %vmull2.i, %a 785 ret <2 x i64> %add 786 } 787 788 define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 789 ; CHECK-LABEL: test_vmlal_high_lane_u16: 790 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 791 ; CHECK-NEXT: ret 792 entry: 793 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 794 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 795 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 796 %add = add <4 x i32> %vmull2.i, %a 797 ret <4 x i32> %add 798 } 799 800 define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 801 ; CHECK-LABEL: test_vmlal_high_lane_u32: 802 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 803 ; CHECK-NEXT: ret 804 entry: 805 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 806 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 807 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 808 %add = add <2 x i64> %vmull2.i, %a 809 ret <2 x i64> %add 810 } 811 812 define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 813 ; CHECK-LABEL: test_vmlal_high_laneq_u16: 814 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 815 ; CHECK-NEXT: ret 816 entry: 817 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 818 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 819 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 820 %add = add <4 x i32> %vmull2.i, %a 821 ret <4 x i32> %add 822 } 823 824 define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 825 ; CHECK-LABEL: test_vmlal_high_laneq_u32: 826 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 827 ; CHECK-NEXT: ret 828 entry: 829 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 830 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 831 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 832 %add = add <2 x i64> %vmull2.i, %a 833 ret <2 x i64> %add 834 } 835 836 define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 837 ; CHECK-LABEL: test_vmlsl_lane_u16: 838 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 839 ; CHECK-NEXT: ret 840 entry: 841 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 842 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 843 %sub = sub <4 x i32> %a, %vmull2.i 844 ret <4 x i32> %sub 845 } 846 847 define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 848 ; CHECK-LABEL: test_vmlsl_lane_u32: 849 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 850 ; CHECK-NEXT: ret 851 entry: 852 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 853 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 854 %sub = sub <2 x i64> %a, %vmull2.i 855 ret <2 x i64> %sub 856 } 857 858 define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 859 ; CHECK-LABEL: test_vmlsl_laneq_u16: 860 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 861 ; CHECK-NEXT: ret 862 entry: 863 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 864 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 865 %sub = sub <4 x i32> %a, %vmull2.i 866 ret <4 x i32> %sub 867 } 868 869 define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 870 ; CHECK-LABEL: test_vmlsl_laneq_u32: 871 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 872 ; CHECK-NEXT: ret 873 entry: 874 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 875 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 876 %sub = sub <2 x i64> %a, %vmull2.i 877 ret <2 x i64> %sub 878 } 879 880 define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 881 ; CHECK-LABEL: test_vmlsl_high_lane_u16: 882 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 883 ; CHECK-NEXT: ret 884 entry: 885 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 886 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 887 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 888 %sub = sub <4 x i32> %a, %vmull2.i 889 ret <4 x i32> %sub 890 } 891 892 define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 893 ; CHECK-LABEL: test_vmlsl_high_lane_u32: 894 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 895 ; CHECK-NEXT: ret 896 entry: 897 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 898 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 899 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 900 %sub = sub <2 x i64> %a, %vmull2.i 901 ret <2 x i64> %sub 902 } 903 904 define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 905 ; CHECK-LABEL: test_vmlsl_high_laneq_u16: 906 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 907 ; CHECK-NEXT: ret 908 entry: 909 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 910 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 911 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 912 %sub = sub <4 x i32> %a, %vmull2.i 913 ret <4 x i32> %sub 914 } 915 916 define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 917 ; CHECK-LABEL: test_vmlsl_high_laneq_u32: 918 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 919 ; CHECK-NEXT: ret 920 entry: 921 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 922 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 923 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 924 %sub = sub <2 x i64> %a, %vmull2.i 925 ret <2 x i64> %sub 926 } 927 928 define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 929 ; CHECK-LABEL: test_vmull_lane_s16: 930 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 931 ; CHECK-NEXT: ret 932 entry: 933 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 934 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 935 ret <4 x i32> %vmull2.i 936 } 937 938 define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 939 ; CHECK-LABEL: test_vmull_lane_s32: 940 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 941 ; CHECK-NEXT: ret 942 entry: 943 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 944 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 945 ret <2 x i64> %vmull2.i 946 } 947 948 define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { 949 ; CHECK-LABEL: test_vmull_lane_u16: 950 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 951 ; CHECK-NEXT: ret 952 entry: 953 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 954 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 955 ret <4 x i32> %vmull2.i 956 } 957 958 define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { 959 ; CHECK-LABEL: test_vmull_lane_u32: 960 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 961 ; CHECK-NEXT: ret 962 entry: 963 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 964 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 965 ret <2 x i64> %vmull2.i 966 } 967 968 define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 969 ; CHECK-LABEL: test_vmull_high_lane_s16: 970 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 971 ; CHECK-NEXT: ret 972 entry: 973 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 974 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 975 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 976 ret <4 x i32> %vmull2.i 977 } 978 979 define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 980 ; CHECK-LABEL: test_vmull_high_lane_s32: 981 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 982 ; CHECK-NEXT: ret 983 entry: 984 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 985 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 986 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 987 ret <2 x i64> %vmull2.i 988 } 989 990 define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { 991 ; CHECK-LABEL: test_vmull_high_lane_u16: 992 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 993 ; CHECK-NEXT: ret 994 entry: 995 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 996 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 997 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 998 ret <4 x i32> %vmull2.i 999 } 1000 1001 define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { 1002 ; CHECK-LABEL: test_vmull_high_lane_u32: 1003 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1004 ; CHECK-NEXT: ret 1005 entry: 1006 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1007 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1008 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1009 ret <2 x i64> %vmull2.i 1010 } 1011 1012 define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1013 ; CHECK-LABEL: test_vmull_laneq_s16: 1014 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 1015 ; CHECK-NEXT: ret 1016 entry: 1017 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1018 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1019 ret <4 x i32> %vmull2.i 1020 } 1021 1022 define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1023 ; CHECK-LABEL: test_vmull_laneq_s32: 1024 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1025 ; CHECK-NEXT: ret 1026 entry: 1027 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1028 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1029 ret <2 x i64> %vmull2.i 1030 } 1031 1032 define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 1033 ; CHECK-LABEL: test_vmull_laneq_u16: 1034 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 1035 ; CHECK-NEXT: ret 1036 entry: 1037 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1038 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1039 ret <4 x i32> %vmull2.i 1040 } 1041 1042 define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 1043 ; CHECK-LABEL: test_vmull_laneq_u32: 1044 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1045 ; CHECK-NEXT: ret 1046 entry: 1047 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1048 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1049 ret <2 x i64> %vmull2.i 1050 } 1051 1052 define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1053 ; CHECK-LABEL: test_vmull_high_laneq_s16: 1054 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1055 ; CHECK-NEXT: ret 1056 entry: 1057 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1058 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1059 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1060 ret <4 x i32> %vmull2.i 1061 } 1062 1063 define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1064 ; CHECK-LABEL: test_vmull_high_laneq_s32: 1065 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1066 ; CHECK-NEXT: ret 1067 entry: 1068 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1069 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1070 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1071 ret <2 x i64> %vmull2.i 1072 } 1073 1074 define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 1075 ; CHECK-LABEL: test_vmull_high_laneq_u16: 1076 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1077 ; CHECK-NEXT: ret 1078 entry: 1079 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1080 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1081 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1082 ret <4 x i32> %vmull2.i 1083 } 1084 1085 define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 1086 ; CHECK-LABEL: test_vmull_high_laneq_u32: 1087 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1088 ; CHECK-NEXT: ret 1089 entry: 1090 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1091 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1092 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1093 ret <2 x i64> %vmull2.i 1094 } 1095 1096 define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1097 ; CHECK-LABEL: test_vqdmlal_lane_s16: 1098 ; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1099 ; CHECK-NEXT: ret 1100 entry: 1101 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1102 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1103 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1104 ret <4 x i32> %vqdmlal4.i 1105 } 1106 1107 define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1108 ; CHECK-LABEL: test_vqdmlal_lane_s32: 1109 ; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1110 ; CHECK-NEXT: ret 1111 entry: 1112 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1113 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1114 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1115 ret <2 x i64> %vqdmlal4.i 1116 } 1117 1118 define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1119 ; CHECK-LABEL: test_vqdmlal_high_lane_s16: 1120 ; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1121 ; CHECK-NEXT: ret 1122 entry: 1123 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1124 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1125 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1126 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1127 ret <4 x i32> %vqdmlal4.i 1128 } 1129 1130 define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1131 ; CHECK-LABEL: test_vqdmlal_high_lane_s32: 1132 ; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1133 ; CHECK-NEXT: ret 1134 entry: 1135 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1136 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1137 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1138 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1139 ret <2 x i64> %vqdmlal4.i 1140 } 1141 1142 define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1143 ; CHECK-LABEL: test_vqdmlsl_lane_s16: 1144 ; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1145 ; CHECK-NEXT: ret 1146 entry: 1147 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1148 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1149 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1150 ret <4 x i32> %vqdmlsl4.i 1151 } 1152 1153 define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1154 ; CHECK-LABEL: test_vqdmlsl_lane_s32: 1155 ; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1156 ; CHECK-NEXT: ret 1157 entry: 1158 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1159 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1160 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1161 ret <2 x i64> %vqdmlsl4.i 1162 } 1163 1164 define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1165 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16: 1166 ; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1167 ; CHECK-NEXT: ret 1168 entry: 1169 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1170 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1171 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1172 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1173 ret <4 x i32> %vqdmlsl4.i 1174 } 1175 1176 define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1177 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32: 1178 ; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1179 ; CHECK-NEXT: ret 1180 entry: 1181 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1182 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1183 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1184 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1185 ret <2 x i64> %vqdmlsl4.i 1186 } 1187 1188 define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1189 ; CHECK-LABEL: test_vqdmull_lane_s16: 1190 ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1191 ; CHECK-NEXT: ret 1192 entry: 1193 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1194 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1195 ret <4 x i32> %vqdmull2.i 1196 } 1197 1198 define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1199 ; CHECK-LABEL: test_vqdmull_lane_s32: 1200 ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1201 ; CHECK-NEXT: ret 1202 entry: 1203 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1204 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1205 ret <2 x i64> %vqdmull2.i 1206 } 1207 1208 define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1209 ; CHECK-LABEL: test_vqdmull_laneq_s16: 1210 ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1211 ; CHECK-NEXT: ret 1212 entry: 1213 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1214 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1215 ret <4 x i32> %vqdmull2.i 1216 } 1217 1218 define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1219 ; CHECK-LABEL: test_vqdmull_laneq_s32: 1220 ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1221 ; CHECK-NEXT: ret 1222 entry: 1223 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1224 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1225 ret <2 x i64> %vqdmull2.i 1226 } 1227 1228 define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1229 ; CHECK-LABEL: test_vqdmull_high_lane_s16: 1230 ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1231 ; CHECK-NEXT: ret 1232 entry: 1233 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1234 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1235 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1236 ret <4 x i32> %vqdmull2.i 1237 } 1238 1239 define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1240 ; CHECK-LABEL: test_vqdmull_high_lane_s32: 1241 ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1242 ; CHECK-NEXT: ret 1243 entry: 1244 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1245 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1246 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1247 ret <2 x i64> %vqdmull2.i 1248 } 1249 1250 define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1251 ; CHECK-LABEL: test_vqdmull_high_laneq_s16: 1252 ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1253 ; CHECK-NEXT: ret 1254 entry: 1255 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1256 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1257 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1258 ret <4 x i32> %vqdmull2.i 1259 } 1260 1261 define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1262 ; CHECK-LABEL: test_vqdmull_high_laneq_s32: 1263 ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1264 ; CHECK-NEXT: ret 1265 entry: 1266 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1267 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1268 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1269 ret <2 x i64> %vqdmull2.i 1270 } 1271 1272 define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1273 ; CHECK-LABEL: test_vqdmulh_lane_s16: 1274 ; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1275 ; CHECK-NEXT: ret 1276 entry: 1277 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1278 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1279 ret <4 x i16> %vqdmulh2.i 1280 } 1281 1282 define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1283 ; CHECK-LABEL: test_vqdmulhq_lane_s16: 1284 ; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1285 ; CHECK-NEXT: ret 1286 entry: 1287 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1288 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1289 ret <8 x i16> %vqdmulh2.i 1290 } 1291 1292 define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1293 ; CHECK-LABEL: test_vqdmulh_lane_s32: 1294 ; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1295 ; CHECK-NEXT: ret 1296 entry: 1297 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1298 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1299 ret <2 x i32> %vqdmulh2.i 1300 } 1301 1302 define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1303 ; CHECK-LABEL: test_vqdmulhq_lane_s32: 1304 ; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1305 ; CHECK-NEXT: ret 1306 entry: 1307 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1308 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1309 ret <4 x i32> %vqdmulh2.i 1310 } 1311 1312 define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1313 ; CHECK-LABEL: test_vqrdmulh_lane_s16: 1314 ; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1315 ; CHECK-NEXT: ret 1316 entry: 1317 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1318 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1319 ret <4 x i16> %vqrdmulh2.i 1320 } 1321 1322 define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1323 ; CHECK-LABEL: test_vqrdmulhq_lane_s16: 1324 ; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1325 ; CHECK-NEXT: ret 1326 entry: 1327 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1328 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1329 ret <8 x i16> %vqrdmulh2.i 1330 } 1331 1332 define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1333 ; CHECK-LABEL: test_vqrdmulh_lane_s32: 1334 ; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1335 ; CHECK-NEXT: ret 1336 entry: 1337 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1338 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1339 ret <2 x i32> %vqrdmulh2.i 1340 } 1341 1342 define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1343 ; CHECK-LABEL: test_vqrdmulhq_lane_s32: 1344 ; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1345 ; CHECK-NEXT: ret 1346 entry: 1347 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1348 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1349 ret <4 x i32> %vqrdmulh2.i 1350 } 1351 1352 define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { 1353 ; CHECK-LABEL: test_vmul_lane_f32: 1354 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1355 ; CHECK-NEXT: ret 1356 entry: 1357 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1358 %mul = fmul <2 x float> %shuffle, %a 1359 ret <2 x float> %mul 1360 } 1361 1362 define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { 1363 ; CHECK-LABEL: test_vmul_lane_f64: 1364 ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 1365 ; CHECK-NEXT: ret 1366 entry: 1367 %0 = bitcast <1 x double> %a to <8 x i8> 1368 %1 = bitcast <8 x i8> %0 to double 1369 %extract = extractelement <1 x double> %v, i32 0 1370 %2 = fmul double %1, %extract 1371 %3 = insertelement <1 x double> undef, double %2, i32 0 1372 ret <1 x double> %3 1373 } 1374 1375 define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { 1376 ; CHECK-LABEL: test_vmulq_lane_f32: 1377 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1378 ; CHECK-NEXT: ret 1379 entry: 1380 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1381 %mul = fmul <4 x float> %shuffle, %a 1382 ret <4 x float> %mul 1383 } 1384 1385 define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { 1386 ; CHECK-LABEL: test_vmulq_lane_f64: 1387 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1388 ; CHECK-NEXT: ret 1389 entry: 1390 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1391 %mul = fmul <2 x double> %shuffle, %a 1392 ret <2 x double> %mul 1393 } 1394 1395 define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { 1396 ; CHECK-LABEL: test_vmul_laneq_f32: 1397 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1398 ; CHECK-NEXT: ret 1399 entry: 1400 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1401 %mul = fmul <2 x float> %shuffle, %a 1402 ret <2 x float> %mul 1403 } 1404 1405 define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { 1406 ; CHECK-LABEL: test_vmul_laneq_f64: 1407 ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 1408 ; CHECK-NEXT: ret 1409 entry: 1410 %0 = bitcast <1 x double> %a to <8 x i8> 1411 %1 = bitcast <8 x i8> %0 to double 1412 %extract = extractelement <2 x double> %v, i32 1 1413 %2 = fmul double %1, %extract 1414 %3 = insertelement <1 x double> undef, double %2, i32 0 1415 ret <1 x double> %3 1416 } 1417 1418 define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1419 ; CHECK-LABEL: test_vmulq_laneq_f32: 1420 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1421 ; CHECK-NEXT: ret 1422 entry: 1423 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1424 %mul = fmul <4 x float> %shuffle, %a 1425 ret <4 x float> %mul 1426 } 1427 1428 define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1429 ; CHECK-LABEL: test_vmulq_laneq_f64: 1430 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1431 ; CHECK-NEXT: ret 1432 entry: 1433 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1434 %mul = fmul <2 x double> %shuffle, %a 1435 ret <2 x double> %mul 1436 } 1437 1438 define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { 1439 ; CHECK-LABEL: test_vmulx_lane_f32: 1440 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1441 ; CHECK-NEXT: ret 1442 entry: 1443 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1444 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1445 ret <2 x float> %vmulx2.i 1446 } 1447 1448 define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { 1449 ; CHECK-LABEL: test_vmulxq_lane_f32: 1450 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1451 ; CHECK-NEXT: ret 1452 entry: 1453 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1454 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1455 ret <4 x float> %vmulx2.i 1456 } 1457 1458 define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { 1459 ; CHECK-LABEL: test_vmulxq_lane_f64: 1460 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1461 ; CHECK-NEXT: ret 1462 entry: 1463 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1464 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1465 ret <2 x double> %vmulx2.i 1466 } 1467 1468 define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { 1469 ; CHECK-LABEL: test_vmulx_laneq_f32: 1470 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1471 ; CHECK-NEXT: ret 1472 entry: 1473 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1474 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1475 ret <2 x float> %vmulx2.i 1476 } 1477 1478 define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1479 ; CHECK-LABEL: test_vmulxq_laneq_f32: 1480 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1481 ; CHECK-NEXT: ret 1482 entry: 1483 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1484 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1485 ret <4 x float> %vmulx2.i 1486 } 1487 1488 define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1489 ; CHECK-LABEL: test_vmulxq_laneq_f64: 1490 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1491 ; CHECK-NEXT: ret 1492 entry: 1493 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1494 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1495 ret <2 x double> %vmulx2.i 1496 } 1497 1498 define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1499 ; CHECK-LABEL: test_vmla_lane_s16_0: 1500 ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1501 ; CHECK-NEXT: ret 1502 entry: 1503 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1504 %mul = mul <4 x i16> %shuffle, %b 1505 %add = add <4 x i16> %mul, %a 1506 ret <4 x i16> %add 1507 } 1508 1509 define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1510 ; CHECK-LABEL: test_vmlaq_lane_s16_0: 1511 ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1512 ; CHECK-NEXT: ret 1513 entry: 1514 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1515 %mul = mul <8 x i16> %shuffle, %b 1516 %add = add <8 x i16> %mul, %a 1517 ret <8 x i16> %add 1518 } 1519 1520 define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1521 ; CHECK-LABEL: test_vmla_lane_s32_0: 1522 ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1523 ; CHECK-NEXT: ret 1524 entry: 1525 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1526 %mul = mul <2 x i32> %shuffle, %b 1527 %add = add <2 x i32> %mul, %a 1528 ret <2 x i32> %add 1529 } 1530 1531 define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1532 ; CHECK-LABEL: test_vmlaq_lane_s32_0: 1533 ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1534 ; CHECK-NEXT: ret 1535 entry: 1536 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1537 %mul = mul <4 x i32> %shuffle, %b 1538 %add = add <4 x i32> %mul, %a 1539 ret <4 x i32> %add 1540 } 1541 1542 define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 1543 ; CHECK-LABEL: test_vmla_laneq_s16_0: 1544 ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1545 ; CHECK-NEXT: ret 1546 entry: 1547 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1548 %mul = mul <4 x i16> %shuffle, %b 1549 %add = add <4 x i16> %mul, %a 1550 ret <4 x i16> %add 1551 } 1552 1553 define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 1554 ; CHECK-LABEL: test_vmlaq_laneq_s16_0: 1555 ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1556 ; CHECK-NEXT: ret 1557 entry: 1558 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1559 %mul = mul <8 x i16> %shuffle, %b 1560 %add = add <8 x i16> %mul, %a 1561 ret <8 x i16> %add 1562 } 1563 1564 define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 1565 ; CHECK-LABEL: test_vmla_laneq_s32_0: 1566 ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1567 ; CHECK-NEXT: ret 1568 entry: 1569 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1570 %mul = mul <2 x i32> %shuffle, %b 1571 %add = add <2 x i32> %mul, %a 1572 ret <2 x i32> %add 1573 } 1574 1575 define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 1576 ; CHECK-LABEL: test_vmlaq_laneq_s32_0: 1577 ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1578 ; CHECK-NEXT: ret 1579 entry: 1580 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1581 %mul = mul <4 x i32> %shuffle, %b 1582 %add = add <4 x i32> %mul, %a 1583 ret <4 x i32> %add 1584 } 1585 1586 define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1587 ; CHECK-LABEL: test_vmls_lane_s16_0: 1588 ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1589 ; CHECK-NEXT: ret 1590 entry: 1591 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1592 %mul = mul <4 x i16> %shuffle, %b 1593 %sub = sub <4 x i16> %a, %mul 1594 ret <4 x i16> %sub 1595 } 1596 1597 define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1598 ; CHECK-LABEL: test_vmlsq_lane_s16_0: 1599 ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1600 ; CHECK-NEXT: ret 1601 entry: 1602 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1603 %mul = mul <8 x i16> %shuffle, %b 1604 %sub = sub <8 x i16> %a, %mul 1605 ret <8 x i16> %sub 1606 } 1607 1608 define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1609 ; CHECK-LABEL: test_vmls_lane_s32_0: 1610 ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1611 ; CHECK-NEXT: ret 1612 entry: 1613 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1614 %mul = mul <2 x i32> %shuffle, %b 1615 %sub = sub <2 x i32> %a, %mul 1616 ret <2 x i32> %sub 1617 } 1618 1619 define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1620 ; CHECK-LABEL: test_vmlsq_lane_s32_0: 1621 ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1622 ; CHECK-NEXT: ret 1623 entry: 1624 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1625 %mul = mul <4 x i32> %shuffle, %b 1626 %sub = sub <4 x i32> %a, %mul 1627 ret <4 x i32> %sub 1628 } 1629 1630 define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 1631 ; CHECK-LABEL: test_vmls_laneq_s16_0: 1632 ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1633 ; CHECK-NEXT: ret 1634 entry: 1635 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1636 %mul = mul <4 x i16> %shuffle, %b 1637 %sub = sub <4 x i16> %a, %mul 1638 ret <4 x i16> %sub 1639 } 1640 1641 define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 1642 ; CHECK-LABEL: test_vmlsq_laneq_s16_0: 1643 ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1644 ; CHECK-NEXT: ret 1645 entry: 1646 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1647 %mul = mul <8 x i16> %shuffle, %b 1648 %sub = sub <8 x i16> %a, %mul 1649 ret <8 x i16> %sub 1650 } 1651 1652 define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 1653 ; CHECK-LABEL: test_vmls_laneq_s32_0: 1654 ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1655 ; CHECK-NEXT: ret 1656 entry: 1657 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1658 %mul = mul <2 x i32> %shuffle, %b 1659 %sub = sub <2 x i32> %a, %mul 1660 ret <2 x i32> %sub 1661 } 1662 1663 define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 1664 ; CHECK-LABEL: test_vmlsq_laneq_s32_0: 1665 ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1666 ; CHECK-NEXT: ret 1667 entry: 1668 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1669 %mul = mul <4 x i32> %shuffle, %b 1670 %sub = sub <4 x i32> %a, %mul 1671 ret <4 x i32> %sub 1672 } 1673 1674 define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 1675 ; CHECK-LABEL: test_vmul_lane_s16_0: 1676 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1677 ; CHECK-NEXT: ret 1678 entry: 1679 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1680 %mul = mul <4 x i16> %shuffle, %a 1681 ret <4 x i16> %mul 1682 } 1683 1684 define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 1685 ; CHECK-LABEL: test_vmulq_lane_s16_0: 1686 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1687 ; CHECK-NEXT: ret 1688 entry: 1689 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1690 %mul = mul <8 x i16> %shuffle, %a 1691 ret <8 x i16> %mul 1692 } 1693 1694 define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 1695 ; CHECK-LABEL: test_vmul_lane_s32_0: 1696 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1697 ; CHECK-NEXT: ret 1698 entry: 1699 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1700 %mul = mul <2 x i32> %shuffle, %a 1701 ret <2 x i32> %mul 1702 } 1703 1704 define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 1705 ; CHECK-LABEL: test_vmulq_lane_s32_0: 1706 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1707 ; CHECK-NEXT: ret 1708 entry: 1709 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1710 %mul = mul <4 x i32> %shuffle, %a 1711 ret <4 x i32> %mul 1712 } 1713 1714 define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 1715 ; CHECK-LABEL: test_vmul_lane_u16_0: 1716 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1717 ; CHECK-NEXT: ret 1718 entry: 1719 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1720 %mul = mul <4 x i16> %shuffle, %a 1721 ret <4 x i16> %mul 1722 } 1723 1724 define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 1725 ; CHECK-LABEL: test_vmulq_lane_u16_0: 1726 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1727 ; CHECK-NEXT: ret 1728 entry: 1729 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1730 %mul = mul <8 x i16> %shuffle, %a 1731 ret <8 x i16> %mul 1732 } 1733 1734 define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 1735 ; CHECK-LABEL: test_vmul_lane_u32_0: 1736 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1737 ; CHECK-NEXT: ret 1738 entry: 1739 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1740 %mul = mul <2 x i32> %shuffle, %a 1741 ret <2 x i32> %mul 1742 } 1743 1744 define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 1745 ; CHECK-LABEL: test_vmulq_lane_u32_0: 1746 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1747 ; CHECK-NEXT: ret 1748 entry: 1749 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1750 %mul = mul <4 x i32> %shuffle, %a 1751 ret <4 x i32> %mul 1752 } 1753 1754 define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 1755 ; CHECK-LABEL: test_vmul_laneq_s16_0: 1756 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1757 ; CHECK-NEXT: ret 1758 entry: 1759 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1760 %mul = mul <4 x i16> %shuffle, %a 1761 ret <4 x i16> %mul 1762 } 1763 1764 define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 1765 ; CHECK-LABEL: test_vmulq_laneq_s16_0: 1766 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1767 ; CHECK-NEXT: ret 1768 entry: 1769 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1770 %mul = mul <8 x i16> %shuffle, %a 1771 ret <8 x i16> %mul 1772 } 1773 1774 define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 1775 ; CHECK-LABEL: test_vmul_laneq_s32_0: 1776 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1777 ; CHECK-NEXT: ret 1778 entry: 1779 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1780 %mul = mul <2 x i32> %shuffle, %a 1781 ret <2 x i32> %mul 1782 } 1783 1784 define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 1785 ; CHECK-LABEL: test_vmulq_laneq_s32_0: 1786 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1787 ; CHECK-NEXT: ret 1788 entry: 1789 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1790 %mul = mul <4 x i32> %shuffle, %a 1791 ret <4 x i32> %mul 1792 } 1793 1794 define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 1795 ; CHECK-LABEL: test_vmul_laneq_u16_0: 1796 ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1797 ; CHECK-NEXT: ret 1798 entry: 1799 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1800 %mul = mul <4 x i16> %shuffle, %a 1801 ret <4 x i16> %mul 1802 } 1803 1804 define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 1805 ; CHECK-LABEL: test_vmulq_laneq_u16_0: 1806 ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1807 ; CHECK-NEXT: ret 1808 entry: 1809 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1810 %mul = mul <8 x i16> %shuffle, %a 1811 ret <8 x i16> %mul 1812 } 1813 1814 define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 1815 ; CHECK-LABEL: test_vmul_laneq_u32_0: 1816 ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1817 ; CHECK-NEXT: ret 1818 entry: 1819 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1820 %mul = mul <2 x i32> %shuffle, %a 1821 ret <2 x i32> %mul 1822 } 1823 1824 define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 1825 ; CHECK-LABEL: test_vmulq_laneq_u32_0: 1826 ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1827 ; CHECK-NEXT: ret 1828 entry: 1829 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1830 %mul = mul <4 x i32> %shuffle, %a 1831 ret <4 x i32> %mul 1832 } 1833 1834 define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 1835 ; CHECK-LABEL: test_vfma_lane_f32_0: 1836 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1837 ; CHECK-NEXT: ret 1838 entry: 1839 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 1840 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1841 ret <2 x float> %0 1842 } 1843 1844 define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 1845 ; CHECK-LABEL: test_vfmaq_lane_f32_0: 1846 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1847 ; CHECK-NEXT: ret 1848 entry: 1849 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 1850 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1851 ret <4 x float> %0 1852 } 1853 1854 define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 1855 ; CHECK-LABEL: test_vfma_laneq_f32_0: 1856 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1857 ; CHECK-NEXT: ret 1858 entry: 1859 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 1860 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1861 ret <2 x float> %0 1862 } 1863 1864 define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 1865 ; CHECK-LABEL: test_vfmaq_laneq_f32_0: 1866 ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1867 ; CHECK-NEXT: ret 1868 entry: 1869 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 1870 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1871 ret <4 x float> %0 1872 } 1873 1874 define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 1875 ; CHECK-LABEL: test_vfms_lane_f32_0: 1876 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1877 ; CHECK-NEXT: ret 1878 entry: 1879 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 1880 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer 1881 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1882 ret <2 x float> %0 1883 } 1884 1885 define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 1886 ; CHECK-LABEL: test_vfmsq_lane_f32_0: 1887 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1888 ; CHECK-NEXT: ret 1889 entry: 1890 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 1891 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer 1892 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1893 ret <4 x float> %0 1894 } 1895 1896 define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 1897 ; CHECK-LABEL: test_vfms_laneq_f32_0: 1898 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1899 ; CHECK-NEXT: ret 1900 entry: 1901 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 1902 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer 1903 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1904 ret <2 x float> %0 1905 } 1906 1907 define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 1908 ; CHECK-LABEL: test_vfmsq_laneq_f32_0: 1909 ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1910 ; CHECK-NEXT: ret 1911 entry: 1912 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 1913 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer 1914 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1915 ret <4 x float> %0 1916 } 1917 1918 define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 1919 ; CHECK-LABEL: test_vfmaq_laneq_f64_0: 1920 ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1921 ; CHECK-NEXT: ret 1922 entry: 1923 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 1924 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 1925 ret <2 x double> %0 1926 } 1927 1928 define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 1929 ; CHECK-LABEL: test_vfmsq_laneq_f64_0: 1930 ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1931 ; CHECK-NEXT: ret 1932 entry: 1933 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 1934 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer 1935 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 1936 ret <2 x double> %0 1937 } 1938 1939 define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1940 ; CHECK-LABEL: test_vmlal_lane_s16_0: 1941 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1942 ; CHECK-NEXT: ret 1943 entry: 1944 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1945 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1946 %add = add <4 x i32> %vmull2.i, %a 1947 ret <4 x i32> %add 1948 } 1949 1950 define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1951 ; CHECK-LABEL: test_vmlal_lane_s32_0: 1952 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1953 ; CHECK-NEXT: ret 1954 entry: 1955 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1956 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1957 %add = add <2 x i64> %vmull2.i, %a 1958 ret <2 x i64> %add 1959 } 1960 1961 define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 1962 ; CHECK-LABEL: test_vmlal_laneq_s16_0: 1963 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1964 ; CHECK-NEXT: ret 1965 entry: 1966 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1967 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1968 %add = add <4 x i32> %vmull2.i, %a 1969 ret <4 x i32> %add 1970 } 1971 1972 define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 1973 ; CHECK-LABEL: test_vmlal_laneq_s32_0: 1974 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1975 ; CHECK-NEXT: ret 1976 entry: 1977 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1978 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1979 %add = add <2 x i64> %vmull2.i, %a 1980 ret <2 x i64> %add 1981 } 1982 1983 define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1984 ; CHECK-LABEL: test_vmlal_high_lane_s16_0: 1985 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1986 ; CHECK-NEXT: ret 1987 entry: 1988 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1989 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1990 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1991 %add = add <4 x i32> %vmull2.i, %a 1992 ret <4 x i32> %add 1993 } 1994 1995 define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1996 ; CHECK-LABEL: test_vmlal_high_lane_s32_0: 1997 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1998 ; CHECK-NEXT: ret 1999 entry: 2000 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2001 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2002 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2003 %add = add <2 x i64> %vmull2.i, %a 2004 ret <2 x i64> %add 2005 } 2006 2007 define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2008 ; CHECK-LABEL: test_vmlal_high_laneq_s16_0: 2009 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2010 ; CHECK-NEXT: ret 2011 entry: 2012 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2013 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2014 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2015 %add = add <4 x i32> %vmull2.i, %a 2016 ret <4 x i32> %add 2017 } 2018 2019 define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2020 ; CHECK-LABEL: test_vmlal_high_laneq_s32_0: 2021 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2022 ; CHECK-NEXT: ret 2023 entry: 2024 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2025 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2026 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2027 %add = add <2 x i64> %vmull2.i, %a 2028 ret <2 x i64> %add 2029 } 2030 2031 define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2032 ; CHECK-LABEL: test_vmlsl_lane_s16_0: 2033 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2034 ; CHECK-NEXT: ret 2035 entry: 2036 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2037 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2038 %sub = sub <4 x i32> %a, %vmull2.i 2039 ret <4 x i32> %sub 2040 } 2041 2042 define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2043 ; CHECK-LABEL: test_vmlsl_lane_s32_0: 2044 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2045 ; CHECK-NEXT: ret 2046 entry: 2047 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2048 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2049 %sub = sub <2 x i64> %a, %vmull2.i 2050 ret <2 x i64> %sub 2051 } 2052 2053 define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2054 ; CHECK-LABEL: test_vmlsl_laneq_s16_0: 2055 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2056 ; CHECK-NEXT: ret 2057 entry: 2058 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2059 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2060 %sub = sub <4 x i32> %a, %vmull2.i 2061 ret <4 x i32> %sub 2062 } 2063 2064 define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2065 ; CHECK-LABEL: test_vmlsl_laneq_s32_0: 2066 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2067 ; CHECK-NEXT: ret 2068 entry: 2069 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2070 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2071 %sub = sub <2 x i64> %a, %vmull2.i 2072 ret <2 x i64> %sub 2073 } 2074 2075 define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2076 ; CHECK-LABEL: test_vmlsl_high_lane_s16_0: 2077 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2078 ; CHECK-NEXT: ret 2079 entry: 2080 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2081 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2082 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2083 %sub = sub <4 x i32> %a, %vmull2.i 2084 ret <4 x i32> %sub 2085 } 2086 2087 define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2088 ; CHECK-LABEL: test_vmlsl_high_lane_s32_0: 2089 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2090 ; CHECK-NEXT: ret 2091 entry: 2092 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2093 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2094 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2095 %sub = sub <2 x i64> %a, %vmull2.i 2096 ret <2 x i64> %sub 2097 } 2098 2099 define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2100 ; CHECK-LABEL: test_vmlsl_high_laneq_s16_0: 2101 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2102 ; CHECK-NEXT: ret 2103 entry: 2104 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2105 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2106 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2107 %sub = sub <4 x i32> %a, %vmull2.i 2108 ret <4 x i32> %sub 2109 } 2110 2111 define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2112 ; CHECK-LABEL: test_vmlsl_high_laneq_s32_0: 2113 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2114 ; CHECK-NEXT: ret 2115 entry: 2116 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2117 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2118 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2119 %sub = sub <2 x i64> %a, %vmull2.i 2120 ret <2 x i64> %sub 2121 } 2122 2123 define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2124 ; CHECK-LABEL: test_vmlal_lane_u16_0: 2125 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2126 ; CHECK-NEXT: ret 2127 entry: 2128 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2129 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2130 %add = add <4 x i32> %vmull2.i, %a 2131 ret <4 x i32> %add 2132 } 2133 2134 define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2135 ; CHECK-LABEL: test_vmlal_lane_u32_0: 2136 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2137 ; CHECK-NEXT: ret 2138 entry: 2139 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2140 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2141 %add = add <2 x i64> %vmull2.i, %a 2142 ret <2 x i64> %add 2143 } 2144 2145 define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2146 ; CHECK-LABEL: test_vmlal_laneq_u16_0: 2147 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2148 ; CHECK-NEXT: ret 2149 entry: 2150 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2151 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2152 %add = add <4 x i32> %vmull2.i, %a 2153 ret <4 x i32> %add 2154 } 2155 2156 define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2157 ; CHECK-LABEL: test_vmlal_laneq_u32_0: 2158 ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2159 ; CHECK-NEXT: ret 2160 entry: 2161 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2162 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2163 %add = add <2 x i64> %vmull2.i, %a 2164 ret <2 x i64> %add 2165 } 2166 2167 define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2168 ; CHECK-LABEL: test_vmlal_high_lane_u16_0: 2169 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2170 ; CHECK-NEXT: ret 2171 entry: 2172 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2173 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2174 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2175 %add = add <4 x i32> %vmull2.i, %a 2176 ret <4 x i32> %add 2177 } 2178 2179 define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2180 ; CHECK-LABEL: test_vmlal_high_lane_u32_0: 2181 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2182 ; CHECK-NEXT: ret 2183 entry: 2184 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2185 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2186 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2187 %add = add <2 x i64> %vmull2.i, %a 2188 ret <2 x i64> %add 2189 } 2190 2191 define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2192 ; CHECK-LABEL: test_vmlal_high_laneq_u16_0: 2193 ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2194 ; CHECK-NEXT: ret 2195 entry: 2196 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2197 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2198 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2199 %add = add <4 x i32> %vmull2.i, %a 2200 ret <4 x i32> %add 2201 } 2202 2203 define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2204 ; CHECK-LABEL: test_vmlal_high_laneq_u32_0: 2205 ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2206 ; CHECK-NEXT: ret 2207 entry: 2208 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2209 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2210 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2211 %add = add <2 x i64> %vmull2.i, %a 2212 ret <2 x i64> %add 2213 } 2214 2215 define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2216 ; CHECK-LABEL: test_vmlsl_lane_u16_0: 2217 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2218 ; CHECK-NEXT: ret 2219 entry: 2220 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2221 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2222 %sub = sub <4 x i32> %a, %vmull2.i 2223 ret <4 x i32> %sub 2224 } 2225 2226 define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2227 ; CHECK-LABEL: test_vmlsl_lane_u32_0: 2228 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2229 ; CHECK-NEXT: ret 2230 entry: 2231 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2232 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2233 %sub = sub <2 x i64> %a, %vmull2.i 2234 ret <2 x i64> %sub 2235 } 2236 2237 define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2238 ; CHECK-LABEL: test_vmlsl_laneq_u16_0: 2239 ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2240 ; CHECK-NEXT: ret 2241 entry: 2242 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2243 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2244 %sub = sub <4 x i32> %a, %vmull2.i 2245 ret <4 x i32> %sub 2246 } 2247 2248 define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2249 ; CHECK-LABEL: test_vmlsl_laneq_u32_0: 2250 ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2251 ; CHECK-NEXT: ret 2252 entry: 2253 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2254 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2255 %sub = sub <2 x i64> %a, %vmull2.i 2256 ret <2 x i64> %sub 2257 } 2258 2259 define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2260 ; CHECK-LABEL: test_vmlsl_high_lane_u16_0: 2261 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2262 ; CHECK-NEXT: ret 2263 entry: 2264 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2265 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2266 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2267 %sub = sub <4 x i32> %a, %vmull2.i 2268 ret <4 x i32> %sub 2269 } 2270 2271 define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2272 ; CHECK-LABEL: test_vmlsl_high_lane_u32_0: 2273 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2274 ; CHECK-NEXT: ret 2275 entry: 2276 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2277 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2278 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2279 %sub = sub <2 x i64> %a, %vmull2.i 2280 ret <2 x i64> %sub 2281 } 2282 2283 define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2284 ; CHECK-LABEL: test_vmlsl_high_laneq_u16_0: 2285 ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2286 ; CHECK-NEXT: ret 2287 entry: 2288 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2289 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2290 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2291 %sub = sub <4 x i32> %a, %vmull2.i 2292 ret <4 x i32> %sub 2293 } 2294 2295 define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2296 ; CHECK-LABEL: test_vmlsl_high_laneq_u32_0: 2297 ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2298 ; CHECK-NEXT: ret 2299 entry: 2300 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2301 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2302 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2303 %sub = sub <2 x i64> %a, %vmull2.i 2304 ret <2 x i64> %sub 2305 } 2306 2307 define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2308 ; CHECK-LABEL: test_vmull_lane_s16_0: 2309 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2310 ; CHECK-NEXT: ret 2311 entry: 2312 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2313 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2314 ret <4 x i32> %vmull2.i 2315 } 2316 2317 define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2318 ; CHECK-LABEL: test_vmull_lane_s32_0: 2319 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2320 ; CHECK-NEXT: ret 2321 entry: 2322 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2323 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2324 ret <2 x i64> %vmull2.i 2325 } 2326 2327 define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 2328 ; CHECK-LABEL: test_vmull_lane_u16_0: 2329 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2330 ; CHECK-NEXT: ret 2331 entry: 2332 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2333 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2334 ret <4 x i32> %vmull2.i 2335 } 2336 2337 define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 2338 ; CHECK-LABEL: test_vmull_lane_u32_0: 2339 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2340 ; CHECK-NEXT: ret 2341 entry: 2342 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2343 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2344 ret <2 x i64> %vmull2.i 2345 } 2346 2347 define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2348 ; CHECK-LABEL: test_vmull_high_lane_s16_0: 2349 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2350 ; CHECK-NEXT: ret 2351 entry: 2352 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2353 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2354 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2355 ret <4 x i32> %vmull2.i 2356 } 2357 2358 define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2359 ; CHECK-LABEL: test_vmull_high_lane_s32_0: 2360 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2361 ; CHECK-NEXT: ret 2362 entry: 2363 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2364 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2365 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2366 ret <2 x i64> %vmull2.i 2367 } 2368 2369 define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 2370 ; CHECK-LABEL: test_vmull_high_lane_u16_0: 2371 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2372 ; CHECK-NEXT: ret 2373 entry: 2374 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2375 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2376 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2377 ret <4 x i32> %vmull2.i 2378 } 2379 2380 define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 2381 ; CHECK-LABEL: test_vmull_high_lane_u32_0: 2382 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2383 ; CHECK-NEXT: ret 2384 entry: 2385 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2386 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2387 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2388 ret <2 x i64> %vmull2.i 2389 } 2390 2391 define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2392 ; CHECK-LABEL: test_vmull_laneq_s16_0: 2393 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2394 ; CHECK-NEXT: ret 2395 entry: 2396 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2397 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2398 ret <4 x i32> %vmull2.i 2399 } 2400 2401 define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2402 ; CHECK-LABEL: test_vmull_laneq_s32_0: 2403 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2404 ; CHECK-NEXT: ret 2405 entry: 2406 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2407 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2408 ret <2 x i64> %vmull2.i 2409 } 2410 2411 define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 2412 ; CHECK-LABEL: test_vmull_laneq_u16_0: 2413 ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2414 ; CHECK-NEXT: ret 2415 entry: 2416 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2417 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2418 ret <4 x i32> %vmull2.i 2419 } 2420 2421 define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 2422 ; CHECK-LABEL: test_vmull_laneq_u32_0: 2423 ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2424 ; CHECK-NEXT: ret 2425 entry: 2426 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2427 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2428 ret <2 x i64> %vmull2.i 2429 } 2430 2431 define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2432 ; CHECK-LABEL: test_vmull_high_laneq_s16_0: 2433 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2434 ; CHECK-NEXT: ret 2435 entry: 2436 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2437 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2438 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2439 ret <4 x i32> %vmull2.i 2440 } 2441 2442 define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2443 ; CHECK-LABEL: test_vmull_high_laneq_s32_0: 2444 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2445 ; CHECK-NEXT: ret 2446 entry: 2447 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2448 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2449 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2450 ret <2 x i64> %vmull2.i 2451 } 2452 2453 define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 2454 ; CHECK-LABEL: test_vmull_high_laneq_u16_0: 2455 ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2456 ; CHECK-NEXT: ret 2457 entry: 2458 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2459 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2460 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2461 ret <4 x i32> %vmull2.i 2462 } 2463 2464 define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 2465 ; CHECK-LABEL: test_vmull_high_laneq_u32_0: 2466 ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2467 ; CHECK-NEXT: ret 2468 entry: 2469 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2470 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2471 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2472 ret <2 x i64> %vmull2.i 2473 } 2474 2475 define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2476 ; CHECK-LABEL: test_vqdmlal_lane_s16_0: 2477 ; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2478 ; CHECK-NEXT: ret 2479 entry: 2480 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2481 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2482 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2483 ret <4 x i32> %vqdmlal4.i 2484 } 2485 2486 define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2487 ; CHECK-LABEL: test_vqdmlal_lane_s32_0: 2488 ; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2489 ; CHECK-NEXT: ret 2490 entry: 2491 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2492 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2493 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2494 ret <2 x i64> %vqdmlal4.i 2495 } 2496 2497 define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2498 ; CHECK-LABEL: test_vqdmlal_high_lane_s16_0: 2499 ; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2500 ; CHECK-NEXT: ret 2501 entry: 2502 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2503 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2504 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2505 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2506 ret <4 x i32> %vqdmlal4.i 2507 } 2508 2509 define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2510 ; CHECK-LABEL: test_vqdmlal_high_lane_s32_0: 2511 ; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2512 ; CHECK-NEXT: ret 2513 entry: 2514 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2515 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2516 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2517 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2518 ret <2 x i64> %vqdmlal4.i 2519 } 2520 2521 define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2522 ; CHECK-LABEL: test_vqdmlsl_lane_s16_0: 2523 ; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2524 ; CHECK-NEXT: ret 2525 entry: 2526 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2527 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2528 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 2529 ret <4 x i32> %vqdmlsl4.i 2530 } 2531 2532 define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2533 ; CHECK-LABEL: test_vqdmlsl_lane_s32_0: 2534 ; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2535 ; CHECK-NEXT: ret 2536 entry: 2537 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2538 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2539 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 2540 ret <2 x i64> %vqdmlsl4.i 2541 } 2542 2543 define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2544 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0: 2545 ; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2546 ; CHECK-NEXT: ret 2547 entry: 2548 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2549 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2550 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2551 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 2552 ret <4 x i32> %vqdmlsl4.i 2553 } 2554 2555 define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2556 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0: 2557 ; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2558 ; CHECK-NEXT: ret 2559 entry: 2560 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2561 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2562 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2563 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 2564 ret <2 x i64> %vqdmlsl4.i 2565 } 2566 2567 define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2568 ; CHECK-LABEL: test_vqdmull_lane_s16_0: 2569 ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2570 ; CHECK-NEXT: ret 2571 entry: 2572 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2573 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2574 ret <4 x i32> %vqdmull2.i 2575 } 2576 2577 define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2578 ; CHECK-LABEL: test_vqdmull_lane_s32_0: 2579 ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2580 ; CHECK-NEXT: ret 2581 entry: 2582 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2583 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2584 ret <2 x i64> %vqdmull2.i 2585 } 2586 2587 define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2588 ; CHECK-LABEL: test_vqdmull_laneq_s16_0: 2589 ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2590 ; CHECK-NEXT: ret 2591 entry: 2592 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2593 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2594 ret <4 x i32> %vqdmull2.i 2595 } 2596 2597 define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2598 ; CHECK-LABEL: test_vqdmull_laneq_s32_0: 2599 ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2600 ; CHECK-NEXT: ret 2601 entry: 2602 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2603 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2604 ret <2 x i64> %vqdmull2.i 2605 } 2606 2607 define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2608 ; CHECK-LABEL: test_vqdmull_high_lane_s16_0: 2609 ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2610 ; CHECK-NEXT: ret 2611 entry: 2612 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2613 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2614 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2615 ret <4 x i32> %vqdmull2.i 2616 } 2617 2618 define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2619 ; CHECK-LABEL: test_vqdmull_high_lane_s32_0: 2620 ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2621 ; CHECK-NEXT: ret 2622 entry: 2623 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2624 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2625 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2626 ret <2 x i64> %vqdmull2.i 2627 } 2628 2629 define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2630 ; CHECK-LABEL: test_vqdmull_high_laneq_s16_0: 2631 ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2632 ; CHECK-NEXT: ret 2633 entry: 2634 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2635 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2636 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2637 ret <4 x i32> %vqdmull2.i 2638 } 2639 2640 define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2641 ; CHECK-LABEL: test_vqdmull_high_laneq_s32_0: 2642 ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2643 ; CHECK-NEXT: ret 2644 entry: 2645 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2646 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2647 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2648 ret <2 x i64> %vqdmull2.i 2649 } 2650 2651 define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2652 ; CHECK-LABEL: test_vqdmulh_lane_s16_0: 2653 ; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2654 ; CHECK-NEXT: ret 2655 entry: 2656 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2657 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 2658 ret <4 x i16> %vqdmulh2.i 2659 } 2660 2661 define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2662 ; CHECK-LABEL: test_vqdmulhq_lane_s16_0: 2663 ; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2664 ; CHECK-NEXT: ret 2665 entry: 2666 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2667 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 2668 ret <8 x i16> %vqdmulh2.i 2669 } 2670 2671 define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2672 ; CHECK-LABEL: test_vqdmulh_lane_s32_0: 2673 ; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2674 ; CHECK-NEXT: ret 2675 entry: 2676 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2677 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 2678 ret <2 x i32> %vqdmulh2.i 2679 } 2680 2681 define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2682 ; CHECK-LABEL: test_vqdmulhq_lane_s32_0: 2683 ; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2684 ; CHECK-NEXT: ret 2685 entry: 2686 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2687 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 2688 ret <4 x i32> %vqdmulh2.i 2689 } 2690 2691 define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2692 ; CHECK-LABEL: test_vqrdmulh_lane_s16_0: 2693 ; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2694 ; CHECK-NEXT: ret 2695 entry: 2696 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2697 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 2698 ret <4 x i16> %vqrdmulh2.i 2699 } 2700 2701 define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2702 ; CHECK-LABEL: test_vqrdmulhq_lane_s16_0: 2703 ; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2704 ; CHECK-NEXT: ret 2705 entry: 2706 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2707 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 2708 ret <8 x i16> %vqrdmulh2.i 2709 } 2710 2711 define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2712 ; CHECK-LABEL: test_vqrdmulh_lane_s32_0: 2713 ; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2714 ; CHECK-NEXT: ret 2715 entry: 2716 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2717 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 2718 ret <2 x i32> %vqrdmulh2.i 2719 } 2720 2721 define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2722 ; CHECK-LABEL: test_vqrdmulhq_lane_s32_0: 2723 ; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2724 ; CHECK-NEXT: ret 2725 entry: 2726 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2727 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 2728 ret <4 x i32> %vqrdmulh2.i 2729 } 2730 2731 define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { 2732 ; CHECK-LABEL: test_vmul_lane_f32_0: 2733 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2734 ; CHECK-NEXT: ret 2735 entry: 2736 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 2737 %mul = fmul <2 x float> %shuffle, %a 2738 ret <2 x float> %mul 2739 } 2740 2741 define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 2742 ; CHECK-LABEL: test_vmulq_lane_f32_0: 2743 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2744 ; CHECK-NEXT: ret 2745 entry: 2746 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 2747 %mul = fmul <4 x float> %shuffle, %a 2748 ret <4 x float> %mul 2749 } 2750 2751 define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 2752 ; CHECK-LABEL: test_vmul_laneq_f32_0: 2753 ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2754 ; CHECK-NEXT: ret 2755 entry: 2756 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 2757 %mul = fmul <2 x float> %shuffle, %a 2758 ret <2 x float> %mul 2759 } 2760 2761 define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { 2762 ; CHECK-LABEL: test_vmul_laneq_f64_0: 2763 ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] 2764 ; CHECK-NEXT: ret 2765 entry: 2766 %0 = bitcast <1 x double> %a to <8 x i8> 2767 %1 = bitcast <8 x i8> %0 to double 2768 %extract = extractelement <2 x double> %v, i32 0 2769 %2 = fmul double %1, %extract 2770 %3 = insertelement <1 x double> undef, double %2, i32 0 2771 ret <1 x double> %3 2772 } 2773 2774 define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 2775 ; CHECK-LABEL: test_vmulq_laneq_f32_0: 2776 ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2777 ; CHECK-NEXT: ret 2778 entry: 2779 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 2780 %mul = fmul <4 x float> %shuffle, %a 2781 ret <4 x float> %mul 2782 } 2783 2784 define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 2785 ; CHECK-LABEL: test_vmulq_laneq_f64_0: 2786 ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2787 ; CHECK-NEXT: ret 2788 entry: 2789 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 2790 %mul = fmul <2 x double> %shuffle, %a 2791 ret <2 x double> %mul 2792 } 2793 2794 define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { 2795 ; CHECK-LABEL: test_vmulx_lane_f32_0: 2796 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2797 ; CHECK-NEXT: ret 2798 entry: 2799 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 2800 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 2801 ret <2 x float> %vmulx2.i 2802 } 2803 2804 define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 2805 ; CHECK-LABEL: test_vmulxq_lane_f32_0: 2806 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2807 ; CHECK-NEXT: ret 2808 entry: 2809 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 2810 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 2811 ret <4 x float> %vmulx2.i 2812 } 2813 2814 define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { 2815 ; CHECK-LABEL: test_vmulxq_lane_f64_0: 2816 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2817 ; CHECK-NEXT: ret 2818 entry: 2819 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 2820 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 2821 ret <2 x double> %vmulx2.i 2822 } 2823 2824 define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 2825 ; CHECK-LABEL: test_vmulx_laneq_f32_0: 2826 ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2827 ; CHECK-NEXT: ret 2828 entry: 2829 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 2830 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 2831 ret <2 x float> %vmulx2.i 2832 } 2833 2834 define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 2835 ; CHECK-LABEL: test_vmulxq_laneq_f32_0: 2836 ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2837 ; CHECK-NEXT: ret 2838 entry: 2839 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 2840 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 2841 ret <4 x float> %vmulx2.i 2842 } 2843 2844 define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 2845 ; CHECK-LABEL: test_vmulxq_laneq_f64_0: 2846 ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2847 ; CHECK-NEXT: ret 2848 entry: 2849 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 2850 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 2851 ret <2 x double> %vmulx2.i 2852 } 2853 2854