1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3 4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { 5 ; CHECK-LABEL: ins16bw: 6 ; CHECK: mov {{v[0-9]+}}.b[15], {{w[0-9]+}} 7 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 8 ret <16 x i8> %tmp3 9 } 10 11 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { 12 ; CHECK-LABEL: ins8hw: 13 ; CHECK: mov {{v[0-9]+}}.h[6], {{w[0-9]+}} 14 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 15 ret <8 x i16> %tmp3 16 } 17 18 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { 19 ; CHECK-LABEL: ins4sw: 20 ; CHECK: mov {{v[0-9]+}}.s[2], {{w[0-9]+}} 21 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 22 ret <4 x i32> %tmp3 23 } 24 25 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { 26 ; CHECK-LABEL: ins2dw: 27 ; CHECK: mov {{v[0-9]+}}.d[1], {{x[0-9]+}} 28 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 29 ret <2 x i64> %tmp3 30 } 31 32 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { 33 ; CHECK-LABEL: ins8bw: 34 ; CHECK: mov {{v[0-9]+}}.b[5], {{w[0-9]+}} 35 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 36 ret <8 x i8> %tmp3 37 } 38 39 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { 40 ; CHECK-LABEL: ins4hw: 41 ; CHECK: mov {{v[0-9]+}}.h[3], {{w[0-9]+}} 42 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 43 ret <4 x i16> %tmp3 44 } 45 46 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { 47 ; CHECK-LABEL: ins2sw: 48 ; CHECK: mov {{v[0-9]+}}.s[1], {{w[0-9]+}} 49 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 50 ret <2 x i32> %tmp3 51 } 52 53 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { 54 ; CHECK-LABEL: ins16b16: 55 ; CHECK: mov {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 56 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 57 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 58 ret <16 x i8> %tmp4 59 } 60 61 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { 62 ; CHECK-LABEL: ins8h8: 63 ; CHECK: mov {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 64 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 65 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 66 ret <8 x i16> %tmp4 67 } 68 69 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { 70 ; CHECK-LABEL: ins4s4: 71 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 72 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 73 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 74 ret <4 x i32> %tmp4 75 } 76 77 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { 78 ; CHECK-LABEL: ins2d2: 79 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 80 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 81 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 82 ret <2 x i64> %tmp4 83 } 84 85 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { 86 ; CHECK-LABEL: ins4f4: 87 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 88 %tmp3 = extractelement <4 x float> %tmp1, i32 2 89 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 90 ret <4 x float> %tmp4 91 } 92 93 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { 94 ; CHECK-LABEL: ins2df2: 95 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 96 %tmp3 = extractelement <2 x double> %tmp1, i32 0 97 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 98 ret <2 x double> %tmp4 99 } 100 101 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { 102 ; CHECK-LABEL: ins8b16: 103 ; CHECK: mov {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 104 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 105 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 106 ret <16 x i8> %tmp4 107 } 108 109 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { 110 ; CHECK-LABEL: ins4h8: 111 ; CHECK: mov {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 112 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 113 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 114 ret <8 x i16> %tmp4 115 } 116 117 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { 118 ; CHECK-LABEL: ins2s4: 119 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 120 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 121 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 122 ret <4 x i32> %tmp4 123 } 124 125 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { 126 ; CHECK-LABEL: ins1d2: 127 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 128 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 129 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 130 ret <2 x i64> %tmp4 131 } 132 133 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { 134 ; CHECK-LABEL: ins2f4: 135 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 136 %tmp3 = extractelement <2 x float> %tmp1, i32 1 137 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 138 ret <4 x float> %tmp4 139 } 140 141 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { 142 ; CHECK-LABEL: ins1f2: 143 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 144 %tmp3 = extractelement <1 x double> %tmp1, i32 0 145 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 146 ret <2 x double> %tmp4 147 } 148 149 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { 150 ; CHECK-LABEL: ins16b8: 151 ; CHECK: mov {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2] 152 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 153 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 154 ret <8 x i8> %tmp4 155 } 156 157 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { 158 ; CHECK-LABEL: ins8h4: 159 ; CHECK: mov {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 160 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 161 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 162 ret <4 x i16> %tmp4 163 } 164 165 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { 166 ; CHECK-LABEL: ins4s2: 167 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 168 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 169 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 170 ret <2 x i32> %tmp4 171 } 172 173 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { 174 ; CHECK-LABEL: ins2d1: 175 ; CHECK: mov {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 176 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 177 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 178 ret <1 x i64> %tmp4 179 } 180 181 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { 182 ; CHECK-LABEL: ins4f2: 183 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 184 %tmp3 = extractelement <4 x float> %tmp1, i32 2 185 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 186 ret <2 x float> %tmp4 187 } 188 189 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { 190 ; CHECK-LABEL: ins2f1: 191 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 192 %tmp3 = extractelement <2 x double> %tmp1, i32 1 193 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 194 ret <1 x double> %tmp4 195 } 196 197 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { 198 ; CHECK-LABEL: ins8b8: 199 ; CHECK: mov {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2] 200 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 201 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 202 ret <8 x i8> %tmp4 203 } 204 205 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { 206 ; CHECK-LABEL: ins4h4: 207 ; CHECK: mov {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 208 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 209 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 210 ret <4 x i16> %tmp4 211 } 212 213 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { 214 ; CHECK-LABEL: ins2s2: 215 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 216 %tmp3 = extractelement <2 x i32> %tmp1, i32 0 217 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 218 ret <2 x i32> %tmp4 219 } 220 221 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { 222 ; CHECK-LABEL: ins1d1: 223 ; CHECK: mov {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 224 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 225 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 226 ret <1 x i64> %tmp4 227 } 228 229 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { 230 ; CHECK-LABEL: ins2f2: 231 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 232 %tmp3 = extractelement <2 x float> %tmp1, i32 0 233 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 234 ret <2 x float> %tmp4 235 } 236 237 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { 238 ; CHECK-LABEL: ins1df1: 239 ; CHECK-NOT: mov {{v[0-9]+}} 240 %tmp3 = extractelement <1 x double> %tmp1, i32 0 241 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 242 ret <1 x double> %tmp4 243 } 244 245 define i32 @umovw16b(<16 x i8> %tmp1) { 246 ; CHECK-LABEL: umovw16b: 247 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 248 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 249 %tmp4 = zext i8 %tmp3 to i32 250 ret i32 %tmp4 251 } 252 253 define i32 @umovw8h(<8 x i16> %tmp1) { 254 ; CHECK-LABEL: umovw8h: 255 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 256 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 257 %tmp4 = zext i16 %tmp3 to i32 258 ret i32 %tmp4 259 } 260 261 define i32 @umovw4s(<4 x i32> %tmp1) { 262 ; CHECK-LABEL: umovw4s: 263 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2] 264 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 265 ret i32 %tmp3 266 } 267 268 define i64 @umovx2d(<2 x i64> %tmp1) { 269 ; CHECK-LABEL: umovx2d: 270 ; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1] 271 %tmp3 = extractelement <2 x i64> %tmp1, i32 1 272 ret i64 %tmp3 273 } 274 275 define i32 @umovw8b(<8 x i8> %tmp1) { 276 ; CHECK-LABEL: umovw8b: 277 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7] 278 %tmp3 = extractelement <8 x i8> %tmp1, i32 7 279 %tmp4 = zext i8 %tmp3 to i32 280 ret i32 %tmp4 281 } 282 283 define i32 @umovw4h(<4 x i16> %tmp1) { 284 ; CHECK-LABEL: umovw4h: 285 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 286 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 287 %tmp4 = zext i16 %tmp3 to i32 288 ret i32 %tmp4 289 } 290 291 define i32 @umovw2s(<2 x i32> %tmp1) { 292 ; CHECK-LABEL: umovw2s: 293 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1] 294 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 295 ret i32 %tmp3 296 } 297 298 define i64 @umovx1d(<1 x i64> %tmp1) { 299 ; CHECK-LABEL: umovx1d: 300 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 301 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 302 ret i64 %tmp3 303 } 304 305 define i32 @smovw16b(<16 x i8> %tmp1) { 306 ; CHECK-LABEL: smovw16b: 307 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 308 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 309 %tmp4 = sext i8 %tmp3 to i32 310 %tmp5 = add i32 %tmp4, %tmp4 311 ret i32 %tmp5 312 } 313 314 define i32 @smovw8h(<8 x i16> %tmp1) { 315 ; CHECK-LABEL: smovw8h: 316 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 317 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 318 %tmp4 = sext i16 %tmp3 to i32 319 %tmp5 = add i32 %tmp4, %tmp4 320 ret i32 %tmp5 321 } 322 323 define i64 @smovx16b(<16 x i8> %tmp1) { 324 ; CHECK-LABEL: smovx16b: 325 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] 326 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 327 %tmp4 = sext i8 %tmp3 to i64 328 ret i64 %tmp4 329 } 330 331 define i64 @smovx8h(<8 x i16> %tmp1) { 332 ; CHECK-LABEL: smovx8h: 333 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] 334 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 335 %tmp4 = sext i16 %tmp3 to i64 336 ret i64 %tmp4 337 } 338 339 define i64 @smovx4s(<4 x i32> %tmp1) { 340 ; CHECK-LABEL: smovx4s: 341 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] 342 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 343 %tmp4 = sext i32 %tmp3 to i64 344 ret i64 %tmp4 345 } 346 347 define i32 @smovw8b(<8 x i8> %tmp1) { 348 ; CHECK-LABEL: smovw8b: 349 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4] 350 %tmp3 = extractelement <8 x i8> %tmp1, i32 4 351 %tmp4 = sext i8 %tmp3 to i32 352 %tmp5 = add i32 %tmp4, %tmp4 353 ret i32 %tmp5 354 } 355 356 define i32 @smovw4h(<4 x i16> %tmp1) { 357 ; CHECK-LABEL: smovw4h: 358 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 359 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 360 %tmp4 = sext i16 %tmp3 to i32 361 %tmp5 = add i32 %tmp4, %tmp4 362 ret i32 %tmp5 363 } 364 365 define i32 @smovx8b(<8 x i8> %tmp1) { 366 ; CHECK-LABEL: smovx8b: 367 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6] 368 %tmp3 = extractelement <8 x i8> %tmp1, i32 6 369 %tmp4 = sext i8 %tmp3 to i32 370 ret i32 %tmp4 371 } 372 373 define i32 @smovx4h(<4 x i16> %tmp1) { 374 ; CHECK-LABEL: smovx4h: 375 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2] 376 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 377 %tmp4 = sext i16 %tmp3 to i32 378 ret i32 %tmp4 379 } 380 381 define i64 @smovx2s(<2 x i32> %tmp1) { 382 ; CHECK-LABEL: smovx2s: 383 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] 384 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 385 %tmp4 = sext i32 %tmp3 to i64 386 ret i64 %tmp4 387 } 388 389 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { 390 ; CHECK-LABEL: test_vcopy_lane_s8: 391 ; CHECK: mov {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] 392 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7> 393 ret <8 x i8> %vset_lane 394 } 395 396 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { 397 ; CHECK-LABEL: test_vcopyq_laneq_s8: 398 ; CHECK: mov {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6] 399 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15> 400 ret <16 x i8> %vset_lane 401 } 402 403 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { 404 ; CHECK-LABEL: test_vcopy_lane_swap_s8: 405 ; CHECK: mov {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0] 406 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0> 407 ret <8 x i8> %vset_lane 408 } 409 410 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { 411 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8: 412 ; CHECK: mov {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15] 413 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 414 ret <16 x i8> %vset_lane 415 } 416 417 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 { 418 ; CHECK-LABEL: test_vdup_n_u8: 419 ; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} 420 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0 421 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1 422 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2 423 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3 424 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4 425 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5 426 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6 427 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7 428 ret <8 x i8> %vecinit7.i 429 } 430 431 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 { 432 ; CHECK-LABEL: test_vdup_n_u16: 433 ; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} 434 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0 435 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1 436 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2 437 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3 438 ret <4 x i16> %vecinit3.i 439 } 440 441 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 { 442 ; CHECK-LABEL: test_vdup_n_u32: 443 ; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} 444 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0 445 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1 446 ret <2 x i32> %vecinit1.i 447 } 448 449 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 { 450 ; CHECK-LABEL: test_vdup_n_u64: 451 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 452 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0 453 ret <1 x i64> %vecinit.i 454 } 455 456 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 { 457 ; CHECK-LABEL: test_vdupq_n_u8: 458 ; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} 459 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0 460 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1 461 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2 462 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3 463 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4 464 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5 465 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6 466 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7 467 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8 468 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9 469 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10 470 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11 471 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12 472 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13 473 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14 474 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15 475 ret <16 x i8> %vecinit15.i 476 } 477 478 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 { 479 ; CHECK-LABEL: test_vdupq_n_u16: 480 ; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} 481 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0 482 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1 483 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2 484 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3 485 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4 486 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5 487 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6 488 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7 489 ret <8 x i16> %vecinit7.i 490 } 491 492 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 { 493 ; CHECK-LABEL: test_vdupq_n_u32: 494 ; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} 495 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0 496 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1 497 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2 498 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3 499 ret <4 x i32> %vecinit3.i 500 } 501 502 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 { 503 ; CHECK-LABEL: test_vdupq_n_u64: 504 ; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} 505 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0 506 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1 507 ret <2 x i64> %vecinit1.i 508 } 509 510 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 { 511 ; CHECK-LABEL: test_vdup_lane_s8: 512 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 513 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 514 ret <8 x i8> %shuffle 515 } 516 517 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 { 518 ; CHECK-LABEL: test_vdup_lane_s16: 519 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 520 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 521 ret <4 x i16> %shuffle 522 } 523 524 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 { 525 ; CHECK-LABEL: test_vdup_lane_s32: 526 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 527 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 528 ret <2 x i32> %shuffle 529 } 530 531 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 { 532 ; CHECK-LABEL: test_vdupq_lane_s8: 533 ; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 534 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 535 ret <16 x i8> %shuffle 536 } 537 538 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 { 539 ; CHECK-LABEL: test_vdupq_lane_s16: 540 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 541 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 542 ret <8 x i16> %shuffle 543 } 544 545 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 { 546 ; CHECK-LABEL: test_vdupq_lane_s32: 547 ; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 548 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 549 ret <4 x i32> %shuffle 550 } 551 552 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 { 553 ; CHECK-LABEL: test_vdupq_lane_s64: 554 ; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 555 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer 556 ret <2 x i64> %shuffle 557 } 558 559 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 { 560 ; CHECK-LABEL: test_vdup_laneq_s8: 561 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 562 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 563 ret <8 x i8> %shuffle 564 } 565 566 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 { 567 ; CHECK-LABEL: test_vdup_laneq_s16: 568 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 569 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 570 ret <4 x i16> %shuffle 571 } 572 573 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 { 574 ; CHECK-LABEL: test_vdup_laneq_s32: 575 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 576 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 577 ret <2 x i32> %shuffle 578 } 579 580 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 { 581 ; CHECK-LABEL: test_vdupq_laneq_s8: 582 ; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 583 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 584 ret <16 x i8> %shuffle 585 } 586 587 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 { 588 ; CHECK-LABEL: test_vdupq_laneq_s16: 589 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 590 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 591 ret <8 x i16> %shuffle 592 } 593 594 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 { 595 ; CHECK-LABEL: test_vdupq_laneq_s32: 596 ; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 597 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 598 ret <4 x i32> %shuffle 599 } 600 601 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 { 602 ; CHECK-LABEL: test_vdupq_laneq_s64: 603 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 604 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 605 ret <2 x i64> %shuffle 606 } 607 608 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) { 609 ; CHECK-LABEL: test_bitcastv8i8toi64: 610 %res = bitcast <8 x i8> %in to i64 611 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 612 ret i64 %res 613 } 614 615 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) { 616 ; CHECK-LABEL: test_bitcastv4i16toi64: 617 %res = bitcast <4 x i16> %in to i64 618 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 619 ret i64 %res 620 } 621 622 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) { 623 ; CHECK-LABEL: test_bitcastv2i32toi64: 624 %res = bitcast <2 x i32> %in to i64 625 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 626 ret i64 %res 627 } 628 629 define i64 @test_bitcastv2f32toi64(<2 x float> %in) { 630 ; CHECK-LABEL: test_bitcastv2f32toi64: 631 %res = bitcast <2 x float> %in to i64 632 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 633 ret i64 %res 634 } 635 636 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) { 637 ; CHECK-LABEL: test_bitcastv1i64toi64: 638 %res = bitcast <1 x i64> %in to i64 639 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 640 ret i64 %res 641 } 642 643 define i64 @test_bitcastv1f64toi64(<1 x double> %in) { 644 ; CHECK-LABEL: test_bitcastv1f64toi64: 645 %res = bitcast <1 x double> %in to i64 646 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 647 ret i64 %res 648 } 649 650 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) { 651 ; CHECK-LABEL: test_bitcasti64tov8i8: 652 %res = bitcast i64 %in to <8 x i8> 653 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 654 ret <8 x i8> %res 655 } 656 657 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) { 658 ; CHECK-LABEL: test_bitcasti64tov4i16: 659 %res = bitcast i64 %in to <4 x i16> 660 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 661 ret <4 x i16> %res 662 } 663 664 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) { 665 ; CHECK-LABEL: test_bitcasti64tov2i32: 666 %res = bitcast i64 %in to <2 x i32> 667 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 668 ret <2 x i32> %res 669 } 670 671 define <2 x float> @test_bitcasti64tov2f32(i64 %in) { 672 ; CHECK-LABEL: test_bitcasti64tov2f32: 673 %res = bitcast i64 %in to <2 x float> 674 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 675 ret <2 x float> %res 676 } 677 678 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) { 679 ; CHECK-LABEL: test_bitcasti64tov1i64: 680 %res = bitcast i64 %in to <1 x i64> 681 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 682 ret <1 x i64> %res 683 } 684 685 define <1 x double> @test_bitcasti64tov1f64(i64 %in) { 686 ; CHECK-LABEL: test_bitcasti64tov1f64: 687 %res = bitcast i64 %in to <1 x double> 688 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 689 ret <1 x double> %res 690 } 691 692 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { 693 ; CHECK-LABEL: test_bitcastv8i8tov1f64: 694 ; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 695 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 696 %sub.i = sub <8 x i8> zeroinitializer, %a 697 %1 = bitcast <8 x i8> %sub.i to <1 x double> 698 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 699 ret <1 x i64> %vcvt.i 700 } 701 702 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { 703 ; CHECK-LABEL: test_bitcastv4i16tov1f64: 704 ; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 705 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 706 %sub.i = sub <4 x i16> zeroinitializer, %a 707 %1 = bitcast <4 x i16> %sub.i to <1 x double> 708 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 709 ret <1 x i64> %vcvt.i 710 } 711 712 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { 713 ; CHECK-LABEL: test_bitcastv2i32tov1f64: 714 ; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 715 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 716 %sub.i = sub <2 x i32> zeroinitializer, %a 717 %1 = bitcast <2 x i32> %sub.i to <1 x double> 718 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 719 ret <1 x i64> %vcvt.i 720 } 721 722 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { 723 ; CHECK-LABEL: test_bitcastv1i64tov1f64: 724 ; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}} 725 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 726 %sub.i = sub <1 x i64> zeroinitializer, %a 727 %1 = bitcast <1 x i64> %sub.i to <1 x double> 728 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 729 ret <1 x i64> %vcvt.i 730 } 731 732 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 { 733 ; CHECK-LABEL: test_bitcastv2f32tov1f64: 734 ; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 735 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 736 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a 737 %1 = bitcast <2 x float> %sub.i to <1 x double> 738 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 739 ret <1 x i64> %vcvt.i 740 } 741 742 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 { 743 ; CHECK-LABEL: test_bitcastv1f64tov8i8: 744 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 745 ; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 746 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 747 %1 = bitcast <1 x double> %vcvt.i to <8 x i8> 748 %sub.i = sub <8 x i8> zeroinitializer, %1 749 ret <8 x i8> %sub.i 750 } 751 752 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 { 753 ; CHECK-LABEL: test_bitcastv1f64tov4i16: 754 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 755 ; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 756 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 757 %1 = bitcast <1 x double> %vcvt.i to <4 x i16> 758 %sub.i = sub <4 x i16> zeroinitializer, %1 759 ret <4 x i16> %sub.i 760 } 761 762 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 { 763 ; CHECK-LABEL: test_bitcastv1f64tov2i32: 764 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 765 ; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 766 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 767 %1 = bitcast <1 x double> %vcvt.i to <2 x i32> 768 %sub.i = sub <2 x i32> zeroinitializer, %1 769 ret <2 x i32> %sub.i 770 } 771 772 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 { 773 ; CHECK-LABEL: test_bitcastv1f64tov1i64: 774 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 775 ; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}} 776 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 777 %1 = bitcast <1 x double> %vcvt.i to <1 x i64> 778 %sub.i = sub <1 x i64> zeroinitializer, %1 779 ret <1 x i64> %sub.i 780 } 781 782 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 { 783 ; CHECK-LABEL: test_bitcastv1f64tov2f32: 784 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 785 ; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 786 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 787 %1 = bitcast <1 x double> %vcvt.i to <2 x float> 788 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1 789 ret <2 x float> %sub.i 790 } 791 792 ; Test insert element into an undef vector 793 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) { 794 ; CHECK-LABEL: scalar_to_vector.v8i8: 795 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 796 %b = insertelement <8 x i8> undef, i8 %a, i32 0 797 ret <8 x i8> %b 798 } 799 800 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) { 801 ; CHECK-LABEL: scalar_to_vector.v16i8: 802 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 803 %b = insertelement <16 x i8> undef, i8 %a, i32 0 804 ret <16 x i8> %b 805 } 806 807 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) { 808 ; CHECK-LABEL: scalar_to_vector.v4i16: 809 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 810 %b = insertelement <4 x i16> undef, i16 %a, i32 0 811 ret <4 x i16> %b 812 } 813 814 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) { 815 ; CHECK-LABEL: scalar_to_vector.v8i16: 816 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 817 %b = insertelement <8 x i16> undef, i16 %a, i32 0 818 ret <8 x i16> %b 819 } 820 821 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) { 822 ; CHECK-LABEL: scalar_to_vector.v2i32: 823 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 824 %b = insertelement <2 x i32> undef, i32 %a, i32 0 825 ret <2 x i32> %b 826 } 827 828 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) { 829 ; CHECK-LABEL: scalar_to_vector.v4i32: 830 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 831 %b = insertelement <4 x i32> undef, i32 %a, i32 0 832 ret <4 x i32> %b 833 } 834 835 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { 836 ; CHECK-LABEL: scalar_to_vector.v2i64: 837 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 838 %b = insertelement <2 x i64> undef, i64 %a, i32 0 839 ret <2 x i64> %b 840 } 841 842 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { 843 ; CHECK-LABEL: testDUP.v1i8: 844 ; CHECK: dup v0.8b, v0.b[0] 845 %b = extractelement <1 x i8> %a, i32 0 846 %c = insertelement <8 x i8> undef, i8 %b, i32 0 847 %d = insertelement <8 x i8> %c, i8 %b, i32 1 848 %e = insertelement <8 x i8> %d, i8 %b, i32 2 849 %f = insertelement <8 x i8> %e, i8 %b, i32 3 850 %g = insertelement <8 x i8> %f, i8 %b, i32 4 851 %h = insertelement <8 x i8> %g, i8 %b, i32 5 852 %i = insertelement <8 x i8> %h, i8 %b, i32 6 853 %j = insertelement <8 x i8> %i, i8 %b, i32 7 854 ret <8 x i8> %j 855 } 856 857 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { 858 ; CHECK-LABEL: testDUP.v1i16: 859 ; CHECK: dup v0.8h, v0.h[0] 860 %b = extractelement <1 x i16> %a, i32 0 861 %c = insertelement <8 x i16> undef, i16 %b, i32 0 862 %d = insertelement <8 x i16> %c, i16 %b, i32 1 863 %e = insertelement <8 x i16> %d, i16 %b, i32 2 864 %f = insertelement <8 x i16> %e, i16 %b, i32 3 865 %g = insertelement <8 x i16> %f, i16 %b, i32 4 866 %h = insertelement <8 x i16> %g, i16 %b, i32 5 867 %i = insertelement <8 x i16> %h, i16 %b, i32 6 868 %j = insertelement <8 x i16> %i, i16 %b, i32 7 869 ret <8 x i16> %j 870 } 871 872 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { 873 ; CHECK-LABEL: testDUP.v1i32: 874 ; CHECK: dup v0.4s, v0.s[0] 875 %b = extractelement <1 x i32> %a, i32 0 876 %c = insertelement <4 x i32> undef, i32 %b, i32 0 877 %d = insertelement <4 x i32> %c, i32 %b, i32 1 878 %e = insertelement <4 x i32> %d, i32 %b, i32 2 879 %f = insertelement <4 x i32> %e, i32 %b, i32 3 880 ret <4 x i32> %f 881 } 882 883 define <8 x i8> @getl(<16 x i8> %x) #0 { 884 ; CHECK-LABEL: getl: 885 ; CHECK: ret 886 %vecext = extractelement <16 x i8> %x, i32 0 887 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 888 %vecext1 = extractelement <16 x i8> %x, i32 1 889 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 890 %vecext3 = extractelement <16 x i8> %x, i32 2 891 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 892 %vecext5 = extractelement <16 x i8> %x, i32 3 893 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 894 %vecext7 = extractelement <16 x i8> %x, i32 4 895 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 896 %vecext9 = extractelement <16 x i8> %x, i32 5 897 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 898 %vecext11 = extractelement <16 x i8> %x, i32 6 899 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 900 %vecext13 = extractelement <16 x i8> %x, i32 7 901 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 902 ret <8 x i8> %vecinit14 903 } 904 905 ; CHECK-LABEL: test_extracts_inserts_varidx_extract: 906 ; CHECK: str q0 907 ; CHECK-DAG: and [[MASKED_IDX:x[0-9]+]], x0, #0x7 908 ; CHECK: bfi [[PTR:x[0-9]+]], [[MASKED_IDX]], #1, #3 909 ; CHECK-DAG: ldr h[[R:[0-9]+]], {{\[}}[[PTR]]{{\]}} 910 ; CHECK-DAG: mov v[[R]].h[1], v0.h[1] 911 ; CHECK-DAG: mov v[[R]].h[2], v0.h[2] 912 ; CHECK-DAG: mov v[[R]].h[3], v0.h[3] 913 define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) { 914 %tmp = extractelement <8 x i16> %x, i32 %idx 915 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0 916 %tmp3 = extractelement <8 x i16> %x, i32 1 917 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 918 %tmp5 = extractelement <8 x i16> %x, i32 2 919 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 920 %tmp7 = extractelement <8 x i16> %x, i32 3 921 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 922 ret <4 x i16> %tmp8 923 } 924 925 ; CHECK-LABEL: test_extracts_inserts_varidx_insert: 926 ; CHECK: and [[MASKED_IDX:x[0-9]+]], x0, #0x3 927 ; CHECK: bfi x9, [[MASKED_IDX]], #1, #2 928 ; CHECK: str h0, [x9] 929 ; CHECK-DAG: ldr d[[R:[0-9]+]] 930 ; CHECK-DAG: mov v[[R]].h[1], v0.h[1] 931 ; CHECK-DAG: mov v[[R]].h[2], v0.h[2] 932 ; CHECK-DAG: mov v[[R]].h[3], v0.h[3] 933 define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) { 934 %tmp = extractelement <8 x i16> %x, i32 0 935 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx 936 %tmp3 = extractelement <8 x i16> %x, i32 1 937 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 938 %tmp5 = extractelement <8 x i16> %x, i32 2 939 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 940 %tmp7 = extractelement <8 x i16> %x, i32 3 941 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 942 ret <4 x i16> %tmp8 943 } 944 945 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) { 946 ; CHECK-LABEL: test_dup_v2i32_v4i16: 947 ; CHECK: dup v0.4h, v0.h[2] 948 entry: 949 %x = extractelement <2 x i32> %a, i32 1 950 %vget_lane = trunc i32 %x to i16 951 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 952 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 953 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 954 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 955 ret <4 x i16> %vecinit3.i 956 } 957 958 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) { 959 ; CHECK-LABEL: test_dup_v4i32_v8i16: 960 ; CHECK: dup v0.8h, v0.h[6] 961 entry: 962 %x = extractelement <4 x i32> %a, i32 3 963 %vget_lane = trunc i32 %x to i16 964 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 965 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 966 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 967 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 968 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 969 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 970 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 971 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 972 ret <8 x i16> %vecinit7.i 973 } 974 975 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) { 976 ; CHECK-LABEL: test_dup_v1i64_v4i16: 977 ; CHECK: dup v0.4h, v0.h[0] 978 entry: 979 %x = extractelement <1 x i64> %a, i32 0 980 %vget_lane = trunc i64 %x to i16 981 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 982 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 983 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 984 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 985 ret <4 x i16> %vecinit3.i 986 } 987 988 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) { 989 ; CHECK-LABEL: test_dup_v1i64_v2i32: 990 ; CHECK: dup v0.2s, v0.s[0] 991 entry: 992 %x = extractelement <1 x i64> %a, i32 0 993 %vget_lane = trunc i64 %x to i32 994 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 995 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 996 ret <2 x i32> %vecinit1.i 997 } 998 999 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) { 1000 ; CHECK-LABEL: test_dup_v2i64_v8i16: 1001 ; CHECK: dup v0.8h, v0.h[4] 1002 entry: 1003 %x = extractelement <2 x i64> %a, i32 1 1004 %vget_lane = trunc i64 %x to i16 1005 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 1006 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 1007 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1008 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1009 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 1010 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 1011 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 1012 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 1013 ret <8 x i16> %vecinit7.i 1014 } 1015 1016 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) { 1017 ; CHECK-LABEL: test_dup_v2i64_v4i32: 1018 ; CHECK: dup v0.4s, v0.s[2] 1019 entry: 1020 %x = extractelement <2 x i64> %a, i32 1 1021 %vget_lane = trunc i64 %x to i32 1022 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 1023 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 1024 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 1025 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 1026 ret <4 x i32> %vecinit3.i 1027 } 1028 1029 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) { 1030 ; CHECK-LABEL: test_dup_v4i32_v4i16: 1031 ; CHECK: dup v0.4h, v0.h[2] 1032 entry: 1033 %x = extractelement <4 x i32> %a, i32 1 1034 %vget_lane = trunc i32 %x to i16 1035 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1036 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1037 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1038 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1039 ret <4 x i16> %vecinit3.i 1040 } 1041 1042 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) { 1043 ; CHECK-LABEL: test_dup_v2i64_v4i16: 1044 ; CHECK: dup v0.4h, v0.h[0] 1045 entry: 1046 %x = extractelement <2 x i64> %a, i32 0 1047 %vget_lane = trunc i64 %x to i16 1048 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1049 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1050 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1051 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1052 ret <4 x i16> %vecinit3.i 1053 } 1054 1055 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) { 1056 ; CHECK-LABEL: test_dup_v2i64_v2i32: 1057 ; CHECK: dup v0.2s, v0.s[0] 1058 entry: 1059 %x = extractelement <2 x i64> %a, i32 0 1060 %vget_lane = trunc i64 %x to i32 1061 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 1062 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 1063 ret <2 x i32> %vecinit1.i 1064 } 1065 1066 1067 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { 1068 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32: 1069 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1070 ; CHECK-NEXT: ret 1071 entry: 1072 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1073 %1 = insertelement <1 x float> undef, float %0, i32 0 1074 %2 = extractelement <1 x float> %1, i32 0 1075 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0 1076 ret <2 x float> %vecinit1.i 1077 } 1078 1079 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) { 1080 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32: 1081 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1082 ; CHECK-NEXT: ret 1083 entry: 1084 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1085 %1 = insertelement <1 x float> undef, float %0, i32 0 1086 %2 = extractelement <1 x float> %1, i32 0 1087 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0 1088 ret <4 x float> %vecinit1.i 1089 } 1090 1091 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>) 1092 1093 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) { 1094 ; CHECK-LABEL: test_concat_undef_v1i32: 1095 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1096 entry: 1097 %0 = extractelement <2 x i32> %a, i32 0 1098 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 1099 ret <2 x i32> %vecinit1.i 1100 } 1101 1102 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4 1103 1104 define <2 x i32> @test_concat_v1i32_undef(i32 %a) { 1105 ; CHECK-LABEL: test_concat_v1i32_undef: 1106 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1107 ; CHECK-NEXT: ret 1108 entry: 1109 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1110 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0 1111 ret <2 x i32> %vecinit.i432 1112 } 1113 1114 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) { 1115 ; CHECK-LABEL: test_concat_same_v1i32_v1i32: 1116 ; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] 1117 entry: 1118 %0 = extractelement <2 x i32> %a, i32 0 1119 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 1120 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 1121 ret <2 x i32> %vecinit1.i 1122 } 1123 1124 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) { 1125 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32: 1126 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1127 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1128 ; CHECK: mov {{v[0-9]+}}.s[1], w{{[0-9]+}} 1129 entry: 1130 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1131 %d = insertelement <2 x i32> undef, i32 %c, i32 0 1132 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b) 1133 %f = insertelement <2 x i32> undef, i32 %e, i32 0 1134 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2> 1135 ret <2 x i32> %h 1136 } 1137 1138 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { 1139 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: 1140 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1141 entry: 1142 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1143 ret <16 x i8> %vecinit30 1144 } 1145 1146 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { 1147 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: 1148 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1149 entry: 1150 %vecext = extractelement <8 x i8> %x, i32 0 1151 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1152 %vecext1 = extractelement <8 x i8> %x, i32 1 1153 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1154 %vecext3 = extractelement <8 x i8> %x, i32 2 1155 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1156 %vecext5 = extractelement <8 x i8> %x, i32 3 1157 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1158 %vecext7 = extractelement <8 x i8> %x, i32 4 1159 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1160 %vecext9 = extractelement <8 x i8> %x, i32 5 1161 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1162 %vecext11 = extractelement <8 x i8> %x, i32 6 1163 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1164 %vecext13 = extractelement <8 x i8> %x, i32 7 1165 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1166 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1167 ret <16 x i8> %vecinit30 1168 } 1169 1170 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { 1171 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: 1172 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1173 entry: 1174 %vecext = extractelement <16 x i8> %x, i32 0 1175 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1176 %vecext1 = extractelement <16 x i8> %x, i32 1 1177 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1178 %vecext3 = extractelement <16 x i8> %x, i32 2 1179 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1180 %vecext5 = extractelement <16 x i8> %x, i32 3 1181 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1182 %vecext7 = extractelement <16 x i8> %x, i32 4 1183 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1184 %vecext9 = extractelement <16 x i8> %x, i32 5 1185 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1186 %vecext11 = extractelement <16 x i8> %x, i32 6 1187 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1188 %vecext13 = extractelement <16 x i8> %x, i32 7 1189 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1190 %vecext15 = extractelement <8 x i8> %y, i32 0 1191 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1192 %vecext17 = extractelement <8 x i8> %y, i32 1 1193 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1194 %vecext19 = extractelement <8 x i8> %y, i32 2 1195 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1196 %vecext21 = extractelement <8 x i8> %y, i32 3 1197 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1198 %vecext23 = extractelement <8 x i8> %y, i32 4 1199 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1200 %vecext25 = extractelement <8 x i8> %y, i32 5 1201 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1202 %vecext27 = extractelement <8 x i8> %y, i32 6 1203 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1204 %vecext29 = extractelement <8 x i8> %y, i32 7 1205 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1206 ret <16 x i8> %vecinit30 1207 } 1208 1209 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { 1210 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: 1211 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1212 entry: 1213 %vecext = extractelement <8 x i8> %x, i32 0 1214 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1215 %vecext1 = extractelement <8 x i8> %x, i32 1 1216 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1217 %vecext3 = extractelement <8 x i8> %x, i32 2 1218 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1219 %vecext5 = extractelement <8 x i8> %x, i32 3 1220 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1221 %vecext7 = extractelement <8 x i8> %x, i32 4 1222 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1223 %vecext9 = extractelement <8 x i8> %x, i32 5 1224 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1225 %vecext11 = extractelement <8 x i8> %x, i32 6 1226 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1227 %vecext13 = extractelement <8 x i8> %x, i32 7 1228 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1229 %vecext15 = extractelement <8 x i8> %y, i32 0 1230 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1231 %vecext17 = extractelement <8 x i8> %y, i32 1 1232 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1233 %vecext19 = extractelement <8 x i8> %y, i32 2 1234 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1235 %vecext21 = extractelement <8 x i8> %y, i32 3 1236 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1237 %vecext23 = extractelement <8 x i8> %y, i32 4 1238 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1239 %vecext25 = extractelement <8 x i8> %y, i32 5 1240 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1241 %vecext27 = extractelement <8 x i8> %y, i32 6 1242 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1243 %vecext29 = extractelement <8 x i8> %y, i32 7 1244 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1245 ret <16 x i8> %vecinit30 1246 } 1247 1248 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { 1249 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: 1250 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1251 entry: 1252 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1253 ret <8 x i16> %vecinit14 1254 } 1255 1256 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { 1257 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: 1258 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1259 entry: 1260 %vecext = extractelement <4 x i16> %x, i32 0 1261 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1262 %vecext1 = extractelement <4 x i16> %x, i32 1 1263 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1264 %vecext3 = extractelement <4 x i16> %x, i32 2 1265 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1266 %vecext5 = extractelement <4 x i16> %x, i32 3 1267 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1268 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1269 ret <8 x i16> %vecinit14 1270 } 1271 1272 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { 1273 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: 1274 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1275 entry: 1276 %vecext = extractelement <8 x i16> %x, i32 0 1277 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1278 %vecext1 = extractelement <8 x i16> %x, i32 1 1279 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1280 %vecext3 = extractelement <8 x i16> %x, i32 2 1281 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1282 %vecext5 = extractelement <8 x i16> %x, i32 3 1283 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1284 %vecext7 = extractelement <4 x i16> %y, i32 0 1285 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1286 %vecext9 = extractelement <4 x i16> %y, i32 1 1287 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1288 %vecext11 = extractelement <4 x i16> %y, i32 2 1289 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1290 %vecext13 = extractelement <4 x i16> %y, i32 3 1291 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1292 ret <8 x i16> %vecinit14 1293 } 1294 1295 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { 1296 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: 1297 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1298 entry: 1299 %vecext = extractelement <4 x i16> %x, i32 0 1300 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1301 %vecext1 = extractelement <4 x i16> %x, i32 1 1302 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1303 %vecext3 = extractelement <4 x i16> %x, i32 2 1304 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1305 %vecext5 = extractelement <4 x i16> %x, i32 3 1306 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1307 %vecext7 = extractelement <4 x i16> %y, i32 0 1308 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1309 %vecext9 = extractelement <4 x i16> %y, i32 1 1310 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1311 %vecext11 = extractelement <4 x i16> %y, i32 2 1312 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1313 %vecext13 = extractelement <4 x i16> %y, i32 3 1314 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1315 ret <8 x i16> %vecinit14 1316 } 1317 1318 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { 1319 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: 1320 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1321 entry: 1322 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1323 ret <4 x i32> %vecinit6 1324 } 1325 1326 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { 1327 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: 1328 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1329 entry: 1330 %vecext = extractelement <2 x i32> %x, i32 0 1331 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1332 %vecext1 = extractelement <2 x i32> %x, i32 1 1333 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1334 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1335 ret <4 x i32> %vecinit6 1336 } 1337 1338 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { 1339 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: 1340 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1341 entry: 1342 %vecext = extractelement <4 x i32> %x, i32 0 1343 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1344 %vecext1 = extractelement <4 x i32> %x, i32 1 1345 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1346 %vecext3 = extractelement <2 x i32> %y, i32 0 1347 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 1348 %vecext5 = extractelement <2 x i32> %y, i32 1 1349 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 1350 ret <4 x i32> %vecinit6 1351 } 1352 1353 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { 1354 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: 1355 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1356 entry: 1357 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1358 ret <4 x i32> %vecinit6 1359 } 1360 1361 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { 1362 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: 1363 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1364 entry: 1365 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1366 ret <2 x i64> %vecinit2 1367 } 1368 1369 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { 1370 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: 1371 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1372 entry: 1373 %vecext = extractelement <1 x i64> %x, i32 0 1374 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1375 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1376 ret <2 x i64> %vecinit2 1377 } 1378 1379 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { 1380 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: 1381 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1382 entry: 1383 %vecext = extractelement <2 x i64> %x, i32 0 1384 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1385 %vecext1 = extractelement <1 x i64> %y, i32 0 1386 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1387 ret <2 x i64> %vecinit2 1388 } 1389 1390 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { 1391 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: 1392 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1393 entry: 1394 %vecext = extractelement <1 x i64> %x, i32 0 1395 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1396 %vecext1 = extractelement <1 x i64> %y, i32 0 1397 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1398 ret <2 x i64> %vecinit2 1399 } 1400 1401 1402 define <4 x i16> @concat_vector_v4i16_const() { 1403 ; CHECK-LABEL: concat_vector_v4i16_const: 1404 ; CHECK: movi {{d[0-9]+}}, #0 1405 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer 1406 ret <4 x i16> %r 1407 } 1408 1409 define <4 x i16> @concat_vector_v4i16_const_one() { 1410 ; CHECK-LABEL: concat_vector_v4i16_const_one: 1411 ; CHECK: movi {{v[0-9]+}}.4h, #1 1412 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer 1413 ret <4 x i16> %r 1414 } 1415 1416 define <4 x i32> @concat_vector_v4i32_const() { 1417 ; CHECK-LABEL: concat_vector_v4i32_const: 1418 ; CHECK: movi {{v[0-9]+}}.2d, #0 1419 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer 1420 ret <4 x i32> %r 1421 } 1422 1423 define <8 x i8> @concat_vector_v8i8_const() { 1424 ; CHECK-LABEL: concat_vector_v8i8_const: 1425 ; CHECK: movi {{d[0-9]+}}, #0 1426 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer 1427 ret <8 x i8> %r 1428 } 1429 1430 define <8 x i16> @concat_vector_v8i16_const() { 1431 ; CHECK-LABEL: concat_vector_v8i16_const: 1432 ; CHECK: movi {{v[0-9]+}}.2d, #0 1433 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer 1434 ret <8 x i16> %r 1435 } 1436 1437 define <8 x i16> @concat_vector_v8i16_const_one() { 1438 ; CHECK-LABEL: concat_vector_v8i16_const_one: 1439 ; CHECK: movi {{v[0-9]+}}.8h, #1 1440 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer 1441 ret <8 x i16> %r 1442 } 1443 1444 define <16 x i8> @concat_vector_v16i8_const() { 1445 ; CHECK-LABEL: concat_vector_v16i8_const: 1446 ; CHECK: movi {{v[0-9]+}}.2d, #0 1447 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer 1448 ret <16 x i8> %r 1449 } 1450 1451 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { 1452 ; CHECK-LABEL: concat_vector_v4i16: 1453 ; CHECK: dup v0.4h, v0.h[0] 1454 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer 1455 ret <4 x i16> %r 1456 } 1457 1458 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { 1459 ; CHECK-LABEL: concat_vector_v4i32: 1460 ; CHECK: dup v0.4s, v0.s[0] 1461 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer 1462 ret <4 x i32> %r 1463 } 1464 1465 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { 1466 ; CHECK-LABEL: concat_vector_v8i8: 1467 ; CHECK: dup v0.8b, v0.b[0] 1468 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer 1469 ret <8 x i8> %r 1470 } 1471 1472 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { 1473 ; CHECK-LABEL: concat_vector_v8i16: 1474 ; CHECK: dup v0.8h, v0.h[0] 1475 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer 1476 ret <8 x i16> %r 1477 } 1478 1479 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { 1480 ; CHECK-LABEL: concat_vector_v16i8: 1481 ; CHECK: dup v0.16b, v0.b[0] 1482 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer 1483 ret <16 x i8> %r 1484 } 1485