1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3 4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { 5 ; CHECK-LABEL: ins16bw: 6 ; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}} 7 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 8 ret <16 x i8> %tmp3 9 } 10 11 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { 12 ; CHECK-LABEL: ins8hw: 13 ; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}} 14 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 15 ret <8 x i16> %tmp3 16 } 17 18 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { 19 ; CHECK-LABEL: ins4sw: 20 ; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}} 21 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 22 ret <4 x i32> %tmp3 23 } 24 25 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { 26 ; CHECK-LABEL: ins2dw: 27 ; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} 28 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 29 ret <2 x i64> %tmp3 30 } 31 32 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { 33 ; CHECK-LABEL: ins8bw: 34 ; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}} 35 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 36 ret <8 x i8> %tmp3 37 } 38 39 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { 40 ; CHECK-LABEL: ins4hw: 41 ; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} 42 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 43 ret <4 x i16> %tmp3 44 } 45 46 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { 47 ; CHECK-LABEL: ins2sw: 48 ; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} 49 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 50 ret <2 x i32> %tmp3 51 } 52 53 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { 54 ; CHECK-LABEL: ins16b16: 55 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 56 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 57 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 58 ret <16 x i8> %tmp4 59 } 60 61 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { 62 ; CHECK-LABEL: ins8h8: 63 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 64 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 65 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 66 ret <8 x i16> %tmp4 67 } 68 69 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { 70 ; CHECK-LABEL: ins4s4: 71 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 72 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 73 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 74 ret <4 x i32> %tmp4 75 } 76 77 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { 78 ; CHECK-LABEL: ins2d2: 79 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 80 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 81 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 82 ret <2 x i64> %tmp4 83 } 84 85 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { 86 ; CHECK-LABEL: ins4f4: 87 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 88 %tmp3 = extractelement <4 x float> %tmp1, i32 2 89 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 90 ret <4 x float> %tmp4 91 } 92 93 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { 94 ; CHECK-LABEL: ins2df2: 95 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 96 %tmp3 = extractelement <2 x double> %tmp1, i32 0 97 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 98 ret <2 x double> %tmp4 99 } 100 101 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { 102 ; CHECK-LABEL: ins8b16: 103 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 104 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 105 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 106 ret <16 x i8> %tmp4 107 } 108 109 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { 110 ; CHECK-LABEL: ins4h8: 111 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 112 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 113 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 114 ret <8 x i16> %tmp4 115 } 116 117 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { 118 ; CHECK-LABEL: ins2s4: 119 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 120 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 121 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 122 ret <4 x i32> %tmp4 123 } 124 125 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { 126 ; CHECK-LABEL: ins1d2: 127 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 128 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 129 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 130 ret <2 x i64> %tmp4 131 } 132 133 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { 134 ; CHECK-LABEL: ins2f4: 135 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 136 %tmp3 = extractelement <2 x float> %tmp1, i32 1 137 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 138 ret <4 x float> %tmp4 139 } 140 141 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { 142 ; CHECK-LABEL: ins1f2: 143 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 144 %tmp3 = extractelement <1 x double> %tmp1, i32 0 145 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 146 ret <2 x double> %tmp4 147 } 148 149 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { 150 ; CHECK-LABEL: ins16b8: 151 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2] 152 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 153 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 154 ret <8 x i8> %tmp4 155 } 156 157 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { 158 ; CHECK-LABEL: ins8h4: 159 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 160 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 161 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 162 ret <4 x i16> %tmp4 163 } 164 165 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { 166 ; CHECK-LABEL: ins4s2: 167 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 168 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 169 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 170 ret <2 x i32> %tmp4 171 } 172 173 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { 174 ; CHECK-LABEL: ins2d1: 175 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 176 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 177 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 178 ret <1 x i64> %tmp4 179 } 180 181 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { 182 ; CHECK-LABEL: ins4f2: 183 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 184 %tmp3 = extractelement <4 x float> %tmp1, i32 2 185 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 186 ret <2 x float> %tmp4 187 } 188 189 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { 190 ; CHECK-LABEL: ins2f1: 191 ; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1] 192 %tmp3 = extractelement <2 x double> %tmp1, i32 1 193 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 194 ret <1 x double> %tmp4 195 } 196 197 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { 198 ; CHECK-LABEL: ins8b8: 199 ; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2] 200 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 201 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 202 ret <8 x i8> %tmp4 203 } 204 205 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { 206 ; CHECK-LABEL: ins4h4: 207 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 208 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 209 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 210 ret <4 x i16> %tmp4 211 } 212 213 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { 214 ; CHECK-LABEL: ins2s2: 215 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 216 %tmp3 = extractelement <2 x i32> %tmp1, i32 0 217 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 218 ret <2 x i32> %tmp4 219 } 220 221 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { 222 ; CHECK-LABEL: ins1d1: 223 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 224 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 225 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 226 ret <1 x i64> %tmp4 227 } 228 229 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { 230 ; CHECK-LABEL: ins2f2: 231 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 232 %tmp3 = extractelement <2 x float> %tmp1, i32 0 233 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 234 ret <2 x float> %tmp4 235 } 236 237 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { 238 ; CHECK-LABEL: ins1df1: 239 ; CHECK-NOT: ins {{v[0-9]+}} 240 %tmp3 = extractelement <1 x double> %tmp1, i32 0 241 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 242 ret <1 x double> %tmp4 243 } 244 245 define i32 @umovw16b(<16 x i8> %tmp1) { 246 ; CHECK-LABEL: umovw16b: 247 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 248 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 249 %tmp4 = zext i8 %tmp3 to i32 250 ret i32 %tmp4 251 } 252 253 define i32 @umovw8h(<8 x i16> %tmp1) { 254 ; CHECK-LABEL: umovw8h: 255 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 256 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 257 %tmp4 = zext i16 %tmp3 to i32 258 ret i32 %tmp4 259 } 260 261 define i32 @umovw4s(<4 x i32> %tmp1) { 262 ; CHECK-LABEL: umovw4s: 263 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2] 264 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 265 ret i32 %tmp3 266 } 267 268 define i64 @umovx2d(<2 x i64> %tmp1) { 269 ; CHECK-LABEL: umovx2d: 270 ; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1] 271 %tmp3 = extractelement <2 x i64> %tmp1, i32 1 272 ret i64 %tmp3 273 } 274 275 define i32 @umovw8b(<8 x i8> %tmp1) { 276 ; CHECK-LABEL: umovw8b: 277 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7] 278 %tmp3 = extractelement <8 x i8> %tmp1, i32 7 279 %tmp4 = zext i8 %tmp3 to i32 280 ret i32 %tmp4 281 } 282 283 define i32 @umovw4h(<4 x i16> %tmp1) { 284 ; CHECK-LABEL: umovw4h: 285 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 286 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 287 %tmp4 = zext i16 %tmp3 to i32 288 ret i32 %tmp4 289 } 290 291 define i32 @umovw2s(<2 x i32> %tmp1) { 292 ; CHECK-LABEL: umovw2s: 293 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1] 294 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 295 ret i32 %tmp3 296 } 297 298 define i64 @umovx1d(<1 x i64> %tmp1) { 299 ; CHECK-LABEL: umovx1d: 300 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 301 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 302 ret i64 %tmp3 303 } 304 305 define i32 @smovw16b(<16 x i8> %tmp1) { 306 ; CHECK-LABEL: smovw16b: 307 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 308 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 309 %tmp4 = sext i8 %tmp3 to i32 310 %tmp5 = add i32 %tmp4, %tmp4 311 ret i32 %tmp5 312 } 313 314 define i32 @smovw8h(<8 x i16> %tmp1) { 315 ; CHECK-LABEL: smovw8h: 316 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 317 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 318 %tmp4 = sext i16 %tmp3 to i32 319 %tmp5 = add i32 %tmp4, %tmp4 320 ret i32 %tmp5 321 } 322 323 define i64 @smovx16b(<16 x i8> %tmp1) { 324 ; CHECK-LABEL: smovx16b: 325 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] 326 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 327 %tmp4 = sext i8 %tmp3 to i64 328 ret i64 %tmp4 329 } 330 331 define i64 @smovx8h(<8 x i16> %tmp1) { 332 ; CHECK-LABEL: smovx8h: 333 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] 334 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 335 %tmp4 = sext i16 %tmp3 to i64 336 ret i64 %tmp4 337 } 338 339 define i64 @smovx4s(<4 x i32> %tmp1) { 340 ; CHECK-LABEL: smovx4s: 341 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] 342 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 343 %tmp4 = sext i32 %tmp3 to i64 344 ret i64 %tmp4 345 } 346 347 define i32 @smovw8b(<8 x i8> %tmp1) { 348 ; CHECK-LABEL: smovw8b: 349 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4] 350 %tmp3 = extractelement <8 x i8> %tmp1, i32 4 351 %tmp4 = sext i8 %tmp3 to i32 352 %tmp5 = add i32 %tmp4, %tmp4 353 ret i32 %tmp5 354 } 355 356 define i32 @smovw4h(<4 x i16> %tmp1) { 357 ; CHECK-LABEL: smovw4h: 358 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 359 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 360 %tmp4 = sext i16 %tmp3 to i32 361 %tmp5 = add i32 %tmp4, %tmp4 362 ret i32 %tmp5 363 } 364 365 define i32 @smovx8b(<8 x i8> %tmp1) { 366 ; CHECK-LABEL: smovx8b: 367 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6] 368 %tmp3 = extractelement <8 x i8> %tmp1, i32 6 369 %tmp4 = sext i8 %tmp3 to i32 370 ret i32 %tmp4 371 } 372 373 define i32 @smovx4h(<4 x i16> %tmp1) { 374 ; CHECK-LABEL: smovx4h: 375 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2] 376 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 377 %tmp4 = sext i16 %tmp3 to i32 378 ret i32 %tmp4 379 } 380 381 define i64 @smovx2s(<2 x i32> %tmp1) { 382 ; CHECK-LABEL: smovx2s: 383 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] 384 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 385 %tmp4 = sext i32 %tmp3 to i64 386 ret i64 %tmp4 387 } 388 389 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { 390 ; CHECK-LABEL: test_vcopy_lane_s8: 391 ; CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] 392 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7> 393 ret <8 x i8> %vset_lane 394 } 395 396 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { 397 ; CHECK-LABEL: test_vcopyq_laneq_s8: 398 ; CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6] 399 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15> 400 ret <16 x i8> %vset_lane 401 } 402 403 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { 404 ; CHECK-LABEL: test_vcopy_lane_swap_s8: 405 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0] 406 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0> 407 ret <8 x i8> %vset_lane 408 } 409 410 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { 411 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8: 412 ; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15] 413 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 414 ret <16 x i8> %vset_lane 415 } 416 417 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 { 418 ; CHECK-LABEL: test_vdup_n_u8: 419 ; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} 420 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0 421 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1 422 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2 423 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3 424 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4 425 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5 426 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6 427 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7 428 ret <8 x i8> %vecinit7.i 429 } 430 431 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 { 432 ; CHECK-LABEL: test_vdup_n_u16: 433 ; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} 434 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0 435 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1 436 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2 437 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3 438 ret <4 x i16> %vecinit3.i 439 } 440 441 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 { 442 ; CHECK-LABEL: test_vdup_n_u32: 443 ; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} 444 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0 445 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1 446 ret <2 x i32> %vecinit1.i 447 } 448 449 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 { 450 ; CHECK-LABEL: test_vdup_n_u64: 451 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 452 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0 453 ret <1 x i64> %vecinit.i 454 } 455 456 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 { 457 ; CHECK-LABEL: test_vdupq_n_u8: 458 ; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} 459 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0 460 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1 461 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2 462 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3 463 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4 464 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5 465 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6 466 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7 467 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8 468 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9 469 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10 470 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11 471 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12 472 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13 473 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14 474 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15 475 ret <16 x i8> %vecinit15.i 476 } 477 478 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 { 479 ; CHECK-LABEL: test_vdupq_n_u16: 480 ; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} 481 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0 482 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1 483 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2 484 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3 485 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4 486 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5 487 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6 488 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7 489 ret <8 x i16> %vecinit7.i 490 } 491 492 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 { 493 ; CHECK-LABEL: test_vdupq_n_u32: 494 ; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} 495 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0 496 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1 497 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2 498 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3 499 ret <4 x i32> %vecinit3.i 500 } 501 502 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 { 503 ; CHECK-LABEL: test_vdupq_n_u64: 504 ; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} 505 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0 506 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1 507 ret <2 x i64> %vecinit1.i 508 } 509 510 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 { 511 ; CHECK-LABEL: test_vdup_lane_s8: 512 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 513 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 514 ret <8 x i8> %shuffle 515 } 516 517 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 { 518 ; CHECK-LABEL: test_vdup_lane_s16: 519 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 520 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 521 ret <4 x i16> %shuffle 522 } 523 524 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 { 525 ; CHECK-LABEL: test_vdup_lane_s32: 526 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 527 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 528 ret <2 x i32> %shuffle 529 } 530 531 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 { 532 ; CHECK-LABEL: test_vdupq_lane_s8: 533 ; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 534 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 535 ret <16 x i8> %shuffle 536 } 537 538 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 { 539 ; CHECK-LABEL: test_vdupq_lane_s16: 540 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 541 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 542 ret <8 x i16> %shuffle 543 } 544 545 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 { 546 ; CHECK-LABEL: test_vdupq_lane_s32: 547 ; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 548 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 549 ret <4 x i32> %shuffle 550 } 551 552 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 { 553 ; CHECK-LABEL: test_vdupq_lane_s64: 554 ; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 555 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer 556 ret <2 x i64> %shuffle 557 } 558 559 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 { 560 ; CHECK-LABEL: test_vdup_laneq_s8: 561 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 562 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 563 ret <8 x i8> %shuffle 564 } 565 566 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 { 567 ; CHECK-LABEL: test_vdup_laneq_s16: 568 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 569 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 570 ret <4 x i16> %shuffle 571 } 572 573 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 { 574 ; CHECK-LABEL: test_vdup_laneq_s32: 575 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 576 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 577 ret <2 x i32> %shuffle 578 } 579 580 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 { 581 ; CHECK-LABEL: test_vdupq_laneq_s8: 582 ; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 583 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 584 ret <16 x i8> %shuffle 585 } 586 587 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 { 588 ; CHECK-LABEL: test_vdupq_laneq_s16: 589 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 590 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 591 ret <8 x i16> %shuffle 592 } 593 594 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 { 595 ; CHECK-LABEL: test_vdupq_laneq_s32: 596 ; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 597 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 598 ret <4 x i32> %shuffle 599 } 600 601 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 { 602 ; CHECK-LABEL: test_vdupq_laneq_s64: 603 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 604 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 605 ret <2 x i64> %shuffle 606 } 607 608 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) { 609 ; CHECK-LABEL: test_bitcastv8i8toi64: 610 %res = bitcast <8 x i8> %in to i64 611 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 612 ret i64 %res 613 } 614 615 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) { 616 ; CHECK-LABEL: test_bitcastv4i16toi64: 617 %res = bitcast <4 x i16> %in to i64 618 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 619 ret i64 %res 620 } 621 622 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) { 623 ; CHECK-LABEL: test_bitcastv2i32toi64: 624 %res = bitcast <2 x i32> %in to i64 625 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 626 ret i64 %res 627 } 628 629 define i64 @test_bitcastv2f32toi64(<2 x float> %in) { 630 ; CHECK-LABEL: test_bitcastv2f32toi64: 631 %res = bitcast <2 x float> %in to i64 632 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 633 ret i64 %res 634 } 635 636 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) { 637 ; CHECK-LABEL: test_bitcastv1i64toi64: 638 %res = bitcast <1 x i64> %in to i64 639 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 640 ret i64 %res 641 } 642 643 define i64 @test_bitcastv1f64toi64(<1 x double> %in) { 644 ; CHECK-LABEL: test_bitcastv1f64toi64: 645 %res = bitcast <1 x double> %in to i64 646 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 647 ret i64 %res 648 } 649 650 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) { 651 ; CHECK-LABEL: test_bitcasti64tov8i8: 652 %res = bitcast i64 %in to <8 x i8> 653 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 654 ret <8 x i8> %res 655 } 656 657 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) { 658 ; CHECK-LABEL: test_bitcasti64tov4i16: 659 %res = bitcast i64 %in to <4 x i16> 660 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 661 ret <4 x i16> %res 662 } 663 664 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) { 665 ; CHECK-LABEL: test_bitcasti64tov2i32: 666 %res = bitcast i64 %in to <2 x i32> 667 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 668 ret <2 x i32> %res 669 } 670 671 define <2 x float> @test_bitcasti64tov2f32(i64 %in) { 672 ; CHECK-LABEL: test_bitcasti64tov2f32: 673 %res = bitcast i64 %in to <2 x float> 674 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 675 ret <2 x float> %res 676 } 677 678 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) { 679 ; CHECK-LABEL: test_bitcasti64tov1i64: 680 %res = bitcast i64 %in to <1 x i64> 681 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 682 ret <1 x i64> %res 683 } 684 685 define <1 x double> @test_bitcasti64tov1f64(i64 %in) { 686 ; CHECK-LABEL: test_bitcasti64tov1f64: 687 %res = bitcast i64 %in to <1 x double> 688 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 689 ret <1 x double> %res 690 } 691 692 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { 693 ; CHECK-LABEL: test_bitcastv8i8tov1f64: 694 ; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 695 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 696 %sub.i = sub <8 x i8> zeroinitializer, %a 697 %1 = bitcast <8 x i8> %sub.i to <1 x double> 698 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 699 ret <1 x i64> %vcvt.i 700 } 701 702 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { 703 ; CHECK-LABEL: test_bitcastv4i16tov1f64: 704 ; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 705 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 706 %sub.i = sub <4 x i16> zeroinitializer, %a 707 %1 = bitcast <4 x i16> %sub.i to <1 x double> 708 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 709 ret <1 x i64> %vcvt.i 710 } 711 712 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { 713 ; CHECK-LABEL: test_bitcastv2i32tov1f64: 714 ; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 715 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 716 %sub.i = sub <2 x i32> zeroinitializer, %a 717 %1 = bitcast <2 x i32> %sub.i to <1 x double> 718 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 719 ret <1 x i64> %vcvt.i 720 } 721 722 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { 723 ; CHECK-LABEL: test_bitcastv1i64tov1f64: 724 ; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}} 725 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 726 %sub.i = sub <1 x i64> zeroinitializer, %a 727 %1 = bitcast <1 x i64> %sub.i to <1 x double> 728 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 729 ret <1 x i64> %vcvt.i 730 } 731 732 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 { 733 ; CHECK-LABEL: test_bitcastv2f32tov1f64: 734 ; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 735 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 736 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a 737 %1 = bitcast <2 x float> %sub.i to <1 x double> 738 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 739 ret <1 x i64> %vcvt.i 740 } 741 742 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 { 743 ; CHECK-LABEL: test_bitcastv1f64tov8i8: 744 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 745 ; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 746 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 747 %1 = bitcast <1 x double> %vcvt.i to <8 x i8> 748 %sub.i = sub <8 x i8> zeroinitializer, %1 749 ret <8 x i8> %sub.i 750 } 751 752 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 { 753 ; CHECK-LABEL: test_bitcastv1f64tov4i16: 754 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 755 ; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 756 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 757 %1 = bitcast <1 x double> %vcvt.i to <4 x i16> 758 %sub.i = sub <4 x i16> zeroinitializer, %1 759 ret <4 x i16> %sub.i 760 } 761 762 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 { 763 ; CHECK-LABEL: test_bitcastv1f64tov2i32: 764 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 765 ; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 766 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 767 %1 = bitcast <1 x double> %vcvt.i to <2 x i32> 768 %sub.i = sub <2 x i32> zeroinitializer, %1 769 ret <2 x i32> %sub.i 770 } 771 772 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 { 773 ; CHECK-LABEL: test_bitcastv1f64tov1i64: 774 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 775 ; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}} 776 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 777 %1 = bitcast <1 x double> %vcvt.i to <1 x i64> 778 %sub.i = sub <1 x i64> zeroinitializer, %1 779 ret <1 x i64> %sub.i 780 } 781 782 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 { 783 ; CHECK-LABEL: test_bitcastv1f64tov2f32: 784 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 785 ; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 786 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 787 %1 = bitcast <1 x double> %vcvt.i to <2 x float> 788 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1 789 ret <2 x float> %sub.i 790 } 791 792 ; Test insert element into an undef vector 793 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) { 794 ; CHECK-LABEL: scalar_to_vector.v8i8: 795 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 796 %b = insertelement <8 x i8> undef, i8 %a, i32 0 797 ret <8 x i8> %b 798 } 799 800 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) { 801 ; CHECK-LABEL: scalar_to_vector.v16i8: 802 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 803 %b = insertelement <16 x i8> undef, i8 %a, i32 0 804 ret <16 x i8> %b 805 } 806 807 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) { 808 ; CHECK-LABEL: scalar_to_vector.v4i16: 809 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 810 %b = insertelement <4 x i16> undef, i16 %a, i32 0 811 ret <4 x i16> %b 812 } 813 814 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) { 815 ; CHECK-LABEL: scalar_to_vector.v8i16: 816 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 817 %b = insertelement <8 x i16> undef, i16 %a, i32 0 818 ret <8 x i16> %b 819 } 820 821 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) { 822 ; CHECK-LABEL: scalar_to_vector.v2i32: 823 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 824 %b = insertelement <2 x i32> undef, i32 %a, i32 0 825 ret <2 x i32> %b 826 } 827 828 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) { 829 ; CHECK-LABEL: scalar_to_vector.v4i32: 830 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 831 %b = insertelement <4 x i32> undef, i32 %a, i32 0 832 ret <4 x i32> %b 833 } 834 835 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { 836 ; CHECK-LABEL: scalar_to_vector.v2i64: 837 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 838 %b = insertelement <2 x i64> undef, i64 %a, i32 0 839 ret <2 x i64> %b 840 } 841 842 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { 843 ; CHECK-LABEL: testDUP.v1i8: 844 ; CHECK: dup v0.8b, v0.b[0] 845 %b = extractelement <1 x i8> %a, i32 0 846 %c = insertelement <8 x i8> undef, i8 %b, i32 0 847 %d = insertelement <8 x i8> %c, i8 %b, i32 1 848 %e = insertelement <8 x i8> %d, i8 %b, i32 2 849 %f = insertelement <8 x i8> %e, i8 %b, i32 3 850 %g = insertelement <8 x i8> %f, i8 %b, i32 4 851 %h = insertelement <8 x i8> %g, i8 %b, i32 5 852 %i = insertelement <8 x i8> %h, i8 %b, i32 6 853 %j = insertelement <8 x i8> %i, i8 %b, i32 7 854 ret <8 x i8> %j 855 } 856 857 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { 858 ; CHECK-LABEL: testDUP.v1i16: 859 ; CHECK: dup v0.8h, v0.h[0] 860 %b = extractelement <1 x i16> %a, i32 0 861 %c = insertelement <8 x i16> undef, i16 %b, i32 0 862 %d = insertelement <8 x i16> %c, i16 %b, i32 1 863 %e = insertelement <8 x i16> %d, i16 %b, i32 2 864 %f = insertelement <8 x i16> %e, i16 %b, i32 3 865 %g = insertelement <8 x i16> %f, i16 %b, i32 4 866 %h = insertelement <8 x i16> %g, i16 %b, i32 5 867 %i = insertelement <8 x i16> %h, i16 %b, i32 6 868 %j = insertelement <8 x i16> %i, i16 %b, i32 7 869 ret <8 x i16> %j 870 } 871 872 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { 873 ; CHECK-LABEL: testDUP.v1i32: 874 ; CHECK: dup v0.4s, v0.s[0] 875 %b = extractelement <1 x i32> %a, i32 0 876 %c = insertelement <4 x i32> undef, i32 %b, i32 0 877 %d = insertelement <4 x i32> %c, i32 %b, i32 1 878 %e = insertelement <4 x i32> %d, i32 %b, i32 2 879 %f = insertelement <4 x i32> %e, i32 %b, i32 3 880 ret <4 x i32> %f 881 } 882 883 define <8 x i8> @getl(<16 x i8> %x) #0 { 884 ; CHECK-LABEL: getl: 885 ; CHECK: ret 886 %vecext = extractelement <16 x i8> %x, i32 0 887 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 888 %vecext1 = extractelement <16 x i8> %x, i32 1 889 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 890 %vecext3 = extractelement <16 x i8> %x, i32 2 891 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 892 %vecext5 = extractelement <16 x i8> %x, i32 3 893 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 894 %vecext7 = extractelement <16 x i8> %x, i32 4 895 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 896 %vecext9 = extractelement <16 x i8> %x, i32 5 897 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 898 %vecext11 = extractelement <16 x i8> %x, i32 6 899 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 900 %vecext13 = extractelement <16 x i8> %x, i32 7 901 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 902 ret <8 x i8> %vecinit14 903 } 904 905 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) { 906 ; CHECK-LABEL: test_dup_v2i32_v4i16: 907 ; CHECK: dup v0.4h, v0.h[2] 908 entry: 909 %x = extractelement <2 x i32> %a, i32 1 910 %vget_lane = trunc i32 %x to i16 911 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 912 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 913 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 914 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 915 ret <4 x i16> %vecinit3.i 916 } 917 918 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) { 919 ; CHECK-LABEL: test_dup_v4i32_v8i16: 920 ; CHECK: dup v0.8h, v0.h[6] 921 entry: 922 %x = extractelement <4 x i32> %a, i32 3 923 %vget_lane = trunc i32 %x to i16 924 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 925 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 926 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 927 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 928 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 929 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 930 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 931 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 932 ret <8 x i16> %vecinit7.i 933 } 934 935 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) { 936 ; CHECK-LABEL: test_dup_v1i64_v4i16: 937 ; CHECK: dup v0.4h, v0.h[0] 938 entry: 939 %x = extractelement <1 x i64> %a, i32 0 940 %vget_lane = trunc i64 %x to i16 941 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 942 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 943 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 944 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 945 ret <4 x i16> %vecinit3.i 946 } 947 948 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) { 949 ; CHECK-LABEL: test_dup_v1i64_v2i32: 950 ; CHECK: dup v0.2s, v0.s[0] 951 entry: 952 %x = extractelement <1 x i64> %a, i32 0 953 %vget_lane = trunc i64 %x to i32 954 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 955 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 956 ret <2 x i32> %vecinit1.i 957 } 958 959 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) { 960 ; CHECK-LABEL: test_dup_v2i64_v8i16: 961 ; CHECK: dup v0.8h, v0.h[4] 962 entry: 963 %x = extractelement <2 x i64> %a, i32 1 964 %vget_lane = trunc i64 %x to i16 965 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 966 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 967 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 968 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 969 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 970 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 971 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 972 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 973 ret <8 x i16> %vecinit7.i 974 } 975 976 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) { 977 ; CHECK-LABEL: test_dup_v2i64_v4i32: 978 ; CHECK: dup v0.4s, v0.s[2] 979 entry: 980 %x = extractelement <2 x i64> %a, i32 1 981 %vget_lane = trunc i64 %x to i32 982 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 983 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 984 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 985 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 986 ret <4 x i32> %vecinit3.i 987 } 988 989 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) { 990 ; CHECK-LABEL: test_dup_v4i32_v4i16: 991 ; CHECK: dup v0.4h, v0.h[2] 992 entry: 993 %x = extractelement <4 x i32> %a, i32 1 994 %vget_lane = trunc i32 %x to i16 995 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 996 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 997 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 998 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 999 ret <4 x i16> %vecinit3.i 1000 } 1001 1002 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) { 1003 ; CHECK-LABEL: test_dup_v2i64_v4i16: 1004 ; CHECK: dup v0.4h, v0.h[0] 1005 entry: 1006 %x = extractelement <2 x i64> %a, i32 0 1007 %vget_lane = trunc i64 %x to i16 1008 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1009 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1010 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1011 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1012 ret <4 x i16> %vecinit3.i 1013 } 1014 1015 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) { 1016 ; CHECK-LABEL: test_dup_v2i64_v2i32: 1017 ; CHECK: dup v0.2s, v0.s[0] 1018 entry: 1019 %x = extractelement <2 x i64> %a, i32 0 1020 %vget_lane = trunc i64 %x to i32 1021 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 1022 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 1023 ret <2 x i32> %vecinit1.i 1024 } 1025 1026 1027 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { 1028 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32: 1029 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1030 ; CHECK-NEXT: ret 1031 entry: 1032 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1033 %1 = insertelement <1 x float> undef, float %0, i32 0 1034 %2 = extractelement <1 x float> %1, i32 0 1035 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0 1036 ret <2 x float> %vecinit1.i 1037 } 1038 1039 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) { 1040 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32: 1041 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1042 ; CHECK-NEXT: ret 1043 entry: 1044 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1045 %1 = insertelement <1 x float> undef, float %0, i32 0 1046 %2 = extractelement <1 x float> %1, i32 0 1047 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0 1048 ret <4 x float> %vecinit1.i 1049 } 1050 1051 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>) 1052 1053 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) { 1054 ; CHECK-LABEL: test_concat_undef_v1i32: 1055 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1056 entry: 1057 %0 = extractelement <2 x i32> %a, i32 0 1058 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 1059 ret <2 x i32> %vecinit1.i 1060 } 1061 1062 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4 1063 1064 define <2 x i32> @test_concat_v1i32_undef(i32 %a) { 1065 ; CHECK-LABEL: test_concat_v1i32_undef: 1066 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1067 ; CHECK-NEXT: ret 1068 entry: 1069 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1070 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0 1071 ret <2 x i32> %vecinit.i432 1072 } 1073 1074 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) { 1075 ; CHECK-LABEL: test_concat_same_v1i32_v1i32: 1076 ; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] 1077 entry: 1078 %0 = extractelement <2 x i32> %a, i32 0 1079 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 1080 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 1081 ret <2 x i32> %vecinit1.i 1082 } 1083 1084 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) { 1085 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32: 1086 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1087 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1088 ; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}} 1089 entry: 1090 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1091 %d = insertelement <2 x i32> undef, i32 %c, i32 0 1092 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b) 1093 %f = insertelement <2 x i32> undef, i32 %e, i32 0 1094 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2> 1095 ret <2 x i32> %h 1096 } 1097 1098 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { 1099 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: 1100 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1101 entry: 1102 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1103 ret <16 x i8> %vecinit30 1104 } 1105 1106 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { 1107 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: 1108 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1109 entry: 1110 %vecext = extractelement <8 x i8> %x, i32 0 1111 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1112 %vecext1 = extractelement <8 x i8> %x, i32 1 1113 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1114 %vecext3 = extractelement <8 x i8> %x, i32 2 1115 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1116 %vecext5 = extractelement <8 x i8> %x, i32 3 1117 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1118 %vecext7 = extractelement <8 x i8> %x, i32 4 1119 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1120 %vecext9 = extractelement <8 x i8> %x, i32 5 1121 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1122 %vecext11 = extractelement <8 x i8> %x, i32 6 1123 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1124 %vecext13 = extractelement <8 x i8> %x, i32 7 1125 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1126 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1127 ret <16 x i8> %vecinit30 1128 } 1129 1130 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { 1131 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: 1132 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1133 entry: 1134 %vecext = extractelement <16 x i8> %x, i32 0 1135 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1136 %vecext1 = extractelement <16 x i8> %x, i32 1 1137 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1138 %vecext3 = extractelement <16 x i8> %x, i32 2 1139 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1140 %vecext5 = extractelement <16 x i8> %x, i32 3 1141 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1142 %vecext7 = extractelement <16 x i8> %x, i32 4 1143 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1144 %vecext9 = extractelement <16 x i8> %x, i32 5 1145 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1146 %vecext11 = extractelement <16 x i8> %x, i32 6 1147 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1148 %vecext13 = extractelement <16 x i8> %x, i32 7 1149 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1150 %vecext15 = extractelement <8 x i8> %y, i32 0 1151 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1152 %vecext17 = extractelement <8 x i8> %y, i32 1 1153 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1154 %vecext19 = extractelement <8 x i8> %y, i32 2 1155 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1156 %vecext21 = extractelement <8 x i8> %y, i32 3 1157 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1158 %vecext23 = extractelement <8 x i8> %y, i32 4 1159 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1160 %vecext25 = extractelement <8 x i8> %y, i32 5 1161 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1162 %vecext27 = extractelement <8 x i8> %y, i32 6 1163 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1164 %vecext29 = extractelement <8 x i8> %y, i32 7 1165 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1166 ret <16 x i8> %vecinit30 1167 } 1168 1169 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { 1170 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: 1171 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1172 entry: 1173 %vecext = extractelement <8 x i8> %x, i32 0 1174 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1175 %vecext1 = extractelement <8 x i8> %x, i32 1 1176 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1177 %vecext3 = extractelement <8 x i8> %x, i32 2 1178 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1179 %vecext5 = extractelement <8 x i8> %x, i32 3 1180 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1181 %vecext7 = extractelement <8 x i8> %x, i32 4 1182 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1183 %vecext9 = extractelement <8 x i8> %x, i32 5 1184 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1185 %vecext11 = extractelement <8 x i8> %x, i32 6 1186 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1187 %vecext13 = extractelement <8 x i8> %x, i32 7 1188 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1189 %vecext15 = extractelement <8 x i8> %y, i32 0 1190 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1191 %vecext17 = extractelement <8 x i8> %y, i32 1 1192 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1193 %vecext19 = extractelement <8 x i8> %y, i32 2 1194 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1195 %vecext21 = extractelement <8 x i8> %y, i32 3 1196 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1197 %vecext23 = extractelement <8 x i8> %y, i32 4 1198 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1199 %vecext25 = extractelement <8 x i8> %y, i32 5 1200 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1201 %vecext27 = extractelement <8 x i8> %y, i32 6 1202 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1203 %vecext29 = extractelement <8 x i8> %y, i32 7 1204 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1205 ret <16 x i8> %vecinit30 1206 } 1207 1208 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { 1209 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: 1210 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1211 entry: 1212 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1213 ret <8 x i16> %vecinit14 1214 } 1215 1216 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { 1217 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: 1218 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1219 entry: 1220 %vecext = extractelement <4 x i16> %x, i32 0 1221 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1222 %vecext1 = extractelement <4 x i16> %x, i32 1 1223 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1224 %vecext3 = extractelement <4 x i16> %x, i32 2 1225 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1226 %vecext5 = extractelement <4 x i16> %x, i32 3 1227 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1228 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1229 ret <8 x i16> %vecinit14 1230 } 1231 1232 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { 1233 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: 1234 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1235 entry: 1236 %vecext = extractelement <8 x i16> %x, i32 0 1237 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1238 %vecext1 = extractelement <8 x i16> %x, i32 1 1239 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1240 %vecext3 = extractelement <8 x i16> %x, i32 2 1241 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1242 %vecext5 = extractelement <8 x i16> %x, i32 3 1243 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1244 %vecext7 = extractelement <4 x i16> %y, i32 0 1245 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1246 %vecext9 = extractelement <4 x i16> %y, i32 1 1247 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1248 %vecext11 = extractelement <4 x i16> %y, i32 2 1249 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1250 %vecext13 = extractelement <4 x i16> %y, i32 3 1251 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1252 ret <8 x i16> %vecinit14 1253 } 1254 1255 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { 1256 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: 1257 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1258 entry: 1259 %vecext = extractelement <4 x i16> %x, i32 0 1260 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1261 %vecext1 = extractelement <4 x i16> %x, i32 1 1262 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1263 %vecext3 = extractelement <4 x i16> %x, i32 2 1264 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1265 %vecext5 = extractelement <4 x i16> %x, i32 3 1266 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1267 %vecext7 = extractelement <4 x i16> %y, i32 0 1268 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1269 %vecext9 = extractelement <4 x i16> %y, i32 1 1270 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1271 %vecext11 = extractelement <4 x i16> %y, i32 2 1272 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1273 %vecext13 = extractelement <4 x i16> %y, i32 3 1274 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1275 ret <8 x i16> %vecinit14 1276 } 1277 1278 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { 1279 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: 1280 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1281 entry: 1282 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1283 ret <4 x i32> %vecinit6 1284 } 1285 1286 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { 1287 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: 1288 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1289 entry: 1290 %vecext = extractelement <2 x i32> %x, i32 0 1291 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1292 %vecext1 = extractelement <2 x i32> %x, i32 1 1293 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1294 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1295 ret <4 x i32> %vecinit6 1296 } 1297 1298 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { 1299 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: 1300 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1301 entry: 1302 %vecext = extractelement <4 x i32> %x, i32 0 1303 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1304 %vecext1 = extractelement <4 x i32> %x, i32 1 1305 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1306 %vecext3 = extractelement <2 x i32> %y, i32 0 1307 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 1308 %vecext5 = extractelement <2 x i32> %y, i32 1 1309 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 1310 ret <4 x i32> %vecinit6 1311 } 1312 1313 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { 1314 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: 1315 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1316 entry: 1317 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1318 ret <4 x i32> %vecinit6 1319 } 1320 1321 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { 1322 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: 1323 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1324 entry: 1325 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1326 ret <2 x i64> %vecinit2 1327 } 1328 1329 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { 1330 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: 1331 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1332 entry: 1333 %vecext = extractelement <1 x i64> %x, i32 0 1334 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1335 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1336 ret <2 x i64> %vecinit2 1337 } 1338 1339 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { 1340 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: 1341 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1342 entry: 1343 %vecext = extractelement <2 x i64> %x, i32 0 1344 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1345 %vecext1 = extractelement <1 x i64> %y, i32 0 1346 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1347 ret <2 x i64> %vecinit2 1348 } 1349 1350 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { 1351 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: 1352 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1353 entry: 1354 %vecext = extractelement <1 x i64> %x, i32 0 1355 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1356 %vecext1 = extractelement <1 x i64> %y, i32 0 1357 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1358 ret <2 x i64> %vecinit2 1359 } 1360 1361 1362 define <4 x i16> @concat_vector_v4i16_const() { 1363 ; CHECK-LABEL: concat_vector_v4i16_const: 1364 ; CHECK: movi {{d[0-9]+}}, #0 1365 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer 1366 ret <4 x i16> %r 1367 } 1368 1369 define <4 x i16> @concat_vector_v4i16_const_one() { 1370 ; CHECK-LABEL: concat_vector_v4i16_const_one: 1371 ; CHECK: movi {{v[0-9]+}}.4h, #0x1 1372 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer 1373 ret <4 x i16> %r 1374 } 1375 1376 define <4 x i32> @concat_vector_v4i32_const() { 1377 ; CHECK-LABEL: concat_vector_v4i32_const: 1378 ; CHECK: movi {{v[0-9]+}}.2d, #0 1379 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer 1380 ret <4 x i32> %r 1381 } 1382 1383 define <8 x i8> @concat_vector_v8i8_const() { 1384 ; CHECK-LABEL: concat_vector_v8i8_const: 1385 ; CHECK: movi {{d[0-9]+}}, #0 1386 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer 1387 ret <8 x i8> %r 1388 } 1389 1390 define <8 x i16> @concat_vector_v8i16_const() { 1391 ; CHECK-LABEL: concat_vector_v8i16_const: 1392 ; CHECK: movi {{v[0-9]+}}.2d, #0 1393 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer 1394 ret <8 x i16> %r 1395 } 1396 1397 define <8 x i16> @concat_vector_v8i16_const_one() { 1398 ; CHECK-LABEL: concat_vector_v8i16_const_one: 1399 ; CHECK: movi {{v[0-9]+}}.8h, #0x1 1400 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer 1401 ret <8 x i16> %r 1402 } 1403 1404 define <16 x i8> @concat_vector_v16i8_const() { 1405 ; CHECK-LABEL: concat_vector_v16i8_const: 1406 ; CHECK: movi {{v[0-9]+}}.2d, #0 1407 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer 1408 ret <16 x i8> %r 1409 } 1410 1411 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { 1412 ; CHECK-LABEL: concat_vector_v4i16: 1413 ; CHECK: dup v0.4h, v0.h[0] 1414 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer 1415 ret <4 x i16> %r 1416 } 1417 1418 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { 1419 ; CHECK-LABEL: concat_vector_v4i32: 1420 ; CHECK: dup v0.4s, v0.s[0] 1421 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer 1422 ret <4 x i32> %r 1423 } 1424 1425 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { 1426 ; CHECK-LABEL: concat_vector_v8i8: 1427 ; CHECK: dup v0.8b, v0.b[0] 1428 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer 1429 ret <8 x i8> %r 1430 } 1431 1432 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { 1433 ; CHECK-LABEL: concat_vector_v8i16: 1434 ; CHECK: dup v0.8h, v0.h[0] 1435 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer 1436 ret <8 x i16> %r 1437 } 1438 1439 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { 1440 ; CHECK-LABEL: concat_vector_v16i8: 1441 ; CHECK: dup v0.16b, v0.b[0] 1442 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer 1443 ret <16 x i8> %r 1444 } 1445