Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
      2 
      3 define <8 x i8> @v_dup8(i8 %A) nounwind {
      4 ;CHECK-LABEL: v_dup8:
      5 ;CHECK: dup.8b
      6 	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
      7 	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
      8 	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
      9 	%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
     10 	%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
     11 	%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
     12 	%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
     13 	%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
     14 	ret <8 x i8> %tmp8
     15 }
     16 
     17 define <4 x i16> @v_dup16(i16 %A) nounwind {
     18 ;CHECK-LABEL: v_dup16:
     19 ;CHECK: dup.4h
     20 	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
     21 	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
     22 	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
     23 	%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
     24 	ret <4 x i16> %tmp4
     25 }
     26 
     27 define <2 x i32> @v_dup32(i32 %A) nounwind {
     28 ;CHECK-LABEL: v_dup32:
     29 ;CHECK: dup.2s
     30 	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
     31 	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
     32 	ret <2 x i32> %tmp2
     33 }
     34 
     35 define <2 x float> @v_dupfloat(float %A) nounwind {
     36 ;CHECK-LABEL: v_dupfloat:
     37 ;CHECK: dup.2s
     38 	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
     39 	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
     40 	ret <2 x float> %tmp2
     41 }
     42 
     43 define <16 x i8> @v_dupQ8(i8 %A) nounwind {
     44 ;CHECK-LABEL: v_dupQ8:
     45 ;CHECK: dup.16b
     46 	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
     47 	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
     48 	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
     49 	%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
     50 	%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
     51 	%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
     52 	%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
     53 	%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
     54 	%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
     55 	%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
     56 	%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
     57 	%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
     58 	%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
     59 	%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
     60 	%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
     61 	%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
     62 	ret <16 x i8> %tmp16
     63 }
     64 
     65 define <8 x i16> @v_dupQ16(i16 %A) nounwind {
     66 ;CHECK-LABEL: v_dupQ16:
     67 ;CHECK: dup.8h
     68 	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
     69 	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
     70 	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
     71 	%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
     72 	%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
     73 	%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
     74 	%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
     75 	%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
     76 	ret <8 x i16> %tmp8
     77 }
     78 
     79 define <4 x i32> @v_dupQ32(i32 %A) nounwind {
     80 ;CHECK-LABEL: v_dupQ32:
     81 ;CHECK: dup.4s
     82 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
     83 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
     84 	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
     85 	%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
     86 	ret <4 x i32> %tmp4
     87 }
     88 
     89 define <4 x float> @v_dupQfloat(float %A) nounwind {
     90 ;CHECK-LABEL: v_dupQfloat:
     91 ;CHECK: dup.4s
     92 	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
     93 	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
     94 	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
     95 	%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
     96 	ret <4 x float> %tmp4
     97 }
     98 
     99 ; Check to make sure it works with shuffles, too.
    100 
    101 define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
    102 ;CHECK-LABEL: v_shuffledup8:
    103 ;CHECK: dup.8b
    104 	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
    105 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
    106 	ret <8 x i8> %tmp2
    107 }
    108 
    109 define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
    110 ;CHECK-LABEL: v_shuffledup16:
    111 ;CHECK: dup.4h
    112 	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
    113 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
    114 	ret <4 x i16> %tmp2
    115 }
    116 
    117 define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
    118 ;CHECK-LABEL: v_shuffledup32:
    119 ;CHECK: dup.2s
    120 	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
    121 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
    122 	ret <2 x i32> %tmp2
    123 }
    124 
    125 define <2 x float> @v_shuffledupfloat(float %A) nounwind {
    126 ;CHECK-LABEL: v_shuffledupfloat:
    127 ;CHECK: dup.2s
    128 	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
    129 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
    130 	ret <2 x float> %tmp2
    131 }
    132 
    133 define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
    134 ;CHECK-LABEL: v_shuffledupQ8:
    135 ;CHECK: dup.16b
    136 	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
    137 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
    138 	ret <16 x i8> %tmp2
    139 }
    140 
    141 define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
    142 ;CHECK-LABEL: v_shuffledupQ16:
    143 ;CHECK: dup.8h
    144 	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
    145 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
    146 	ret <8 x i16> %tmp2
    147 }
    148 
    149 define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
    150 ;CHECK-LABEL: v_shuffledupQ32:
    151 ;CHECK: dup.4s
    152 	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
    153 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
    154 	ret <4 x i32> %tmp2
    155 }
    156 
    157 define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
    158 ;CHECK-LABEL: v_shuffledupQfloat:
    159 ;CHECK: dup.4s
    160 	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
    161 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
    162 	ret <4 x float> %tmp2
    163 }
    164 
    165 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
    166 ;CHECK-LABEL: vduplane8:
    167 ;CHECK: dup.8b
    168 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    169 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
    170 	ret <8 x i8> %tmp2
    171 }
    172 
    173 define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
    174 ;CHECK-LABEL: vduplane16:
    175 ;CHECK: dup.4h
    176 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    177 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
    178 	ret <4 x i16> %tmp2
    179 }
    180 
    181 define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
    182 ;CHECK-LABEL: vduplane32:
    183 ;CHECK: dup.2s
    184 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    185 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
    186 	ret <2 x i32> %tmp2
    187 }
    188 
    189 define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
    190 ;CHECK-LABEL: vduplanefloat:
    191 ;CHECK: dup.2s
    192 	%tmp1 = load <2 x float>, <2 x float>* %A
    193 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
    194 	ret <2 x float> %tmp2
    195 }
    196 
    197 define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
    198 ;CHECK-LABEL: vduplaneQ8:
    199 ;CHECK: dup.16b
    200 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    201 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
    202 	ret <16 x i8> %tmp2
    203 }
    204 
    205 define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
    206 ;CHECK-LABEL: vduplaneQ16:
    207 ;CHECK: dup.8h
    208 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    209 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
    210 	ret <8 x i16> %tmp2
    211 }
    212 
    213 define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
    214 ;CHECK-LABEL: vduplaneQ32:
    215 ;CHECK: dup.4s
    216 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    217 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
    218 	ret <4 x i32> %tmp2
    219 }
    220 
    221 define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
    222 ;CHECK-LABEL: vduplaneQfloat:
    223 ;CHECK: dup.4s
    224 	%tmp1 = load <2 x float>, <2 x float>* %A
    225 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
    226 	ret <4 x float> %tmp2
    227 }
    228 
    229 define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
    230 ;CHECK-LABEL: foo:
    231 ;CHECK: dup.2d
    232 entry:
    233   %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    234   ret <2 x i64> %0
    235 }
    236 
    237 define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
    238 ;CHECK-LABEL: bar:
    239 ;CHECK: dup.2d
    240 entry:
    241   %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    242   ret <2 x i64> %0
    243 }
    244 
    245 define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
    246 ;CHECK-LABEL: baz:
    247 ;CHECK: dup.2d
    248 entry:
    249   %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
    250   ret <2 x double> %0
    251 }
    252 
    253 define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
    254 ;CHECK-LABEL: qux:
    255 ;CHECK: dup.2d
    256 entry:
    257   %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
    258   ret <2 x double> %0
    259 }
    260 
    261 define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone  {
    262 ; CHECK-LABEL: f:
    263 ; CHECK-NEXT: fmov s0, w0
    264 ; CHECK-NEXT: ins.s v0[1], w1
    265 ; CHECK-NEXT: ret
    266   %vecinit = insertelement <2 x i32> undef, i32 %a, i32 0
    267   %vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1
    268   ret <2 x i32> %vecinit1
    269 }
    270 
    271 define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone  {
    272 ; CHECK-LABEL: g:
    273 ; CHECK-NEXT: fmov s0, w0
    274 ; CHECK-NEXT: ins.s v0[1], w1
    275 ; CHECK-NEXT: ins.s v0[2], w1
    276 ; CHECK-NEXT: ins.s v0[3], w0
    277 ; CHECK-NEXT: ret
    278   %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
    279   %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
    280   %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
    281   %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a, i32 3
    282   ret <4 x i32> %vecinit3
    283 }
    284 
    285 define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone  {
    286 ; CHECK-LABEL: h:
    287 ; CHECK-NEXT: fmov d0, x0
    288 ; CHECK-NEXT: ins.d v0[1], x1
    289 ; CHECK-NEXT: ret
    290   %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
    291   %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
    292   ret <2 x i64> %vecinit1
    293 }
    294 
    295 ; We used to spot this as a BUILD_VECTOR implementable by dup, but assume that
    296 ; the single value needed was of the same type as the vector. This is false if
    297 ; the scalar corresponding to the vector type is illegal (e.g. a <4 x i16>
    298 ; BUILD_VECTOR will have an i32 as its source). In that case, the operation is
    299 ; not a simple "dup vD.4h, vN.h[idx]" after all, and we crashed.
    300 ;
    301 ; *However*, it is a dup vD.4h, vN.h[2*idx].
    302 define <4 x i16> @test_build_illegal(<4 x i32> %in) {
    303 ; CHECK-LABEL: test_build_illegal:
    304 ; CHECK: dup.4h v0, v0[6]
    305   %val = extractelement <4 x i32> %in, i32 3
    306   %smallval = trunc i32 %val to i16
    307   %vec = insertelement <4x i16> undef, i16 %smallval, i32 3
    308 
    309   ret <4 x i16> %vec
    310 }
    311 
    312 ; We used to inherit an already extract_subvectored v4i16 from
    313 ; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
    314 ; the formation of an indexed-by-7 MLS.
    315 define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
    316 ; CHECK-LABEL: test_high_splat:
    317 ; CHECK: mls.4h v0, v1, v2[7]
    318 entry:
    319   %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
    320   %mul = mul <4 x i16> %shuffle, %b
    321   %sub = sub <4 x i16> %a, %mul
    322   ret <4 x i16> %sub
    323 }
    324 
    325 ; Also test the DUP path in the PerfectShuffle generator.
    326 
    327 ; CHECK-LABEL: test_perfectshuffle_dupext_v4i16:
    328 ; CHECK-NEXT: dup.4h v0, v0[0]
    329 ; CHECK-NEXT: ext.8b v0, v0, v1, #4
    330 define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
    331   %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
    332   ret <4 x i16> %r
    333 }
    334 
    335 ; CHECK-LABEL: test_perfectshuffle_dupext_v4f16:
    336 ; CHECK-NEXT: dup.4h v0, v0[0]
    337 ; CHECK-NEXT: ext.8b v0, v0, v1, #4
    338 ; CHECK-NEXT: ret
    339 define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b) nounwind {
    340   %r = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
    341   ret <4 x half> %r
    342 }
    343 
    344 ; CHECK-LABEL: test_perfectshuffle_dupext_v4i32:
    345 ; CHECK-NEXT: dup.4s v0, v0[0]
    346 ; CHECK-NEXT: ext.16b v0, v0, v1, #8
    347 ; CHECK-NEXT: ret
    348 define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
    349   %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
    350   ret <4 x i32> %r
    351 }
    352 
    353 ; CHECK-LABEL: test_perfectshuffle_dupext_v4f32:
    354 ; CHECK-NEXT: dup.4s v0, v0[0]
    355 ; CHECK-NEXT: ext.16b v0, v0, v1, #8
    356 ; CHECK-NEXT: ret
    357 define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float> %b) nounwind {
    358   %r = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
    359   ret <4 x float> %r
    360 }
    361