Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon -verify-machineinstrs %s -o - \
      2 ; RUN:	| FileCheck %s
      3 
      4 define <8 x i8> @v_dup8(i8 %A) nounwind {
      5 ;CHECK-LABEL: v_dup8:
      6 ;CHECK: vdup.8
      7 	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
      8 	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
      9 	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
     10 	%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
     11 	%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
     12 	%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
     13 	%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
     14 	%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
     15 	ret <8 x i8> %tmp8
     16 }
     17 
     18 define <4 x i16> @v_dup16(i16 %A) nounwind {
     19 ;CHECK-LABEL: v_dup16:
     20 ;CHECK: vdup.16
     21 	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
     22 	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
     23 	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
     24 	%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
     25 	ret <4 x i16> %tmp4
     26 }
     27 
     28 define <2 x i32> @v_dup32(i32 %A) nounwind {
     29 ;CHECK-LABEL: v_dup32:
     30 ;CHECK: vdup.32
     31 	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
     32 	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
     33 	ret <2 x i32> %tmp2
     34 }
     35 
     36 define <2 x float> @v_dupfloat(float %A) nounwind {
     37 ;CHECK-LABEL: v_dupfloat:
     38 ;CHECK: vdup.32
     39 	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
     40 	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
     41 	ret <2 x float> %tmp2
     42 }
     43 
     44 define <16 x i8> @v_dupQ8(i8 %A) nounwind {
     45 ;CHECK-LABEL: v_dupQ8:
     46 ;CHECK: vdup.8
     47 	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
     48 	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
     49 	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
     50 	%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
     51 	%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
     52 	%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
     53 	%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
     54 	%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
     55 	%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
     56 	%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
     57 	%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
     58 	%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
     59 	%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
     60 	%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
     61 	%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
     62 	%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
     63 	ret <16 x i8> %tmp16
     64 }
     65 
     66 define <8 x i16> @v_dupQ16(i16 %A) nounwind {
     67 ;CHECK-LABEL: v_dupQ16:
     68 ;CHECK: vdup.16
     69 	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
     70 	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
     71 	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
     72 	%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
     73 	%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
     74 	%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
     75 	%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
     76 	%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
     77 	ret <8 x i16> %tmp8
     78 }
     79 
     80 define <4 x i32> @v_dupQ32(i32 %A) nounwind {
     81 ;CHECK-LABEL: v_dupQ32:
     82 ;CHECK: vdup.32
     83 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
     84 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
     85 	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
     86 	%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
     87 	ret <4 x i32> %tmp4
     88 }
     89 
     90 define <4 x float> @v_dupQfloat(float %A) nounwind {
     91 ;CHECK-LABEL: v_dupQfloat:
     92 ;CHECK: vdup.32
     93 	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
     94 	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
     95 	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
     96 	%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
     97 	ret <4 x float> %tmp4
     98 }
     99 
    100 ; Check to make sure it works with shuffles, too.
    101 
    102 define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
    103 ;CHECK-LABEL: v_shuffledup8:
    104 ;CHECK: vdup.8
    105 	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
    106 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
    107 	ret <8 x i8> %tmp2
    108 }
    109 
    110 define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
    111 ;CHECK-LABEL: v_shuffledup16:
    112 ;CHECK: vdup.16
    113 	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
    114 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
    115 	ret <4 x i16> %tmp2
    116 }
    117 
    118 define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
    119 ;CHECK-LABEL: v_shuffledup32:
    120 ;CHECK: vdup.32
    121 	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
    122 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
    123 	ret <2 x i32> %tmp2
    124 }
    125 
    126 define <2 x float> @v_shuffledupfloat(float %A) nounwind {
    127 ;CHECK-LABEL: v_shuffledupfloat:
    128 ;CHECK: vdup.32
    129 	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
    130 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
    131 	ret <2 x float> %tmp2
    132 }
    133 
    134 define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
    135 ;CHECK-LABEL: v_shuffledupQ8:
    136 ;CHECK: vdup.8
    137 	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
    138 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
    139 	ret <16 x i8> %tmp2
    140 }
    141 
    142 define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
    143 ;CHECK-LABEL: v_shuffledupQ16:
    144 ;CHECK: vdup.16
    145 	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
    146 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
    147 	ret <8 x i16> %tmp2
    148 }
    149 
    150 define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
    151 ;CHECK-LABEL: v_shuffledupQ32:
    152 ;CHECK: vdup.32
    153 	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
    154 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
    155 	ret <4 x i32> %tmp2
    156 }
    157 
    158 define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
    159 ;CHECK-LABEL: v_shuffledupQfloat:
    160 ;CHECK: vdup.32
    161 	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
    162 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
    163 	ret <4 x float> %tmp2
    164 }
    165 
    166 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
    167 ;CHECK-LABEL: vduplane8:
    168 ;CHECK: vdup.8
    169 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    170 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
    171 	ret <8 x i8> %tmp2
    172 }
    173 
    174 define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
    175 ;CHECK-LABEL: vduplane16:
    176 ;CHECK: vdup.16
    177 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    178 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
    179 	ret <4 x i16> %tmp2
    180 }
    181 
    182 define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
    183 ;CHECK-LABEL: vduplane32:
    184 ;CHECK: vdup.32
    185 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    186 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
    187 	ret <2 x i32> %tmp2
    188 }
    189 
    190 define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
    191 ;CHECK-LABEL: vduplanefloat:
    192 ;CHECK: vdup.32
    193 	%tmp1 = load <2 x float>, <2 x float>* %A
    194 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
    195 	ret <2 x float> %tmp2
    196 }
    197 
    198 define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
    199 ;CHECK-LABEL: vduplaneQ8:
    200 ;CHECK: vdup.8
    201 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    202 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
    203 	ret <16 x i8> %tmp2
    204 }
    205 
    206 define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
    207 ;CHECK-LABEL: vduplaneQ16:
    208 ;CHECK: vdup.16
    209 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    210 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
    211 	ret <8 x i16> %tmp2
    212 }
    213 
    214 define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
    215 ;CHECK-LABEL: vduplaneQ32:
    216 ;CHECK: vdup.32
    217 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    218 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
    219 	ret <4 x i32> %tmp2
    220 }
    221 
    222 define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
    223 ;CHECK-LABEL: vduplaneQfloat:
    224 ;CHECK: vdup.32
    225 	%tmp1 = load <2 x float>, <2 x float>* %A
    226 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
    227 	ret <4 x float> %tmp2
    228 }
    229 
    230 define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
    231 entry:
    232   %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    233   ret <2 x i64> %0
    234 }
    235 
    236 define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
    237 entry:
    238   %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    239   ret <2 x i64> %0
    240 }
    241 
    242 define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
    243 entry:
    244   %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
    245   ret <2 x double> %0
    246 }
    247 
    248 define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
    249 entry:
    250   %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
    251   ret <2 x double> %0
    252 }
    253 
    254 ; Radar 7373643
    255 ;CHECK-LABEL: redundantVdup:
    256 ;CHECK: vmov.i8
    257 ;CHECK-NOT: vdup.8
    258 ;CHECK: vstr
    259 define void @redundantVdup(<8 x i8>* %ptr) nounwind {
    260   %1 = insertelement <8 x i8> undef, i8 -128, i32 0
    261   %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
    262   store <8 x i8> %2, <8 x i8>* %ptr, align 8
    263   ret void
    264 }
    265 
    266 define <4 x i32> @tdupi(i32 %x, i32 %y) {
    267 ;CHECK-LABEL: tdupi:
    268 ;CHECK: vdup.32
    269   %1 = insertelement <4 x i32> undef, i32 %x, i32 0
    270   %2 = insertelement <4 x i32> %1, i32 %x, i32 1
    271   %3 = insertelement <4 x i32> %2, i32 %x, i32 2
    272   %4 = insertelement <4 x i32> %3, i32 %y, i32 3
    273   ret <4 x i32> %4
    274 }
    275 
    276 define <4 x float> @tdupf(float %x, float %y) {
    277 ;CHECK-LABEL: tdupf:
    278 ;CHECK: vdup.32
    279   %1 = insertelement <4 x float> undef, float %x, i32 0
    280   %2 = insertelement <4 x float> %1, float %x, i32 1
    281   %3 = insertelement <4 x float> %2, float %x, i32 2
    282   %4 = insertelement <4 x float> %3, float %y, i32 3
    283   ret <4 x float> %4
    284 }
    285 
    286 ; This test checks that when splatting an element from a vector into another,
    287 ; the value isn't moved out to GPRs first.
    288 define <4 x i32> @tduplane(<4 x i32> %invec) {
    289 ;CHECK-LABEL: tduplane:
    290 ;CHECK-NOT: vmov {{.*}}, d16[1]
    291 ;CHECK: vdup.32 {{.*}}, d16[1]
    292   %in = extractelement <4 x i32> %invec, i32 1
    293   %1 = insertelement <4 x i32> undef, i32 %in, i32 0
    294   %2 = insertelement <4 x i32> %1, i32 %in, i32 1
    295   %3 = insertelement <4 x i32> %2, i32 %in, i32 2
    296   %4 = insertelement <4 x i32> %3, i32 255, i32 3
    297   ret <4 x i32> %4
    298 }
    299 
    300 define <2 x float> @check_f32(<4 x float> %v) nounwind {
    301 ;CHECK-LABEL: check_f32:
    302 ;CHECK: vdup.32 {{.*}}, d{{..}}[1]
    303   %x = extractelement <4 x float> %v, i32 3
    304   %1 = insertelement  <2 x float> undef, float %x, i32 0
    305   %2 = insertelement  <2 x float> %1, float %x, i32 1
    306   ret <2 x float> %2
    307 }
    308 
    309 define <2 x i32> @check_i32(<4 x i32> %v) nounwind {
    310 ;CHECK-LABEL: check_i32:
    311 ;CHECK: vdup.32 {{.*}}, d{{..}}[1]
    312   %x = extractelement <4 x i32> %v, i32 3
    313   %1 = insertelement  <2 x i32> undef, i32 %x, i32 0
    314   %2 = insertelement  <2 x i32> %1, i32 %x, i32 1
    315   ret <2 x i32> %2
    316 }
    317 
    318 define <4 x i16> @check_i16(<8 x i16> %v) nounwind {
    319 ;CHECK-LABEL: check_i16:
    320 ;CHECK: vdup.16 {{.*}}, d{{..}}[3]
    321   %x = extractelement <8 x i16> %v, i32 3
    322   %1 = insertelement  <4 x i16> undef, i16 %x, i32 0
    323   %2 = insertelement  <4 x i16> %1, i16 %x, i32 1
    324   ret <4 x i16> %2
    325 }
    326 
    327 define <8 x i8> @check_i8(<16 x i8> %v) nounwind {
    328 ;CHECK-LABEL: check_i8:
    329 ;CHECK: vdup.8 {{.*}}, d{{..}}[3]
    330   %x = extractelement <16 x i8> %v, i32 3
    331   %1 = insertelement  <8  x i8> undef, i8 %x, i32 0
    332   %2 = insertelement  <8  x i8> %1, i8 %x, i32 1
    333   ret <8 x i8> %2
    334 }
    335 
    336 ; Check that an SPR splat produces a vdup.
    337 
    338 define <2 x float> @check_spr_splat2(<2 x float> %p, i16 %q) {
    339 ;CHECK-LABEL: check_spr_splat2:
    340 ;CHECK: vdup.32 d
    341   %conv = sitofp i16 %q to float
    342   %splat.splatinsert = insertelement <2 x float> undef, float %conv, i32 0
    343   %splat.splat = shufflevector <2 x float> %splat.splatinsert, <2 x float> undef, <2 x i32> zeroinitializer
    344   %sub = fsub <2 x float> %splat.splat, %p
    345   ret <2 x float> %sub
    346 }
    347 
    348 define <4 x float> @check_spr_splat4(<4 x float> %p, i16 %q) {
    349 ;CHECK-LABEL: check_spr_splat4:
    350 ;CHECK: vld1.16
    351   %conv = sitofp i16 %q to float
    352   %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
    353   %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
    354   %sub = fsub <4 x float> %splat.splat, %p
    355   ret <4 x float> %sub
    356 }
    357 ; Same codegen as above test; scalar is splatted using vld1, so shuffle index is irrelevant.
    358 define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) {
    359 ;CHECK-LABEL: check_spr_splat4_lane1:
    360 ;CHECK: vld1.16
    361   %conv = sitofp i16 %q to float
    362   %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 1
    363   %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    364   %sub = fsub <4 x float> %splat.splat, %p
    365   ret <4 x float> %sub
    366 }
    367 
    368 ; Also make sure we don't barf on variable-index extractelts, where we almost
    369 ; could have generated a vdup.
    370 
    371 define <8 x i8> @check_i8_varidx(<16 x i8> %v, i32 %idx) {
    372 ; CHECK-LABEL: check_i8_varidx:
    373 ; CHECK: mov r[[FP:[0-9]+]], sp
    374 ; CHECK: ldr r[[IDX:[0-9]+]], [r[[FP]], #4]
    375 ; CHECK: mov r[[SPCOPY:[0-9]+]], sp
    376 ; CHECK: vst1.64 {d{{.*}}, d{{.*}}}, [r[[SPCOPY]]:128], r[[IDX]]
    377 ; CHECK: vld1.8 {d{{.*}}[]}, [r[[SPCOPY]]]
    378   %x = extractelement <16 x i8> %v, i32 %idx
    379   %1 = insertelement  <8 x i8> undef, i8 %x, i32 0
    380   %2 = insertelement  <8 x i8> %1, i8 %x, i32 1
    381   ret <8 x i8> %2
    382 }
    383