Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @v_dup8(i8 %A) nounwind {
      4 ;CHECK: v_dup8:
      5 ;CHECK: vdup.8
      6 	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
      7 	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
      8 	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
      9 	%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
     10 	%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
     11 	%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
     12 	%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
     13 	%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
     14 	ret <8 x i8> %tmp8
     15 }
     16 
     17 define <4 x i16> @v_dup16(i16 %A) nounwind {
     18 ;CHECK: v_dup16:
     19 ;CHECK: vdup.16
     20 	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
     21 	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
     22 	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
     23 	%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
     24 	ret <4 x i16> %tmp4
     25 }
     26 
     27 define <2 x i32> @v_dup32(i32 %A) nounwind {
     28 ;CHECK: v_dup32:
     29 ;CHECK: vdup.32
     30 	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
     31 	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
     32 	ret <2 x i32> %tmp2
     33 }
     34 
     35 define <2 x float> @v_dupfloat(float %A) nounwind {
     36 ;CHECK: v_dupfloat:
     37 ;CHECK: vdup.32
     38 	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
     39 	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
     40 	ret <2 x float> %tmp2
     41 }
     42 
     43 define <16 x i8> @v_dupQ8(i8 %A) nounwind {
     44 ;CHECK: v_dupQ8:
     45 ;CHECK: vdup.8
     46 	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
     47 	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
     48 	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
     49 	%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
     50 	%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
     51 	%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
     52 	%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
     53 	%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
     54 	%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
     55 	%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
     56 	%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
     57 	%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
     58 	%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
     59 	%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
     60 	%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
     61 	%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
     62 	ret <16 x i8> %tmp16
     63 }
     64 
     65 define <8 x i16> @v_dupQ16(i16 %A) nounwind {
     66 ;CHECK: v_dupQ16:
     67 ;CHECK: vdup.16
     68 	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
     69 	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
     70 	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
     71 	%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
     72 	%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
     73 	%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
     74 	%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
     75 	%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
     76 	ret <8 x i16> %tmp8
     77 }
     78 
     79 define <4 x i32> @v_dupQ32(i32 %A) nounwind {
     80 ;CHECK: v_dupQ32:
     81 ;CHECK: vdup.32
     82 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
     83 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
     84 	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
     85 	%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
     86 	ret <4 x i32> %tmp4
     87 }
     88 
     89 define <4 x float> @v_dupQfloat(float %A) nounwind {
     90 ;CHECK: v_dupQfloat:
     91 ;CHECK: vdup.32
     92 	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
     93 	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
     94 	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
     95 	%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
     96 	ret <4 x float> %tmp4
     97 }
     98 
     99 ; Check to make sure it works with shuffles, too.
    100 
    101 define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
    102 ;CHECK: v_shuffledup8:
    103 ;CHECK: vdup.8
    104 	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
    105 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
    106 	ret <8 x i8> %tmp2
    107 }
    108 
    109 define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
    110 ;CHECK: v_shuffledup16:
    111 ;CHECK: vdup.16
    112 	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
    113 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
    114 	ret <4 x i16> %tmp2
    115 }
    116 
    117 define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
    118 ;CHECK: v_shuffledup32:
    119 ;CHECK: vdup.32
    120 	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
    121 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
    122 	ret <2 x i32> %tmp2
    123 }
    124 
    125 define <2 x float> @v_shuffledupfloat(float %A) nounwind {
    126 ;CHECK: v_shuffledupfloat:
    127 ;CHECK: vdup.32
    128 	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
    129 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
    130 	ret <2 x float> %tmp2
    131 }
    132 
    133 define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
    134 ;CHECK: v_shuffledupQ8:
    135 ;CHECK: vdup.8
    136 	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
    137 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
    138 	ret <16 x i8> %tmp2
    139 }
    140 
    141 define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
    142 ;CHECK: v_shuffledupQ16:
    143 ;CHECK: vdup.16
    144 	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
    145 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
    146 	ret <8 x i16> %tmp2
    147 }
    148 
    149 define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
    150 ;CHECK: v_shuffledupQ32:
    151 ;CHECK: vdup.32
    152 	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
    153 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
    154 	ret <4 x i32> %tmp2
    155 }
    156 
    157 define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
    158 ;CHECK: v_shuffledupQfloat:
    159 ;CHECK: vdup.32
    160 	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
    161 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
    162 	ret <4 x float> %tmp2
    163 }
    164 
    165 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
    166 ;CHECK: vduplane8:
    167 ;CHECK: vdup.8
    168 	%tmp1 = load <8 x i8>* %A
    169 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
    170 	ret <8 x i8> %tmp2
    171 }
    172 
    173 define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
    174 ;CHECK: vduplane16:
    175 ;CHECK: vdup.16
    176 	%tmp1 = load <4 x i16>* %A
    177 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
    178 	ret <4 x i16> %tmp2
    179 }
    180 
    181 define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
    182 ;CHECK: vduplane32:
    183 ;CHECK: vdup.32
    184 	%tmp1 = load <2 x i32>* %A
    185 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
    186 	ret <2 x i32> %tmp2
    187 }
    188 
    189 define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
    190 ;CHECK: vduplanefloat:
    191 ;CHECK: vdup.32
    192 	%tmp1 = load <2 x float>* %A
    193 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
    194 	ret <2 x float> %tmp2
    195 }
    196 
    197 define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
    198 ;CHECK: vduplaneQ8:
    199 ;CHECK: vdup.8
    200 	%tmp1 = load <8 x i8>* %A
    201 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
    202 	ret <16 x i8> %tmp2
    203 }
    204 
    205 define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
    206 ;CHECK: vduplaneQ16:
    207 ;CHECK: vdup.16
    208 	%tmp1 = load <4 x i16>* %A
    209 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
    210 	ret <8 x i16> %tmp2
    211 }
    212 
    213 define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
    214 ;CHECK: vduplaneQ32:
    215 ;CHECK: vdup.32
    216 	%tmp1 = load <2 x i32>* %A
    217 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
    218 	ret <4 x i32> %tmp2
    219 }
    220 
    221 define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
    222 ;CHECK: vduplaneQfloat:
    223 ;CHECK: vdup.32
    224 	%tmp1 = load <2 x float>* %A
    225 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
    226 	ret <4 x float> %tmp2
    227 }
    228 
    229 define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
    230 entry:
    231   %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    232   ret <2 x i64> %0
    233 }
    234 
    235 define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
    236 entry:
    237   %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    238   ret <2 x i64> %0
    239 }
    240 
    241 define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
    242 entry:
    243   %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
    244   ret <2 x double> %0
    245 }
    246 
    247 define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
    248 entry:
    249   %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
    250   ret <2 x double> %0
    251 }
    252 
    253 ; Radar 7373643
    254 ;CHECK: redundantVdup:
    255 ;CHECK: vmov.i8
    256 ;CHECK-NOT: vdup.8
    257 ;CHECK: vstr
    258 define void @redundantVdup(<8 x i8>* %ptr) nounwind {
    259   %1 = insertelement <8 x i8> undef, i8 -128, i32 0
    260   %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
    261   store <8 x i8> %2, <8 x i8>* %ptr, align 8
    262   ret void
    263 }
    264 
    265 define <4 x i32> @tdupi(i32 %x, i32 %y) {
    266 ;CHECK: tdupi
    267 ;CHECK: vdup.32
    268   %1 = insertelement <4 x i32> undef, i32 %x, i32 0
    269   %2 = insertelement <4 x i32> %1, i32 %x, i32 1
    270   %3 = insertelement <4 x i32> %2, i32 %x, i32 2
    271   %4 = insertelement <4 x i32> %3, i32 %y, i32 3
    272   ret <4 x i32> %4
    273 }
    274 
    275 define <4 x float> @tdupf(float %x, float %y) {
    276 ;CHECK: tdupf
    277 ;CHECK: vdup.32
    278   %1 = insertelement <4 x float> undef, float %x, i32 0
    279   %2 = insertelement <4 x float> %1, float %x, i32 1
    280   %3 = insertelement <4 x float> %2, float %x, i32 2
    281   %4 = insertelement <4 x float> %3, float %y, i32 3
    282   ret <4 x float> %4
    283 }
    284 
    285 ; This test checks that when splatting an element from a vector into another,
    286 ; the value isn't moved out to GPRs first.
    287 define <4 x i32> @tduplane(<4 x i32> %invec) {
    288 ;CHECK: tduplane
    289 ;CHECK-NOT: vmov {{.*}}, d16[1]
    290 ;CHECK: vdup.32 {{.*}}, d16[1]
    291   %in = extractelement <4 x i32> %invec, i32 1
    292   %1 = insertelement <4 x i32> undef, i32 %in, i32 0
    293   %2 = insertelement <4 x i32> %1, i32 %in, i32 1
    294   %3 = insertelement <4 x i32> %2, i32 %in, i32 2
    295   %4 = insertelement <4 x i32> %3, i32 255, i32 3
    296   ret <4 x i32> %4
    297 }
    298