Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
      2 
      3 ; CHECK: vpbroadcastb (%
      4 define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
      5 entry:
      6   %q = load i8* %ptr, align 4
      7   %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
      8   %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
      9   %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
     10   %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
     11   %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
     12   %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
     13   %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
     14   %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
     15   %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
     16   %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
     17   %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
     18   %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
     19   %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
     20   %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
     21   %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
     22   %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
     23   ret <16 x i8> %qf
     24 }
     25 ; CHECK: vpbroadcastb (%
     26 define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
     27 entry:
     28   %q = load i8* %ptr, align 4
     29   %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
     30   %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
     31   %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
     32   %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
     33   %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
     34   %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
     35   %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
     36   %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
     37   %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
     38   %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
     39   %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
     40   %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
     41   %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
     42   %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
     43   %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
     44   %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
     45 
     46   %q20 = insertelement <32 x i8> %qf, i8 %q,  i32 16
     47   %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
     48   %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
     49   %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
     50   %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
     51   %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
     52   %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
     53   %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
     54   %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
     55   %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
     56   %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
     57   %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
     58   %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
     59   %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
     60   %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
     61   %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
     62   ret <32 x i8> %q2f
     63 }
     64 ; CHECK: vpbroadcastw (%
     65 
     66 define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
     67 entry:
     68   %q = load i16* %ptr, align 4
     69   %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
     70   %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
     71   %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
     72   %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
     73   %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
     74   %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
     75   %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
     76   %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
     77   ret <8 x i16> %q7
     78 }
     79 ; CHECK: vpbroadcastw (%
     80 define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
     81 entry:
     82   %q = load i16* %ptr, align 4
     83   %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
     84   %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
     85   %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
     86   %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
     87   %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
     88   %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
     89   %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
     90   %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
     91   %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
     92   %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
     93   %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
     94   %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
     95   %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
     96   %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
     97   %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
     98   %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
     99   ret <16 x i16> %qf
    100 }
    101 ; CHECK: vpbroadcastd (%
    102 define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
    103 entry:
    104   %q = load i32* %ptr, align 4
    105   %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
    106   %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
    107   %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
    108   %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
    109   ret <4 x i32> %q3
    110 }
    111 ; CHECK: vpbroadcastd (%
    112 define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
    113 entry:
    114   %q = load i32* %ptr, align 4
    115   %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
    116   %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
    117   %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
    118   %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
    119   %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
    120   %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
    121   %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
    122   %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
    123   ret <8 x i32> %q7
    124 }
    125 ; CHECK: vpbroadcastq (%
    126 define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
    127 entry:
    128   %q = load i64* %ptr, align 4
    129   %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
    130   %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
    131   ret <2 x i64> %q1
    132 }
    133 ; CHECK: vpbroadcastq (%
    134 define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
    135 entry:
    136   %q = load i64* %ptr, align 4
    137   %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
    138   %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
    139   %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
    140   %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
    141   ret <4 x i64> %q3
    142 }
    143 
    144 ; make sure that we still don't support broadcast double into 128-bit vector
    145 ; this used to crash
    146 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
    147 entry:
    148   %q = load double* %ptr, align 4
    149   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
    150   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
    151   ret <2 x double> %vecinit2.i
    152 }
    153 
    154 ; CHECK: V111
    155 ; CHECK: vpbroadcastd
    156 ; CHECK: ret
    157 define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
    158 entry:
    159   %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    160   ret <8 x i32> %g
    161 }
    162 
    163 ; CHECK: V113
    164 ; CHECK: vbroadcastss
    165 ; CHECK: ret
    166 define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
    167 entry:
    168   %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
    169   ret <8 x float> %g
    170 }
    171 
    172 ; CHECK: _e2
    173 ; CHECK: vbroadcastss
    174 ; CHECK: ret
    175 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
    176   %vecinit.i = insertelement <4 x float> undef, float        0xbf80000000000000, i32 0
    177   %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
    178   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
    179   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
    180   ret <4 x float> %vecinit6.i
    181 }
    182 
    183 ; CHECK: _e4
    184 ; CHECK-NOT: broadcast
    185 ; CHECK: ret
    186 define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
    187   %vecinit0.i = insertelement <8 x i8> undef, i8       52, i32 0
    188   %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
    189   %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
    190   %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
    191   %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
    192   %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
    193   %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
    194   %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
    195   ret <8 x i8> %vecinit7.i
    196 }
    197 
    198 
    199 define void @crash() nounwind alwaysinline {
    200 WGLoopsEntry:
    201   br i1 undef, label %ret, label %footer329VF
    202 
    203 footer329VF:
    204   %A.0.inVF = fmul float undef, 6.553600e+04
    205   %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
    206   %A.0VF = fptosi float %A.0.inVF to i32
    207   %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
    208   %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
    209   %1 = and i32 %A.0VF, 65535
    210   %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
    211   %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
    212   br i1 undef, label %preload1201VF, label %footer349VF
    213 
    214 preload1201VF:
    215   br label %footer349VF
    216 
    217 footer349VF:
    218   %2 = mul nsw <8 x i32> undef, %0
    219   %3 = mul nsw <8 x i32> undef, %vector1099VF
    220   br label %footer329VF
    221 
    222 ret:
    223   ret void
    224 }
    225 
    226 ; CHECK: _inreg0
    227 ; CHECK: broadcastss
    228 ; CHECK: ret
    229 define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
    230   %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
    231   %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
    232   ret <8 x i32> %wide
    233 }
    234 
    235 ; CHECK: _inreg1
    236 ; CHECK: broadcastss
    237 ; CHECK: ret
    238 define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
    239   %in = insertelement <8 x float> undef, float %scalar, i32 0
    240   %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
    241   ret <8 x float> %wide
    242 }
    243 
    244 ; CHECK: _inreg2
    245 ; CHECK: broadcastss
    246 ; CHECK: ret
    247 define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
    248   %in = insertelement <4 x float> undef, float %scalar, i32 0
    249   %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
    250   ret <4 x float> %wide
    251 }
    252 
    253 ; CHECK: _inreg3
    254 ; CHECK: broadcastsd
    255 ; CHECK: ret
    256 define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
    257   %in = insertelement <4 x double> undef, double %scalar, i32 0
    258   %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
    259   ret <4 x double> %wide
    260 }
    261 
    262 ;CHECK-LABEL: _inreg8xfloat:
    263 ;CHECK: vbroadcastss
    264 ;CHECK: ret
    265 define   <8 x float> @_inreg8xfloat(<8 x float> %a) {
    266   %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
    267   ret <8 x float> %b
    268 }
    269 
    270 ;CHECK-LABEL: _inreg4xfloat:
    271 ;CHECK: vbroadcastss
    272 ;CHECK: ret
    273 define   <4 x float> @_inreg4xfloat(<4 x float> %a) {
    274   %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
    275   ret <4 x float> %b
    276 }
    277 
    278 ;CHECK-LABEL: _inreg16xi16:
    279 ;CHECK: vpbroadcastw
    280 ;CHECK: ret
    281 define   <16 x i16> @_inreg16xi16(<16 x i16> %a) {
    282   %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
    283   ret <16 x i16> %b
    284 }
    285 
    286 ;CHECK-LABEL: _inreg8xi16:
    287 ;CHECK: vpbroadcastw
    288 ;CHECK: ret
    289 define   <8 x i16> @_inreg8xi16(<8 x i16> %a) {
    290   %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
    291   ret <8 x i16> %b
    292 }
    293 
    294 
    295 ;CHECK-LABEL: _inreg4xi64:
    296 ;CHECK: vpbroadcastq
    297 ;CHECK: ret
    298 define   <4 x i64> @_inreg4xi64(<4 x i64> %a) {
    299   %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
    300   ret <4 x i64> %b
    301 }
    302 
    303 ;CHECK-LABEL: _inreg2xi64:
    304 ;CHECK: vpbroadcastq
    305 ;CHECK: ret
    306 define   <2 x i64> @_inreg2xi64(<2 x i64> %a) {
    307   %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
    308   ret <2 x i64> %b
    309 }
    310 
    311 ;CHECK-LABEL: _inreg4xdouble:
    312 ;CHECK: vbroadcastsd
    313 ;CHECK: ret
    314 define   <4 x double> @_inreg4xdouble(<4 x double> %a) {
    315   %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
    316   ret <4 x double> %b
    317 }
    318 
    319 ;CHECK-LABEL: _inreg2xdouble:
    320 ;CHECK: vpbroadcastq
    321 ;CHECK: ret
    322 define   <2 x double> @_inreg2xdouble(<2 x double> %a) {
    323   %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
    324   ret <2 x double> %b
    325 }
    326 
    327 ;CHECK-LABEL: _inreg8xi32:
    328 ;CHECK: vpbroadcastd
    329 ;CHECK: ret
    330 define   <8 x i32> @_inreg8xi32(<8 x i32> %a) {
    331   %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
    332   ret <8 x i32> %b
    333 }
    334 
    335 ;CHECK-LABEL: _inreg4xi32:
    336 ;CHECK: vpbroadcastd
    337 ;CHECK: ret
    338 define   <4 x i32> @_inreg4xi32(<4 x i32> %a) {
    339   %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
    340   ret <4 x i32> %b
    341 }
    342 
    343 ;CHECK-LABEL: _inreg32xi8:
    344 ;CHECK: vpbroadcastb
    345 ;CHECK: ret
    346 define   <32 x i8> @_inreg32xi8(<32 x i8> %a) {
    347   %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
    348   ret <32 x i8> %b
    349 }
    350 
    351 ;CHECK-LABEL: _inreg16xi8:
    352 ;CHECK: vpbroadcastb
    353 ;CHECK: ret
    354 define   <16 x i8> @_inreg16xi8(<16 x i8> %a) {
    355   %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
    356   ret <16 x i8> %b
    357 }
    358