Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
      2 
      3 define   <16 x i32> @_inreg16xi32(i32 %a) {
      4 ; CHECK-LABEL: _inreg16xi32:
      5 ; CHECK:       ## BB#0:
      6 ; CHECK-NEXT:    vpbroadcastd %edi, %zmm0
      7 ; CHECK-NEXT:    retq
      8   %b = insertelement <16 x i32> undef, i32 %a, i32 0
      9   %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
     10   ret <16 x i32> %c
     11 }
     12 
     13 define   <8 x i64> @_inreg8xi64(i64 %a) {
     14 ; CHECK-LABEL: _inreg8xi64:
     15 ; CHECK:       ## BB#0:
     16 ; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0
     17 ; CHECK-NEXT:    retq
     18   %b = insertelement <8 x i64> undef, i64 %a, i32 0
     19   %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
     20   ret <8 x i64> %c
     21 }
     22 
     23 ;CHECK-LABEL: _ss16xfloat_v4
     24 ;CHECK: vbroadcastss %xmm0, %zmm0
     25 ;CHECK: ret
     26 define   <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
     27   %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer
     28   ret <16 x float> %b
     29 }
     30 
     31 define   <16 x float> @_inreg16xfloat(float %a) {
     32 ; CHECK-LABEL: _inreg16xfloat:
     33 ; CHECK:       ## BB#0:
     34 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
     35 ; CHECK-NEXT:    retq
     36   %b = insertelement <16 x float> undef, float %a, i32 0
     37   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
     38   ret <16 x float> %c
     39 }
     40 
     41 ;CHECK-LABEL: _ss16xfloat_mask:
     42 ;CHECK: vbroadcastss %xmm0, %zmm1 {%k1}
     43 ;CHECK: ret
     44 define   <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
     45   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
     46   %b = insertelement <16 x float> undef, float %a, i32 0
     47   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
     48   %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
     49   ret <16 x float> %r
     50 }
     51 
     52 ;CHECK-LABEL: _ss16xfloat_maskz:
     53 ;CHECK: vbroadcastss %xmm0, %zmm0 {%k1} {z}
     54 ;CHECK: ret
     55 define   <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
     56   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
     57   %b = insertelement <16 x float> undef, float %a, i32 0
     58   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
     59   %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
     60   ret <16 x float> %r
     61 }
     62 
     63 ;CHECK-LABEL: _ss16xfloat_load:
     64 ;CHECK: vbroadcastss (%{{.*}}, %zmm
     65 ;CHECK: ret
     66 define   <16 x float> @_ss16xfloat_load(float* %a.ptr) {
     67   %a = load float, float* %a.ptr
     68   %b = insertelement <16 x float> undef, float %a, i32 0
     69   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
     70   ret <16 x float> %c
     71 }
     72 
     73 ;CHECK-LABEL: _ss16xfloat_mask_load:
     74 ;CHECK: vbroadcastss (%rdi), %zmm0 {%k1}
     75 ;CHECK: ret
     76 define   <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
     77   %a = load float, float* %a.ptr
     78   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
     79   %b = insertelement <16 x float> undef, float %a, i32 0
     80   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
     81   %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
     82   ret <16 x float> %r
     83 }
     84 
     85 ;CHECK-LABEL: _ss16xfloat_maskz_load:
     86 ;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} {z}
     87 ;CHECK: ret
     88 define   <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
     89   %a = load float, float* %a.ptr
     90   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
     91   %b = insertelement <16 x float> undef, float %a, i32 0
     92   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
     93   %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
     94   ret <16 x float> %r
     95 }
     96 
     97 define   <8 x double> @_inreg8xdouble(double %a) {
     98 ; CHECK-LABEL: _inreg8xdouble:
     99 ; CHECK:       ## BB#0:
    100 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
    101 ; CHECK-NEXT:    retq
    102   %b = insertelement <8 x double> undef, double %a, i32 0
    103   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
    104   ret <8 x double> %c
    105 }
    106 
    107 ;CHECK-LABEL: _sd8xdouble_mask:
    108 ;CHECK: vbroadcastsd %xmm0, %zmm1 {%k1}
    109 ;CHECK: ret
    110 define   <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
    111   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    112   %b = insertelement <8 x double> undef, double %a, i32 0
    113   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
    114   %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
    115   ret <8 x double> %r
    116 }
    117 
    118 ;CHECK-LABEL: _sd8xdouble_maskz:
    119 ;CHECK: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
    120 ;CHECK: ret
    121 define   <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
    122   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    123   %b = insertelement <8 x double> undef, double %a, i32 0
    124   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
    125   %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
    126   ret <8 x double> %r
    127 }
    128 
    129 ;CHECK-LABEL: _sd8xdouble_load:
    130 ;CHECK: vbroadcastsd (%rdi), %zmm
    131 ;CHECK: ret
    132 define   <8 x double> @_sd8xdouble_load(double* %a.ptr) {
    133   %a = load double, double* %a.ptr
    134   %b = insertelement <8 x double> undef, double %a, i32 0
    135   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
    136   ret <8 x double> %c
    137 }
    138 
    139 ;CHECK-LABEL: _sd8xdouble_mask_load:
    140 ;CHECK: vbroadcastsd (%rdi), %zmm0 {%k1}
    141 ;CHECK: ret
    142 define   <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
    143   %a = load double, double* %a.ptr
    144   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    145   %b = insertelement <8 x double> undef, double %a, i32 0
    146   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
    147   %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
    148   ret <8 x double> %r
    149 }
    150 
    151 define   <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
    152 ; CHECK-LABEL: _sd8xdouble_maskz_load:
    153 ; CHECK:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
    154 ; CHECK:    ret
    155   %a = load double, double* %a.ptr
    156   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    157   %b = insertelement <8 x double> undef, double %a, i32 0
    158   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
    159   %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
    160   ret <8 x double> %r
    161 }
    162 
    163 define   <16 x i32> @_xmm16xi32(<16 x i32> %a) {
    164 ; CHECK-LABEL: _xmm16xi32:
    165 ; CHECK:       ## BB#0:
    166 ; CHECK-NEXT:    vpbroadcastd %xmm0, %zmm0
    167 ; CHECK-NEXT:    retq
    168   %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
    169   ret <16 x i32> %b
    170 }
    171 
    172 define   <16 x float> @_xmm16xfloat(<16 x float> %a) {
    173 ; CHECK-LABEL: _xmm16xfloat:
    174 ; CHECK:       ## BB#0:
    175 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
    176 ; CHECK-NEXT:    retq
    177   %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
    178   ret <16 x float> %b
    179 }
    180 
    181 define <16 x i32> @test_vbroadcast() {
    182 ; CHECK-LABEL: test_vbroadcast:
    183 ; CHECK:       ## BB#0: ## %entry
    184 ; CHECK-NEXT:    vpxord %zmm0, %zmm0, %zmm0
    185 ; CHECK-NEXT:    vcmpunordps %zmm0, %zmm0, %k1
    186 ; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
    187 ; CHECK-NEXT:    knotw %k1, %k1
    188 ; CHECK-NEXT:    vmovdqu32 %zmm0, %zmm0 {%k1} {z}
    189 ; CHECK-NEXT:    retq
    190 entry:
    191   %0 = sext <16 x i1> zeroinitializer to <16 x i32>
    192   %1 = fcmp uno <16 x float> undef, zeroinitializer
    193   %2 = sext <16 x i1> %1 to <16 x i32>
    194   %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2
    195   ret <16 x i32> %3
    196 }
    197 
    198 ; We implement the set1 intrinsics with vector initializers.  Verify that the
    199 ; IR generated will produce broadcasts at the end.
    200 define <8 x double> @test_set1_pd(double %d) #2 {
    201 ; CHECK-LABEL: test_set1_pd:
    202 ; CHECK:       ## BB#0: ## %entry
    203 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
    204 ; CHECK-NEXT:    retq
    205 entry:
    206   %vecinit.i = insertelement <8 x double> undef, double %d, i32 0
    207   %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1
    208   %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2
    209   %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3
    210   %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4
    211   %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5
    212   %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6
    213   %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7
    214   ret <8 x double> %vecinit7.i
    215 }
    216 
    217 define <8 x i64> @test_set1_epi64(i64 %d) #2 {
    218 ; CHECK-LABEL: test_set1_epi64:
    219 ; CHECK:       ## BB#0: ## %entry
    220 ; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0
    221 ; CHECK-NEXT:    retq
    222 entry:
    223   %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0
    224   %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1
    225   %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2
    226   %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3
    227   %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4
    228   %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5
    229   %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6
    230   %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7
    231   ret <8 x i64> %vecinit7.i
    232 }
    233 
    234 define <16 x float> @test_set1_ps(float %f) #2 {
    235 ; CHECK-LABEL: test_set1_ps:
    236 ; CHECK:       ## BB#0: ## %entry
    237 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
    238 ; CHECK-NEXT:    retq
    239 entry:
    240   %vecinit.i = insertelement <16 x float> undef, float %f, i32 0
    241   %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1
    242   %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2
    243   %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3
    244   %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4
    245   %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5
    246   %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6
    247   %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7
    248   %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8
    249   %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9
    250   %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10
    251   %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11
    252   %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12
    253   %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13
    254   %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14
    255   %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15
    256   ret <16 x float> %vecinit15.i
    257 }
    258 
    259 define <16 x i32> @test_set1_epi32(i32 %f) #2 {
    260 ; CHECK-LABEL: test_set1_epi32:
    261 ; CHECK:       ## BB#0: ## %entry
    262 ; CHECK-NEXT:    vpbroadcastd %edi, %zmm0
    263 ; CHECK-NEXT:    retq
    264 entry:
    265   %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0
    266   %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1
    267   %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2
    268   %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3
    269   %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4
    270   %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5
    271   %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6
    272   %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7
    273   %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8
    274   %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9
    275   %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10
    276   %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11
    277   %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12
    278   %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13
    279   %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14
    280   %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15
    281   ret <16 x i32> %vecinit15.i
    282 }
    283 
    284 ; We implement the scalar broadcast intrinsics with vector initializers.
    285 ; Verify that the IR generated will produce the broadcast at the end.
    286 define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
    287 ; CHECK-LABEL: test_mm512_broadcastsd_pd:
    288 ; CHECK:       ## BB#0: ## %entry
    289 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
    290 ; CHECK-NEXT:    retq
    291 entry:
    292   %0 = extractelement <2 x double> %a, i32 0
    293   %vecinit.i = insertelement <8 x double> undef, double %0, i32 0
    294   %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1
    295   %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2
    296   %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3
    297   %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4
    298   %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5
    299   %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6
    300   %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7
    301   ret <8 x double> %vecinit7.i
    302 }
    303