Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
      3 
      4 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
      5 ; CHECK-LABEL: A:
      6 ; CHECK:       ## BB#0: ## %entry
      7 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
      8 ; CHECK-NEXT:    retq
      9 entry:
     10   %q = load i64, i64* %ptr, align 8
     11   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
     12   %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
     13   %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
     14   %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
     15   ret <4 x i64> %vecinit6.i
     16 }
     17 
     18 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
     19 ; CHECK-LABEL: B:
     20 ; CHECK:       ## BB#0: ## %entry
     21 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
     22 ; CHECK-NEXT:    retq
     23 entry:
     24   %q = load i32, i32* %ptr, align 4
     25   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
     26   %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
     27   %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
     28   %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
     29   ret <8 x i32> %vecinit6.i
     30 }
     31 
     32 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
     33 ; CHECK-LABEL: C:
     34 ; CHECK:       ## BB#0: ## %entry
     35 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
     36 ; CHECK-NEXT:    retq
     37 entry:
     38   %q = load double, double* %ptr, align 8
     39   %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
     40   %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
     41   %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
     42   %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
     43   ret <4 x double> %vecinit6.i
     44 }
     45 
     46 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
     47 ; CHECK-LABEL: D:
     48 ; CHECK:       ## BB#0: ## %entry
     49 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
     50 ; CHECK-NEXT:    retq
     51 entry:
     52   %q = load float, float* %ptr, align 4
     53   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
     54   %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
     55   %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
     56   %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
     57   ret <8 x float> %vecinit6.i
     58 }
     59 
     60 ;;;; 128-bit versions
     61 
     62 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
     63 ; CHECK-LABEL: e:
     64 ; CHECK:       ## BB#0: ## %entry
     65 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
     66 ; CHECK-NEXT:    retq
     67 entry:
     68   %q = load float, float* %ptr, align 4
     69   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
     70   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
     71   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
     72   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
     73   ret <4 x float> %vecinit6.i
     74 }
     75 
     76 ; Don't broadcast constants on pre-AVX2 hardware.
     77 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
     78 ; CHECK-LABEL: _e2:
     79 ; CHECK:       ## BB#0: ## %entry
     80 ; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
     81 ; CHECK-NEXT:    retq
     82 entry:
     83    %vecinit.i = insertelement <4 x float> undef, float       0xbf80000000000000, i32 0
     84   %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
     85   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
     86   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
     87   ret <4 x float> %vecinit6.i
     88 }
     89 
     90 
     91 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
     92 ; CHECK-LABEL: F:
     93 ; CHECK:       ## BB#0: ## %entry
     94 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
     95 ; CHECK-NEXT:    retq
     96 entry:
     97   %q = load i32, i32* %ptr, align 4
     98   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
     99   %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
    100   %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
    101   %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3
    102   ret <4 x i32> %vecinit6.i
    103 }
    104 
    105 ; FIXME: Pointer adjusted broadcasts
    106 
    107 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
    108 ; CHECK-LABEL: load_splat_4i32_4i32_1111:
    109 ; CHECK:       ## BB#0: ## %entry
    110 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
    111 ; CHECK-NEXT:    retq
    112 entry:
    113   %ld = load <4 x i32>, <4 x i32>* %ptr
    114   %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    115   ret <4 x i32> %ret
    116 }
    117 
    118 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
    119 ; CHECK-LABEL: load_splat_8i32_4i32_33333333:
    120 ; CHECK:       ## BB#0: ## %entry
    121 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
    122 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    123 ; CHECK-NEXT:    retq
    124 entry:
    125   %ld = load <4 x i32>, <4 x i32>* %ptr
    126   %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    127   ret <8 x i32> %ret
    128 }
    129 
    130 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
    131 ; CHECK-LABEL: load_splat_8i32_8i32_55555555:
    132 ; CHECK:       ## BB#0: ## %entry
    133 ; CHECK-NEXT:    vmovaps (%rdi), %ymm0
    134 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
    135 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
    136 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    137 ; CHECK-NEXT:    retq
    138 entry:
    139   %ld = load <8 x i32>, <8 x i32>* %ptr
    140   %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    141   ret <8 x i32> %ret
    142 }
    143 
    144 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
    145 ; CHECK-LABEL: load_splat_4f32_4f32_1111:
    146 ; CHECK:       ## BB#0: ## %entry
    147 ; CHECK-NEXT:    vbroadcastss 4(%rdi), %xmm0
    148 ; CHECK-NEXT:    retq
    149 entry:
    150   %ld = load <4 x float>, <4 x float>* %ptr
    151   %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    152   ret <4 x float> %ret
    153 }
    154 
    155 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
    156 ; CHECK-LABEL: load_splat_8f32_4f32_33333333:
    157 ; CHECK:       ## BB#0: ## %entry
    158 ; CHECK-NEXT:    vbroadcastss 12(%rdi), %ymm0
    159 ; CHECK-NEXT:    retq
    160 entry:
    161   %ld = load <4 x float>, <4 x float>* %ptr
    162   %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    163   ret <8 x float> %ret
    164 }
    165 
    166 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
    167 ; CHECK-LABEL: load_splat_8f32_8f32_55555555:
    168 ; CHECK:       ## BB#0: ## %entry
    169 ; CHECK-NEXT:    vbroadcastss 20(%rdi), %ymm0
    170 ; CHECK-NEXT:    retq
    171 entry:
    172   %ld = load <8 x float>, <8 x float>* %ptr
    173   %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    174   ret <8 x float> %ret
    175 }
    176 
    177 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
    178 ; CHECK-LABEL: load_splat_2i64_2i64_1111:
    179 ; CHECK:       ## BB#0: ## %entry
    180 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
    181 ; CHECK-NEXT:    retq
    182 entry:
    183   %ld = load <2 x i64>, <2 x i64>* %ptr
    184   %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    185   ret <2 x i64> %ret
    186 }
    187 
    188 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
    189 ; CHECK-LABEL: load_splat_4i64_2i64_1111:
    190 ; CHECK:       ## BB#0: ## %entry
    191 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0
    192 ; CHECK-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
    193 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    194 ; CHECK-NEXT:    retq
    195 entry:
    196   %ld = load <2 x i64>, <2 x i64>* %ptr
    197   %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    198   ret <4 x i64> %ret
    199 }
    200 
    201 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
    202 ; CHECK-LABEL: load_splat_4i64_4i64_2222:
    203 ; CHECK:       ## BB#0: ## %entry
    204 ; CHECK-NEXT:    vmovapd (%rdi), %ymm0
    205 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
    206 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    207 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    208 ; CHECK-NEXT:    retq
    209 entry:
    210   %ld = load <4 x i64>, <4 x i64>* %ptr
    211   %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    212   ret <4 x i64> %ret
    213 }
    214 
    215 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
    216 ; CHECK-LABEL: load_splat_2f64_2f64_1111:
    217 ; CHECK:       ## BB#0: ## %entry
    218 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0
    219 ; CHECK-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
    220 ; CHECK-NEXT:    retq
    221 entry:
    222   %ld = load <2 x double>, <2 x double>* %ptr
    223   %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
    224   ret <2 x double> %ret
    225 }
    226 
    227 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
    228 ; CHECK-LABEL: load_splat_4f64_2f64_1111:
    229 ; CHECK:       ## BB#0: ## %entry
    230 ; CHECK-NEXT:    vbroadcastsd 8(%rdi), %ymm0
    231 ; CHECK-NEXT:    retq
    232 entry:
    233   %ld = load <2 x double>, <2 x double>* %ptr
    234   %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    235   ret <4 x double> %ret
    236 }
    237 
    238 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
    239 ; CHECK-LABEL: load_splat_4f64_4f64_2222:
    240 ; CHECK:       ## BB#0: ## %entry
    241 ; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
    242 ; CHECK-NEXT:    retq
    243 entry:
    244   %ld = load <4 x double>, <4 x double>* %ptr
    245   %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    246   ret <4 x double> %ret
    247 }
    248 
    249 ; Unsupported vbroadcasts
    250 
    251 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
    252 ; CHECK-LABEL: G:
    253 ; CHECK:       ## BB#0: ## %entry
    254 ; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    255 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
    256 ; CHECK-NEXT:    retq
    257 entry:
    258   %q = load i64, i64* %ptr, align 8
    259   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
    260   %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
    261   ret <2 x i64> %vecinit2.i
    262 }
    263 
    264 define <4 x i32> @H(<4 x i32> %a) {
    265 ; CHECK-LABEL: H:
    266 ; CHECK:       ## BB#0: ## %entry
    267 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    268 ; CHECK-NEXT:    retq
    269 entry:
    270   %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    271   ret <4 x i32> %x
    272 }
    273 
    274 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
    275 ; CHECK-LABEL: I:
    276 ; CHECK:       ## BB#0: ## %entry
    277 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    278 ; CHECK-NEXT:    retq
    279 entry:
    280   %q = load double, double* %ptr, align 4
    281   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
    282   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
    283   ret <2 x double> %vecinit2.i
    284 }
    285 
    286 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
    287 ; CHECK-LABEL: _RR:
    288 ; CHECK:       ## BB#0: ## %entry
    289 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
    290 ; CHECK-NEXT:    movl (%rsi), %eax
    291 ; CHECK-NEXT:    movl %eax, (%rax)
    292 ; CHECK-NEXT:    retq
    293 entry:
    294   %q = load float, float* %ptr, align 4
    295   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    296   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
    297   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
    298   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
    299   ; force a chain
    300   %j = load i32, i32* %k, align 4
    301   store i32 %j, i32* undef
    302   ret <4 x float> %vecinit6.i
    303 }
    304 
    305 define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
    306 ; CHECK-LABEL: _RR2:
    307 ; CHECK:       ## BB#0: ## %entry
    308 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
    309 ; CHECK-NEXT:    retq
    310 entry:
    311   %q = load float, float* %ptr, align 4
    312   %v = insertelement <4 x float> undef, float %q, i32 0
    313   %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
    314   ret <4 x float> %t
    315 }
    316 
    317 ; These tests check that a vbroadcast instruction is used when we have a splat
    318 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
    319 ; (via the insertelements).
    320 
    321 define <8 x float> @splat_concat1(float* %p) {
    322 ; CHECK-LABEL: splat_concat1:
    323 ; CHECK:       ## BB#0:
    324 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
    325 ; CHECK-NEXT:    retq
    326   %1 = load float, float* %p, align 4
    327   %2 = insertelement <4 x float> undef, float %1, i32 0
    328   %3 = insertelement <4 x float> %2, float %1, i32 1
    329   %4 = insertelement <4 x float> %3, float %1, i32 2
    330   %5 = insertelement <4 x float> %4, float %1, i32 3
    331   %6 = shufflevector <4 x float> %5, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    332   ret <8 x float> %6
    333 }
    334 
    335 define <8 x float> @splat_concat2(float* %p) {
    336 ; CHECK-LABEL: splat_concat2:
    337 ; CHECK:       ## BB#0:
    338 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
    339 ; CHECK-NEXT:    retq
    340   %1 = load float, float* %p, align 4
    341   %2 = insertelement <4 x float> undef, float %1, i32 0
    342   %3 = insertelement <4 x float> %2, float %1, i32 1
    343   %4 = insertelement <4 x float> %3, float %1, i32 2
    344   %5 = insertelement <4 x float> %4, float %1, i32 3
    345   %6 = insertelement <4 x float> undef, float %1, i32 0
    346   %7 = insertelement <4 x float> %6, float %1, i32 1
    347   %8 = insertelement <4 x float> %7, float %1, i32 2
    348   %9 = insertelement <4 x float> %8, float %1, i32 3
    349   %10 = shufflevector <4 x float> %5, <4 x float> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    350   ret <8 x float> %10
    351 }
    352 
    353 define <4 x double> @splat_concat3(double* %p) {
    354 ; CHECK-LABEL: splat_concat3:
    355 ; CHECK:       ## BB#0:
    356 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
    357 ; CHECK-NEXT:    retq
    358   %1 = load double, double* %p, align 8
    359   %2 = insertelement <2 x double> undef, double %1, i32 0
    360   %3 = insertelement <2 x double> %2, double %1, i32 1
    361   %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
    362   ret <4 x double> %4
    363 }
    364 
    365 define <4 x double> @splat_concat4(double* %p) {
    366 ; CHECK-LABEL: splat_concat4:
    367 ; CHECK:       ## BB#0:
    368 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
    369 ; CHECK-NEXT:    retq
    370   %1 = load double, double* %p, align 8
    371   %2 = insertelement <2 x double> undef, double %1, i32 0
    372   %3 = insertelement <2 x double> %2, double %1, i32 1
    373   %4 = insertelement <2 x double> undef, double %1, i32 0
    374   %5 = insertelement <2 x double> %2, double %1, i32 1
    375   %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    376   ret <4 x double> %6
    377 }
    378