Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64
      4 
      5 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
      6 ; X32-LABEL: A:
      7 ; X32:       ## %bb.0: ## %entry
      8 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
      9 ; X32-NEXT:    movl (%eax), %ecx
     10 ; X32-NEXT:    movl 4(%eax), %eax
     11 ; X32-NEXT:    vmovd %ecx, %xmm0
     12 ; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
     13 ; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
     14 ; X32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
     15 ; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     16 ; X32-NEXT:    retl
     17 ;
     18 ; X64-LABEL: A:
     19 ; X64:       ## %bb.0: ## %entry
     20 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
     21 ; X64-NEXT:    retq
     22 entry:
     23   %q = load i64, i64* %ptr, align 8
     24   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
     25   %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
     26   %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
     27   %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
     28   ret <4 x i64> %vecinit6.i
     29 }
     30 
     31 define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
     32 ; X32-LABEL: A2:
     33 ; X32:       ## %bb.0: ## %entry
     34 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     35 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     36 ; X32-NEXT:    movl (%ecx), %edx
     37 ; X32-NEXT:    movl 4(%ecx), %ecx
     38 ; X32-NEXT:    movl %ecx, 4(%eax)
     39 ; X32-NEXT:    movl %edx, (%eax)
     40 ; X32-NEXT:    vmovd %edx, %xmm0
     41 ; X32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
     42 ; X32-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
     43 ; X32-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
     44 ; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     45 ; X32-NEXT:    retl
     46 ;
     47 ; X64-LABEL: A2:
     48 ; X64:       ## %bb.0: ## %entry
     49 ; X64-NEXT:    movq (%rdi), %rax
     50 ; X64-NEXT:    vmovq %rax, %xmm0
     51 ; X64-NEXT:    movq %rax, (%rsi)
     52 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
     53 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     54 ; X64-NEXT:    retq
     55 entry:
     56   %q = load i64, i64* %ptr, align 8
     57   store i64 %q, i64* %ptr2, align 8 ; to create a chain to prevent broadcast
     58   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
     59   %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
     60   %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
     61   %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
     62   ret <4 x i64> %vecinit6.i
     63 }
     64 
     65 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
     66 ; X32-LABEL: B:
     67 ; X32:       ## %bb.0: ## %entry
     68 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     69 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
     70 ; X32-NEXT:    retl
     71 ;
     72 ; X64-LABEL: B:
     73 ; X64:       ## %bb.0: ## %entry
     74 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
     75 ; X64-NEXT:    retq
     76 entry:
     77   %q = load i32, i32* %ptr, align 4
     78   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
     79   %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
     80   %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
     81   %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
     82   ret <8 x i32> %vecinit6.i
     83 }
     84 
     85 define <8 x i32> @B2(i32* %ptr) nounwind uwtable readnone ssp {
     86 ; X32-LABEL: B2:
     87 ; X32:       ## %bb.0: ## %entry
     88 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     89 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
     90 ; X32-NEXT:    retl
     91 ;
     92 ; X64-LABEL: B2:
     93 ; X64:       ## %bb.0: ## %entry
     94 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
     95 ; X64-NEXT:    retq
     96 entry:
     97   %q = load i32, i32* %ptr, align 4
     98   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
     99   %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
    100   %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
    101   %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
    102   %vecinit8.i = insertelement <8 x i32> %vecinit6.i, i32 %q, i32 4
    103   %vecinit10.i = insertelement <8 x i32> %vecinit8.i, i32 %q, i32 5
    104   %vecinit12.i = insertelement <8 x i32> %vecinit10.i, i32 %q, i32 6
    105   %vecinit14.i = insertelement <8 x i32> %vecinit12.i, i32 %q, i32 7
    106   ret <8 x i32> %vecinit14.i
    107 }
    108 
    109 define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
    110 ; X32-LABEL: B3:
    111 ; X32:       ## %bb.0: ## %entry
    112 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    113 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    114 ; X32-NEXT:    movl (%ecx), %ecx
    115 ; X32-NEXT:    vmovd %ecx, %xmm0
    116 ; X32-NEXT:    movl %ecx, (%eax)
    117 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    118 ; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    119 ; X32-NEXT:    retl
    120 ;
    121 ; X64-LABEL: B3:
    122 ; X64:       ## %bb.0: ## %entry
    123 ; X64-NEXT:    movl (%rdi), %eax
    124 ; X64-NEXT:    vmovd %eax, %xmm0
    125 ; X64-NEXT:    movl %eax, (%rsi)
    126 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    127 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    128 ; X64-NEXT:    retq
    129 entry:
    130   %q = load i32, i32* %ptr, align 4
    131   store i32 %q, i32* %ptr2, align 4 ; to create a chain to prevent broadcast
    132   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
    133   %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
    134   %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
    135   %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
    136   %vecinit8.i = insertelement <8 x i32> %vecinit6.i, i32 %q, i32 4
    137   %vecinit10.i = insertelement <8 x i32> %vecinit8.i, i32 %q, i32 5
    138   %vecinit12.i = insertelement <8 x i32> %vecinit10.i, i32 %q, i32 6
    139   %vecinit14.i = insertelement <8 x i32> %vecinit12.i, i32 %q, i32 7
    140   ret <8 x i32> %vecinit14.i
    141 }
    142 
    143 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
    144 ; X32-LABEL: C:
    145 ; X32:       ## %bb.0: ## %entry
    146 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    147 ; X32-NEXT:    vbroadcastsd (%eax), %ymm0
    148 ; X32-NEXT:    retl
    149 ;
    150 ; X64-LABEL: C:
    151 ; X64:       ## %bb.0: ## %entry
    152 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
    153 ; X64-NEXT:    retq
    154 entry:
    155   %q = load double, double* %ptr, align 8
    156   %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
    157   %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
    158   %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
    159   %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
    160   ret <4 x double> %vecinit6.i
    161 }
    162 
    163 define <4 x double> @C2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp {
    164 ; X32-LABEL: C2:
    165 ; X32:       ## %bb.0: ## %entry
    166 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    167 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    168 ; X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    169 ; X32-NEXT:    vmovsd %xmm0, (%eax)
    170 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    171 ; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    172 ; X32-NEXT:    retl
    173 ;
    174 ; X64-LABEL: C2:
    175 ; X64:       ## %bb.0: ## %entry
    176 ; X64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    177 ; X64-NEXT:    vmovsd %xmm0, (%rsi)
    178 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    179 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    180 ; X64-NEXT:    retq
    181 entry:
    182   %q = load double, double* %ptr, align 8
    183   store double %q, double* %ptr2, align 8 ; to create a chain to prevent broadcast
    184   %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
    185   %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
    186   %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
    187   %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
    188   ret <4 x double> %vecinit6.i
    189 }
    190 
    191 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
    192 ; X32-LABEL: D:
    193 ; X32:       ## %bb.0: ## %entry
    194 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    195 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
    196 ; X32-NEXT:    retl
    197 ;
    198 ; X64-LABEL: D:
    199 ; X64:       ## %bb.0: ## %entry
    200 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
    201 ; X64-NEXT:    retq
    202 entry:
    203   %q = load float, float* %ptr, align 4
    204   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    205   %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
    206   %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
    207   %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
    208   ret <8 x float> %vecinit6.i
    209 }
    210 
    211 define <8 x float> @D2(float* %ptr) nounwind uwtable readnone ssp {
    212 ; X32-LABEL: D2:
    213 ; X32:       ## %bb.0: ## %entry
    214 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    215 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
    216 ; X32-NEXT:    retl
    217 ;
    218 ; X64-LABEL: D2:
    219 ; X64:       ## %bb.0: ## %entry
    220 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
    221 ; X64-NEXT:    retq
    222 entry:
    223   %q = load float, float* %ptr, align 4
    224   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    225   %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
    226   %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
    227   %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
    228   %vecinit8.i = insertelement <8 x float> %vecinit6.i, float %q, i32 4
    229   %vecinit10.i = insertelement <8 x float> %vecinit8.i, float %q, i32 5
    230   %vecinit12.i = insertelement <8 x float> %vecinit10.i, float %q, i32 6
    231   %vecinit14.i = insertelement <8 x float> %vecinit12.i, float %q, i32 7
    232   ret <8 x float> %vecinit14.i
    233 }
    234 
    235 define <8 x float> @D3(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp {
    236 ; X32-LABEL: D3:
    237 ; X32:       ## %bb.0: ## %entry
    238 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    239 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    240 ; X32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    241 ; X32-NEXT:    vmovss %xmm0, (%eax)
    242 ; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
    243 ; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    244 ; X32-NEXT:    retl
    245 ;
    246 ; X64-LABEL: D3:
    247 ; X64:       ## %bb.0: ## %entry
    248 ; X64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    249 ; X64-NEXT:    vmovss %xmm0, (%rsi)
    250 ; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
    251 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    252 ; X64-NEXT:    retq
    253 entry:
    254   %q = load float, float* %ptr, align 4
    255   store float %q, float* %ptr2, align 4 ; to create a chain to prevent broadcast
    256   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    257   %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
    258   %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
    259   %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
    260   %vecinit8.i = insertelement <8 x float> %vecinit6.i, float %q, i32 4
    261   %vecinit10.i = insertelement <8 x float> %vecinit8.i, float %q, i32 5
    262   %vecinit12.i = insertelement <8 x float> %vecinit10.i, float %q, i32 6
    263   %vecinit14.i = insertelement <8 x float> %vecinit12.i, float %q, i32 7
    264   ret <8 x float> %vecinit14.i
    265 }
    266 
    267 ;;;; 128-bit versions
    268 
    269 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
    270 ; X32-LABEL: e:
    271 ; X32:       ## %bb.0: ## %entry
    272 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    273 ; X32-NEXT:    vbroadcastss (%eax), %xmm0
    274 ; X32-NEXT:    retl
    275 ;
    276 ; X64-LABEL: e:
    277 ; X64:       ## %bb.0: ## %entry
    278 ; X64-NEXT:    vbroadcastss (%rdi), %xmm0
    279 ; X64-NEXT:    retq
    280 entry:
    281   %q = load float, float* %ptr, align 4
    282   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    283   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
    284   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
    285   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
    286   ret <4 x float> %vecinit6.i
    287 }
    288 
    289 define <4 x float> @e2(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp {
    290 ; X32-LABEL: e2:
    291 ; X32:       ## %bb.0: ## %entry
    292 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    293 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    294 ; X32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    295 ; X32-NEXT:    vmovss %xmm0, (%eax)
    296 ; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
    297 ; X32-NEXT:    retl
    298 ;
    299 ; X64-LABEL: e2:
    300 ; X64:       ## %bb.0: ## %entry
    301 ; X64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    302 ; X64-NEXT:    vmovss %xmm0, (%rsi)
    303 ; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
    304 ; X64-NEXT:    retq
    305 entry:
    306   %q = load float, float* %ptr, align 4
    307   store float %q, float* %ptr2, align 4 ; to create a chain to prevent broadcast
    308   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    309   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
    310   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
    311   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
    312   ret <4 x float> %vecinit6.i
    313 }
    314 
    315 ; Don't broadcast constants on pre-AVX2 hardware.
    316 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
    317 ; X32-LABEL: _e2:
    318 ; X32:       ## %bb.0: ## %entry
    319 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
    320 ; X32-NEXT:    retl
    321 ;
    322 ; X64-LABEL: _e2:
    323 ; X64:       ## %bb.0: ## %entry
    324 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
    325 ; X64-NEXT:    retq
    326 entry:
    327    %vecinit.i = insertelement <4 x float> undef, float       0xbf80000000000000, i32 0
    328   %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
    329   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
    330   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
    331   ret <4 x float> %vecinit6.i
    332 }
    333 
    334 
    335 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
    336 ; X32-LABEL: F:
    337 ; X32:       ## %bb.0: ## %entry
    338 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    339 ; X32-NEXT:    vbroadcastss (%eax), %xmm0
    340 ; X32-NEXT:    retl
    341 ;
    342 ; X64-LABEL: F:
    343 ; X64:       ## %bb.0: ## %entry
    344 ; X64-NEXT:    vbroadcastss (%rdi), %xmm0
    345 ; X64-NEXT:    retq
    346 entry:
    347   %q = load i32, i32* %ptr, align 4
    348   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
    349   %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
    350   %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
    351   %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3
    352   ret <4 x i32> %vecinit6.i
    353 }
    354 
    355 define <4 x i32> @F2(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
    356 ; X32-LABEL: F2:
    357 ; X32:       ## %bb.0: ## %entry
    358 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    359 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    360 ; X32-NEXT:    movl (%ecx), %ecx
    361 ; X32-NEXT:    movl %ecx, (%eax)
    362 ; X32-NEXT:    vmovd %ecx, %xmm0
    363 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    364 ; X32-NEXT:    retl
    365 ;
    366 ; X64-LABEL: F2:
    367 ; X64:       ## %bb.0: ## %entry
    368 ; X64-NEXT:    movl (%rdi), %eax
    369 ; X64-NEXT:    movl %eax, (%rsi)
    370 ; X64-NEXT:    vmovd %eax, %xmm0
    371 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    372 ; X64-NEXT:    retq
    373 entry:
    374   %q = load i32, i32* %ptr, align 4
    375   store i32 %q, i32* %ptr2, align 4 ; to create a chain to prevent broadcast
    376   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
    377   %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
    378   %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
    379   %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3
    380   ret <4 x i32> %vecinit6.i
    381 }
    382 
    383 ; FIXME: Pointer adjusted broadcasts
    384 
    385 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
    386 ; X32-LABEL: load_splat_4i32_4i32_1111:
    387 ; X32:       ## %bb.0: ## %entry
    388 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    389 ; X32-NEXT:    vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
    390 ; X32-NEXT:    retl
    391 ;
    392 ; X64-LABEL: load_splat_4i32_4i32_1111:
    393 ; X64:       ## %bb.0: ## %entry
    394 ; X64-NEXT:    vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
    395 ; X64-NEXT:    retq
    396 entry:
    397   %ld = load <4 x i32>, <4 x i32>* %ptr
    398   %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    399   ret <4 x i32> %ret
    400 }
    401 
    402 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
    403 ; X32-LABEL: load_splat_8i32_4i32_33333333:
    404 ; X32:       ## %bb.0: ## %entry
    405 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    406 ; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
    407 ; X32-NEXT:    retl
    408 ;
    409 ; X64-LABEL: load_splat_8i32_4i32_33333333:
    410 ; X64:       ## %bb.0: ## %entry
    411 ; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
    412 ; X64-NEXT:    retq
    413 entry:
    414   %ld = load <4 x i32>, <4 x i32>* %ptr
    415   %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    416   ret <8 x i32> %ret
    417 }
    418 
    419 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
    420 ; X32-LABEL: load_splat_8i32_8i32_55555555:
    421 ; X32:       ## %bb.0: ## %entry
    422 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    423 ; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
    424 ; X32-NEXT:    retl
    425 ;
    426 ; X64-LABEL: load_splat_8i32_8i32_55555555:
    427 ; X64:       ## %bb.0: ## %entry
    428 ; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
    429 ; X64-NEXT:    retq
    430 entry:
    431   %ld = load <8 x i32>, <8 x i32>* %ptr
    432   %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    433   ret <8 x i32> %ret
    434 }
    435 
    436 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
    437 ; X32-LABEL: load_splat_4f32_4f32_1111:
    438 ; X32:       ## %bb.0: ## %entry
    439 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    440 ; X32-NEXT:    vbroadcastss 4(%eax), %xmm0
    441 ; X32-NEXT:    retl
    442 ;
    443 ; X64-LABEL: load_splat_4f32_4f32_1111:
    444 ; X64:       ## %bb.0: ## %entry
    445 ; X64-NEXT:    vbroadcastss 4(%rdi), %xmm0
    446 ; X64-NEXT:    retq
    447 entry:
    448   %ld = load <4 x float>, <4 x float>* %ptr
    449   %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    450   ret <4 x float> %ret
    451 }
    452 
    453 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
    454 ; X32-LABEL: load_splat_8f32_4f32_33333333:
    455 ; X32:       ## %bb.0: ## %entry
    456 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    457 ; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
    458 ; X32-NEXT:    retl
    459 ;
    460 ; X64-LABEL: load_splat_8f32_4f32_33333333:
    461 ; X64:       ## %bb.0: ## %entry
    462 ; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
    463 ; X64-NEXT:    retq
    464 entry:
    465   %ld = load <4 x float>, <4 x float>* %ptr
    466   %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    467   ret <8 x float> %ret
    468 }
    469 
    470 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
    471 ; X32-LABEL: load_splat_8f32_8f32_55555555:
    472 ; X32:       ## %bb.0: ## %entry
    473 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    474 ; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
    475 ; X32-NEXT:    retl
    476 ;
    477 ; X64-LABEL: load_splat_8f32_8f32_55555555:
    478 ; X64:       ## %bb.0: ## %entry
    479 ; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
    480 ; X64-NEXT:    retq
    481 entry:
    482   %ld = load <8 x float>, <8 x float>* %ptr
    483   %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    484   ret <8 x float> %ret
    485 }
    486 
    487 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
    488 ; X32-LABEL: load_splat_2i64_2i64_1111:
    489 ; X32:       ## %bb.0: ## %entry
    490 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    491 ; X32-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,2,3]
    492 ; X32-NEXT:    retl
    493 ;
    494 ; X64-LABEL: load_splat_2i64_2i64_1111:
    495 ; X64:       ## %bb.0: ## %entry
    496 ; X64-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,2,3]
    497 ; X64-NEXT:    retq
    498 entry:
    499   %ld = load <2 x i64>, <2 x i64>* %ptr
    500   %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    501   ret <2 x i64> %ret
    502 }
    503 
    504 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
    505 ; X32-LABEL: load_splat_4i64_2i64_1111:
    506 ; X32:       ## %bb.0: ## %entry
    507 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    508 ; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
    509 ; X32-NEXT:    retl
    510 ;
    511 ; X64-LABEL: load_splat_4i64_2i64_1111:
    512 ; X64:       ## %bb.0: ## %entry
    513 ; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
    514 ; X64-NEXT:    retq
    515 entry:
    516   %ld = load <2 x i64>, <2 x i64>* %ptr
    517   %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    518   ret <4 x i64> %ret
    519 }
    520 
    521 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
    522 ; X32-LABEL: load_splat_4i64_4i64_2222:
    523 ; X32:       ## %bb.0: ## %entry
    524 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    525 ; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
    526 ; X32-NEXT:    retl
    527 ;
    528 ; X64-LABEL: load_splat_4i64_4i64_2222:
    529 ; X64:       ## %bb.0: ## %entry
    530 ; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
    531 ; X64-NEXT:    retq
    532 entry:
    533   %ld = load <4 x i64>, <4 x i64>* %ptr
    534   %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    535   ret <4 x i64> %ret
    536 }
    537 
    538 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
    539 ; X32-LABEL: load_splat_2f64_2f64_1111:
    540 ; X32:       ## %bb.0: ## %entry
    541 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    542 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    543 ; X32-NEXT:    retl
    544 ;
    545 ; X64-LABEL: load_splat_2f64_2f64_1111:
    546 ; X64:       ## %bb.0: ## %entry
    547 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    548 ; X64-NEXT:    retq
    549 entry:
    550   %ld = load <2 x double>, <2 x double>* %ptr
    551   %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
    552   ret <2 x double> %ret
    553 }
    554 
    555 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
    556 ; X32-LABEL: load_splat_4f64_2f64_1111:
    557 ; X32:       ## %bb.0: ## %entry
    558 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    559 ; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
    560 ; X32-NEXT:    retl
    561 ;
    562 ; X64-LABEL: load_splat_4f64_2f64_1111:
    563 ; X64:       ## %bb.0: ## %entry
    564 ; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
    565 ; X64-NEXT:    retq
    566 entry:
    567   %ld = load <2 x double>, <2 x double>* %ptr
    568   %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    569   ret <4 x double> %ret
    570 }
    571 
    572 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
    573 ; X32-LABEL: load_splat_4f64_4f64_2222:
    574 ; X32:       ## %bb.0: ## %entry
    575 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    576 ; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
    577 ; X32-NEXT:    retl
    578 ;
    579 ; X64-LABEL: load_splat_4f64_4f64_2222:
    580 ; X64:       ## %bb.0: ## %entry
    581 ; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
    582 ; X64-NEXT:    retq
    583 entry:
    584   %ld = load <4 x double>, <4 x double>* %ptr
    585   %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    586   ret <4 x double> %ret
    587 }
    588 
    589 ; Unsupported vbroadcasts
    590 
    591 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
    592 ; X32-LABEL: G:
    593 ; X32:       ## %bb.0: ## %entry
    594 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    595 ; X32-NEXT:    movl (%eax), %ecx
    596 ; X32-NEXT:    movl 4(%eax), %eax
    597 ; X32-NEXT:    vmovd %ecx, %xmm0
    598 ; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
    599 ; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
    600 ; X32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
    601 ; X32-NEXT:    retl
    602 ;
    603 ; X64-LABEL: G:
    604 ; X64:       ## %bb.0: ## %entry
    605 ; X64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    606 ; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
    607 ; X64-NEXT:    retq
    608 entry:
    609   %q = load i64, i64* %ptr, align 8
    610   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
    611   %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
    612   ret <2 x i64> %vecinit2.i
    613 }
    614 
    615 define <2 x i64> @G2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
    616 ; X32-LABEL: G2:
    617 ; X32:       ## %bb.0: ## %entry
    618 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    619 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    620 ; X32-NEXT:    movl (%ecx), %edx
    621 ; X32-NEXT:    movl 4(%ecx), %ecx
    622 ; X32-NEXT:    movl %ecx, 4(%eax)
    623 ; X32-NEXT:    movl %edx, (%eax)
    624 ; X32-NEXT:    vmovd %edx, %xmm0
    625 ; X32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
    626 ; X32-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
    627 ; X32-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
    628 ; X32-NEXT:    retl
    629 ;
    630 ; X64-LABEL: G2:
    631 ; X64:       ## %bb.0: ## %entry
    632 ; X64-NEXT:    movq (%rdi), %rax
    633 ; X64-NEXT:    movq %rax, (%rsi)
    634 ; X64-NEXT:    vmovq %rax, %xmm0
    635 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
    636 ; X64-NEXT:    retq
    637 entry:
    638   %q = load i64, i64* %ptr, align 8
    639   store i64 %q, i64* %ptr2, align 8 ; to create a chain to prevent broadcast
    640   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
    641   %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
    642   ret <2 x i64> %vecinit2.i
    643 }
    644 
    645 define <4 x i32> @H(<4 x i32> %a) {
    646 ; X32-LABEL: H:
    647 ; X32:       ## %bb.0: ## %entry
    648 ; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    649 ; X32-NEXT:    retl
    650 ;
    651 ; X64-LABEL: H:
    652 ; X64:       ## %bb.0: ## %entry
    653 ; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3]
    654 ; X64-NEXT:    retq
    655 entry:
    656   %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    657   ret <4 x i32> %x
    658 }
    659 
    660 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
    661 ; X32-LABEL: I:
    662 ; X32:       ## %bb.0: ## %entry
    663 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    664 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    665 ; X32-NEXT:    retl
    666 ;
    667 ; X64-LABEL: I:
    668 ; X64:       ## %bb.0: ## %entry
    669 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    670 ; X64-NEXT:    retq
    671 entry:
    672   %q = load double, double* %ptr, align 4
    673   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
    674   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
    675   ret <2 x double> %vecinit2.i
    676 }
    677 
    678 define <2 x double> @I2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp {
    679 ; X32-LABEL: I2:
    680 ; X32:       ## %bb.0: ## %entry
    681 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    682 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    683 ; X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    684 ; X32-NEXT:    vmovsd %xmm0, (%eax)
    685 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    686 ; X32-NEXT:    retl
    687 ;
    688 ; X64-LABEL: I2:
    689 ; X64:       ## %bb.0: ## %entry
    690 ; X64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    691 ; X64-NEXT:    vmovsd %xmm0, (%rsi)
    692 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    693 ; X64-NEXT:    retq
    694 entry:
    695   %q = load double, double* %ptr, align 4
    696   store double %q, double* %ptr2, align 4 ; to create a chain to prevent broadcast
    697   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
    698   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
    699   ret <2 x double> %vecinit2.i
    700 }
    701 
    702 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
    703 ; X32-LABEL: _RR:
    704 ; X32:       ## %bb.0: ## %entry
    705 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    706 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    707 ; X32-NEXT:    vbroadcastss (%ecx), %xmm0
    708 ; X32-NEXT:    movl (%eax), %eax
    709 ; X32-NEXT:    movl %eax, (%eax)
    710 ; X32-NEXT:    retl
    711 ;
    712 ; X64-LABEL: _RR:
    713 ; X64:       ## %bb.0: ## %entry
    714 ; X64-NEXT:    vbroadcastss (%rdi), %xmm0
    715 ; X64-NEXT:    movl (%rsi), %eax
    716 ; X64-NEXT:    movl %eax, (%rax)
    717 ; X64-NEXT:    retq
    718 entry:
    719   %q = load float, float* %ptr, align 4
    720   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    721   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
    722   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
    723   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
    724   ; force a chain
    725   %j = load i32, i32* %k, align 4
    726   store i32 %j, i32* undef
    727   ret <4 x float> %vecinit6.i
    728 }
    729 
    730 define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
    731 ; X32-LABEL: _RR2:
    732 ; X32:       ## %bb.0: ## %entry
    733 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    734 ; X32-NEXT:    vbroadcastss (%eax), %xmm0
    735 ; X32-NEXT:    retl
    736 ;
    737 ; X64-LABEL: _RR2:
    738 ; X64:       ## %bb.0: ## %entry
    739 ; X64-NEXT:    vbroadcastss (%rdi), %xmm0
    740 ; X64-NEXT:    retq
    741 entry:
    742   %q = load float, float* %ptr, align 4
    743   %v = insertelement <4 x float> undef, float %q, i32 0
    744   %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
    745   ret <4 x float> %t
    746 }
    747 
    748 ; These tests check that a vbroadcast instruction is used when we have a splat
    749 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
    750 ; (via the insertelements).
    751 
    752 define <8 x float> @splat_concat1(float* %p) {
    753 ; X32-LABEL: splat_concat1:
    754 ; X32:       ## %bb.0:
    755 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    756 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
    757 ; X32-NEXT:    retl
    758 ;
    759 ; X64-LABEL: splat_concat1:
    760 ; X64:       ## %bb.0:
    761 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
    762 ; X64-NEXT:    retq
    763   %1 = load float, float* %p, align 4
    764   %2 = insertelement <4 x float> undef, float %1, i32 0
    765   %3 = insertelement <4 x float> %2, float %1, i32 1
    766   %4 = insertelement <4 x float> %3, float %1, i32 2
    767   %5 = insertelement <4 x float> %4, float %1, i32 3
    768   %6 = shufflevector <4 x float> %5, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    769   ret <8 x float> %6
    770 }
    771 
    772 define <8 x float> @splat_concat2(float* %p) {
    773 ; X32-LABEL: splat_concat2:
    774 ; X32:       ## %bb.0:
    775 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    776 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
    777 ; X32-NEXT:    retl
    778 ;
    779 ; X64-LABEL: splat_concat2:
    780 ; X64:       ## %bb.0:
    781 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
    782 ; X64-NEXT:    retq
    783   %1 = load float, float* %p, align 4
    784   %2 = insertelement <4 x float> undef, float %1, i32 0
    785   %3 = insertelement <4 x float> %2, float %1, i32 1
    786   %4 = insertelement <4 x float> %3, float %1, i32 2
    787   %5 = insertelement <4 x float> %4, float %1, i32 3
    788   %6 = insertelement <4 x float> undef, float %1, i32 0
    789   %7 = insertelement <4 x float> %6, float %1, i32 1
    790   %8 = insertelement <4 x float> %7, float %1, i32 2
    791   %9 = insertelement <4 x float> %8, float %1, i32 3
    792   %10 = shufflevector <4 x float> %5, <4 x float> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    793   ret <8 x float> %10
    794 }
    795 
    796 define <4 x double> @splat_concat3(double* %p) {
    797 ; X32-LABEL: splat_concat3:
    798 ; X32:       ## %bb.0:
    799 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    800 ; X32-NEXT:    vbroadcastsd (%eax), %ymm0
    801 ; X32-NEXT:    retl
    802 ;
    803 ; X64-LABEL: splat_concat3:
    804 ; X64:       ## %bb.0:
    805 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
    806 ; X64-NEXT:    retq
    807   %1 = load double, double* %p, align 8
    808   %2 = insertelement <2 x double> undef, double %1, i32 0
    809   %3 = insertelement <2 x double> %2, double %1, i32 1
    810   %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
    811   ret <4 x double> %4
    812 }
    813 
    814 define <4 x double> @splat_concat4(double* %p) {
    815 ; X32-LABEL: splat_concat4:
    816 ; X32:       ## %bb.0:
    817 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    818 ; X32-NEXT:    vbroadcastsd (%eax), %ymm0
    819 ; X32-NEXT:    retl
    820 ;
    821 ; X64-LABEL: splat_concat4:
    822 ; X64:       ## %bb.0:
    823 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
    824 ; X64-NEXT:    retq
    825   %1 = load double, double* %p, align 8
    826   %2 = insertelement <2 x double> undef, double %1, i32 0
    827   %3 = insertelement <2 x double> %2, double %1, i32 1
    828   %4 = insertelement <2 x double> undef, double %1, i32 0
    829   %5 = insertelement <2 x double> %2, double %1, i32 1
    830   %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    831   ret <4 x double> %6
    832 }
    833 
    834 ; PR34041
    835 define <4 x double> @broadcast_shuffle_1000(double* %p) {
    836 ; X32-LABEL: broadcast_shuffle_1000:
    837 ; X32:       ## %bb.0:
    838 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    839 ; X32-NEXT:    vbroadcastsd (%eax), %ymm0
    840 ; X32-NEXT:    retl
    841 ;
    842 ; X64-LABEL: broadcast_shuffle_1000:
    843 ; X64:       ## %bb.0:
    844 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
    845 ; X64-NEXT:    retq
    846   %1 = load double, double* %p
    847   %2 = insertelement <2 x double> undef, double %1, i32 0
    848   %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
    849   ret <4 x double> %3
    850 }
    851 
    852 define <4 x double> @broadcast_shuffle1032(double* %p) {
    853 ; X32-LABEL: broadcast_shuffle1032:
    854 ; X32:       ## %bb.0:
    855 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    856 ; X32-NEXT:    vbroadcastsd (%eax), %ymm0
    857 ; X32-NEXT:    retl
    858 ;
    859 ; X64-LABEL: broadcast_shuffle1032:
    860 ; X64:       ## %bb.0:
    861 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
    862 ; X64-NEXT:    retq
    863   %1 = load double, double* %p
    864   %2 = insertelement <2 x double> undef, double %1, i32 1
    865   %3 = insertelement <2 x double> undef, double %1, i32 0
    866   %4 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    867   ret <4 x double> %4
    868 }
    869 
    870 ;
    871 ; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies.
    872 ;
    873 define float @broadcast_lifetime() nounwind {
    874 ; X32-LABEL: broadcast_lifetime:
    875 ; X32:       ## %bb.0:
    876 ; X32-NEXT:    pushl %esi
    877 ; X32-NEXT:    subl $40, %esp
    878 ; X32-NEXT:    leal {{[0-9]+}}(%esp), %esi
    879 ; X32-NEXT:    movl %esi, (%esp)
    880 ; X32-NEXT:    calll _gfunc
    881 ; X32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    882 ; X32-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## 4-byte Spill
    883 ; X32-NEXT:    movl %esi, (%esp)
    884 ; X32-NEXT:    calll _gfunc
    885 ; X32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    886 ; X32-NEXT:    vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## 4-byte Folded Reload
    887 ; X32-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
    888 ; X32-NEXT:    flds {{[0-9]+}}(%esp)
    889 ; X32-NEXT:    addl $40, %esp
    890 ; X32-NEXT:    popl %esi
    891 ; X32-NEXT:    retl
    892 ;
    893 ; X64-LABEL: broadcast_lifetime:
    894 ; X64:       ## %bb.0:
    895 ; X64-NEXT:    subq $40, %rsp
    896 ; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
    897 ; X64-NEXT:    callq _gfunc
    898 ; X64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    899 ; X64-NEXT:    vmovss %xmm0, {{[0-9]+}}(%rsp) ## 4-byte Spill
    900 ; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
    901 ; X64-NEXT:    callq _gfunc
    902 ; X64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    903 ; X64-NEXT:    vsubss {{[0-9]+}}(%rsp), %xmm0, %xmm0 ## 4-byte Folded Reload
    904 ; X64-NEXT:    addq $40, %rsp
    905 ; X64-NEXT:    retq
    906   %1 = alloca <4 x float>, align 16
    907   %2 = alloca <4 x float>, align 16
    908   %3 = bitcast <4 x float>* %1 to i8*
    909   %4 = bitcast <4 x float>* %2 to i8*
    910 
    911   call void @llvm.lifetime.start.p0i8(i64 16, i8* %3)
    912   call void @gfunc(<4 x float>* %1)
    913   %5 = load <4 x float>, <4 x float>* %1, align 16
    914   call void @llvm.lifetime.end.p0i8(i64 16, i8* %3)
    915 
    916   call void @llvm.lifetime.start.p0i8(i64 16, i8* %4)
    917   call void @gfunc(<4 x float>* %2)
    918   %6 = load <4 x float>, <4 x float>* %2, align 16
    919   call void @llvm.lifetime.end.p0i8(i64 16, i8* %4)
    920 
    921   %7 = extractelement <4 x float> %5, i32 1
    922   %8 = extractelement <4 x float> %6, i32 1
    923   %9 = fsub float %8, %7
    924   ret float %9
    925 }
    926 
    927 declare void @gfunc(<4 x float>*)
    928 declare void @llvm.lifetime.start.p0i8(i64, i8*)
    929 declare void @llvm.lifetime.end.p0i8(i64, i8*)
    930