Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64
      4 
      5 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
      6 ; X32-LABEL: A:
      7 ; X32:       ## BB#0: ## %entry
      8 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
      9 ; X32-NEXT:    movl (%eax), %ecx
     10 ; X32-NEXT:    movl 4(%eax), %eax
     11 ; X32-NEXT:    vmovd %ecx, %xmm0
     12 ; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
     13 ; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
     14 ; X32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
     15 ; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     16 ; X32-NEXT:    retl
     17 ;
     18 ; X64-LABEL: A:
     19 ; X64:       ## BB#0: ## %entry
     20 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
     21 ; X64-NEXT:    retq
     22 entry:
     23   %q = load i64, i64* %ptr, align 8
     24   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
     25   %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
     26   %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
     27   %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
     28   ret <4 x i64> %vecinit6.i
     29 }
     30 
     31 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
     32 ; X32-LABEL: B:
     33 ; X32:       ## BB#0: ## %entry
     34 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     35 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
     36 ; X32-NEXT:    retl
     37 ;
     38 ; X64-LABEL: B:
     39 ; X64:       ## BB#0: ## %entry
     40 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
     41 ; X64-NEXT:    retq
     42 entry:
     43   %q = load i32, i32* %ptr, align 4
     44   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
     45   %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
     46   %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
     47   %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
     48   ret <8 x i32> %vecinit6.i
     49 }
     50 
     51 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
     52 ; X32-LABEL: C:
     53 ; X32:       ## BB#0: ## %entry
     54 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     55 ; X32-NEXT:    vbroadcastsd (%eax), %ymm0
     56 ; X32-NEXT:    retl
     57 ;
     58 ; X64-LABEL: C:
     59 ; X64:       ## BB#0: ## %entry
     60 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
     61 ; X64-NEXT:    retq
     62 entry:
     63   %q = load double, double* %ptr, align 8
     64   %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
     65   %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
     66   %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
     67   %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
     68   ret <4 x double> %vecinit6.i
     69 }
     70 
     71 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
     72 ; X32-LABEL: D:
     73 ; X32:       ## BB#0: ## %entry
     74 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     75 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
     76 ; X32-NEXT:    retl
     77 ;
     78 ; X64-LABEL: D:
     79 ; X64:       ## BB#0: ## %entry
     80 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
     81 ; X64-NEXT:    retq
     82 entry:
     83   %q = load float, float* %ptr, align 4
     84   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
     85   %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
     86   %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
     87   %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
     88   ret <8 x float> %vecinit6.i
     89 }
     90 
     91 ;;;; 128-bit versions
     92 
     93 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
     94 ; X32-LABEL: e:
     95 ; X32:       ## BB#0: ## %entry
     96 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     97 ; X32-NEXT:    vbroadcastss (%eax), %xmm0
     98 ; X32-NEXT:    retl
     99 ;
    100 ; X64-LABEL: e:
    101 ; X64:       ## BB#0: ## %entry
    102 ; X64-NEXT:    vbroadcastss (%rdi), %xmm0
    103 ; X64-NEXT:    retq
    104 entry:
    105   %q = load float, float* %ptr, align 4
    106   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    107   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
    108   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
    109   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
    110   ret <4 x float> %vecinit6.i
    111 }
    112 
    113 ; Don't broadcast constants on pre-AVX2 hardware.
    114 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
    115 ; X32-LABEL: _e2:
    116 ; X32:       ## BB#0: ## %entry
    117 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
    118 ; X32-NEXT:    retl
    119 ;
    120 ; X64-LABEL: _e2:
    121 ; X64:       ## BB#0: ## %entry
    122 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
    123 ; X64-NEXT:    retq
    124 entry:
    125    %vecinit.i = insertelement <4 x float> undef, float       0xbf80000000000000, i32 0
    126   %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
    127   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
    128   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
    129   ret <4 x float> %vecinit6.i
    130 }
    131 
    132 
    133 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
    134 ; X32-LABEL: F:
    135 ; X32:       ## BB#0: ## %entry
    136 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    137 ; X32-NEXT:    vbroadcastss (%eax), %xmm0
    138 ; X32-NEXT:    retl
    139 ;
    140 ; X64-LABEL: F:
    141 ; X64:       ## BB#0: ## %entry
    142 ; X64-NEXT:    vbroadcastss (%rdi), %xmm0
    143 ; X64-NEXT:    retq
    144 entry:
    145   %q = load i32, i32* %ptr, align 4
    146   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
    147   %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
    148   %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
    149   %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3
    150   ret <4 x i32> %vecinit6.i
    151 }
    152 
    153 ; FIXME: Pointer adjusted broadcasts
    154 
    155 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
    156 ; X32-LABEL: load_splat_4i32_4i32_1111:
    157 ; X32:       ## BB#0: ## %entry
    158 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    159 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
    160 ; X32-NEXT:    retl
    161 ;
    162 ; X64-LABEL: load_splat_4i32_4i32_1111:
    163 ; X64:       ## BB#0: ## %entry
    164 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
    165 ; X64-NEXT:    retq
    166 entry:
    167   %ld = load <4 x i32>, <4 x i32>* %ptr
    168   %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    169   ret <4 x i32> %ret
    170 }
    171 
    172 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
    173 ; X32-LABEL: load_splat_8i32_4i32_33333333:
    174 ; X32:       ## BB#0: ## %entry
    175 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    176 ; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
    177 ; X32-NEXT:    retl
    178 ;
    179 ; X64-LABEL: load_splat_8i32_4i32_33333333:
    180 ; X64:       ## BB#0: ## %entry
    181 ; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
    182 ; X64-NEXT:    retq
    183 entry:
    184   %ld = load <4 x i32>, <4 x i32>* %ptr
    185   %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    186   ret <8 x i32> %ret
    187 }
    188 
    189 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
    190 ; X32-LABEL: load_splat_8i32_8i32_55555555:
    191 ; X32:       ## BB#0: ## %entry
    192 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    193 ; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
    194 ; X32-NEXT:    retl
    195 ;
    196 ; X64-LABEL: load_splat_8i32_8i32_55555555:
    197 ; X64:       ## BB#0: ## %entry
    198 ; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
    199 ; X64-NEXT:    retq
    200 entry:
    201   %ld = load <8 x i32>, <8 x i32>* %ptr
    202   %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    203   ret <8 x i32> %ret
    204 }
    205 
    206 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
    207 ; X32-LABEL: load_splat_4f32_4f32_1111:
    208 ; X32:       ## BB#0: ## %entry
    209 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    210 ; X32-NEXT:    vbroadcastss 4(%eax), %xmm0
    211 ; X32-NEXT:    retl
    212 ;
    213 ; X64-LABEL: load_splat_4f32_4f32_1111:
    214 ; X64:       ## BB#0: ## %entry
    215 ; X64-NEXT:    vbroadcastss 4(%rdi), %xmm0
    216 ; X64-NEXT:    retq
    217 entry:
    218   %ld = load <4 x float>, <4 x float>* %ptr
    219   %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    220   ret <4 x float> %ret
    221 }
    222 
    223 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
    224 ; X32-LABEL: load_splat_8f32_4f32_33333333:
    225 ; X32:       ## BB#0: ## %entry
    226 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    227 ; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
    228 ; X32-NEXT:    retl
    229 ;
    230 ; X64-LABEL: load_splat_8f32_4f32_33333333:
    231 ; X64:       ## BB#0: ## %entry
    232 ; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
    233 ; X64-NEXT:    retq
    234 entry:
    235   %ld = load <4 x float>, <4 x float>* %ptr
    236   %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    237   ret <8 x float> %ret
    238 }
    239 
    240 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
    241 ; X32-LABEL: load_splat_8f32_8f32_55555555:
    242 ; X32:       ## BB#0: ## %entry
    243 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    244 ; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
    245 ; X32-NEXT:    retl
    246 ;
    247 ; X64-LABEL: load_splat_8f32_8f32_55555555:
    248 ; X64:       ## BB#0: ## %entry
    249 ; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
    250 ; X64-NEXT:    retq
    251 entry:
    252   %ld = load <8 x float>, <8 x float>* %ptr
    253   %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    254   ret <8 x float> %ret
    255 }
    256 
    257 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
    258 ; X32-LABEL: load_splat_2i64_2i64_1111:
    259 ; X32:       ## BB#0: ## %entry
    260 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    261 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
    262 ; X32-NEXT:    retl
    263 ;
    264 ; X64-LABEL: load_splat_2i64_2i64_1111:
    265 ; X64:       ## BB#0: ## %entry
    266 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
    267 ; X64-NEXT:    retq
    268 entry:
    269   %ld = load <2 x i64>, <2 x i64>* %ptr
    270   %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    271   ret <2 x i64> %ret
    272 }
    273 
    274 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
    275 ; X32-LABEL: load_splat_4i64_2i64_1111:
    276 ; X32:       ## BB#0: ## %entry
    277 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    278 ; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
    279 ; X32-NEXT:    retl
    280 ;
    281 ; X64-LABEL: load_splat_4i64_2i64_1111:
    282 ; X64:       ## BB#0: ## %entry
    283 ; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
    284 ; X64-NEXT:    retq
    285 entry:
    286   %ld = load <2 x i64>, <2 x i64>* %ptr
    287   %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    288   ret <4 x i64> %ret
    289 }
    290 
    291 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
    292 ; X32-LABEL: load_splat_4i64_4i64_2222:
    293 ; X32:       ## BB#0: ## %entry
    294 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    295 ; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
    296 ; X32-NEXT:    retl
    297 ;
    298 ; X64-LABEL: load_splat_4i64_4i64_2222:
    299 ; X64:       ## BB#0: ## %entry
    300 ; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
    301 ; X64-NEXT:    retq
    302 entry:
    303   %ld = load <4 x i64>, <4 x i64>* %ptr
    304   %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    305   ret <4 x i64> %ret
    306 }
    307 
    308 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
    309 ; X32-LABEL: load_splat_2f64_2f64_1111:
    310 ; X32:       ## BB#0: ## %entry
    311 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    312 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    313 ; X32-NEXT:    retl
    314 ;
    315 ; X64-LABEL: load_splat_2f64_2f64_1111:
    316 ; X64:       ## BB#0: ## %entry
    317 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    318 ; X64-NEXT:    retq
    319 entry:
    320   %ld = load <2 x double>, <2 x double>* %ptr
    321   %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
    322   ret <2 x double> %ret
    323 }
    324 
    325 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
    326 ; X32-LABEL: load_splat_4f64_2f64_1111:
    327 ; X32:       ## BB#0: ## %entry
    328 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    329 ; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
    330 ; X32-NEXT:    retl
    331 ;
    332 ; X64-LABEL: load_splat_4f64_2f64_1111:
    333 ; X64:       ## BB#0: ## %entry
    334 ; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
    335 ; X64-NEXT:    retq
    336 entry:
    337   %ld = load <2 x double>, <2 x double>* %ptr
    338   %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    339   ret <4 x double> %ret
    340 }
    341 
    342 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
    343 ; X32-LABEL: load_splat_4f64_4f64_2222:
    344 ; X32:       ## BB#0: ## %entry
    345 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    346 ; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
    347 ; X32-NEXT:    retl
    348 ;
    349 ; X64-LABEL: load_splat_4f64_4f64_2222:
    350 ; X64:       ## BB#0: ## %entry
    351 ; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
    352 ; X64-NEXT:    retq
    353 entry:
    354   %ld = load <4 x double>, <4 x double>* %ptr
    355   %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    356   ret <4 x double> %ret
    357 }
    358 
    359 ; Unsupported vbroadcasts
    360 
    361 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
    362 ; X32-LABEL: G:
    363 ; X32:       ## BB#0: ## %entry
    364 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    365 ; X32-NEXT:    movl (%eax), %ecx
    366 ; X32-NEXT:    movl 4(%eax), %eax
    367 ; X32-NEXT:    vmovd %ecx, %xmm0
    368 ; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
    369 ; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
    370 ; X32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
    371 ; X32-NEXT:    retl
    372 ;
    373 ; X64-LABEL: G:
    374 ; X64:       ## BB#0: ## %entry
    375 ; X64-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    376 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
    377 ; X64-NEXT:    retq
    378 entry:
    379   %q = load i64, i64* %ptr, align 8
    380   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
    381   %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
    382   ret <2 x i64> %vecinit2.i
    383 }
    384 
    385 define <4 x i32> @H(<4 x i32> %a) {
    386 ; X32-LABEL: H:
    387 ; X32:       ## BB#0: ## %entry
    388 ; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    389 ; X32-NEXT:    retl
    390 ;
    391 ; X64-LABEL: H:
    392 ; X64:       ## BB#0: ## %entry
    393 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    394 ; X64-NEXT:    retq
    395 entry:
    396   %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    397   ret <4 x i32> %x
    398 }
    399 
    400 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
    401 ; X32-LABEL: I:
    402 ; X32:       ## BB#0: ## %entry
    403 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    404 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    405 ; X32-NEXT:    retl
    406 ;
    407 ; X64-LABEL: I:
    408 ; X64:       ## BB#0: ## %entry
    409 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    410 ; X64-NEXT:    retq
    411 entry:
    412   %q = load double, double* %ptr, align 4
    413   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
    414   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
    415   ret <2 x double> %vecinit2.i
    416 }
    417 
    418 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
    419 ; X32-LABEL: _RR:
    420 ; X32:       ## BB#0: ## %entry
    421 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    422 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    423 ; X32-NEXT:    vbroadcastss (%ecx), %xmm0
    424 ; X32-NEXT:    movl (%eax), %eax
    425 ; X32-NEXT:    movl %eax, (%eax)
    426 ; X32-NEXT:    retl
    427 ;
    428 ; X64-LABEL: _RR:
    429 ; X64:       ## BB#0: ## %entry
    430 ; X64-NEXT:    vbroadcastss (%rdi), %xmm0
    431 ; X64-NEXT:    movl (%rsi), %eax
    432 ; X64-NEXT:    movl %eax, (%rax)
    433 ; X64-NEXT:    retq
    434 entry:
    435   %q = load float, float* %ptr, align 4
    436   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    437   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
    438   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
    439   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
    440   ; force a chain
    441   %j = load i32, i32* %k, align 4
    442   store i32 %j, i32* undef
    443   ret <4 x float> %vecinit6.i
    444 }
    445 
    446 define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
    447 ; X32-LABEL: _RR2:
    448 ; X32:       ## BB#0: ## %entry
    449 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    450 ; X32-NEXT:    vbroadcastss (%eax), %xmm0
    451 ; X32-NEXT:    retl
    452 ;
    453 ; X64-LABEL: _RR2:
    454 ; X64:       ## BB#0: ## %entry
    455 ; X64-NEXT:    vbroadcastss (%rdi), %xmm0
    456 ; X64-NEXT:    retq
    457 entry:
    458   %q = load float, float* %ptr, align 4
    459   %v = insertelement <4 x float> undef, float %q, i32 0
    460   %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
    461   ret <4 x float> %t
    462 }
    463 
    464 ; These tests check that a vbroadcast instruction is used when we have a splat
    465 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
    466 ; (via the insertelements).
    467 
    468 define <8 x float> @splat_concat1(float* %p) {
    469 ; X32-LABEL: splat_concat1:
    470 ; X32:       ## BB#0:
    471 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    472 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
    473 ; X32-NEXT:    retl
    474 ;
    475 ; X64-LABEL: splat_concat1:
    476 ; X64:       ## BB#0:
    477 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
    478 ; X64-NEXT:    retq
    479   %1 = load float, float* %p, align 4
    480   %2 = insertelement <4 x float> undef, float %1, i32 0
    481   %3 = insertelement <4 x float> %2, float %1, i32 1
    482   %4 = insertelement <4 x float> %3, float %1, i32 2
    483   %5 = insertelement <4 x float> %4, float %1, i32 3
    484   %6 = shufflevector <4 x float> %5, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    485   ret <8 x float> %6
    486 }
    487 
    488 define <8 x float> @splat_concat2(float* %p) {
    489 ; X32-LABEL: splat_concat2:
    490 ; X32:       ## BB#0:
    491 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    492 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
    493 ; X32-NEXT:    retl
    494 ;
    495 ; X64-LABEL: splat_concat2:
    496 ; X64:       ## BB#0:
    497 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
    498 ; X64-NEXT:    retq
    499   %1 = load float, float* %p, align 4
    500   %2 = insertelement <4 x float> undef, float %1, i32 0
    501   %3 = insertelement <4 x float> %2, float %1, i32 1
    502   %4 = insertelement <4 x float> %3, float %1, i32 2
    503   %5 = insertelement <4 x float> %4, float %1, i32 3
    504   %6 = insertelement <4 x float> undef, float %1, i32 0
    505   %7 = insertelement <4 x float> %6, float %1, i32 1
    506   %8 = insertelement <4 x float> %7, float %1, i32 2
    507   %9 = insertelement <4 x float> %8, float %1, i32 3
    508   %10 = shufflevector <4 x float> %5, <4 x float> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    509   ret <8 x float> %10
    510 }
    511 
    512 define <4 x double> @splat_concat3(double* %p) {
    513 ; X32-LABEL: splat_concat3:
    514 ; X32:       ## BB#0:
    515 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    516 ; X32-NEXT:    vbroadcastsd (%eax), %ymm0
    517 ; X32-NEXT:    retl
    518 ;
    519 ; X64-LABEL: splat_concat3:
    520 ; X64:       ## BB#0:
    521 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
    522 ; X64-NEXT:    retq
    523   %1 = load double, double* %p, align 8
    524   %2 = insertelement <2 x double> undef, double %1, i32 0
    525   %3 = insertelement <2 x double> %2, double %1, i32 1
    526   %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
    527   ret <4 x double> %4
    528 }
    529 
    530 define <4 x double> @splat_concat4(double* %p) {
    531 ; X32-LABEL: splat_concat4:
    532 ; X32:       ## BB#0:
    533 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    534 ; X32-NEXT:    vbroadcastsd (%eax), %ymm0
    535 ; X32-NEXT:    retl
    536 ;
    537 ; X64-LABEL: splat_concat4:
    538 ; X64:       ## BB#0:
    539 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
    540 ; X64-NEXT:    retq
    541   %1 = load double, double* %p, align 8
    542   %2 = insertelement <2 x double> undef, double %1, i32 0
    543   %3 = insertelement <2 x double> %2, double %1, i32 1
    544   %4 = insertelement <2 x double> undef, double %1, i32 0
    545   %5 = insertelement <2 x double> %2, double %1, i32 1
    546   %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    547   ret <4 x double> %6
    548 }
    549