Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
      4 
      5 define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
      6 ; X32-LABEL: BB16:
      7 ; X32:       ## BB#0: ## %entry
      8 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
      9 ; X32-NEXT:    vpbroadcastb (%eax), %xmm0
     10 ; X32-NEXT:    retl
     11 ;
     12 ; X64-LABEL: BB16:
     13 ; X64:       ## BB#0: ## %entry
     14 ; X64-NEXT:    vpbroadcastb (%rdi), %xmm0
     15 ; X64-NEXT:    retq
     16 entry:
     17   %q = load i8, i8* %ptr, align 4
     18   %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
     19   %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
     20   %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
     21   %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
     22   %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
     23   %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
     24   %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
     25   %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
     26   %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
     27   %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
     28   %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
     29   %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
     30   %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
     31   %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
     32   %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
     33   %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
     34   ret <16 x i8> %qf
     35 }
     36 
     37 define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
     38 ; X32-LABEL: BB32:
     39 ; X32:       ## BB#0: ## %entry
     40 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     41 ; X32-NEXT:    vpbroadcastb (%eax), %ymm0
     42 ; X32-NEXT:    retl
     43 ;
     44 ; X64-LABEL: BB32:
     45 ; X64:       ## BB#0: ## %entry
     46 ; X64-NEXT:    vpbroadcastb (%rdi), %ymm0
     47 ; X64-NEXT:    retq
     48 entry:
     49   %q = load i8, i8* %ptr, align 4
     50   %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
     51   %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
     52   %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
     53   %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
     54   %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
     55   %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
     56   %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
     57   %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
     58   %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
     59   %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
     60   %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
     61   %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
     62   %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
     63   %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
     64   %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
     65   %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
     66 
     67   %q20 = insertelement <32 x i8> %qf, i8 %q,  i32 16
     68   %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
     69   %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
     70   %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
     71   %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
     72   %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
     73   %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
     74   %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
     75   %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
     76   %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
     77   %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
     78   %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
     79   %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
     80   %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
     81   %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
     82   %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
     83   ret <32 x i8> %q2f
     84 }
     85 
     86 define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
     87 ; X32-LABEL: W16:
     88 ; X32:       ## BB#0: ## %entry
     89 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     90 ; X32-NEXT:    vpbroadcastw (%eax), %xmm0
     91 ; X32-NEXT:    retl
     92 ;
     93 ; X64-LABEL: W16:
     94 ; X64:       ## BB#0: ## %entry
     95 ; X64-NEXT:    vpbroadcastw (%rdi), %xmm0
     96 ; X64-NEXT:    retq
     97 entry:
     98   %q = load i16, i16* %ptr, align 4
     99   %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
    100   %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
    101   %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
    102   %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
    103   %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
    104   %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
    105   %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
    106   %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
    107   ret <8 x i16> %q7
    108 }
    109 
    110 define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
    111 ; X32-LABEL: WW16:
    112 ; X32:       ## BB#0: ## %entry
    113 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    114 ; X32-NEXT:    vpbroadcastw (%eax), %ymm0
    115 ; X32-NEXT:    retl
    116 ;
    117 ; X64-LABEL: WW16:
    118 ; X64:       ## BB#0: ## %entry
    119 ; X64-NEXT:    vpbroadcastw (%rdi), %ymm0
    120 ; X64-NEXT:    retq
    121 entry:
    122   %q = load i16, i16* %ptr, align 4
    123   %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
    124   %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
    125   %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
    126   %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
    127   %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
    128   %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
    129   %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
    130   %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
    131   %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
    132   %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
    133   %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
    134   %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
    135   %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
    136   %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
    137   %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
    138   %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
    139   ret <16 x i16> %qf
    140 }
    141 
    142 define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
    143 ; X32-LABEL: D32:
    144 ; X32:       ## BB#0: ## %entry
    145 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    146 ; X32-NEXT:    vbroadcastss (%eax), %xmm0
    147 ; X32-NEXT:    retl
    148 ;
    149 ; X64-LABEL: D32:
    150 ; X64:       ## BB#0: ## %entry
    151 ; X64-NEXT:    vbroadcastss (%rdi), %xmm0
    152 ; X64-NEXT:    retq
    153 entry:
    154   %q = load i32, i32* %ptr, align 4
    155   %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
    156   %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
    157   %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
    158   %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
    159   ret <4 x i32> %q3
    160 }
    161 
    162 define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
    163 ; X32-LABEL: DD32:
    164 ; X32:       ## BB#0: ## %entry
    165 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    166 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
    167 ; X32-NEXT:    retl
    168 ;
    169 ; X64-LABEL: DD32:
    170 ; X64:       ## BB#0: ## %entry
    171 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
    172 ; X64-NEXT:    retq
    173 entry:
    174   %q = load i32, i32* %ptr, align 4
    175   %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
    176   %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
    177   %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
    178   %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
    179   %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
    180   %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
    181   %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
    182   %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
    183   ret <8 x i32> %q7
    184 }
    185 
    186 define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
    187 ; X32-LABEL: Q64:
    188 ; X32:       ## BB#0: ## %entry
    189 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    190 ; X32-NEXT:    movl (%eax), %ecx
    191 ; X32-NEXT:    movl 4(%eax), %eax
    192 ; X32-NEXT:    vmovd %ecx, %xmm0
    193 ; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
    194 ; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
    195 ; X32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
    196 ; X32-NEXT:    retl
    197 ;
    198 ; X64-LABEL: Q64:
    199 ; X64:       ## BB#0: ## %entry
    200 ; X64-NEXT:    vpbroadcastq (%rdi), %xmm0
    201 ; X64-NEXT:    retq
    202 entry:
    203   %q = load i64, i64* %ptr, align 4
    204   %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
    205   %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
    206   ret <2 x i64> %q1
    207 }
    208 
    209 define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
    210 ; X32-LABEL: QQ64:
    211 ; X32:       ## BB#0: ## %entry
    212 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    213 ; X32-NEXT:    movl (%eax), %ecx
    214 ; X32-NEXT:    movl 4(%eax), %eax
    215 ; X32-NEXT:    vmovd %ecx, %xmm0
    216 ; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
    217 ; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
    218 ; X32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
    219 ; X32-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    220 ; X32-NEXT:    retl
    221 ;
    222 ; X64-LABEL: QQ64:
    223 ; X64:       ## BB#0: ## %entry
    224 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
    225 ; X64-NEXT:    retq
    226 entry:
    227   %q = load i64, i64* %ptr, align 4
    228   %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
    229   %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
    230   %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
    231   %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
    232   ret <4 x i64> %q3
    233 }
    234 
    235 ; FIXME: Pointer adjusted broadcasts
    236 
    237 define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
    238 ; X32-LABEL: load_splat_16i8_16i8_1111111111111111:
    239 ; X32:       ## BB#0: ## %entry
    240 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    241 ; X32-NEXT:    vpbroadcastb 1(%eax), %xmm0
    242 ; X32-NEXT:    retl
    243 ;
    244 ; X64-LABEL: load_splat_16i8_16i8_1111111111111111:
    245 ; X64:       ## BB#0: ## %entry
    246 ; X64-NEXT:    vpbroadcastb 1(%rdi), %xmm0
    247 ; X64-NEXT:    retq
    248 entry:
    249   %ld = load <16 x i8>, <16 x i8>* %ptr
    250   %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    251   ret <16 x i8> %ret
    252 }
    253 
    254 define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
    255 ; X32-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
    256 ; X32:       ## BB#0: ## %entry
    257 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    258 ; X32-NEXT:    vpbroadcastb 1(%eax), %ymm0
    259 ; X32-NEXT:    retl
    260 ;
    261 ; X64-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
    262 ; X64:       ## BB#0: ## %entry
    263 ; X64-NEXT:    vpbroadcastb 1(%rdi), %ymm0
    264 ; X64-NEXT:    retq
    265 entry:
    266   %ld = load <16 x i8>, <16 x i8>* %ptr
    267   %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    268   ret <32 x i8> %ret
    269 }
    270 
    271 define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
    272 ; X32-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
    273 ; X32:       ## BB#0: ## %entry
    274 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    275 ; X32-NEXT:    vpbroadcastb 1(%eax), %ymm0
    276 ; X32-NEXT:    retl
    277 ;
    278 ; X64-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
    279 ; X64:       ## BB#0: ## %entry
    280 ; X64-NEXT:    vpbroadcastb 1(%rdi), %ymm0
    281 ; X64-NEXT:    retq
    282 entry:
    283   %ld = load <32 x i8>, <32 x i8>* %ptr
    284   %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    285   ret <32 x i8> %ret
    286 }
    287 
    288 define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
    289 ; X32-LABEL: load_splat_8i16_8i16_11111111:
    290 ; X32:       ## BB#0: ## %entry
    291 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    292 ; X32-NEXT:    vpbroadcastw 2(%eax), %xmm0
    293 ; X32-NEXT:    retl
    294 ;
    295 ; X64-LABEL: load_splat_8i16_8i16_11111111:
    296 ; X64:       ## BB#0: ## %entry
    297 ; X64-NEXT:    vpbroadcastw 2(%rdi), %xmm0
    298 ; X64-NEXT:    retq
    299 entry:
    300   %ld = load <8 x i16>, <8 x i16>* %ptr
    301   %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    302   ret <8 x i16> %ret
    303 }
    304 
    305 define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
    306 ; X32-LABEL: load_splat_16i16_8i16_1111111111111111:
    307 ; X32:       ## BB#0: ## %entry
    308 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    309 ; X32-NEXT:    vpbroadcastw 2(%eax), %ymm0
    310 ; X32-NEXT:    retl
    311 ;
    312 ; X64-LABEL: load_splat_16i16_8i16_1111111111111111:
    313 ; X64:       ## BB#0: ## %entry
    314 ; X64-NEXT:    vpbroadcastw 2(%rdi), %ymm0
    315 ; X64-NEXT:    retq
    316 entry:
    317   %ld = load <8 x i16>, <8 x i16>* %ptr
    318   %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    319   ret <16 x i16> %ret
    320 }
    321 
    322 define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
    323 ; X32-LABEL: load_splat_16i16_16i16_1111111111111111:
    324 ; X32:       ## BB#0: ## %entry
    325 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    326 ; X32-NEXT:    vpbroadcastw 2(%eax), %ymm0
    327 ; X32-NEXT:    retl
    328 ;
    329 ; X64-LABEL: load_splat_16i16_16i16_1111111111111111:
    330 ; X64:       ## BB#0: ## %entry
    331 ; X64-NEXT:    vpbroadcastw 2(%rdi), %ymm0
    332 ; X64-NEXT:    retq
    333 entry:
    334   %ld = load <16 x i16>, <16 x i16>* %ptr
    335   %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    336   ret <16 x i16> %ret
    337 }
    338 
    339 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
    340 ; X32-LABEL: load_splat_4i32_4i32_1111:
    341 ; X32:       ## BB#0: ## %entry
    342 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    343 ; X32-NEXT:    vbroadcastss 4(%eax), %xmm0
    344 ; X32-NEXT:    retl
    345 ;
    346 ; X64-LABEL: load_splat_4i32_4i32_1111:
    347 ; X64:       ## BB#0: ## %entry
    348 ; X64-NEXT:    vbroadcastss 4(%rdi), %xmm0
    349 ; X64-NEXT:    retq
    350 entry:
    351   %ld = load <4 x i32>, <4 x i32>* %ptr
    352   %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    353   ret <4 x i32> %ret
    354 }
    355 
    356 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
    357 ; X32-LABEL: load_splat_8i32_4i32_33333333:
    358 ; X32:       ## BB#0: ## %entry
    359 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    360 ; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
    361 ; X32-NEXT:    retl
    362 ;
    363 ; X64-LABEL: load_splat_8i32_4i32_33333333:
    364 ; X64:       ## BB#0: ## %entry
    365 ; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
    366 ; X64-NEXT:    retq
    367 entry:
    368   %ld = load <4 x i32>, <4 x i32>* %ptr
    369   %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    370   ret <8 x i32> %ret
    371 }
    372 
    373 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
    374 ; X32-LABEL: load_splat_8i32_8i32_55555555:
    375 ; X32:       ## BB#0: ## %entry
    376 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    377 ; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
    378 ; X32-NEXT:    retl
    379 ;
    380 ; X64-LABEL: load_splat_8i32_8i32_55555555:
    381 ; X64:       ## BB#0: ## %entry
    382 ; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
    383 ; X64-NEXT:    retq
    384 entry:
    385   %ld = load <8 x i32>, <8 x i32>* %ptr
    386   %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    387   ret <8 x i32> %ret
    388 }
    389 
    390 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
    391 ; X32-LABEL: load_splat_4f32_4f32_1111:
    392 ; X32:       ## BB#0: ## %entry
    393 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    394 ; X32-NEXT:    vbroadcastss 4(%eax), %xmm0
    395 ; X32-NEXT:    retl
    396 ;
    397 ; X64-LABEL: load_splat_4f32_4f32_1111:
    398 ; X64:       ## BB#0: ## %entry
    399 ; X64-NEXT:    vbroadcastss 4(%rdi), %xmm0
    400 ; X64-NEXT:    retq
    401 entry:
    402   %ld = load <4 x float>, <4 x float>* %ptr
    403   %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    404   ret <4 x float> %ret
    405 }
    406 
    407 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
    408 ; X32-LABEL: load_splat_8f32_4f32_33333333:
    409 ; X32:       ## BB#0: ## %entry
    410 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    411 ; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
    412 ; X32-NEXT:    retl
    413 ;
    414 ; X64-LABEL: load_splat_8f32_4f32_33333333:
    415 ; X64:       ## BB#0: ## %entry
    416 ; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
    417 ; X64-NEXT:    retq
    418 entry:
    419   %ld = load <4 x float>, <4 x float>* %ptr
    420   %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    421   ret <8 x float> %ret
    422 }
    423 
    424 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
    425 ; X32-LABEL: load_splat_8f32_8f32_55555555:
    426 ; X32:       ## BB#0: ## %entry
    427 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    428 ; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
    429 ; X32-NEXT:    retl
    430 ;
    431 ; X64-LABEL: load_splat_8f32_8f32_55555555:
    432 ; X64:       ## BB#0: ## %entry
    433 ; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
    434 ; X64-NEXT:    retq
    435 entry:
    436   %ld = load <8 x float>, <8 x float>* %ptr
    437   %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    438   ret <8 x float> %ret
    439 }
    440 
    441 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
    442 ; X32-LABEL: load_splat_2i64_2i64_1111:
    443 ; X32:       ## BB#0: ## %entry
    444 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    445 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    446 ; X32-NEXT:    retl
    447 ;
    448 ; X64-LABEL: load_splat_2i64_2i64_1111:
    449 ; X64:       ## BB#0: ## %entry
    450 ; X64-NEXT:    vpbroadcastq 8(%rdi), %xmm0
    451 ; X64-NEXT:    retq
    452 entry:
    453   %ld = load <2 x i64>, <2 x i64>* %ptr
    454   %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    455   ret <2 x i64> %ret
    456 }
    457 
    458 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
    459 ; X32-LABEL: load_splat_4i64_2i64_1111:
    460 ; X32:       ## BB#0: ## %entry
    461 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    462 ; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
    463 ; X32-NEXT:    retl
    464 ;
    465 ; X64-LABEL: load_splat_4i64_2i64_1111:
    466 ; X64:       ## BB#0: ## %entry
    467 ; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
    468 ; X64-NEXT:    retq
    469 entry:
    470   %ld = load <2 x i64>, <2 x i64>* %ptr
    471   %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    472   ret <4 x i64> %ret
    473 }
    474 
    475 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
    476 ; X32-LABEL: load_splat_4i64_4i64_2222:
    477 ; X32:       ## BB#0: ## %entry
    478 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    479 ; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
    480 ; X32-NEXT:    retl
    481 ;
    482 ; X64-LABEL: load_splat_4i64_4i64_2222:
    483 ; X64:       ## BB#0: ## %entry
    484 ; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
    485 ; X64-NEXT:    retq
    486 entry:
    487   %ld = load <4 x i64>, <4 x i64>* %ptr
    488   %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    489   ret <4 x i64> %ret
    490 }
    491 
    492 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
    493 ; X32-LABEL: load_splat_2f64_2f64_1111:
    494 ; X32:       ## BB#0: ## %entry
    495 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    496 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    497 ; X32-NEXT:    retl
    498 ;
    499 ; X64-LABEL: load_splat_2f64_2f64_1111:
    500 ; X64:       ## BB#0: ## %entry
    501 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    502 ; X64-NEXT:    retq
    503 entry:
    504   %ld = load <2 x double>, <2 x double>* %ptr
    505   %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
    506   ret <2 x double> %ret
    507 }
    508 
    509 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
    510 ; X32-LABEL: load_splat_4f64_2f64_1111:
    511 ; X32:       ## BB#0: ## %entry
    512 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    513 ; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
    514 ; X32-NEXT:    retl
    515 ;
    516 ; X64-LABEL: load_splat_4f64_2f64_1111:
    517 ; X64:       ## BB#0: ## %entry
    518 ; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
    519 ; X64-NEXT:    retq
    520 entry:
    521   %ld = load <2 x double>, <2 x double>* %ptr
    522   %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    523   ret <4 x double> %ret
    524 }
    525 
    526 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
    527 ; X32-LABEL: load_splat_4f64_4f64_2222:
    528 ; X32:       ## BB#0: ## %entry
    529 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    530 ; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
    531 ; X32-NEXT:    retl
    532 ;
    533 ; X64-LABEL: load_splat_4f64_4f64_2222:
    534 ; X64:       ## BB#0: ## %entry
    535 ; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
    536 ; X64-NEXT:    retq
    537 entry:
    538   %ld = load <4 x double>, <4 x double>* %ptr
    539   %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    540   ret <4 x double> %ret
    541 }
    542 
    543 ; make sure that we still don't support broadcast double into 128-bit vector
    544 ; this used to crash
    545 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
    546 ; X32-LABEL: I:
    547 ; X32:       ## BB#0: ## %entry
    548 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    549 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    550 ; X32-NEXT:    retl
    551 ;
    552 ; X64-LABEL: I:
    553 ; X64:       ## BB#0: ## %entry
    554 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    555 ; X64-NEXT:    retq
    556 entry:
    557   %q = load double, double* %ptr, align 4
    558   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
    559   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
    560   ret <2 x double> %vecinit2.i
    561 }
    562 
    563 define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
    564 ; X32-LABEL: V111:
    565 ; X32:       ## BB#0: ## %entry
    566 ; X32-NEXT:    vpbroadcastd LCPI27_0, %ymm1
    567 ; X32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    568 ; X32-NEXT:    retl
    569 ;
    570 ; X64-LABEL: V111:
    571 ; X64:       ## BB#0: ## %entry
    572 ; X64-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
    573 ; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    574 ; X64-NEXT:    retq
    575 entry:
    576   %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    577   ret <8 x i32> %g
    578 }
    579 
    580 define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
    581 ; X32-LABEL: V113:
    582 ; X32:       ## BB#0: ## %entry
    583 ; X32-NEXT:    vbroadcastss LCPI28_0, %ymm1
    584 ; X32-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    585 ; X32-NEXT:    retl
    586 ;
    587 ; X64-LABEL: V113:
    588 ; X64:       ## BB#0: ## %entry
    589 ; X64-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
    590 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    591 ; X64-NEXT:    retq
    592 entry:
    593   %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
    594   ret <8 x float> %g
    595 }
    596 
    597 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
    598 ; X32-LABEL: _e2:
    599 ; X32:       ## BB#0:
    600 ; X32-NEXT:    vbroadcastss LCPI29_0, %xmm0
    601 ; X32-NEXT:    retl
    602 ;
    603 ; X64-LABEL: _e2:
    604 ; X64:       ## BB#0:
    605 ; X64-NEXT:    vbroadcastss {{.*}}(%rip), %xmm0
    606 ; X64-NEXT:    retq
    607   %vecinit.i = insertelement <4 x float> undef, float        0xbf80000000000000, i32 0
    608   %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
    609   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
    610   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
    611   ret <4 x float> %vecinit6.i
    612 }
    613 
    614 define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
    615 ; X32-LABEL: _e4:
    616 ; X32:       ## BB#0:
    617 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
    618 ; X32-NEXT:    retl
    619 ;
    620 ; X64-LABEL: _e4:
    621 ; X64:       ## BB#0:
    622 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
    623 ; X64-NEXT:    retq
    624   %vecinit0.i = insertelement <8 x i8> undef, i8       52, i32 0
    625   %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
    626   %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
    627   %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
    628   %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
    629   %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
    630   %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
    631   %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
    632   ret <8 x i8> %vecinit7.i
    633 }
    634 
    635 define void @crash() nounwind alwaysinline {
    636 ; X32-LABEL: crash:
    637 ; X32:       ## BB#0: ## %WGLoopsEntry
    638 ; X32-NEXT:    xorl %eax, %eax
    639 ; X32-NEXT:    testb %al, %al
    640 ; X32-NEXT:    je LBB31_1
    641 ; X32-NEXT:  ## BB#2: ## %ret
    642 ; X32-NEXT:    retl
    643 ; X32-NEXT:    .p2align 4, 0x90
    644 ; X32-NEXT:  LBB31_1: ## %footer349VF
    645 ; X32-NEXT:    ## =>This Inner Loop Header: Depth=1
    646 ; X32-NEXT:    jmp LBB31_1
    647 ;
    648 ; X64-LABEL: crash:
    649 ; X64:       ## BB#0: ## %WGLoopsEntry
    650 ; X64-NEXT:    xorl %eax, %eax
    651 ; X64-NEXT:    testb %al, %al
    652 ; X64-NEXT:    je LBB31_1
    653 ; X64-NEXT:  ## BB#2: ## %ret
    654 ; X64-NEXT:    retq
    655 ; X64-NEXT:    .p2align 4, 0x90
    656 ; X64-NEXT:  LBB31_1: ## %footer349VF
    657 ; X64-NEXT:    ## =>This Inner Loop Header: Depth=1
    658 ; X64-NEXT:    jmp LBB31_1
    659 WGLoopsEntry:
    660   br i1 undef, label %ret, label %footer329VF
    661 
    662 footer329VF:
    663   %A.0.inVF = fmul float undef, 6.553600e+04
    664   %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
    665   %A.0VF = fptosi float %A.0.inVF to i32
    666   %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
    667   %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
    668   %1 = and i32 %A.0VF, 65535
    669   %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
    670   %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
    671   br i1 undef, label %preload1201VF, label %footer349VF
    672 
    673 preload1201VF:
    674   br label %footer349VF
    675 
    676 footer349VF:
    677   %2 = mul nsw <8 x i32> undef, %0
    678   %3 = mul nsw <8 x i32> undef, %vector1099VF
    679   br label %footer329VF
    680 
    681 ret:
    682   ret void
    683 }
    684 
    685 define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
    686 ; X32-LABEL: _inreg0:
    687 ; X32:       ## BB#0:
    688 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
    689 ; X32-NEXT:    retl
    690 ;
    691 ; X64-LABEL: _inreg0:
    692 ; X64:       ## BB#0:
    693 ; X64-NEXT:    vmovd %edi, %xmm0
    694 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    695 ; X64-NEXT:    retq
    696   %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
    697   %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
    698   ret <8 x i32> %wide
    699 }
    700 
    701 define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
    702 ; X32-LABEL: _inreg1:
    703 ; X32:       ## BB#0:
    704 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
    705 ; X32-NEXT:    retl
    706 ;
    707 ; X64-LABEL: _inreg1:
    708 ; X64:       ## BB#0:
    709 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    710 ; X64-NEXT:    retq
    711   %in = insertelement <8 x float> undef, float %scalar, i32 0
    712   %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
    713   ret <8 x float> %wide
    714 }
    715 
    716 define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
    717 ; X32-LABEL: _inreg2:
    718 ; X32:       ## BB#0:
    719 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm0
    720 ; X32-NEXT:    retl
    721 ;
    722 ; X64-LABEL: _inreg2:
    723 ; X64:       ## BB#0:
    724 ; X64-NEXT:    vbroadcastss %xmm0, %xmm0
    725 ; X64-NEXT:    retq
    726   %in = insertelement <4 x float> undef, float %scalar, i32 0
    727   %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
    728   ret <4 x float> %wide
    729 }
    730 
    731 define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
    732 ; X32-LABEL: _inreg3:
    733 ; X32:       ## BB#0:
    734 ; X32-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
    735 ; X32-NEXT:    retl
    736 ;
    737 ; X64-LABEL: _inreg3:
    738 ; X64:       ## BB#0:
    739 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
    740 ; X64-NEXT:    retq
    741   %in = insertelement <4 x double> undef, double %scalar, i32 0
    742   %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
    743   ret <4 x double> %wide
    744 }
    745 
    746 define   <8 x float> @_inreg8xfloat(<8 x float> %a) {
    747 ; X32-LABEL: _inreg8xfloat:
    748 ; X32:       ## BB#0:
    749 ; X32-NEXT:    vbroadcastss %xmm0, %ymm0
    750 ; X32-NEXT:    retl
    751 ;
    752 ; X64-LABEL: _inreg8xfloat:
    753 ; X64:       ## BB#0:
    754 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    755 ; X64-NEXT:    retq
    756   %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
    757   ret <8 x float> %b
    758 }
    759 
    760 define   <4 x float> @_inreg4xfloat(<4 x float> %a) {
    761 ; X32-LABEL: _inreg4xfloat:
    762 ; X32:       ## BB#0:
    763 ; X32-NEXT:    vbroadcastss %xmm0, %xmm0
    764 ; X32-NEXT:    retl
    765 ;
    766 ; X64-LABEL: _inreg4xfloat:
    767 ; X64:       ## BB#0:
    768 ; X64-NEXT:    vbroadcastss %xmm0, %xmm0
    769 ; X64-NEXT:    retq
    770   %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
    771   ret <4 x float> %b
    772 }
    773 
    774 define   <16 x i16> @_inreg16xi16(<16 x i16> %a) {
    775 ; X32-LABEL: _inreg16xi16:
    776 ; X32:       ## BB#0:
    777 ; X32-NEXT:    vpbroadcastw %xmm0, %ymm0
    778 ; X32-NEXT:    retl
    779 ;
    780 ; X64-LABEL: _inreg16xi16:
    781 ; X64:       ## BB#0:
    782 ; X64-NEXT:    vpbroadcastw %xmm0, %ymm0
    783 ; X64-NEXT:    retq
    784   %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
    785   ret <16 x i16> %b
    786 }
    787 
    788 define   <8 x i16> @_inreg8xi16(<8 x i16> %a) {
    789 ; X32-LABEL: _inreg8xi16:
    790 ; X32:       ## BB#0:
    791 ; X32-NEXT:    vpbroadcastw %xmm0, %xmm0
    792 ; X32-NEXT:    retl
    793 ;
    794 ; X64-LABEL: _inreg8xi16:
    795 ; X64:       ## BB#0:
    796 ; X64-NEXT:    vpbroadcastw %xmm0, %xmm0
    797 ; X64-NEXT:    retq
    798   %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
    799   ret <8 x i16> %b
    800 }
    801 
    802 define   <4 x i64> @_inreg4xi64(<4 x i64> %a) {
    803 ; X32-LABEL: _inreg4xi64:
    804 ; X32:       ## BB#0:
    805 ; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
    806 ; X32-NEXT:    retl
    807 ;
    808 ; X64-LABEL: _inreg4xi64:
    809 ; X64:       ## BB#0:
    810 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
    811 ; X64-NEXT:    retq
    812   %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
    813   ret <4 x i64> %b
    814 }
    815 
    816 define   <2 x i64> @_inreg2xi64(<2 x i64> %a) {
    817 ; X32-LABEL: _inreg2xi64:
    818 ; X32:       ## BB#0:
    819 ; X32-NEXT:    vpbroadcastq %xmm0, %xmm0
    820 ; X32-NEXT:    retl
    821 ;
    822 ; X64-LABEL: _inreg2xi64:
    823 ; X64:       ## BB#0:
    824 ; X64-NEXT:    vpbroadcastq %xmm0, %xmm0
    825 ; X64-NEXT:    retq
    826   %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
    827   ret <2 x i64> %b
    828 }
    829 
    830 define   <4 x double> @_inreg4xdouble(<4 x double> %a) {
    831 ; X32-LABEL: _inreg4xdouble:
    832 ; X32:       ## BB#0:
    833 ; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
    834 ; X32-NEXT:    retl
    835 ;
    836 ; X64-LABEL: _inreg4xdouble:
    837 ; X64:       ## BB#0:
    838 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
    839 ; X64-NEXT:    retq
    840   %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
    841   ret <4 x double> %b
    842 }
    843 
    844 define   <2 x double> @_inreg2xdouble(<2 x double> %a) {
    845 ; X32-LABEL: _inreg2xdouble:
    846 ; X32:       ## BB#0:
    847 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    848 ; X32-NEXT:    retl
    849 ;
    850 ; X64-LABEL: _inreg2xdouble:
    851 ; X64:       ## BB#0:
    852 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    853 ; X64-NEXT:    retq
    854   %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
    855   ret <2 x double> %b
    856 }
    857 
    858 define   <8 x i32> @_inreg8xi32(<8 x i32> %a) {
    859 ; X32-LABEL: _inreg8xi32:
    860 ; X32:       ## BB#0:
    861 ; X32-NEXT:    vbroadcastss %xmm0, %ymm0
    862 ; X32-NEXT:    retl
    863 ;
    864 ; X64-LABEL: _inreg8xi32:
    865 ; X64:       ## BB#0:
    866 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    867 ; X64-NEXT:    retq
    868   %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
    869   ret <8 x i32> %b
    870 }
    871 
    872 define   <4 x i32> @_inreg4xi32(<4 x i32> %a) {
    873 ; X32-LABEL: _inreg4xi32:
    874 ; X32:       ## BB#0:
    875 ; X32-NEXT:    vbroadcastss %xmm0, %xmm0
    876 ; X32-NEXT:    retl
    877 ;
    878 ; X64-LABEL: _inreg4xi32:
    879 ; X64:       ## BB#0:
    880 ; X64-NEXT:    vbroadcastss %xmm0, %xmm0
    881 ; X64-NEXT:    retq
    882   %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
    883   ret <4 x i32> %b
    884 }
    885 
    886 define   <32 x i8> @_inreg32xi8(<32 x i8> %a) {
    887 ; X32-LABEL: _inreg32xi8:
    888 ; X32:       ## BB#0:
    889 ; X32-NEXT:    vpbroadcastb %xmm0, %ymm0
    890 ; X32-NEXT:    retl
    891 ;
    892 ; X64-LABEL: _inreg32xi8:
    893 ; X64:       ## BB#0:
    894 ; X64-NEXT:    vpbroadcastb %xmm0, %ymm0
    895 ; X64-NEXT:    retq
    896   %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
    897   ret <32 x i8> %b
    898 }
    899 
    900 define   <16 x i8> @_inreg16xi8(<16 x i8> %a) {
    901 ; X32-LABEL: _inreg16xi8:
    902 ; X32:       ## BB#0:
    903 ; X32-NEXT:    vpbroadcastb %xmm0, %xmm0
    904 ; X32-NEXT:    retl
    905 ;
    906 ; X64-LABEL: _inreg16xi8:
    907 ; X64:       ## BB#0:
    908 ; X64-NEXT:    vpbroadcastb %xmm0, %xmm0
    909 ; X64-NEXT:    retq
    910   %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
    911   ret <16 x i8> %b
    912 }
    913 
    914 ; These tests check that a vbroadcast instruction is used when we have a splat
    915 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
    916 ; (via the insertelements).
    917 
    918 define <8 x float> @splat_concat1(float %f) {
    919 ; X32-LABEL: splat_concat1:
    920 ; X32:       ## BB#0:
    921 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
    922 ; X32-NEXT:    retl
    923 ;
    924 ; X64-LABEL: splat_concat1:
    925 ; X64:       ## BB#0:
    926 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    927 ; X64-NEXT:    retq
    928   %1 = insertelement <4 x float> undef, float %f, i32 0
    929   %2 = insertelement <4 x float> %1, float %f, i32 1
    930   %3 = insertelement <4 x float> %2, float %f, i32 2
    931   %4 = insertelement <4 x float> %3, float %f, i32 3
    932   %5 = shufflevector <4 x float> %4, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    933   ret <8 x float> %5
    934 }
    935 
    936 define <8 x float> @splat_concat2(float %f) {
    937 ; X32-LABEL: splat_concat2:
    938 ; X32:       ## BB#0:
    939 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
    940 ; X32-NEXT:    retl
    941 ;
    942 ; X64-LABEL: splat_concat2:
    943 ; X64:       ## BB#0:
    944 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    945 ; X64-NEXT:    retq
    946   %1 = insertelement <4 x float> undef, float %f, i32 0
    947   %2 = insertelement <4 x float> %1, float %f, i32 1
    948   %3 = insertelement <4 x float> %2, float %f, i32 2
    949   %4 = insertelement <4 x float> %3, float %f, i32 3
    950   %5 = insertelement <4 x float> undef, float %f, i32 0
    951   %6 = insertelement <4 x float> %5, float %f, i32 1
    952   %7 = insertelement <4 x float> %6, float %f, i32 2
    953   %8 = insertelement <4 x float> %7, float %f, i32 3
    954   %9 = shufflevector <4 x float> %4, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    955   ret <8 x float> %9
    956 }
    957 
    958 define <4 x double> @splat_concat3(double %d) {
    959 ; X32-LABEL: splat_concat3:
    960 ; X32:       ## BB#0:
    961 ; X32-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
    962 ; X32-NEXT:    retl
    963 ;
    964 ; X64-LABEL: splat_concat3:
    965 ; X64:       ## BB#0:
    966 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
    967 ; X64-NEXT:    retq
    968   %1 = insertelement <2 x double> undef, double %d, i32 0
    969   %2 = insertelement <2 x double> %1, double %d, i32 1
    970   %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
    971   ret <4 x double> %3
    972 }
    973 
    974 define <4 x double> @splat_concat4(double %d) {
    975 ; X32-LABEL: splat_concat4:
    976 ; X32:       ## BB#0:
    977 ; X32-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
    978 ; X32-NEXT:    retl
    979 ;
    980 ; X64-LABEL: splat_concat4:
    981 ; X64:       ## BB#0:
    982 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
    983 ; X64-NEXT:    retq
    984   %1 = insertelement <2 x double> undef, double %d, i32 0
    985   %2 = insertelement <2 x double> %1, double %d, i32 1
    986   %3 = insertelement <2 x double> undef, double %d, i32 0
    987   %4 = insertelement <2 x double> %3, double %d, i32 1
    988   %5 = shufflevector <2 x double> %2, <2 x double> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    989   ret <4 x double> %5
    990 }
    991 
    992 ; Test cases for <rdar://problem/16074331>.
    993 ; Instruction selection for broacast instruction fails if
    994 ; the load cannot be folded into the broadcast.
    995 ; This happens if the load has initial one use but other uses are
    996 ; created later, or if selection DAG cannot prove that folding the
    997 ; load will not create a cycle in the DAG.
    998 ; Those test cases exerce the latter.
    999 
   1000 ; CHECK-LABEL: isel_crash_16b
   1001 ; CHECK: vpbroadcastb {{[^,]+}}, %xmm{{[0-9]+}}
   1002 ; CHECK: ret
   1003 define void @isel_crash_16b(i8* %cV_R.addr) {
   1004 eintry:
   1005   %__a.addr.i = alloca <2 x i64>, align 16
   1006   %__b.addr.i = alloca <2 x i64>, align 16
   1007   %vCr = alloca <2 x i64>, align 16
   1008   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
   1009   %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
   1010   %tmp2 = load i8, i8* %cV_R.addr, align 4
   1011   %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0
   1012   %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
   1013   %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64>
   1014   store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
   1015   store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
   1016   ret void
   1017 }
   1018 
   1019 ; CHECK-LABEL: isel_crash_32b
   1020 ; CHECK: vpbroadcastb {{[^,]+}}, %ymm{{[0-9]+}}
   1021 ; CHECK: ret
   1022 define void @isel_crash_32b(i8* %cV_R.addr) {
   1023 eintry:
   1024   %__a.addr.i = alloca <4 x i64>, align 16
   1025   %__b.addr.i = alloca <4 x i64>, align 16
   1026   %vCr = alloca <4 x i64>, align 16
   1027   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   1028   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   1029   %tmp2 = load i8, i8* %cV_R.addr, align 4
   1030   %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0
   1031   %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer
   1032   %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64>
   1033   store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
   1034   store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
   1035   ret void
   1036 }
   1037 
   1038 ; CHECK-LABEL: isel_crash_8w
   1039 ; CHECK: vpbroadcastw {{[^,]+}}, %xmm{{[0-9]+}}
   1040 ; CHECK: ret
   1041 define void @isel_crash_8w(i16* %cV_R.addr) {
   1042 entry:
   1043   %__a.addr.i = alloca <2 x i64>, align 16
   1044   %__b.addr.i = alloca <2 x i64>, align 16
   1045   %vCr = alloca <2 x i64>, align 16
   1046   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
   1047   %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
   1048   %tmp2 = load i16, i16* %cV_R.addr, align 4
   1049   %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0
   1050   %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
   1051   %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64>
   1052   store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
   1053   store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
   1054   ret void
   1055 }
   1056 
   1057 ; CHECK-LABEL: isel_crash_16w
   1058 ; CHECK: vpbroadcastw {{[^,]+}}, %ymm{{[0-9]+}}
   1059 ; CHECK: ret
   1060 define void @isel_crash_16w(i16* %cV_R.addr) {
   1061 eintry:
   1062   %__a.addr.i = alloca <4 x i64>, align 16
   1063   %__b.addr.i = alloca <4 x i64>, align 16
   1064   %vCr = alloca <4 x i64>, align 16
   1065   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   1066   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   1067   %tmp2 = load i16, i16* %cV_R.addr, align 4
   1068   %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0
   1069   %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer
   1070   %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64>
   1071   store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
   1072   store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
   1073   ret void
   1074 }
   1075 
   1076 ; CHECK-LABEL: isel_crash_4d
   1077 ; CHECK: vbroadcastss {{[^,]+}}, %xmm{{[0-9]+}}
   1078 ; CHECK: ret
   1079 define void @isel_crash_4d(i32* %cV_R.addr) {
   1080 entry:
   1081   %__a.addr.i = alloca <2 x i64>, align 16
   1082   %__b.addr.i = alloca <2 x i64>, align 16
   1083   %vCr = alloca <2 x i64>, align 16
   1084   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
   1085   %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
   1086   %tmp2 = load i32, i32* %cV_R.addr, align 4
   1087   %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0
   1088   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   1089   %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64>
   1090   store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
   1091   store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
   1092   ret void
   1093 }
   1094 
   1095 ; CHECK-LABEL: isel_crash_8d
   1096 ; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}}
   1097 ; CHECK: ret
   1098 define void @isel_crash_8d(i32* %cV_R.addr) {
   1099 eintry:
   1100   %__a.addr.i = alloca <4 x i64>, align 16
   1101   %__b.addr.i = alloca <4 x i64>, align 16
   1102   %vCr = alloca <4 x i64>, align 16
   1103   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   1104   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   1105   %tmp2 = load i32, i32* %cV_R.addr, align 4
   1106   %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0
   1107   %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
   1108   %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64>
   1109   store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
   1110   store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
   1111   ret void
   1112 }
   1113 
   1114 ; X64-LABEL: isel_crash_2q
   1115 ; X64: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}}
   1116 ; X64: ret
   1117 define void @isel_crash_2q(i64* %cV_R.addr) {
   1118 entry:
   1119   %__a.addr.i = alloca <2 x i64>, align 16
   1120   %__b.addr.i = alloca <2 x i64>, align 16
   1121   %vCr = alloca <2 x i64>, align 16
   1122   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
   1123   %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
   1124   %tmp2 = load i64, i64* %cV_R.addr, align 4
   1125   %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0
   1126   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   1127   store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
   1128   store <2 x i64> %splat.splat, <2 x i64>* %__b.addr.i, align 16
   1129   ret void
   1130 }
   1131 
   1132 ; X64-LABEL: isel_crash_4q
   1133 ; X64: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}}
   1134 ; X64: ret
   1135 define void @isel_crash_4q(i64* %cV_R.addr) {
   1136 eintry:
   1137   %__a.addr.i = alloca <4 x i64>, align 16
   1138   %__b.addr.i = alloca <4 x i64>, align 16
   1139   %vCr = alloca <4 x i64>, align 16
   1140   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   1141   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   1142   %tmp2 = load i64, i64* %cV_R.addr, align 4
   1143   %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0
   1144   %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
   1145   store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
   1146   store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16
   1147   ret void
   1148 }
   1149