Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX2
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2
      4 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX512VL
      5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL
      6 
      7 define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
      8 ; X32-LABEL: BB16:
      9 ; X32:       ## %bb.0: ## %entry
     10 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     11 ; X32-NEXT:    vpbroadcastb (%eax), %xmm0
     12 ; X32-NEXT:    retl
     13 ;
     14 ; X64-LABEL: BB16:
     15 ; X64:       ## %bb.0: ## %entry
     16 ; X64-NEXT:    vpbroadcastb (%rdi), %xmm0
     17 ; X64-NEXT:    retq
     18 entry:
     19   %q = load i8, i8* %ptr, align 4
     20   %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
     21   %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
     22   %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
     23   %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
     24   %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
     25   %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
     26   %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
     27   %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
     28   %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
     29   %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
     30   %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
     31   %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
     32   %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
     33   %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
     34   %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
     35   %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
     36   ret <16 x i8> %qf
     37 }
     38 
     39 define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
     40 ; X32-LABEL: BB32:
     41 ; X32:       ## %bb.0: ## %entry
     42 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     43 ; X32-NEXT:    vpbroadcastb (%eax), %ymm0
     44 ; X32-NEXT:    retl
     45 ;
     46 ; X64-LABEL: BB32:
     47 ; X64:       ## %bb.0: ## %entry
     48 ; X64-NEXT:    vpbroadcastb (%rdi), %ymm0
     49 ; X64-NEXT:    retq
     50 entry:
     51   %q = load i8, i8* %ptr, align 4
     52   %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
     53   %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
     54   %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
     55   %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
     56   %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
     57   %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
     58   %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
     59   %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
     60   %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
     61   %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
     62   %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
     63   %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
     64   %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
     65   %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
     66   %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
     67   %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
     68 
     69   %q20 = insertelement <32 x i8> %qf, i8 %q,  i32 16
     70   %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
     71   %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
     72   %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
     73   %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
     74   %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
     75   %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
     76   %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
     77   %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
     78   %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
     79   %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
     80   %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
     81   %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
     82   %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
     83   %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
     84   %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
     85   ret <32 x i8> %q2f
     86 }
     87 
     88 define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
     89 ; X32-LABEL: W16:
     90 ; X32:       ## %bb.0: ## %entry
     91 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     92 ; X32-NEXT:    vpbroadcastw (%eax), %xmm0
     93 ; X32-NEXT:    retl
     94 ;
     95 ; X64-LABEL: W16:
     96 ; X64:       ## %bb.0: ## %entry
     97 ; X64-NEXT:    vpbroadcastw (%rdi), %xmm0
     98 ; X64-NEXT:    retq
     99 entry:
    100   %q = load i16, i16* %ptr, align 4
    101   %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
    102   %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
    103   %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
    104   %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
    105   %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
    106   %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
    107   %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
    108   %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
    109   ret <8 x i16> %q7
    110 }
    111 
    112 define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
    113 ; X32-LABEL: WW16:
    114 ; X32:       ## %bb.0: ## %entry
    115 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    116 ; X32-NEXT:    vpbroadcastw (%eax), %ymm0
    117 ; X32-NEXT:    retl
    118 ;
    119 ; X64-LABEL: WW16:
    120 ; X64:       ## %bb.0: ## %entry
    121 ; X64-NEXT:    vpbroadcastw (%rdi), %ymm0
    122 ; X64-NEXT:    retq
    123 entry:
    124   %q = load i16, i16* %ptr, align 4
    125   %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
    126   %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
    127   %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
    128   %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
    129   %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
    130   %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
    131   %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
    132   %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
    133   %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
    134   %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
    135   %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
    136   %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
    137   %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
    138   %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
    139   %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
    140   %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
    141   ret <16 x i16> %qf
    142 }
    143 
    144 define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
    145 ; X32-LABEL: D32:
    146 ; X32:       ## %bb.0: ## %entry
    147 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    148 ; X32-NEXT:    vbroadcastss (%eax), %xmm0
    149 ; X32-NEXT:    retl
    150 ;
    151 ; X64-LABEL: D32:
    152 ; X64:       ## %bb.0: ## %entry
    153 ; X64-NEXT:    vbroadcastss (%rdi), %xmm0
    154 ; X64-NEXT:    retq
    155 entry:
    156   %q = load i32, i32* %ptr, align 4
    157   %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
    158   %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
    159   %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
    160   %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
    161   ret <4 x i32> %q3
    162 }
    163 
    164 define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
    165 ; X32-LABEL: DD32:
    166 ; X32:       ## %bb.0: ## %entry
    167 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    168 ; X32-NEXT:    vbroadcastss (%eax), %ymm0
    169 ; X32-NEXT:    retl
    170 ;
    171 ; X64-LABEL: DD32:
    172 ; X64:       ## %bb.0: ## %entry
    173 ; X64-NEXT:    vbroadcastss (%rdi), %ymm0
    174 ; X64-NEXT:    retq
    175 entry:
    176   %q = load i32, i32* %ptr, align 4
    177   %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
    178   %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
    179   %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
    180   %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
    181   %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
    182   %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
    183   %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
    184   %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
    185   ret <8 x i32> %q7
    186 }
    187 
    188 define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
    189 ; X32-LABEL: Q64:
    190 ; X32:       ## %bb.0: ## %entry
    191 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    192 ; X32-NEXT:    vpbroadcastq (%eax), %xmm0
    193 ; X32-NEXT:    retl
    194 ;
    195 ; X64-LABEL: Q64:
    196 ; X64:       ## %bb.0: ## %entry
    197 ; X64-NEXT:    vpbroadcastq (%rdi), %xmm0
    198 ; X64-NEXT:    retq
    199 entry:
    200   %q = load i64, i64* %ptr, align 4
    201   %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
    202   %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
    203   ret <2 x i64> %q1
    204 }
    205 
    206 define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
    207 ; X32-LABEL: QQ64:
    208 ; X32:       ## %bb.0: ## %entry
    209 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    210 ; X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    211 ; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
    212 ; X32-NEXT:    retl
    213 ;
    214 ; X64-LABEL: QQ64:
    215 ; X64:       ## %bb.0: ## %entry
    216 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
    217 ; X64-NEXT:    retq
    218 entry:
    219   %q = load i64, i64* %ptr, align 4
    220   %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
    221   %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
    222   %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
    223   %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
    224   ret <4 x i64> %q3
    225 }
    226 
    227 define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) {
    228 ; X32-LABEL: broadcast_mem_v4i16_v8i16:
    229 ; X32:       ## %bb.0:
    230 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    231 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    232 ; X32-NEXT:    retl
    233 ;
    234 ; X64-LABEL: broadcast_mem_v4i16_v8i16:
    235 ; X64:       ## %bb.0:
    236 ; X64-NEXT:    vpbroadcastq (%rdi), %xmm0
    237 ; X64-NEXT:    retq
    238   %load = load <4 x i16>, <4 x i16>* %ptr
    239   %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    240   ret <8 x i16> %shuf
    241 }
    242 
    243 define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) {
    244 ; X32-LABEL: broadcast_mem_v4i16_v16i16:
    245 ; X32:       ## %bb.0:
    246 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    247 ; X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    248 ; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
    249 ; X32-NEXT:    retl
    250 ;
    251 ; X64-LABEL: broadcast_mem_v4i16_v16i16:
    252 ; X64:       ## %bb.0:
    253 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
    254 ; X64-NEXT:    retq
    255   %load = load <4 x i16>, <4 x i16>* %ptr
    256   %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    257   ret <16 x i16> %shuf
    258 }
    259 
    260 ; FIXME: Pointer adjusted broadcasts
    261 
    262 define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
    263 ; X32-LABEL: load_splat_16i8_16i8_1111111111111111:
    264 ; X32:       ## %bb.0: ## %entry
    265 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    266 ; X32-NEXT:    vpbroadcastb 1(%eax), %xmm0
    267 ; X32-NEXT:    retl
    268 ;
    269 ; X64-LABEL: load_splat_16i8_16i8_1111111111111111:
    270 ; X64:       ## %bb.0: ## %entry
    271 ; X64-NEXT:    vpbroadcastb 1(%rdi), %xmm0
    272 ; X64-NEXT:    retq
    273 entry:
    274   %ld = load <16 x i8>, <16 x i8>* %ptr
    275   %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    276   ret <16 x i8> %ret
    277 }
    278 
    279 define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
    280 ; X32-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
    281 ; X32:       ## %bb.0: ## %entry
    282 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    283 ; X32-NEXT:    vpbroadcastb 1(%eax), %ymm0
    284 ; X32-NEXT:    retl
    285 ;
    286 ; X64-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
    287 ; X64:       ## %bb.0: ## %entry
    288 ; X64-NEXT:    vpbroadcastb 1(%rdi), %ymm0
    289 ; X64-NEXT:    retq
    290 entry:
    291   %ld = load <16 x i8>, <16 x i8>* %ptr
    292   %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    293   ret <32 x i8> %ret
    294 }
    295 
    296 define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
    297 ; X32-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
    298 ; X32:       ## %bb.0: ## %entry
    299 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    300 ; X32-NEXT:    vpbroadcastb 1(%eax), %ymm0
    301 ; X32-NEXT:    retl
    302 ;
    303 ; X64-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
    304 ; X64:       ## %bb.0: ## %entry
    305 ; X64-NEXT:    vpbroadcastb 1(%rdi), %ymm0
    306 ; X64-NEXT:    retq
    307 entry:
    308   %ld = load <32 x i8>, <32 x i8>* %ptr
    309   %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    310   ret <32 x i8> %ret
    311 }
    312 
    313 define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
    314 ; X32-LABEL: load_splat_8i16_8i16_11111111:
    315 ; X32:       ## %bb.0: ## %entry
    316 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    317 ; X32-NEXT:    vpbroadcastw 2(%eax), %xmm0
    318 ; X32-NEXT:    retl
    319 ;
    320 ; X64-LABEL: load_splat_8i16_8i16_11111111:
    321 ; X64:       ## %bb.0: ## %entry
    322 ; X64-NEXT:    vpbroadcastw 2(%rdi), %xmm0
    323 ; X64-NEXT:    retq
    324 entry:
    325   %ld = load <8 x i16>, <8 x i16>* %ptr
    326   %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    327   ret <8 x i16> %ret
    328 }
    329 
    330 define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
    331 ; X32-LABEL: load_splat_16i16_8i16_1111111111111111:
    332 ; X32:       ## %bb.0: ## %entry
    333 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    334 ; X32-NEXT:    vpbroadcastw 2(%eax), %ymm0
    335 ; X32-NEXT:    retl
    336 ;
    337 ; X64-LABEL: load_splat_16i16_8i16_1111111111111111:
    338 ; X64:       ## %bb.0: ## %entry
    339 ; X64-NEXT:    vpbroadcastw 2(%rdi), %ymm0
    340 ; X64-NEXT:    retq
    341 entry:
    342   %ld = load <8 x i16>, <8 x i16>* %ptr
    343   %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    344   ret <16 x i16> %ret
    345 }
    346 
    347 define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
    348 ; X32-LABEL: load_splat_16i16_16i16_1111111111111111:
    349 ; X32:       ## %bb.0: ## %entry
    350 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    351 ; X32-NEXT:    vpbroadcastw 2(%eax), %ymm0
    352 ; X32-NEXT:    retl
    353 ;
    354 ; X64-LABEL: load_splat_16i16_16i16_1111111111111111:
    355 ; X64:       ## %bb.0: ## %entry
    356 ; X64-NEXT:    vpbroadcastw 2(%rdi), %ymm0
    357 ; X64-NEXT:    retq
    358 entry:
    359   %ld = load <16 x i16>, <16 x i16>* %ptr
    360   %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    361   ret <16 x i16> %ret
    362 }
    363 
    364 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
    365 ; X32-LABEL: load_splat_4i32_4i32_1111:
    366 ; X32:       ## %bb.0: ## %entry
    367 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    368 ; X32-NEXT:    vbroadcastss 4(%eax), %xmm0
    369 ; X32-NEXT:    retl
    370 ;
    371 ; X64-LABEL: load_splat_4i32_4i32_1111:
    372 ; X64:       ## %bb.0: ## %entry
    373 ; X64-NEXT:    vbroadcastss 4(%rdi), %xmm0
    374 ; X64-NEXT:    retq
    375 entry:
    376   %ld = load <4 x i32>, <4 x i32>* %ptr
    377   %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    378   ret <4 x i32> %ret
    379 }
    380 
    381 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
    382 ; X32-LABEL: load_splat_8i32_4i32_33333333:
    383 ; X32:       ## %bb.0: ## %entry
    384 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    385 ; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
    386 ; X32-NEXT:    retl
    387 ;
    388 ; X64-LABEL: load_splat_8i32_4i32_33333333:
    389 ; X64:       ## %bb.0: ## %entry
    390 ; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
    391 ; X64-NEXT:    retq
    392 entry:
    393   %ld = load <4 x i32>, <4 x i32>* %ptr
    394   %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    395   ret <8 x i32> %ret
    396 }
    397 
    398 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
    399 ; X32-LABEL: load_splat_8i32_8i32_55555555:
    400 ; X32:       ## %bb.0: ## %entry
    401 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    402 ; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
    403 ; X32-NEXT:    retl
    404 ;
    405 ; X64-LABEL: load_splat_8i32_8i32_55555555:
    406 ; X64:       ## %bb.0: ## %entry
    407 ; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
    408 ; X64-NEXT:    retq
    409 entry:
    410   %ld = load <8 x i32>, <8 x i32>* %ptr
    411   %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    412   ret <8 x i32> %ret
    413 }
    414 
    415 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
    416 ; X32-LABEL: load_splat_4f32_4f32_1111:
    417 ; X32:       ## %bb.0: ## %entry
    418 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    419 ; X32-NEXT:    vbroadcastss 4(%eax), %xmm0
    420 ; X32-NEXT:    retl
    421 ;
    422 ; X64-LABEL: load_splat_4f32_4f32_1111:
    423 ; X64:       ## %bb.0: ## %entry
    424 ; X64-NEXT:    vbroadcastss 4(%rdi), %xmm0
    425 ; X64-NEXT:    retq
    426 entry:
    427   %ld = load <4 x float>, <4 x float>* %ptr
    428   %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    429   ret <4 x float> %ret
    430 }
    431 
    432 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
    433 ; X32-LABEL: load_splat_8f32_4f32_33333333:
    434 ; X32:       ## %bb.0: ## %entry
    435 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    436 ; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
    437 ; X32-NEXT:    retl
    438 ;
    439 ; X64-LABEL: load_splat_8f32_4f32_33333333:
    440 ; X64:       ## %bb.0: ## %entry
    441 ; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
    442 ; X64-NEXT:    retq
    443 entry:
    444   %ld = load <4 x float>, <4 x float>* %ptr
    445   %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    446   ret <8 x float> %ret
    447 }
    448 
    449 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
    450 ; X32-LABEL: load_splat_8f32_8f32_55555555:
    451 ; X32:       ## %bb.0: ## %entry
    452 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    453 ; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
    454 ; X32-NEXT:    retl
    455 ;
    456 ; X64-LABEL: load_splat_8f32_8f32_55555555:
    457 ; X64:       ## %bb.0: ## %entry
    458 ; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
    459 ; X64-NEXT:    retq
    460 entry:
    461   %ld = load <8 x float>, <8 x float>* %ptr
    462   %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    463   ret <8 x float> %ret
    464 }
    465 
    466 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
    467 ; X32-LABEL: load_splat_2i64_2i64_1111:
    468 ; X32:       ## %bb.0: ## %entry
    469 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    470 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    471 ; X32-NEXT:    retl
    472 ;
    473 ; X64-LABEL: load_splat_2i64_2i64_1111:
    474 ; X64:       ## %bb.0: ## %entry
    475 ; X64-NEXT:    vpbroadcastq 8(%rdi), %xmm0
    476 ; X64-NEXT:    retq
    477 entry:
    478   %ld = load <2 x i64>, <2 x i64>* %ptr
    479   %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    480   ret <2 x i64> %ret
    481 }
    482 
    483 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
    484 ; X32-LABEL: load_splat_4i64_2i64_1111:
    485 ; X32:       ## %bb.0: ## %entry
    486 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    487 ; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
    488 ; X32-NEXT:    retl
    489 ;
    490 ; X64-LABEL: load_splat_4i64_2i64_1111:
    491 ; X64:       ## %bb.0: ## %entry
    492 ; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
    493 ; X64-NEXT:    retq
    494 entry:
    495   %ld = load <2 x i64>, <2 x i64>* %ptr
    496   %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    497   ret <4 x i64> %ret
    498 }
    499 
    500 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
    501 ; X32-LABEL: load_splat_4i64_4i64_2222:
    502 ; X32:       ## %bb.0: ## %entry
    503 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    504 ; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
    505 ; X32-NEXT:    retl
    506 ;
    507 ; X64-LABEL: load_splat_4i64_4i64_2222:
    508 ; X64:       ## %bb.0: ## %entry
    509 ; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
    510 ; X64-NEXT:    retq
    511 entry:
    512   %ld = load <4 x i64>, <4 x i64>* %ptr
    513   %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    514   ret <4 x i64> %ret
    515 }
    516 
    517 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
    518 ; X32-LABEL: load_splat_2f64_2f64_1111:
    519 ; X32:       ## %bb.0: ## %entry
    520 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    521 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    522 ; X32-NEXT:    retl
    523 ;
    524 ; X64-LABEL: load_splat_2f64_2f64_1111:
    525 ; X64:       ## %bb.0: ## %entry
    526 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    527 ; X64-NEXT:    retq
    528 entry:
    529   %ld = load <2 x double>, <2 x double>* %ptr
    530   %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
    531   ret <2 x double> %ret
    532 }
    533 
    534 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
    535 ; X32-LABEL: load_splat_4f64_2f64_1111:
    536 ; X32:       ## %bb.0: ## %entry
    537 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    538 ; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
    539 ; X32-NEXT:    retl
    540 ;
    541 ; X64-LABEL: load_splat_4f64_2f64_1111:
    542 ; X64:       ## %bb.0: ## %entry
    543 ; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
    544 ; X64-NEXT:    retq
    545 entry:
    546   %ld = load <2 x double>, <2 x double>* %ptr
    547   %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    548   ret <4 x double> %ret
    549 }
    550 
    551 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
    552 ; X32-LABEL: load_splat_4f64_4f64_2222:
    553 ; X32:       ## %bb.0: ## %entry
    554 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    555 ; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
    556 ; X32-NEXT:    retl
    557 ;
    558 ; X64-LABEL: load_splat_4f64_4f64_2222:
    559 ; X64:       ## %bb.0: ## %entry
    560 ; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
    561 ; X64-NEXT:    retq
    562 entry:
    563   %ld = load <4 x double>, <4 x double>* %ptr
    564   %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    565   ret <4 x double> %ret
    566 }
    567 
    568 ; make sure that we still don't support broadcast double into 128-bit vector
    569 ; this used to crash
    570 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
    571 ; X32-LABEL: I:
    572 ; X32:       ## %bb.0: ## %entry
    573 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    574 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    575 ; X32-NEXT:    retl
    576 ;
    577 ; X64-LABEL: I:
    578 ; X64:       ## %bb.0: ## %entry
    579 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
    580 ; X64-NEXT:    retq
    581 entry:
    582   %q = load double, double* %ptr, align 4
    583   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
    584   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
    585   ret <2 x double> %vecinit2.i
    586 }
    587 
    588 define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
    589 ; X32-AVX2-LABEL: V111:
    590 ; X32-AVX2:       ## %bb.0: ## %entry
    591 ; X32-AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
    592 ; X32-AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    593 ; X32-AVX2-NEXT:    retl
    594 ;
    595 ; X64-AVX2-LABEL: V111:
    596 ; X64-AVX2:       ## %bb.0: ## %entry
    597 ; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
    598 ; X64-AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    599 ; X64-AVX2-NEXT:    retq
    600 ;
    601 ; X32-AVX512VL-LABEL: V111:
    602 ; X32-AVX512VL:       ## %bb.0: ## %entry
    603 ; X32-AVX512VL-NEXT:    vpaddd LCPI29_0{1to8}, %ymm0, %ymm0
    604 ; X32-AVX512VL-NEXT:    retl
    605 ;
    606 ; X64-AVX512VL-LABEL: V111:
    607 ; X64-AVX512VL:       ## %bb.0: ## %entry
    608 ; X64-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0
    609 ; X64-AVX512VL-NEXT:    retq
    610 entry:
    611   %g = add <8 x i32> %in, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    612   ret <8 x i32> %g
    613 }
    614 
    615 define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
    616 ; X32-AVX2-LABEL: V113:
    617 ; X32-AVX2:       ## %bb.0: ## %entry
    618 ; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
    619 ; X32-AVX2-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    620 ; X32-AVX2-NEXT:    retl
    621 ;
    622 ; X64-AVX2-LABEL: V113:
    623 ; X64-AVX2:       ## %bb.0: ## %entry
    624 ; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
    625 ; X64-AVX2-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    626 ; X64-AVX2-NEXT:    retq
    627 ;
    628 ; X32-AVX512VL-LABEL: V113:
    629 ; X32-AVX512VL:       ## %bb.0: ## %entry
    630 ; X32-AVX512VL-NEXT:    vaddps LCPI30_0{1to8}, %ymm0, %ymm0
    631 ; X32-AVX512VL-NEXT:    retl
    632 ;
    633 ; X64-AVX512VL-LABEL: V113:
    634 ; X64-AVX512VL:       ## %bb.0: ## %entry
    635 ; X64-AVX512VL-NEXT:    vaddps {{.*}}(%rip){1to8}, %ymm0, %ymm0
    636 ; X64-AVX512VL-NEXT:    retq
    637 entry:
    638   %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
    639   ret <8 x float> %g
    640 }
    641 
    642 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
    643 ; X32-LABEL: _e2:
    644 ; X32:       ## %bb.0:
    645 ; X32-NEXT:    vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
    646 ; X32-NEXT:    retl
    647 ;
    648 ; X64-LABEL: _e2:
    649 ; X64:       ## %bb.0:
    650 ; X64-NEXT:    vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
    651 ; X64-NEXT:    retq
    652   %vecinit.i = insertelement <4 x float> undef, float        0xbf80000000000000, i32 0
    653   %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
    654   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
    655   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
    656   ret <4 x float> %vecinit6.i
    657 }
    658 
    659 define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
    660 ; X32-LABEL: _e4:
    661 ; X32:       ## %bb.0:
    662 ; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
    663 ; X32-NEXT:    retl
    664 ;
    665 ; X64-LABEL: _e4:
    666 ; X64:       ## %bb.0:
    667 ; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
    668 ; X64-NEXT:    retq
    669   %vecinit0.i = insertelement <8 x i8> undef, i8       52, i32 0
    670   %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
    671   %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
    672   %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
    673   %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
    674   %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
    675   %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
    676   %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
    677   ret <8 x i8> %vecinit7.i
    678 }
    679 
    680 define void @crash() nounwind alwaysinline {
    681 ; X32-LABEL: crash:
    682 ; X32:       ## %bb.0: ## %WGLoopsEntry
    683 ; X32-NEXT:    xorl %eax, %eax
    684 ; X32-NEXT:    testb %al, %al
    685 ; X32-NEXT:    je LBB33_1
    686 ; X32-NEXT:  ## %bb.2: ## %ret
    687 ; X32-NEXT:    retl
    688 ; X32-NEXT:    .p2align 4, 0x90
    689 ; X32-NEXT:  LBB33_1: ## %footer329VF
    690 ; X32-NEXT:    ## =>This Inner Loop Header: Depth=1
    691 ; X32-NEXT:    jmp LBB33_1
    692 ;
    693 ; X64-LABEL: crash:
    694 ; X64:       ## %bb.0: ## %WGLoopsEntry
    695 ; X64-NEXT:    xorl %eax, %eax
    696 ; X64-NEXT:    testb %al, %al
    697 ; X64-NEXT:    je LBB33_1
    698 ; X64-NEXT:  ## %bb.2: ## %ret
    699 ; X64-NEXT:    retq
    700 ; X64-NEXT:    .p2align 4, 0x90
    701 ; X64-NEXT:  LBB33_1: ## %footer329VF
    702 ; X64-NEXT:    ## =>This Inner Loop Header: Depth=1
    703 ; X64-NEXT:    jmp LBB33_1
    704 WGLoopsEntry:
    705   br i1 undef, label %ret, label %footer329VF
    706 
    707 footer329VF:
    708   %A.0.inVF = fmul float undef, 6.553600e+04
    709   %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
    710   %A.0VF = fptosi float %A.0.inVF to i32
    711   %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
    712   %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
    713   %1 = and i32 %A.0VF, 65535
    714   %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
    715   %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
    716   br i1 undef, label %preload1201VF, label %footer349VF
    717 
    718 preload1201VF:
    719   br label %footer349VF
    720 
    721 footer349VF:
    722   %2 = mul nsw <8 x i32> undef, %0
    723   %3 = mul nsw <8 x i32> undef, %vector1099VF
    724   br label %footer329VF
    725 
    726 ret:
    727   ret void
    728 }
    729 
    730 define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
    731 ; X32-LABEL: _inreg0:
    732 ; X32:       ## %bb.0:
    733 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
    734 ; X32-NEXT:    retl
    735 ;
    736 ; X64-AVX2-LABEL: _inreg0:
    737 ; X64-AVX2:       ## %bb.0:
    738 ; X64-AVX2-NEXT:    vmovd %edi, %xmm0
    739 ; X64-AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
    740 ; X64-AVX2-NEXT:    retq
    741 ;
    742 ; X64-AVX512VL-LABEL: _inreg0:
    743 ; X64-AVX512VL:       ## %bb.0:
    744 ; X64-AVX512VL-NEXT:    vpbroadcastd %edi, %ymm0
    745 ; X64-AVX512VL-NEXT:    retq
    746   %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
    747   %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
    748   ret <8 x i32> %wide
    749 }
    750 
    751 define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
    752 ; X32-LABEL: _inreg1:
    753 ; X32:       ## %bb.0:
    754 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
    755 ; X32-NEXT:    retl
    756 ;
    757 ; X64-LABEL: _inreg1:
    758 ; X64:       ## %bb.0:
    759 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    760 ; X64-NEXT:    retq
    761   %in = insertelement <8 x float> undef, float %scalar, i32 0
    762   %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
    763   ret <8 x float> %wide
    764 }
    765 
    766 define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
    767 ; X32-LABEL: _inreg2:
    768 ; X32:       ## %bb.0:
    769 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm0
    770 ; X32-NEXT:    retl
    771 ;
    772 ; X64-LABEL: _inreg2:
    773 ; X64:       ## %bb.0:
    774 ; X64-NEXT:    vbroadcastss %xmm0, %xmm0
    775 ; X64-NEXT:    retq
    776   %in = insertelement <4 x float> undef, float %scalar, i32 0
    777   %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
    778   ret <4 x float> %wide
    779 }
    780 
    781 define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
    782 ; X32-LABEL: _inreg3:
    783 ; X32:       ## %bb.0:
    784 ; X32-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
    785 ; X32-NEXT:    retl
    786 ;
    787 ; X64-LABEL: _inreg3:
    788 ; X64:       ## %bb.0:
    789 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
    790 ; X64-NEXT:    retq
    791   %in = insertelement <4 x double> undef, double %scalar, i32 0
    792   %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
    793   ret <4 x double> %wide
    794 }
    795 
    796 define   <8 x float> @_inreg8xfloat(<8 x float> %a) {
    797 ; X32-LABEL: _inreg8xfloat:
    798 ; X32:       ## %bb.0:
    799 ; X32-NEXT:    vbroadcastss %xmm0, %ymm0
    800 ; X32-NEXT:    retl
    801 ;
    802 ; X64-LABEL: _inreg8xfloat:
    803 ; X64:       ## %bb.0:
    804 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    805 ; X64-NEXT:    retq
    806   %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
    807   ret <8 x float> %b
    808 }
    809 
    810 define   <4 x float> @_inreg4xfloat(<4 x float> %a) {
    811 ; X32-LABEL: _inreg4xfloat:
    812 ; X32:       ## %bb.0:
    813 ; X32-NEXT:    vbroadcastss %xmm0, %xmm0
    814 ; X32-NEXT:    retl
    815 ;
    816 ; X64-LABEL: _inreg4xfloat:
    817 ; X64:       ## %bb.0:
    818 ; X64-NEXT:    vbroadcastss %xmm0, %xmm0
    819 ; X64-NEXT:    retq
    820   %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
    821   ret <4 x float> %b
    822 }
    823 
    824 define   <16 x i16> @_inreg16xi16(<16 x i16> %a) {
    825 ; X32-LABEL: _inreg16xi16:
    826 ; X32:       ## %bb.0:
    827 ; X32-NEXT:    vpbroadcastw %xmm0, %ymm0
    828 ; X32-NEXT:    retl
    829 ;
    830 ; X64-LABEL: _inreg16xi16:
    831 ; X64:       ## %bb.0:
    832 ; X64-NEXT:    vpbroadcastw %xmm0, %ymm0
    833 ; X64-NEXT:    retq
    834   %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
    835   ret <16 x i16> %b
    836 }
    837 
    838 define   <8 x i16> @_inreg8xi16(<8 x i16> %a) {
    839 ; X32-LABEL: _inreg8xi16:
    840 ; X32:       ## %bb.0:
    841 ; X32-NEXT:    vpbroadcastw %xmm0, %xmm0
    842 ; X32-NEXT:    retl
    843 ;
    844 ; X64-LABEL: _inreg8xi16:
    845 ; X64:       ## %bb.0:
    846 ; X64-NEXT:    vpbroadcastw %xmm0, %xmm0
    847 ; X64-NEXT:    retq
    848   %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
    849   ret <8 x i16> %b
    850 }
    851 
    852 define   <4 x i64> @_inreg4xi64(<4 x i64> %a) {
    853 ; X32-LABEL: _inreg4xi64:
    854 ; X32:       ## %bb.0:
    855 ; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
    856 ; X32-NEXT:    retl
    857 ;
    858 ; X64-LABEL: _inreg4xi64:
    859 ; X64:       ## %bb.0:
    860 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
    861 ; X64-NEXT:    retq
    862   %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
    863   ret <4 x i64> %b
    864 }
    865 
    866 define   <2 x i64> @_inreg2xi64(<2 x i64> %a) {
    867 ; X32-LABEL: _inreg2xi64:
    868 ; X32:       ## %bb.0:
    869 ; X32-NEXT:    vpbroadcastq %xmm0, %xmm0
    870 ; X32-NEXT:    retl
    871 ;
    872 ; X64-LABEL: _inreg2xi64:
    873 ; X64:       ## %bb.0:
    874 ; X64-NEXT:    vpbroadcastq %xmm0, %xmm0
    875 ; X64-NEXT:    retq
    876   %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
    877   ret <2 x i64> %b
    878 }
    879 
    880 define   <4 x double> @_inreg4xdouble(<4 x double> %a) {
    881 ; X32-LABEL: _inreg4xdouble:
    882 ; X32:       ## %bb.0:
    883 ; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
    884 ; X32-NEXT:    retl
    885 ;
    886 ; X64-LABEL: _inreg4xdouble:
    887 ; X64:       ## %bb.0:
    888 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
    889 ; X64-NEXT:    retq
    890   %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
    891   ret <4 x double> %b
    892 }
    893 
    894 define   <2 x double> @_inreg2xdouble(<2 x double> %a) {
    895 ; X32-LABEL: _inreg2xdouble:
    896 ; X32:       ## %bb.0:
    897 ; X32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    898 ; X32-NEXT:    retl
    899 ;
    900 ; X64-LABEL: _inreg2xdouble:
    901 ; X64:       ## %bb.0:
    902 ; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    903 ; X64-NEXT:    retq
    904   %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
    905   ret <2 x double> %b
    906 }
    907 
    908 define   <8 x i32> @_inreg8xi32(<8 x i32> %a) {
    909 ; X32-LABEL: _inreg8xi32:
    910 ; X32:       ## %bb.0:
    911 ; X32-NEXT:    vbroadcastss %xmm0, %ymm0
    912 ; X32-NEXT:    retl
    913 ;
    914 ; X64-LABEL: _inreg8xi32:
    915 ; X64:       ## %bb.0:
    916 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    917 ; X64-NEXT:    retq
    918   %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
    919   ret <8 x i32> %b
    920 }
    921 
    922 define   <4 x i32> @_inreg4xi32(<4 x i32> %a) {
    923 ; X32-LABEL: _inreg4xi32:
    924 ; X32:       ## %bb.0:
    925 ; X32-NEXT:    vbroadcastss %xmm0, %xmm0
    926 ; X32-NEXT:    retl
    927 ;
    928 ; X64-LABEL: _inreg4xi32:
    929 ; X64:       ## %bb.0:
    930 ; X64-NEXT:    vbroadcastss %xmm0, %xmm0
    931 ; X64-NEXT:    retq
    932   %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
    933   ret <4 x i32> %b
    934 }
    935 
    936 define   <32 x i8> @_inreg32xi8(<32 x i8> %a) {
    937 ; X32-LABEL: _inreg32xi8:
    938 ; X32:       ## %bb.0:
    939 ; X32-NEXT:    vpbroadcastb %xmm0, %ymm0
    940 ; X32-NEXT:    retl
    941 ;
    942 ; X64-LABEL: _inreg32xi8:
    943 ; X64:       ## %bb.0:
    944 ; X64-NEXT:    vpbroadcastb %xmm0, %ymm0
    945 ; X64-NEXT:    retq
    946   %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
    947   ret <32 x i8> %b
    948 }
    949 
    950 define   <16 x i8> @_inreg16xi8(<16 x i8> %a) {
    951 ; X32-LABEL: _inreg16xi8:
    952 ; X32:       ## %bb.0:
    953 ; X32-NEXT:    vpbroadcastb %xmm0, %xmm0
    954 ; X32-NEXT:    retl
    955 ;
    956 ; X64-LABEL: _inreg16xi8:
    957 ; X64:       ## %bb.0:
    958 ; X64-NEXT:    vpbroadcastb %xmm0, %xmm0
    959 ; X64-NEXT:    retq
    960   %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
    961   ret <16 x i8> %b
    962 }
    963 
    964 ; These tests check that a vbroadcast instruction is used when we have a splat
    965 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
    966 ; (via the insertelements).
    967 
    968 define <8 x float> @splat_concat1(float %f) {
    969 ; X32-LABEL: splat_concat1:
    970 ; X32:       ## %bb.0:
    971 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
    972 ; X32-NEXT:    retl
    973 ;
    974 ; X64-LABEL: splat_concat1:
    975 ; X64:       ## %bb.0:
    976 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    977 ; X64-NEXT:    retq
    978   %1 = insertelement <4 x float> undef, float %f, i32 0
    979   %2 = insertelement <4 x float> %1, float %f, i32 1
    980   %3 = insertelement <4 x float> %2, float %f, i32 2
    981   %4 = insertelement <4 x float> %3, float %f, i32 3
    982   %5 = shufflevector <4 x float> %4, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    983   ret <8 x float> %5
    984 }
    985 
    986 define <8 x float> @splat_concat2(float %f) {
    987 ; X32-LABEL: splat_concat2:
    988 ; X32:       ## %bb.0:
    989 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
    990 ; X32-NEXT:    retl
    991 ;
    992 ; X64-LABEL: splat_concat2:
    993 ; X64:       ## %bb.0:
    994 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    995 ; X64-NEXT:    retq
    996   %1 = insertelement <4 x float> undef, float %f, i32 0
    997   %2 = insertelement <4 x float> %1, float %f, i32 1
    998   %3 = insertelement <4 x float> %2, float %f, i32 2
    999   %4 = insertelement <4 x float> %3, float %f, i32 3
   1000   %5 = insertelement <4 x float> undef, float %f, i32 0
   1001   %6 = insertelement <4 x float> %5, float %f, i32 1
   1002   %7 = insertelement <4 x float> %6, float %f, i32 2
   1003   %8 = insertelement <4 x float> %7, float %f, i32 3
   1004   %9 = shufflevector <4 x float> %4, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1005   ret <8 x float> %9
   1006 }
   1007 
   1008 define <4 x double> @splat_concat3(double %d) {
   1009 ; X32-LABEL: splat_concat3:
   1010 ; X32:       ## %bb.0:
   1011 ; X32-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
   1012 ; X32-NEXT:    retl
   1013 ;
   1014 ; X64-LABEL: splat_concat3:
   1015 ; X64:       ## %bb.0:
   1016 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
   1017 ; X64-NEXT:    retq
   1018   %1 = insertelement <2 x double> undef, double %d, i32 0
   1019   %2 = insertelement <2 x double> %1, double %d, i32 1
   1020   %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1021   ret <4 x double> %3
   1022 }
   1023 
   1024 define <4 x double> @splat_concat4(double %d) {
   1025 ; X32-LABEL: splat_concat4:
   1026 ; X32:       ## %bb.0:
   1027 ; X32-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
   1028 ; X32-NEXT:    retl
   1029 ;
   1030 ; X64-LABEL: splat_concat4:
   1031 ; X64:       ## %bb.0:
   1032 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
   1033 ; X64-NEXT:    retq
   1034   %1 = insertelement <2 x double> undef, double %d, i32 0
   1035   %2 = insertelement <2 x double> %1, double %d, i32 1
   1036   %3 = insertelement <2 x double> undef, double %d, i32 0
   1037   %4 = insertelement <2 x double> %3, double %d, i32 1
   1038   %5 = shufflevector <2 x double> %2, <2 x double> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1039   ret <4 x double> %5
   1040 }
   1041 
   1042 ; Test cases for <rdar://problem/16074331>.
   1043 ; Instruction selection for broacast instruction fails if
   1044 ; the load cannot be folded into the broadcast.
   1045 ; This happens if the load has initial one use but other uses are
   1046 ; created later, or if selection DAG cannot prove that folding the
   1047 ; load will not create a cycle in the DAG.
   1048 ; Those test cases exerce the latter.
   1049 
   1050 define void @isel_crash_16b(i8* %cV_R.addr) {
   1051 ; X32-LABEL: isel_crash_16b:
   1052 ; X32:       ## %bb.0: ## %eintry
   1053 ; X32-NEXT:    subl $60, %esp
   1054 ; X32-NEXT:    .cfi_def_cfa_offset 64
   1055 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1056 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1057 ; X32-NEXT:    vmovaps %xmm0, (%esp)
   1058 ; X32-NEXT:    vpbroadcastb (%eax), %xmm1
   1059 ; X32-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
   1060 ; X32-NEXT:    vmovdqa %xmm1, {{[0-9]+}}(%esp)
   1061 ; X32-NEXT:    addl $60, %esp
   1062 ; X32-NEXT:    retl
   1063 ;
   1064 ; X64-LABEL: isel_crash_16b:
   1065 ; X64:       ## %bb.0: ## %eintry
   1066 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1067 ; X64-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
   1068 ; X64-NEXT:    vpbroadcastb (%rdi), %xmm1
   1069 ; X64-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
   1070 ; X64-NEXT:    vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
   1071 ; X64-NEXT:    retq
   1072 eintry:
   1073   %__a.addr.i = alloca <2 x i64>, align 16
   1074   %__b.addr.i = alloca <2 x i64>, align 16
   1075   %vCr = alloca <2 x i64>, align 16
   1076   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
   1077   %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
   1078   %tmp2 = load i8, i8* %cV_R.addr, align 4
   1079   %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0
   1080   %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
   1081   %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64>
   1082   store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
   1083   store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
   1084   ret void
   1085 }
   1086 
   1087 define void @isel_crash_32b(i8* %cV_R.addr) {
   1088 ; X32-LABEL: isel_crash_32b:
   1089 ; X32:       ## %bb.0: ## %eintry
   1090 ; X32-NEXT:    pushl %ebp
   1091 ; X32-NEXT:    .cfi_def_cfa_offset 8
   1092 ; X32-NEXT:    .cfi_offset %ebp, -8
   1093 ; X32-NEXT:    movl %esp, %ebp
   1094 ; X32-NEXT:    .cfi_def_cfa_register %ebp
   1095 ; X32-NEXT:    andl $-32, %esp
   1096 ; X32-NEXT:    subl $128, %esp
   1097 ; X32-NEXT:    movl 8(%ebp), %eax
   1098 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1099 ; X32-NEXT:    vmovaps %ymm0, (%esp)
   1100 ; X32-NEXT:    vpbroadcastb (%eax), %ymm1
   1101 ; X32-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
   1102 ; X32-NEXT:    vmovdqa %ymm1, {{[0-9]+}}(%esp)
   1103 ; X32-NEXT:    movl %ebp, %esp
   1104 ; X32-NEXT:    popl %ebp
   1105 ; X32-NEXT:    vzeroupper
   1106 ; X32-NEXT:    retl
   1107 ;
   1108 ; X64-LABEL: isel_crash_32b:
   1109 ; X64:       ## %bb.0: ## %eintry
   1110 ; X64-NEXT:    pushq %rbp
   1111 ; X64-NEXT:    .cfi_def_cfa_offset 16
   1112 ; X64-NEXT:    .cfi_offset %rbp, -16
   1113 ; X64-NEXT:    movq %rsp, %rbp
   1114 ; X64-NEXT:    .cfi_def_cfa_register %rbp
   1115 ; X64-NEXT:    andq $-32, %rsp
   1116 ; X64-NEXT:    subq $128, %rsp
   1117 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1118 ; X64-NEXT:    vmovaps %ymm0, (%rsp)
   1119 ; X64-NEXT:    vpbroadcastb (%rdi), %ymm1
   1120 ; X64-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
   1121 ; X64-NEXT:    vmovdqa %ymm1, {{[0-9]+}}(%rsp)
   1122 ; X64-NEXT:    movq %rbp, %rsp
   1123 ; X64-NEXT:    popq %rbp
   1124 ; X64-NEXT:    vzeroupper
   1125 ; X64-NEXT:    retq
   1126 eintry:
   1127   %__a.addr.i = alloca <4 x i64>, align 16
   1128   %__b.addr.i = alloca <4 x i64>, align 16
   1129   %vCr = alloca <4 x i64>, align 16
   1130   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   1131   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   1132   %tmp2 = load i8, i8* %cV_R.addr, align 4
   1133   %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0
   1134   %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer
   1135   %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64>
   1136   store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
   1137   store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
   1138   ret void
   1139 }
   1140 
   1141 define void @isel_crash_8w(i16* %cV_R.addr) {
   1142 ; X32-LABEL: isel_crash_8w:
   1143 ; X32:       ## %bb.0: ## %entry
   1144 ; X32-NEXT:    subl $60, %esp
   1145 ; X32-NEXT:    .cfi_def_cfa_offset 64
   1146 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1147 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1148 ; X32-NEXT:    vmovaps %xmm0, (%esp)
   1149 ; X32-NEXT:    vpbroadcastw (%eax), %xmm1
   1150 ; X32-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
   1151 ; X32-NEXT:    vmovdqa %xmm1, {{[0-9]+}}(%esp)
   1152 ; X32-NEXT:    addl $60, %esp
   1153 ; X32-NEXT:    retl
   1154 ;
   1155 ; X64-LABEL: isel_crash_8w:
   1156 ; X64:       ## %bb.0: ## %entry
   1157 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1158 ; X64-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
   1159 ; X64-NEXT:    vpbroadcastw (%rdi), %xmm1
   1160 ; X64-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
   1161 ; X64-NEXT:    vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
   1162 ; X64-NEXT:    retq
   1163 entry:
   1164   %__a.addr.i = alloca <2 x i64>, align 16
   1165   %__b.addr.i = alloca <2 x i64>, align 16
   1166   %vCr = alloca <2 x i64>, align 16
   1167   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
   1168   %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
   1169   %tmp2 = load i16, i16* %cV_R.addr, align 4
   1170   %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0
   1171   %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
   1172   %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64>
   1173   store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
   1174   store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
   1175   ret void
   1176 }
   1177 
   1178 define void @isel_crash_16w(i16* %cV_R.addr) {
   1179 ; X32-LABEL: isel_crash_16w:
   1180 ; X32:       ## %bb.0: ## %eintry
   1181 ; X32-NEXT:    pushl %ebp
   1182 ; X32-NEXT:    .cfi_def_cfa_offset 8
   1183 ; X32-NEXT:    .cfi_offset %ebp, -8
   1184 ; X32-NEXT:    movl %esp, %ebp
   1185 ; X32-NEXT:    .cfi_def_cfa_register %ebp
   1186 ; X32-NEXT:    andl $-32, %esp
   1187 ; X32-NEXT:    subl $128, %esp
   1188 ; X32-NEXT:    movl 8(%ebp), %eax
   1189 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1190 ; X32-NEXT:    vmovaps %ymm0, (%esp)
   1191 ; X32-NEXT:    vpbroadcastw (%eax), %ymm1
   1192 ; X32-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
   1193 ; X32-NEXT:    vmovdqa %ymm1, {{[0-9]+}}(%esp)
   1194 ; X32-NEXT:    movl %ebp, %esp
   1195 ; X32-NEXT:    popl %ebp
   1196 ; X32-NEXT:    vzeroupper
   1197 ; X32-NEXT:    retl
   1198 ;
   1199 ; X64-LABEL: isel_crash_16w:
   1200 ; X64:       ## %bb.0: ## %eintry
   1201 ; X64-NEXT:    pushq %rbp
   1202 ; X64-NEXT:    .cfi_def_cfa_offset 16
   1203 ; X64-NEXT:    .cfi_offset %rbp, -16
   1204 ; X64-NEXT:    movq %rsp, %rbp
   1205 ; X64-NEXT:    .cfi_def_cfa_register %rbp
   1206 ; X64-NEXT:    andq $-32, %rsp
   1207 ; X64-NEXT:    subq $128, %rsp
   1208 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1209 ; X64-NEXT:    vmovaps %ymm0, (%rsp)
   1210 ; X64-NEXT:    vpbroadcastw (%rdi), %ymm1
   1211 ; X64-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
   1212 ; X64-NEXT:    vmovdqa %ymm1, {{[0-9]+}}(%rsp)
   1213 ; X64-NEXT:    movq %rbp, %rsp
   1214 ; X64-NEXT:    popq %rbp
   1215 ; X64-NEXT:    vzeroupper
   1216 ; X64-NEXT:    retq
   1217 eintry:
   1218   %__a.addr.i = alloca <4 x i64>, align 16
   1219   %__b.addr.i = alloca <4 x i64>, align 16
   1220   %vCr = alloca <4 x i64>, align 16
   1221   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   1222   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   1223   %tmp2 = load i16, i16* %cV_R.addr, align 4
   1224   %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0
   1225   %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer
   1226   %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64>
   1227   store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
   1228   store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
   1229   ret void
   1230 }
   1231 
   1232 define void @isel_crash_4d(i32* %cV_R.addr) {
   1233 ; X32-LABEL: isel_crash_4d:
   1234 ; X32:       ## %bb.0: ## %entry
   1235 ; X32-NEXT:    subl $60, %esp
   1236 ; X32-NEXT:    .cfi_def_cfa_offset 64
   1237 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1238 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1239 ; X32-NEXT:    vmovaps %xmm0, (%esp)
   1240 ; X32-NEXT:    vbroadcastss (%eax), %xmm1
   1241 ; X32-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
   1242 ; X32-NEXT:    vmovaps %xmm1, {{[0-9]+}}(%esp)
   1243 ; X32-NEXT:    addl $60, %esp
   1244 ; X32-NEXT:    retl
   1245 ;
   1246 ; X64-LABEL: isel_crash_4d:
   1247 ; X64:       ## %bb.0: ## %entry
   1248 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1249 ; X64-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
   1250 ; X64-NEXT:    vbroadcastss (%rdi), %xmm1
   1251 ; X64-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
   1252 ; X64-NEXT:    vmovaps %xmm1, -{{[0-9]+}}(%rsp)
   1253 ; X64-NEXT:    retq
   1254 entry:
   1255   %__a.addr.i = alloca <2 x i64>, align 16
   1256   %__b.addr.i = alloca <2 x i64>, align 16
   1257   %vCr = alloca <2 x i64>, align 16
   1258   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
   1259   %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
   1260   %tmp2 = load i32, i32* %cV_R.addr, align 4
   1261   %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0
   1262   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   1263   %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64>
   1264   store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
   1265   store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
   1266   ret void
   1267 }
   1268 
   1269 define void @isel_crash_8d(i32* %cV_R.addr) {
   1270 ; X32-LABEL: isel_crash_8d:
   1271 ; X32:       ## %bb.0: ## %eintry
   1272 ; X32-NEXT:    pushl %ebp
   1273 ; X32-NEXT:    .cfi_def_cfa_offset 8
   1274 ; X32-NEXT:    .cfi_offset %ebp, -8
   1275 ; X32-NEXT:    movl %esp, %ebp
   1276 ; X32-NEXT:    .cfi_def_cfa_register %ebp
   1277 ; X32-NEXT:    andl $-32, %esp
   1278 ; X32-NEXT:    subl $128, %esp
   1279 ; X32-NEXT:    movl 8(%ebp), %eax
   1280 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1281 ; X32-NEXT:    vmovaps %ymm0, (%esp)
   1282 ; X32-NEXT:    vbroadcastss (%eax), %ymm1
   1283 ; X32-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
   1284 ; X32-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
   1285 ; X32-NEXT:    movl %ebp, %esp
   1286 ; X32-NEXT:    popl %ebp
   1287 ; X32-NEXT:    vzeroupper
   1288 ; X32-NEXT:    retl
   1289 ;
   1290 ; X64-LABEL: isel_crash_8d:
   1291 ; X64:       ## %bb.0: ## %eintry
   1292 ; X64-NEXT:    pushq %rbp
   1293 ; X64-NEXT:    .cfi_def_cfa_offset 16
   1294 ; X64-NEXT:    .cfi_offset %rbp, -16
   1295 ; X64-NEXT:    movq %rsp, %rbp
   1296 ; X64-NEXT:    .cfi_def_cfa_register %rbp
   1297 ; X64-NEXT:    andq $-32, %rsp
   1298 ; X64-NEXT:    subq $128, %rsp
   1299 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1300 ; X64-NEXT:    vmovaps %ymm0, (%rsp)
   1301 ; X64-NEXT:    vbroadcastss (%rdi), %ymm1
   1302 ; X64-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
   1303 ; X64-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
   1304 ; X64-NEXT:    movq %rbp, %rsp
   1305 ; X64-NEXT:    popq %rbp
   1306 ; X64-NEXT:    vzeroupper
   1307 ; X64-NEXT:    retq
   1308 eintry:
   1309   %__a.addr.i = alloca <4 x i64>, align 16
   1310   %__b.addr.i = alloca <4 x i64>, align 16
   1311   %vCr = alloca <4 x i64>, align 16
   1312   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   1313   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   1314   %tmp2 = load i32, i32* %cV_R.addr, align 4
   1315   %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0
   1316   %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
   1317   %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64>
   1318   store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
   1319   store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
   1320   ret void
   1321 }
   1322 
   1323 define void @isel_crash_2q(i64* %cV_R.addr) {
   1324 ; X32-LABEL: isel_crash_2q:
   1325 ; X32:       ## %bb.0: ## %entry
   1326 ; X32-NEXT:    subl $60, %esp
   1327 ; X32-NEXT:    .cfi_def_cfa_offset 64
   1328 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1329 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1330 ; X32-NEXT:    vmovaps %xmm0, (%esp)
   1331 ; X32-NEXT:    vpbroadcastq (%eax), %xmm1
   1332 ; X32-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
   1333 ; X32-NEXT:    vmovdqa %xmm1, {{[0-9]+}}(%esp)
   1334 ; X32-NEXT:    addl $60, %esp
   1335 ; X32-NEXT:    retl
   1336 ;
   1337 ; X64-LABEL: isel_crash_2q:
   1338 ; X64:       ## %bb.0: ## %entry
   1339 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1340 ; X64-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
   1341 ; X64-NEXT:    vpbroadcastq (%rdi), %xmm1
   1342 ; X64-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
   1343 ; X64-NEXT:    vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
   1344 ; X64-NEXT:    retq
   1345 entry:
   1346   %__a.addr.i = alloca <2 x i64>, align 16
   1347   %__b.addr.i = alloca <2 x i64>, align 16
   1348   %vCr = alloca <2 x i64>, align 16
   1349   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
   1350   %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
   1351   %tmp2 = load i64, i64* %cV_R.addr, align 4
   1352   %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0
   1353   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   1354   store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
   1355   store <2 x i64> %splat.splat, <2 x i64>* %__b.addr.i, align 16
   1356   ret void
   1357 }
   1358 
   1359 define void @isel_crash_4q(i64* %cV_R.addr) {
   1360 ; X32-LABEL: isel_crash_4q:
   1361 ; X32:       ## %bb.0: ## %eintry
   1362 ; X32-NEXT:    pushl %ebp
   1363 ; X32-NEXT:    .cfi_def_cfa_offset 8
   1364 ; X32-NEXT:    .cfi_offset %ebp, -8
   1365 ; X32-NEXT:    movl %esp, %ebp
   1366 ; X32-NEXT:    .cfi_def_cfa_register %ebp
   1367 ; X32-NEXT:    andl $-32, %esp
   1368 ; X32-NEXT:    subl $128, %esp
   1369 ; X32-NEXT:    movl 8(%ebp), %eax
   1370 ; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1371 ; X32-NEXT:    vmovaps %ymm0, (%esp)
   1372 ; X32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
   1373 ; X32-NEXT:    vbroadcastsd %xmm1, %ymm1
   1374 ; X32-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
   1375 ; X32-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
   1376 ; X32-NEXT:    movl %ebp, %esp
   1377 ; X32-NEXT:    popl %ebp
   1378 ; X32-NEXT:    vzeroupper
   1379 ; X32-NEXT:    retl
   1380 ;
   1381 ; X64-LABEL: isel_crash_4q:
   1382 ; X64:       ## %bb.0: ## %eintry
   1383 ; X64-NEXT:    pushq %rbp
   1384 ; X64-NEXT:    .cfi_def_cfa_offset 16
   1385 ; X64-NEXT:    .cfi_offset %rbp, -16
   1386 ; X64-NEXT:    movq %rsp, %rbp
   1387 ; X64-NEXT:    .cfi_def_cfa_register %rbp
   1388 ; X64-NEXT:    andq $-32, %rsp
   1389 ; X64-NEXT:    subq $128, %rsp
   1390 ; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
   1391 ; X64-NEXT:    vmovaps %ymm0, (%rsp)
   1392 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm1
   1393 ; X64-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
   1394 ; X64-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
   1395 ; X64-NEXT:    movq %rbp, %rsp
   1396 ; X64-NEXT:    popq %rbp
   1397 ; X64-NEXT:    vzeroupper
   1398 ; X64-NEXT:    retq
   1399 eintry:
   1400   %__a.addr.i = alloca <4 x i64>, align 16
   1401   %__b.addr.i = alloca <4 x i64>, align 16
   1402   %vCr = alloca <4 x i64>, align 16
   1403   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
   1404   %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
   1405   %tmp2 = load i64, i64* %cV_R.addr, align 4
   1406   %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0
   1407   %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
   1408   store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
   1409   store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16
   1410   ret void
   1411 }
   1412