Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX
      4 
      5 ; This test is an assembly of avx512 instructions to check their scheduling
      6 
      7 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
      8 ; GENERIC-LABEL: addpd512:
      9 ; GENERIC:       # %bb.0: # %entry
     10 ; GENERIC-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
     11 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     12 ;
     13 ; SKX-LABEL: addpd512:
     14 ; SKX:       # %bb.0: # %entry
     15 ; SKX-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
     16 ; SKX-NEXT:    retq # sched: [7:1.00]
     17 entry:
     18   %add.i = fadd <8 x double> %x, %y
     19   ret <8 x double> %add.i
     20 }
     21 
     22 define <8 x double> @addpd512fold(<8 x double> %y) {
     23 ; GENERIC-LABEL: addpd512fold:
     24 ; GENERIC:       # %bb.0: # %entry
     25 ; GENERIC-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00]
     26 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     27 ;
     28 ; SKX-LABEL: addpd512fold:
     29 ; SKX:       # %bb.0: # %entry
     30 ; SKX-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
     31 ; SKX-NEXT:    retq # sched: [7:1.00]
     32 entry:
     33   %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
     34   ret <8 x double> %add.i
     35 }
     36 
     37 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
     38 ; GENERIC-LABEL: addps512:
     39 ; GENERIC:       # %bb.0: # %entry
     40 ; GENERIC-NEXT:    vaddps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
     41 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     42 ;
     43 ; SKX-LABEL: addps512:
     44 ; SKX:       # %bb.0: # %entry
     45 ; SKX-NEXT:    vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
     46 ; SKX-NEXT:    retq # sched: [7:1.00]
     47 entry:
     48   %add.i = fadd <16 x float> %x, %y
     49   ret <16 x float> %add.i
     50 }
     51 
     52 define <16 x float> @addps512fold(<16 x float> %y) {
     53 ; GENERIC-LABEL: addps512fold:
     54 ; GENERIC:       # %bb.0: # %entry
     55 ; GENERIC-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00]
     56 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     57 ;
     58 ; SKX-LABEL: addps512fold:
     59 ; SKX:       # %bb.0: # %entry
     60 ; SKX-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
     61 ; SKX-NEXT:    retq # sched: [7:1.00]
     62 entry:
     63   %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
     64   ret <16 x float> %add.i
     65 }
     66 
     67 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
     68 ; GENERIC-LABEL: subpd512:
     69 ; GENERIC:       # %bb.0: # %entry
     70 ; GENERIC-NEXT:    vsubpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
     71 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     72 ;
     73 ; SKX-LABEL: subpd512:
     74 ; SKX:       # %bb.0: # %entry
     75 ; SKX-NEXT:    vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
     76 ; SKX-NEXT:    retq # sched: [7:1.00]
     77 entry:
     78   %sub.i = fsub <8 x double> %x, %y
     79   ret <8 x double> %sub.i
     80 }
     81 
     82 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
     83 ; GENERIC-LABEL: subpd512fold:
     84 ; GENERIC:       # %bb.0: # %entry
     85 ; GENERIC-NEXT:    vsubpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
     86 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     87 ;
     88 ; SKX-LABEL: subpd512fold:
     89 ; SKX:       # %bb.0: # %entry
     90 ; SKX-NEXT:    vsubpd (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
     91 ; SKX-NEXT:    retq # sched: [7:1.00]
     92 entry:
     93   %tmp2 = load <8 x double>, <8 x double>* %x, align 8
     94   %sub.i = fsub <8 x double> %y, %tmp2
     95   ret <8 x double> %sub.i
     96 }
     97 
     98 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
     99 ; GENERIC-LABEL: subps512:
    100 ; GENERIC:       # %bb.0: # %entry
    101 ; GENERIC-NEXT:    vsubps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
    102 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    103 ;
    104 ; SKX-LABEL: subps512:
    105 ; SKX:       # %bb.0: # %entry
    106 ; SKX-NEXT:    vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
    107 ; SKX-NEXT:    retq # sched: [7:1.00]
    108 entry:
    109   %sub.i = fsub <16 x float> %x, %y
    110   ret <16 x float> %sub.i
    111 }
    112 
    113 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
    114 ; GENERIC-LABEL: subps512fold:
    115 ; GENERIC:       # %bb.0: # %entry
    116 ; GENERIC-NEXT:    vsubps (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
    117 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    118 ;
    119 ; SKX-LABEL: subps512fold:
    120 ; SKX:       # %bb.0: # %entry
    121 ; SKX-NEXT:    vsubps (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
    122 ; SKX-NEXT:    retq # sched: [7:1.00]
    123 entry:
    124   %tmp2 = load <16 x float>, <16 x float>* %x, align 4
    125   %sub.i = fsub <16 x float> %y, %tmp2
    126   ret <16 x float> %sub.i
    127 }
    128 
    129 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
    130 ; GENERIC-LABEL: imulq512:
    131 ; GENERIC:       # %bb.0:
    132 ; GENERIC-NEXT:    vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
    133 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    134 ;
    135 ; SKX-LABEL: imulq512:
    136 ; SKX:       # %bb.0:
    137 ; SKX-NEXT:    vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.50]
    138 ; SKX-NEXT:    retq # sched: [7:1.00]
    139   %z = mul <8 x i64>%x, %y
    140   ret <8 x i64>%z
    141 }
    142 
    143 define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
    144 ; GENERIC-LABEL: imulq256:
    145 ; GENERIC:       # %bb.0:
    146 ; GENERIC-NEXT:    vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
    147 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    148 ;
    149 ; SKX-LABEL: imulq256:
    150 ; SKX:       # %bb.0:
    151 ; SKX-NEXT:    vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.50]
    152 ; SKX-NEXT:    retq # sched: [7:1.00]
    153   %z = mul <4 x i64>%x, %y
    154   ret <4 x i64>%z
    155 }
    156 
    157 define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
    158 ; GENERIC-LABEL: imulq128:
    159 ; GENERIC:       # %bb.0:
    160 ; GENERIC-NEXT:    vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
    161 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    162 ;
    163 ; SKX-LABEL: imulq128:
    164 ; SKX:       # %bb.0:
    165 ; SKX-NEXT:    vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.50]
    166 ; SKX-NEXT:    retq # sched: [7:1.00]
    167   %z = mul <2 x i64>%x, %y
    168   ret <2 x i64>%z
    169 }
    170 
    171 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
    172 ; GENERIC-LABEL: mulpd512:
    173 ; GENERIC:       # %bb.0: # %entry
    174 ; GENERIC-NEXT:    vmulpd %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
    175 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    176 ;
    177 ; SKX-LABEL: mulpd512:
    178 ; SKX:       # %bb.0: # %entry
    179 ; SKX-NEXT:    vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
    180 ; SKX-NEXT:    retq # sched: [7:1.00]
    181 entry:
    182   %mul.i = fmul <8 x double> %x, %y
    183   ret <8 x double> %mul.i
    184 }
    185 
    186 define <8 x double> @mulpd512fold(<8 x double> %y) {
    187 ; GENERIC-LABEL: mulpd512fold:
    188 ; GENERIC:       # %bb.0: # %entry
    189 ; GENERIC-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00]
    190 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    191 ;
    192 ; SKX-LABEL: mulpd512fold:
    193 ; SKX:       # %bb.0: # %entry
    194 ; SKX-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
    195 ; SKX-NEXT:    retq # sched: [7:1.00]
    196 entry:
    197   %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
    198   ret <8 x double> %mul.i
    199 }
    200 
    201 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
    202 ; GENERIC-LABEL: mulps512:
    203 ; GENERIC:       # %bb.0: # %entry
    204 ; GENERIC-NEXT:    vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
    205 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    206 ;
    207 ; SKX-LABEL: mulps512:
    208 ; SKX:       # %bb.0: # %entry
    209 ; SKX-NEXT:    vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
    210 ; SKX-NEXT:    retq # sched: [7:1.00]
    211 entry:
    212   %mul.i = fmul <16 x float> %x, %y
    213   ret <16 x float> %mul.i
    214 }
    215 
    216 define <16 x float> @mulps512fold(<16 x float> %y) {
    217 ; GENERIC-LABEL: mulps512fold:
    218 ; GENERIC:       # %bb.0: # %entry
    219 ; GENERIC-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00]
    220 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    221 ;
    222 ; SKX-LABEL: mulps512fold:
    223 ; SKX:       # %bb.0: # %entry
    224 ; SKX-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
    225 ; SKX-NEXT:    retq # sched: [7:1.00]
    226 entry:
    227   %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
    228   ret <16 x float> %mul.i
    229 }
    230 
    231 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
    232 ; GENERIC-LABEL: divpd512:
    233 ; GENERIC:       # %bb.0: # %entry
    234 ; GENERIC-NEXT:    vdivpd %zmm0, %zmm1, %zmm0 # sched: [45:44.00]
    235 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    236 ;
    237 ; SKX-LABEL: divpd512:
    238 ; SKX:       # %bb.0: # %entry
    239 ; SKX-NEXT:    vdivpd %zmm0, %zmm1, %zmm0 # sched: [23:16.00]
    240 ; SKX-NEXT:    retq # sched: [7:1.00]
    241 entry:
    242   %div.i = fdiv <8 x double> %x, %y
    243   ret <8 x double> %div.i
    244 }
    245 
    246 define <8 x double> @divpd512fold(<8 x double> %y) {
    247 ; GENERIC-LABEL: divpd512fold:
    248 ; GENERIC:       # %bb.0: # %entry
    249 ; GENERIC-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [52:44.00]
    250 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    251 ;
    252 ; SKX-LABEL: divpd512fold:
    253 ; SKX:       # %bb.0: # %entry
    254 ; SKX-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [30:16.00]
    255 ; SKX-NEXT:    retq # sched: [7:1.00]
    256 entry:
    257   %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
    258   ret <8 x double> %div.i
    259 }
    260 
    261 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
    262 ; GENERIC-LABEL: divps512:
    263 ; GENERIC:       # %bb.0: # %entry
    264 ; GENERIC-NEXT:    vdivps %zmm0, %zmm1, %zmm0 # sched: [29:28.00]
    265 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    266 ;
    267 ; SKX-LABEL: divps512:
    268 ; SKX:       # %bb.0: # %entry
    269 ; SKX-NEXT:    vdivps %zmm0, %zmm1, %zmm0 # sched: [18:10.00]
    270 ; SKX-NEXT:    retq # sched: [7:1.00]
    271 entry:
    272   %div.i = fdiv <16 x float> %x, %y
    273   ret <16 x float> %div.i
    274 }
    275 
    276 define <16 x float> @divps512fold(<16 x float> %y) {
    277 ; GENERIC-LABEL: divps512fold:
    278 ; GENERIC:       # %bb.0: # %entry
    279 ; GENERIC-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [36:28.00]
    280 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    281 ;
    282 ; SKX-LABEL: divps512fold:
    283 ; SKX:       # %bb.0: # %entry
    284 ; SKX-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [25:10.00]
    285 ; SKX-NEXT:    retq # sched: [7:1.00]
    286 entry:
    287   %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
    288   ret <16 x float> %div.i
    289 }
    290 
    291 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
    292 ; GENERIC-LABEL: vpaddq_test:
    293 ; GENERIC:       # %bb.0:
    294 ; GENERIC-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
    295 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    296 ;
    297 ; SKX-LABEL: vpaddq_test:
    298 ; SKX:       # %bb.0:
    299 ; SKX-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
    300 ; SKX-NEXT:    retq # sched: [7:1.00]
    301   %x = add <8 x i64> %i, %j
    302   ret <8 x i64> %x
    303 }
    304 
    305 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
    306 ; GENERIC-LABEL: vpaddq_fold_test:
    307 ; GENERIC:       # %bb.0:
    308 ; GENERIC-NEXT:    vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
    309 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    310 ;
    311 ; SKX-LABEL: vpaddq_fold_test:
    312 ; SKX:       # %bb.0:
    313 ; SKX-NEXT:    vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
    314 ; SKX-NEXT:    retq # sched: [7:1.00]
    315   %tmp = load <8 x i64>, <8 x i64>* %j, align 4
    316   %x = add <8 x i64> %i, %tmp
    317   ret <8 x i64> %x
    318 }
    319 
    320 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
    321 ; GENERIC-LABEL: vpaddq_broadcast_test:
    322 ; GENERIC:       # %bb.0:
    323 ; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
    324 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    325 ;
    326 ; SKX-LABEL: vpaddq_broadcast_test:
    327 ; SKX:       # %bb.0:
    328 ; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
    329 ; SKX-NEXT:    retq # sched: [7:1.00]
    330   %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
    331   ret <8 x i64> %x
    332 }
    333 
    334 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
    335 ; GENERIC-LABEL: vpaddq_broadcast2_test:
    336 ; GENERIC:       # %bb.0:
    337 ; GENERIC-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
    338 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    339 ;
    340 ; SKX-LABEL: vpaddq_broadcast2_test:
    341 ; SKX:       # %bb.0:
    342 ; SKX-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
    343 ; SKX-NEXT:    retq # sched: [7:1.00]
    344   %tmp = load i64, i64* %j
    345   %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
    346   %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
    347   %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
    348   %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
    349   %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
    350   %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
    351   %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
    352   %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
    353   %x = add <8 x i64> %i, %j.7
    354   ret <8 x i64> %x
    355 }
    356 
    357 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
    358 ; GENERIC-LABEL: vpaddd_test:
    359 ; GENERIC:       # %bb.0:
    360 ; GENERIC-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
    361 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    362 ;
    363 ; SKX-LABEL: vpaddd_test:
    364 ; SKX:       # %bb.0:
    365 ; SKX-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
    366 ; SKX-NEXT:    retq # sched: [7:1.00]
    367   %x = add <16 x i32> %i, %j
    368   ret <16 x i32> %x
    369 }
    370 
    371 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
    372 ; GENERIC-LABEL: vpaddd_fold_test:
    373 ; GENERIC:       # %bb.0:
    374 ; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
    375 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    376 ;
    377 ; SKX-LABEL: vpaddd_fold_test:
    378 ; SKX:       # %bb.0:
    379 ; SKX-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
    380 ; SKX-NEXT:    retq # sched: [7:1.00]
    381   %tmp = load <16 x i32>, <16 x i32>* %j, align 4
    382   %x = add <16 x i32> %i, %tmp
    383   ret <16 x i32> %x
    384 }
    385 
    386 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
    387 ; GENERIC-LABEL: vpaddd_broadcast_test:
    388 ; GENERIC:       # %bb.0:
    389 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
    390 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    391 ;
    392 ; SKX-LABEL: vpaddd_broadcast_test:
    393 ; SKX:       # %bb.0:
    394 ; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
    395 ; SKX-NEXT:    retq # sched: [7:1.00]
    396   %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
    397   ret <16 x i32> %x
    398 }
    399 
    400 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
    401 ; GENERIC-LABEL: vpaddd_mask_test:
    402 ; GENERIC:       # %bb.0:
    403 ; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
    404 ; GENERIC-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.50]
    405 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    406 ;
    407 ; SKX-LABEL: vpaddd_mask_test:
    408 ; SKX:       # %bb.0:
    409 ; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
    410 ; SKX-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.33]
    411 ; SKX-NEXT:    retq # sched: [7:1.00]
    412   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    413   %x = add <16 x i32> %i, %j
    414   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
    415   ret <16 x i32> %r
    416 }
    417 
    418 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
    419 ; GENERIC-LABEL: vpaddd_maskz_test:
    420 ; GENERIC:       # %bb.0:
    421 ; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
    422 ; GENERIC-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
    423 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    424 ;
    425 ; SKX-LABEL: vpaddd_maskz_test:
    426 ; SKX:       # %bb.0:
    427 ; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
    428 ; SKX-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
    429 ; SKX-NEXT:    retq # sched: [7:1.00]
    430   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    431   %x = add <16 x i32> %i, %j
    432   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    433   ret <16 x i32> %r
    434 }
    435 
    436 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
    437 ; GENERIC-LABEL: vpaddd_mask_fold_test:
    438 ; GENERIC:       # %bb.0:
    439 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
    440 ; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50]
    441 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    442 ;
    443 ; SKX-LABEL: vpaddd_mask_fold_test:
    444 ; SKX:       # %bb.0:
    445 ; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
    446 ; SKX-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50]
    447 ; SKX-NEXT:    retq # sched: [7:1.00]
    448   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    449   %j = load <16 x i32>, <16 x i32>* %j.ptr
    450   %x = add <16 x i32> %i, %j
    451   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
    452   ret <16 x i32> %r
    453 }
    454 
    455 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
    456 ; GENERIC-LABEL: vpaddd_mask_broadcast_test:
    457 ; GENERIC:       # %bb.0:
    458 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
    459 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50]
    460 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    461 ;
    462 ; SKX-LABEL: vpaddd_mask_broadcast_test:
    463 ; SKX:       # %bb.0:
    464 ; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
    465 ; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50]
    466 ; SKX-NEXT:    retq # sched: [7:1.00]
    467   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    468   %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
    469   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
    470   ret <16 x i32> %r
    471 }
    472 
    473 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
    474 ; GENERIC-LABEL: vpaddd_maskz_fold_test:
    475 ; GENERIC:       # %bb.0:
    476 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
    477 ; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
    478 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    479 ;
    480 ; SKX-LABEL: vpaddd_maskz_fold_test:
    481 ; SKX:       # %bb.0:
    482 ; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
    483 ; SKX-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
    484 ; SKX-NEXT:    retq # sched: [7:1.00]
    485   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    486   %j = load <16 x i32>, <16 x i32>* %j.ptr
    487   %x = add <16 x i32> %i, %j
    488   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    489   ret <16 x i32> %r
    490 }
    491 
    492 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
    493 ; GENERIC-LABEL: vpaddd_maskz_broadcast_test:
    494 ; GENERIC:       # %bb.0:
    495 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
    496 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
    497 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    498 ;
    499 ; SKX-LABEL: vpaddd_maskz_broadcast_test:
    500 ; SKX:       # %bb.0:
    501 ; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
    502 ; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
    503 ; SKX-NEXT:    retq # sched: [7:1.00]
    504   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    505   %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    506   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    507   ret <16 x i32> %r
    508 }
    509 
    510 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
    511 ; GENERIC-LABEL: vpsubq_test:
    512 ; GENERIC:       # %bb.0:
    513 ; GENERIC-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
    514 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    515 ;
    516 ; SKX-LABEL: vpsubq_test:
    517 ; SKX:       # %bb.0:
    518 ; SKX-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
    519 ; SKX-NEXT:    retq # sched: [7:1.00]
    520   %x = sub <8 x i64> %i, %j
    521   ret <8 x i64> %x
    522 }
    523 
    524 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
    525 ; GENERIC-LABEL: vpsubd_test:
    526 ; GENERIC:       # %bb.0:
    527 ; GENERIC-NEXT:    vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
    528 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    529 ;
    530 ; SKX-LABEL: vpsubd_test:
    531 ; SKX:       # %bb.0:
    532 ; SKX-NEXT:    vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
    533 ; SKX-NEXT:    retq # sched: [7:1.00]
    534   %x = sub <16 x i32> %i, %j
    535   ret <16 x i32> %x
    536 }
    537 
    538 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
    539 ; GENERIC-LABEL: vpmulld_test:
    540 ; GENERIC:       # %bb.0:
    541 ; GENERIC-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
    542 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    543 ;
    544 ; SKX-LABEL: vpmulld_test:
    545 ; SKX:       # %bb.0:
    546 ; SKX-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:1.00]
    547 ; SKX-NEXT:    retq # sched: [7:1.00]
    548   %x = mul <16 x i32> %i, %j
    549   ret <16 x i32> %x
    550 }
    551 
    552 declare float @sqrtf(float) readnone
    553 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
    554 ; GENERIC-LABEL: sqrtA:
    555 ; GENERIC:       # %bb.0: # %entry
    556 ; GENERIC-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00]
    557 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    558 ;
    559 ; SKX-LABEL: sqrtA:
    560 ; SKX:       # %bb.0: # %entry
    561 ; SKX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
    562 ; SKX-NEXT:    retq # sched: [7:1.00]
    563 entry:
    564   %conv1 = tail call float @sqrtf(float %a) nounwind readnone
    565   ret float %conv1
    566 }
    567 
    568 declare double @sqrt(double) readnone
    569 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
    570 ; GENERIC-LABEL: sqrtB:
    571 ; GENERIC:       # %bb.0: # %entry
    572 ; GENERIC-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00]
    573 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    574 ;
    575 ; SKX-LABEL: sqrtB:
    576 ; SKX:       # %bb.0: # %entry
    577 ; SKX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00]
    578 ; SKX-NEXT:    retq # sched: [7:1.00]
    579 entry:
    580   %call = tail call double @sqrt(double %a) nounwind readnone
    581   ret double %call
    582 }
    583 
    584 declare float @llvm.sqrt.f32(float)
    585 define float @sqrtC(float %a) nounwind {
    586 ; GENERIC-LABEL: sqrtC:
    587 ; GENERIC:       # %bb.0:
    588 ; GENERIC-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00]
    589 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    590 ;
    591 ; SKX-LABEL: sqrtC:
    592 ; SKX:       # %bb.0:
    593 ; SKX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
    594 ; SKX-NEXT:    retq # sched: [7:1.00]
    595   %b = call float @llvm.sqrt.f32(float %a)
    596   ret float %b
    597 }
    598 
    599 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
    600 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
    601 ; GENERIC-LABEL: sqrtD:
    602 ; GENERIC:       # %bb.0:
    603 ; GENERIC-NEXT:    vsqrtps %zmm0, %zmm0 # sched: [29:28.00]
    604 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    605 ;
    606 ; SKX-LABEL: sqrtD:
    607 ; SKX:       # %bb.0:
    608 ; SKX-NEXT:    vsqrtps %zmm0, %zmm0 # sched: [20:12.00]
    609 ; SKX-NEXT:    retq # sched: [7:1.00]
    610   %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
    611   ret <16 x float> %b
    612 }
    613 
    614 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
    615 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
    616 ; GENERIC-LABEL: sqrtE:
    617 ; GENERIC:       # %bb.0:
    618 ; GENERIC-NEXT:    vsqrtpd %zmm0, %zmm0 # sched: [45:44.00]
    619 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    620 ;
    621 ; SKX-LABEL: sqrtE:
    622 ; SKX:       # %bb.0:
    623 ; SKX-NEXT:    vsqrtpd %zmm0, %zmm0 # sched: [32:24.00]
    624 ; SKX-NEXT:    retq # sched: [7:1.00]
    625   %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
    626   ret <8 x double> %b
    627 }
    628 
    629 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
    630 ; GENERIC-LABEL: fadd_broadcast:
    631 ; GENERIC:       # %bb.0:
    632 ; GENERIC-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00]
    633 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    634 ;
    635 ; SKX-LABEL: fadd_broadcast:
    636 ; SKX:       # %bb.0:
    637 ; SKX-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
    638 ; SKX-NEXT:    retq # sched: [7:1.00]
    639   %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
    640   ret <16 x float> %b
    641 }
    642 
    643 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
    644 ; GENERIC-LABEL: addq_broadcast:
    645 ; GENERIC:       # %bb.0:
    646 ; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
    647 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    648 ;
    649 ; SKX-LABEL: addq_broadcast:
    650 ; SKX:       # %bb.0:
    651 ; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
    652 ; SKX-NEXT:    retq # sched: [7:1.00]
    653   %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
    654   ret <8 x i64> %b
    655 }
    656 
    657 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
    658 ; GENERIC-LABEL: orq_broadcast:
    659 ; GENERIC:       # %bb.0:
    660 ; GENERIC-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
    661 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    662 ;
    663 ; SKX-LABEL: orq_broadcast:
    664 ; SKX:       # %bb.0:
    665 ; SKX-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
    666 ; SKX-NEXT:    retq # sched: [7:1.00]
    667   %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
    668   ret <8 x i64> %b
    669 }
    670 
    671 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
    672 ; GENERIC-LABEL: andd512fold:
    673 ; GENERIC:       # %bb.0: # %entry
    674 ; GENERIC-NEXT:    vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
    675 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    676 ;
    677 ; SKX-LABEL: andd512fold:
    678 ; SKX:       # %bb.0: # %entry
    679 ; SKX-NEXT:    vandps (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
    680 ; SKX-NEXT:    retq # sched: [7:1.00]
    681 entry:
    682   %a = load <16 x i32>, <16 x i32>* %x, align 4
    683   %b = and <16 x i32> %y, %a
    684   ret <16 x i32> %b
    685 }
    686 
    687 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
    688 ; GENERIC-LABEL: andqbrst:
    689 ; GENERIC:       # %bb.0: # %entry
    690 ; GENERIC-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
    691 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    692 ;
    693 ; SKX-LABEL: andqbrst:
    694 ; SKX:       # %bb.0: # %entry
    695 ; SKX-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
    696 ; SKX-NEXT:    retq # sched: [7:1.00]
    697 entry:
    698   %a = load i64, i64* %ap, align 8
    699   %b = insertelement <8 x i64> undef, i64 %a, i32 0
    700   %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
    701   %d = and <8 x i64> %p1, %c
    702   ret <8 x i64>%d
    703 }
    704 
    705 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
    706 ; GENERIC-LABEL: test_mask_vaddps:
    707 ; GENERIC:       # %bb.0:
    708 ; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
    709 ; GENERIC-NEXT:    vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
    710 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    711 ;
    712 ; SKX-LABEL: test_mask_vaddps:
    713 ; SKX:       # %bb.0:
    714 ; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
    715 ; SKX-NEXT:    vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
    716 ; SKX-NEXT:    retq # sched: [7:1.00]
    717                                      <16 x float> %j, <16 x i32> %mask1)
    718                                      nounwind readnone {
    719   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    720   %x = fadd <16 x float> %i, %j
    721   %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
    722   ret <16 x float> %r
    723 }
    724 
    725 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
    726 ; GENERIC-LABEL: test_mask_vmulps:
    727 ; GENERIC:       # %bb.0:
    728 ; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
    729 ; GENERIC-NEXT:    vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00]
    730 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    731 ;
    732 ; SKX-LABEL: test_mask_vmulps:
    733 ; SKX:       # %bb.0:
    734 ; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
    735 ; SKX-NEXT:    vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
    736 ; SKX-NEXT:    retq # sched: [7:1.00]
    737   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    738   %x = fmul <16 x float> %i, %j
    739   %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
    740   ret <16 x float> %r
    741 }
    742 
    743 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
    744 ; GENERIC-LABEL: test_mask_vminps:
    745 ; GENERIC:       # %bb.0:
    746 ; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
    747 ; GENERIC-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
    748 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    749 ;
    750 ; SKX-LABEL: test_mask_vminps:
    751 ; SKX:       # %bb.0:
    752 ; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
    753 ; SKX-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
    754 ; SKX-NEXT:    retq # sched: [7:1.00]
    755   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    756   %cmp_res = fcmp olt <16 x float> %i, %j
    757   %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
    758   %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
    759   ret <16 x float> %r
    760 }
    761 
    762 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone {
    763 ; GENERIC-LABEL: test_mask_vminpd:
    764 ; GENERIC:       # %bb.0:
    765 ; GENERIC-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
    766 ; GENERIC-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
    767 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    768 ;
    769 ; SKX-LABEL: test_mask_vminpd:
    770 ; SKX:       # %bb.0:
    771 ; SKX-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
    772 ; SKX-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
    773 ; SKX-NEXT:    retq # sched: [7:1.00]
    774   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    775   %cmp_res = fcmp olt <8 x double> %i, %j
    776   %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
    777   %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
    778   ret <8 x double> %r
    779 }
    780 
    781 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
    782 ; GENERIC-LABEL: test_mask_vmaxps:
    783 ; GENERIC:       # %bb.0:
    784 ; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
    785 ; GENERIC-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
    786 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    787 ;
    788 ; SKX-LABEL: test_mask_vmaxps:
    789 ; SKX:       # %bb.0:
    790 ; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
    791 ; SKX-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
    792 ; SKX-NEXT:    retq # sched: [7:1.00]
    793   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    794   %cmp_res = fcmp ogt <16 x float> %i, %j
    795   %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
    796   %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
    797   ret <16 x float> %r
    798 }
    799 
    800 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone {
    801 ; GENERIC-LABEL: test_mask_vmaxpd:
    802 ; GENERIC:       # %bb.0:
    803 ; GENERIC-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
    804 ; GENERIC-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
    805 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    806 ;
    807 ; SKX-LABEL: test_mask_vmaxpd:
    808 ; SKX:       # %bb.0:
    809 ; SKX-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
    810 ; SKX-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
    811 ; SKX-NEXT:    retq # sched: [7:1.00]
    812   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    813   %cmp_res = fcmp ogt <8 x double> %i, %j
    814   %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
    815   %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
    816   ret <8 x double> %r
    817 }
    818 
    819 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
    820 ; GENERIC-LABEL: test_mask_vsubps:
    821 ; GENERIC:       # %bb.0:
    822 ; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
    823 ; GENERIC-NEXT:    vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
    824 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    825 ;
    826 ; SKX-LABEL: test_mask_vsubps:
    827 ; SKX:       # %bb.0:
    828 ; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
    829 ; SKX-NEXT:    vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
    830 ; SKX-NEXT:    retq # sched: [7:1.00]
    831   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    832   %x = fsub <16 x float> %i, %j
    833   %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
    834   ret <16 x float> %r
    835 }
    836 
    837 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
    838 ; GENERIC-LABEL: test_mask_vdivps:
    839 ; GENERIC:       # %bb.0:
    840 ; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
    841 ; GENERIC-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [29:28.00]
    842 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    843 ;
    844 ; SKX-LABEL: test_mask_vdivps:
    845 ; SKX:       # %bb.0:
    846 ; SKX-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
    847 ; SKX-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [18:10.00]
    848 ; SKX-NEXT:    retq # sched: [7:1.00]
    849   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    850   %x = fdiv <16 x float> %i, %j
    851   %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
    852   ret <16 x float> %r
    853 }
    854 
    855 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone {
    856 ; GENERIC-LABEL: test_mask_vaddpd:
    857 ; GENERIC:       # %bb.0:
    858 ; GENERIC-NEXT:    vptestmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
    859 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
    860 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    861 ;
    862 ; SKX-LABEL: test_mask_vaddpd:
    863 ; SKX:       # %bb.0:
    864 ; SKX-NEXT:    vptestmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
    865 ; SKX-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
    866 ; SKX-NEXT:    retq # sched: [7:1.00]
    867   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    868   %x = fadd <8 x double> %i, %j
    869   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
    870   ret <8 x double> %r
    871 }
    872 
    873 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone {
    874 ; GENERIC-LABEL: test_maskz_vaddpd:
    875 ; GENERIC:       # %bb.0:
    876 ; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
    877 ; GENERIC-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
    878 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    879 ;
    880 ; SKX-LABEL: test_maskz_vaddpd:
    881 ; SKX:       # %bb.0:
    882 ; SKX-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
    883 ; SKX-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.50]
    884 ; SKX-NEXT:    retq # sched: [7:1.00]
    885   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    886   %x = fadd <8 x double> %i, %j
    887   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
    888   ret <8 x double> %r
    889 }
    890 
    891 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double>* %j,  <8 x i64> %mask1) nounwind {
    892 ; GENERIC-LABEL: test_mask_fold_vaddpd:
    893 ; GENERIC:       # %bb.0:
    894 ; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
    895 ; GENERIC-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00]
    896 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    897 ;
    898 ; SKX-LABEL: test_mask_fold_vaddpd:
    899 ; SKX:       # %bb.0:
    900 ; SKX-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
    901 ; SKX-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [11:0.50]
    902 ; SKX-NEXT:    retq # sched: [7:1.00]
    903   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    904   %tmp = load <8 x double>, <8 x double>* %j, align 8
    905   %x = fadd <8 x double> %i, %tmp
    906   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
    907   ret <8 x double> %r
    908 }
    909 
    910 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind {
    911 ; GENERIC-LABEL: test_maskz_fold_vaddpd:
    912 ; GENERIC:       # %bb.0:
    913 ; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
    914 ; GENERIC-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00]
    915 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    916 ;
    917 ; SKX-LABEL: test_maskz_fold_vaddpd:
    918 ; SKX:       # %bb.0:
    919 ; SKX-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
    920 ; SKX-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50]
    921 ; SKX-NEXT:    retq # sched: [7:1.00]
    922   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    923   %tmp = load <8 x double>, <8 x double>* %j, align 8
    924   %x = fadd <8 x double> %i, %tmp
    925   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
    926   ret <8 x double> %r
    927 }
    928 
    929 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
    930 ; GENERIC-LABEL: test_broadcast_vaddpd:
    931 ; GENERIC:       # %bb.0:
    932 ; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [10:1.00]
    933 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    934 ;
    935 ; SKX-LABEL: test_broadcast_vaddpd:
    936 ; SKX:       # %bb.0:
    937 ; SKX-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [11:0.50]
    938 ; SKX-NEXT:    retq # sched: [7:1.00]
    939   %tmp = load double, double* %j
    940   %b = insertelement <8 x double> undef, double %tmp, i32 0
    941   %c = shufflevector <8 x double> %b, <8 x double> undef,
    942                      <8 x i32> zeroinitializer
    943   %x = fadd <8 x double> %c, %i
    944   ret <8 x double> %x
    945 }
    946 
    947 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind {
    948 ; GENERIC-LABEL: test_mask_broadcast_vaddpd:
    949 ; GENERIC:       # %bb.0:
    950 ; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
    951 ; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [10:1.00]
    952 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
    953 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    954 ;
    955 ; SKX-LABEL: test_mask_broadcast_vaddpd:
    956 ; SKX:       # %bb.0:
    957 ; SKX-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
    958 ; SKX-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50]
    959 ; SKX-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:0.33]
    960 ; SKX-NEXT:    retq # sched: [7:1.00]
    961   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    962   %tmp = load double, double* %j
    963   %b = insertelement <8 x double> undef, double %tmp, i32 0
    964   %c = shufflevector <8 x double> %b, <8 x double> undef,
    965                      <8 x i32> zeroinitializer
    966   %x = fadd <8 x double> %c, %i
    967   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
    968   ret <8 x double> %r
    969 }
    970 
    971 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
    972 ; GENERIC-LABEL: test_maskz_broadcast_vaddpd:
    973 ; GENERIC:       # %bb.0:
    974 ; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
    975 ; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00]
    976 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    977 ;
    978 ; SKX-LABEL: test_maskz_broadcast_vaddpd:
    979 ; SKX:       # %bb.0:
    980 ; SKX-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
    981 ; SKX-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50]
    982 ; SKX-NEXT:    retq # sched: [7:1.00]
    983                                        <8 x i64> %mask1) nounwind {
    984   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    985   %tmp = load double, double* %j
    986   %b = insertelement <8 x double> undef, double %tmp, i32 0
    987   %c = shufflevector <8 x double> %b, <8 x double> undef,
    988                      <8 x i32> zeroinitializer
    989   %x = fadd <8 x double> %c, %i
    990   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
    991   ret <8 x double> %r
    992 }
    993 
    994 define <16 x float>  @test_fxor(<16 x float> %a) {
    995 ; GENERIC-LABEL: test_fxor:
    996 ; GENERIC:       # %bb.0:
    997 ; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
    998 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    999 ;
   1000 ; SKX-LABEL: test_fxor:
   1001 ; SKX:       # %bb.0:
   1002 ; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
   1003 ; SKX-NEXT:    retq # sched: [7:1.00]
   1004 
   1005   %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
   1006   ret <16 x float>%res
   1007 }
   1008 
   1009 define <8 x float>  @test_fxor_8f32(<8 x float> %a) {
   1010 ; GENERIC-LABEL: test_fxor_8f32:
   1011 ; GENERIC:       # %bb.0:
   1012 ; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00]
   1013 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1014 ;
   1015 ; SKX-LABEL: test_fxor_8f32:
   1016 ; SKX:       # %bb.0:
   1017 ; SKX-NEXT:    vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
   1018 ; SKX-NEXT:    retq # sched: [7:1.00]
   1019   %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
   1020   ret <8 x float>%res
   1021 }
   1022 
   1023 define <8 x double> @fabs_v8f64(<8 x double> %p)
   1024 ; GENERIC-LABEL: fabs_v8f64:
   1025 ; GENERIC:       # %bb.0:
   1026 ; GENERIC-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
   1027 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1028 ;
   1029 ; SKX-LABEL: fabs_v8f64:
   1030 ; SKX:       # %bb.0:
   1031 ; SKX-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
   1032 ; SKX-NEXT:    retq # sched: [7:1.00]
   1033 {
   1034   %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
   1035   ret <8 x double> %t
   1036 }
   1037 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
   1038 
   1039 define <16 x float> @fabs_v16f32(<16 x float> %p)
   1040 ; GENERIC-LABEL: fabs_v16f32:
   1041 ; GENERIC:       # %bb.0:
   1042 ; GENERIC-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
   1043 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1044 ;
   1045 ; SKX-LABEL: fabs_v16f32:
   1046 ; SKX:       # %bb.0:
   1047 ; SKX-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
   1048 ; SKX-NEXT:    retq # sched: [7:1.00]
   1049 {
   1050   %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
   1051   ret <16 x float> %t
   1052 }
   1053 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
   1054 
   1055 define double @test1(double %a, double %b) nounwind {
   1056 ; GENERIC-LABEL: test1:
   1057 ; GENERIC:       # %bb.0:
   1058 ; GENERIC-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
   1059 ; GENERIC-NEXT:    jne .LBB64_1 # sched: [1:1.00]
   1060 ; GENERIC-NEXT:    jnp .LBB64_2 # sched: [1:1.00]
   1061 ; GENERIC-NEXT:  .LBB64_1: # %l1
   1062 ; GENERIC-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1063 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1064 ; GENERIC-NEXT:  .LBB64_2: # %l2
   1065 ; GENERIC-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1066 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1067 ;
   1068 ; SKX-LABEL: test1:
   1069 ; SKX:       # %bb.0:
   1070 ; SKX-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
   1071 ; SKX-NEXT:    jne .LBB64_1 # sched: [1:0.50]
   1072 ; SKX-NEXT:    jnp .LBB64_2 # sched: [1:0.50]
   1073 ; SKX-NEXT:  .LBB64_1: # %l1
   1074 ; SKX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1075 ; SKX-NEXT:    retq # sched: [7:1.00]
   1076 ; SKX-NEXT:  .LBB64_2: # %l2
   1077 ; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1078 ; SKX-NEXT:    retq # sched: [7:1.00]
   1079   %tobool = fcmp une double %a, %b
   1080   br i1 %tobool, label %l1, label %l2
   1081 
   1082 l1:
   1083   %c = fsub double %a, %b
   1084   ret double %c
   1085 l2:
   1086   %c1 = fadd double %a, %b
   1087   ret double %c1
   1088 }
   1089 
   1090 define float @test2(float %a, float %b) nounwind {
   1091 ; GENERIC-LABEL: test2:
   1092 ; GENERIC:       # %bb.0:
   1093 ; GENERIC-NEXT:    vucomiss %xmm0, %xmm1 # sched: [2:1.00]
   1094 ; GENERIC-NEXT:    jbe .LBB65_2 # sched: [1:1.00]
   1095 ; GENERIC-NEXT:  # %bb.1: # %l1
   1096 ; GENERIC-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1097 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1098 ; GENERIC-NEXT:  .LBB65_2: # %l2
   1099 ; GENERIC-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1100 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1101 ;
   1102 ; SKX-LABEL: test2:
   1103 ; SKX:       # %bb.0:
   1104 ; SKX-NEXT:    vucomiss %xmm0, %xmm1 # sched: [2:1.00]
   1105 ; SKX-NEXT:    jbe .LBB65_2 # sched: [1:0.50]
   1106 ; SKX-NEXT:  # %bb.1: # %l1
   1107 ; SKX-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1108 ; SKX-NEXT:    retq # sched: [7:1.00]
   1109 ; SKX-NEXT:  .LBB65_2: # %l2
   1110 ; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1111 ; SKX-NEXT:    retq # sched: [7:1.00]
   1112   %tobool = fcmp olt float %a, %b
   1113   br i1 %tobool, label %l1, label %l2
   1114 
   1115 l1:
   1116   %c = fsub float %a, %b
   1117   ret float %c
   1118 l2:
   1119   %c1 = fadd float %a, %b
   1120   ret float %c1
   1121 }
   1122 
   1123 define i32 @test3(float %a, float %b) {
   1124 ; GENERIC-LABEL: test3:
   1125 ; GENERIC:       # %bb.0:
   1126 ; GENERIC-NEXT:    vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00]
   1127 ; GENERIC-NEXT:    kmovw %k0, %eax # sched: [1:0.33]
   1128 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1129 ;
   1130 ; SKX-LABEL: test3:
   1131 ; SKX:       # %bb.0:
   1132 ; SKX-NEXT:    vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00]
   1133 ; SKX-NEXT:    kmovw %k0, %eax # sched: [3:1.00]
   1134 ; SKX-NEXT:    retq # sched: [7:1.00]
   1135 
   1136   %cmp10.i = fcmp oeq float %a, %b
   1137   %conv11.i = zext i1 %cmp10.i to i32
   1138   ret i32 %conv11.i
   1139 }
   1140 
   1141 define float @test5(float %p) #0 {
   1142 ; GENERIC-LABEL: test5:
   1143 ; GENERIC:       # %bb.0: # %entry
   1144 ; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   1145 ; GENERIC-NEXT:    vucomiss %xmm1, %xmm0 # sched: [2:1.00]
   1146 ; GENERIC-NEXT:    jne .LBB67_1 # sched: [1:1.00]
   1147 ; GENERIC-NEXT:    jp .LBB67_1 # sched: [1:1.00]
   1148 ; GENERIC-NEXT:  # %bb.2: # %return
   1149 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1150 ; GENERIC-NEXT:  .LBB67_1: # %if.end
   1151 ; GENERIC-NEXT:    seta %al # sched: [2:1.00]
   1152 ; GENERIC-NEXT:    movzbl %al, %eax # sched: [1:0.33]
   1153 ; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   1154 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1155 ;
   1156 ; SKX-LABEL: test5:
   1157 ; SKX:       # %bb.0: # %entry
   1158 ; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   1159 ; SKX-NEXT:    vucomiss %xmm1, %xmm0 # sched: [2:1.00]
   1160 ; SKX-NEXT:    jne .LBB67_1 # sched: [1:0.50]
   1161 ; SKX-NEXT:    jp .LBB67_1 # sched: [1:0.50]
   1162 ; SKX-NEXT:  # %bb.2: # %return
   1163 ; SKX-NEXT:    retq # sched: [7:1.00]
   1164 ; SKX-NEXT:  .LBB67_1: # %if.end
   1165 ; SKX-NEXT:    seta %al # sched: [2:1.00]
   1166 ; SKX-NEXT:    movzbl %al, %eax # sched: [1:0.25]
   1167 ; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   1168 ; SKX-NEXT:    retq # sched: [7:1.00]
   1169 entry:
   1170   %cmp = fcmp oeq float %p, 0.000000e+00
   1171   br i1 %cmp, label %return, label %if.end
   1172 
   1173 if.end:                                           ; preds = %entry
   1174   %cmp1 = fcmp ogt float %p, 0.000000e+00
   1175   %cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00
   1176   br label %return
   1177 
   1178 return:                                           ; preds = %if.end, %entry
   1179   %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ]
   1180   ret float %retval.0
   1181 }
   1182 
   1183 define i32 @test6(i32 %a, i32 %b) {
   1184 ; GENERIC-LABEL: test6:
   1185 ; GENERIC:       # %bb.0:
   1186 ; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
   1187 ; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
   1188 ; GENERIC-NEXT:    sete %al # sched: [1:0.50]
   1189 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1190 ;
   1191 ; SKX-LABEL: test6:
   1192 ; SKX:       # %bb.0:
   1193 ; SKX-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   1194 ; SKX-NEXT:    cmpl %esi, %edi # sched: [1:0.25]
   1195 ; SKX-NEXT:    sete %al # sched: [1:0.50]
   1196 ; SKX-NEXT:    retq # sched: [7:1.00]
   1197   %cmp = icmp eq i32 %a, %b
   1198   %res = zext i1 %cmp to i32
   1199   ret i32 %res
   1200 }
   1201 
   1202 define i32 @test7(double %x, double %y) #2 {
   1203 ; GENERIC-LABEL: test7:
   1204 ; GENERIC:       # %bb.0: # %entry
   1205 ; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
   1206 ; GENERIC-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
   1207 ; GENERIC-NEXT:    setne %al # sched: [1:0.50]
   1208 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1209 ;
   1210 ; SKX-LABEL: test7:
   1211 ; SKX:       # %bb.0: # %entry
   1212 ; SKX-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   1213 ; SKX-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
   1214 ; SKX-NEXT:    setne %al # sched: [1:0.50]
   1215 ; SKX-NEXT:    retq # sched: [7:1.00]
   1216 entry:
   1217   %0 = fcmp one double %x, %y
   1218   %or = zext i1 %0 to i32
   1219   ret i32 %or
   1220 }
   1221 
   1222 define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
   1223 ; GENERIC-LABEL: test8:
   1224 ; GENERIC:       # %bb.0:
   1225 ; GENERIC-NEXT:    xorl $-2147483648, %esi # imm = 0x80000000
   1226 ; GENERIC-NEXT:    # sched: [1:0.33]
   1227 ; GENERIC-NEXT:    testl %edx, %edx # sched: [1:0.33]
   1228 ; GENERIC-NEXT:    movl $1, %eax # sched: [1:0.33]
   1229 ; GENERIC-NEXT:    cmovel %eax, %edx # sched: [2:0.67]
   1230 ; GENERIC-NEXT:    notl %edi # sched: [1:0.33]
   1231 ; GENERIC-NEXT:    orl %edi, %esi # sched: [1:0.33]
   1232 ; GENERIC-NEXT:    cmovnel %edx, %eax # sched: [2:0.67]
   1233 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1234 ;
   1235 ; SKX-LABEL: test8:
   1236 ; SKX:       # %bb.0:
   1237 ; SKX-NEXT:    notl %edi # sched: [1:0.25]
   1238 ; SKX-NEXT:    xorl $-2147483648, %esi # imm = 0x80000000
   1239 ; SKX-NEXT:    # sched: [1:0.25]
   1240 ; SKX-NEXT:    testl %edx, %edx # sched: [1:0.25]
   1241 ; SKX-NEXT:    movl $1, %eax # sched: [1:0.25]
   1242 ; SKX-NEXT:    cmovel %eax, %edx # sched: [1:0.50]
   1243 ; SKX-NEXT:    orl %edi, %esi # sched: [1:0.25]
   1244 ; SKX-NEXT:    cmovnel %edx, %eax # sched: [1:0.50]
   1245 ; SKX-NEXT:    retq # sched: [7:1.00]
   1246   %tmp1 = icmp eq i32 %a1, -1
   1247   %tmp2 = icmp eq i32 %a2, -2147483648
   1248   %tmp3 = and i1 %tmp1, %tmp2
   1249   %tmp4 = icmp eq i32 %a3, 0
   1250   %tmp5 = or i1 %tmp3, %tmp4
   1251   %res = select i1 %tmp5, i32 1, i32 %a3
   1252   ret i32 %res
   1253 }
   1254 
   1255 define i32 @test9(i64 %a) {
   1256 ; GENERIC-LABEL: test9:
   1257 ; GENERIC:       # %bb.0:
   1258 ; GENERIC-NEXT:    testb $1, %dil # sched: [1:0.33]
   1259 ; GENERIC-NEXT:    jne .LBB71_2 # sched: [1:1.00]
   1260 ; GENERIC-NEXT:  # %bb.1: # %A
   1261 ; GENERIC-NEXT:    movl $6, %eax # sched: [1:0.33]
   1262 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1263 ; GENERIC-NEXT:  .LBB71_2: # %B
   1264 ; GENERIC-NEXT:    movl $7, %eax # sched: [1:0.33]
   1265 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1266 ;
   1267 ; SKX-LABEL: test9:
   1268 ; SKX:       # %bb.0:
   1269 ; SKX-NEXT:    testb $1, %dil # sched: [1:0.25]
   1270 ; SKX-NEXT:    jne .LBB71_2 # sched: [1:0.50]
   1271 ; SKX-NEXT:  # %bb.1: # %A
   1272 ; SKX-NEXT:    movl $6, %eax # sched: [1:0.25]
   1273 ; SKX-NEXT:    retq # sched: [7:1.00]
   1274 ; SKX-NEXT:  .LBB71_2: # %B
   1275 ; SKX-NEXT:    movl $7, %eax # sched: [1:0.25]
   1276 ; SKX-NEXT:    retq # sched: [7:1.00]
   1277  %b = and i64 %a, 1
   1278  %cmp10.i = icmp eq i64 %b, 0
   1279  br i1 %cmp10.i, label %A, label %B
   1280 A:
   1281  ret i32 6
   1282 B:
   1283  ret i32 7
   1284 }
   1285 
   1286 define i32 @test10(i64 %b, i64 %c, i1 %d) {
   1287 ; GENERIC-LABEL: test10:
   1288 ; GENERIC:       # %bb.0:
   1289 ; GENERIC-NEXT:    movl %edx, %eax # sched: [1:0.33]
   1290 ; GENERIC-NEXT:    andb $1, %al # sched: [1:0.33]
   1291 ; GENERIC-NEXT:    cmpq %rsi, %rdi # sched: [1:0.33]
   1292 ; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
   1293 ; GENERIC-NEXT:    orb %dl, %cl # sched: [1:0.33]
   1294 ; GENERIC-NEXT:    andb $1, %cl # sched: [1:0.33]
   1295 ; GENERIC-NEXT:    cmpb %cl, %al # sched: [1:0.33]
   1296 ; GENERIC-NEXT:    je .LBB72_1 # sched: [1:1.00]
   1297 ; GENERIC-NEXT:  # %bb.2: # %if.end.i
   1298 ; GENERIC-NEXT:    movl $6, %eax # sched: [1:0.33]
   1299 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1300 ; GENERIC-NEXT:  .LBB72_1: # %if.then.i
   1301 ; GENERIC-NEXT:    movl $5, %eax # sched: [1:0.33]
   1302 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1303 ;
   1304 ; SKX-LABEL: test10:
   1305 ; SKX:       # %bb.0:
   1306 ; SKX-NEXT:    movl %edx, %eax # sched: [1:0.25]
   1307 ; SKX-NEXT:    andb $1, %al # sched: [1:0.25]
   1308 ; SKX-NEXT:    cmpq %rsi, %rdi # sched: [1:0.25]
   1309 ; SKX-NEXT:    sete %cl # sched: [1:0.50]
   1310 ; SKX-NEXT:    orb %dl, %cl # sched: [1:0.25]
   1311 ; SKX-NEXT:    andb $1, %cl # sched: [1:0.25]
   1312 ; SKX-NEXT:    cmpb %cl, %al # sched: [1:0.25]
   1313 ; SKX-NEXT:    je .LBB72_1 # sched: [1:0.50]
   1314 ; SKX-NEXT:  # %bb.2: # %if.end.i
   1315 ; SKX-NEXT:    movl $6, %eax # sched: [1:0.25]
   1316 ; SKX-NEXT:    retq # sched: [7:1.00]
   1317 ; SKX-NEXT:  .LBB72_1: # %if.then.i
   1318 ; SKX-NEXT:    movl $5, %eax # sched: [1:0.25]
   1319 ; SKX-NEXT:    retq # sched: [7:1.00]
   1320 
   1321   %cmp8.i = icmp eq i64 %b, %c
   1322   %or1 = or i1 %d, %cmp8.i
   1323   %xor1 = xor i1 %d, %or1
   1324   br i1 %xor1, label %if.end.i, label %if.then.i
   1325 
   1326 if.then.i:
   1327  ret i32 5
   1328 
   1329 if.end.i:
   1330   ret i32 6
   1331 }
   1332 
   1333 define <16 x float> @sitof32(<16 x i32> %a) nounwind {
   1334 ; GENERIC-LABEL: sitof32:
   1335 ; GENERIC:       # %bb.0:
   1336 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
   1337 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1338 ;
   1339 ; SKX-LABEL: sitof32:
   1340 ; SKX:       # %bb.0:
   1341 ; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
   1342 ; SKX-NEXT:    retq # sched: [7:1.00]
   1343   %b = sitofp <16 x i32> %a to <16 x float>
   1344   ret <16 x float> %b
   1345 }
   1346 
   1347 define <8 x double> @sltof864(<8 x i64> %a) {
   1348 ; GENERIC-LABEL: sltof864:
   1349 ; GENERIC:       # %bb.0:
   1350 ; GENERIC-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
   1351 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1352 ;
   1353 ; SKX-LABEL: sltof864:
   1354 ; SKX:       # %bb.0:
   1355 ; SKX-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
   1356 ; SKX-NEXT:    retq # sched: [7:1.00]
   1357   %b = sitofp <8 x i64> %a to <8 x double>
   1358   ret <8 x double> %b
   1359 }
   1360 
   1361 define <4 x double> @slto4f64(<4 x i64> %a) {
   1362 ; GENERIC-LABEL: slto4f64:
   1363 ; GENERIC:       # %bb.0:
   1364 ; GENERIC-NEXT:    vcvtqq2pd %ymm0, %ymm0 # sched: [4:1.00]
   1365 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1366 ;
   1367 ; SKX-LABEL: slto4f64:
   1368 ; SKX:       # %bb.0:
   1369 ; SKX-NEXT:    vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.50]
   1370 ; SKX-NEXT:    retq # sched: [7:1.00]
   1371   %b = sitofp <4 x i64> %a to <4 x double>
   1372   ret <4 x double> %b
   1373 }
   1374 
   1375 define <2 x double> @slto2f64(<2 x i64> %a) {
   1376 ; GENERIC-LABEL: slto2f64:
   1377 ; GENERIC:       # %bb.0:
   1378 ; GENERIC-NEXT:    vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
   1379 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1380 ;
   1381 ; SKX-LABEL: slto2f64:
   1382 ; SKX:       # %bb.0:
   1383 ; SKX-NEXT:    vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.50]
   1384 ; SKX-NEXT:    retq # sched: [7:1.00]
   1385   %b = sitofp <2 x i64> %a to <2 x double>
   1386   ret <2 x double> %b
   1387 }
   1388 
   1389 define <2 x float> @sltof2f32(<2 x i64> %a) {
   1390 ; GENERIC-LABEL: sltof2f32:
   1391 ; GENERIC:       # %bb.0:
   1392 ; GENERIC-NEXT:    vcvtqq2ps %xmm0, %xmm0 # sched: [3:1.00]
   1393 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1394 ;
   1395 ; SKX-LABEL: sltof2f32:
   1396 ; SKX:       # %bb.0:
   1397 ; SKX-NEXT:    vcvtqq2ps %xmm0, %xmm0 # sched: [5:1.00]
   1398 ; SKX-NEXT:    retq # sched: [7:1.00]
   1399   %b = sitofp <2 x i64> %a to <2 x float>
   1400   ret <2 x float>%b
   1401 }
   1402 
   1403 define <4 x float> @slto4f32_mem(<4 x i64>* %a) {
   1404 ; GENERIC-LABEL: slto4f32_mem:
   1405 ; GENERIC:       # %bb.0:
   1406 ; GENERIC-NEXT:    vcvtqq2psy (%rdi), %xmm0 # sched: [10:1.00]
   1407 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1408 ;
   1409 ; SKX-LABEL: slto4f32_mem:
   1410 ; SKX:       # %bb.0:
   1411 ; SKX-NEXT:    vcvtqq2psy (%rdi), %xmm0 # sched: [11:0.50]
   1412 ; SKX-NEXT:    retq # sched: [7:1.00]
   1413   %a1 = load <4 x i64>, <4 x i64>* %a, align 8
   1414   %b = sitofp <4 x i64> %a1 to <4 x float>
   1415   ret <4 x float>%b
   1416 }
   1417 
   1418 define <4 x i64> @f64to4sl(<4 x double> %a) {
   1419 ; GENERIC-LABEL: f64to4sl:
   1420 ; GENERIC:       # %bb.0:
   1421 ; GENERIC-NEXT:    vcvttpd2qq %ymm0, %ymm0 # sched: [4:1.00]
   1422 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1423 ;
   1424 ; SKX-LABEL: f64to4sl:
   1425 ; SKX:       # %bb.0:
   1426 ; SKX-NEXT:    vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.50]
   1427 ; SKX-NEXT:    retq # sched: [7:1.00]
   1428   %b = fptosi <4 x double> %a to <4 x i64>
   1429   ret <4 x i64> %b
   1430 }
   1431 
   1432 define <4 x i64> @f32to4sl(<4 x float> %a) {
   1433 ; GENERIC-LABEL: f32to4sl:
   1434 ; GENERIC:       # %bb.0:
   1435 ; GENERIC-NEXT:    vcvttps2qq %xmm0, %ymm0 # sched: [3:1.00]
   1436 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1437 ;
   1438 ; SKX-LABEL: f32to4sl:
   1439 ; SKX:       # %bb.0:
   1440 ; SKX-NEXT:    vcvttps2qq %xmm0, %ymm0 # sched: [7:1.00]
   1441 ; SKX-NEXT:    retq # sched: [7:1.00]
   1442   %b = fptosi <4 x float> %a to <4 x i64>
   1443   ret <4 x i64> %b
   1444 }
   1445 
   1446 define <4 x float> @slto4f32(<4 x i64> %a) {
   1447 ; GENERIC-LABEL: slto4f32:
   1448 ; GENERIC:       # %bb.0:
   1449 ; GENERIC-NEXT:    vcvtqq2ps %ymm0, %xmm0 # sched: [3:1.00]
   1450 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1451 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1452 ;
   1453 ; SKX-LABEL: slto4f32:
   1454 ; SKX:       # %bb.0:
   1455 ; SKX-NEXT:    vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00]
   1456 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1457 ; SKX-NEXT:    retq # sched: [7:1.00]
   1458   %b = sitofp <4 x i64> %a to <4 x float>
   1459   ret <4 x float> %b
   1460 }
   1461 
   1462 define <4 x float> @ulto4f32(<4 x i64> %a) {
   1463 ; GENERIC-LABEL: ulto4f32:
   1464 ; GENERIC:       # %bb.0:
   1465 ; GENERIC-NEXT:    vcvtuqq2ps %ymm0, %xmm0 # sched: [3:1.00]
   1466 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1467 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1468 ;
   1469 ; SKX-LABEL: ulto4f32:
   1470 ; SKX:       # %bb.0:
   1471 ; SKX-NEXT:    vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00]
   1472 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1473 ; SKX-NEXT:    retq # sched: [7:1.00]
   1474   %b = uitofp <4 x i64> %a to <4 x float>
   1475   ret <4 x float> %b
   1476 }
   1477 
   1478 define <8 x double> @ulto8f64(<8 x i64> %a) {
   1479 ; GENERIC-LABEL: ulto8f64:
   1480 ; GENERIC:       # %bb.0:
   1481 ; GENERIC-NEXT:    vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00]
   1482 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1483 ;
   1484 ; SKX-LABEL: ulto8f64:
   1485 ; SKX:       # %bb.0:
   1486 ; SKX-NEXT:    vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50]
   1487 ; SKX-NEXT:    retq # sched: [7:1.00]
   1488   %b = uitofp <8 x i64> %a to <8 x double>
   1489   ret <8 x double> %b
   1490 }
   1491 
   1492 define <16 x double> @ulto16f64(<16 x i64> %a) {
   1493 ; GENERIC-LABEL: ulto16f64:
   1494 ; GENERIC:       # %bb.0:
   1495 ; GENERIC-NEXT:    vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00]
   1496 ; GENERIC-NEXT:    vcvtuqq2pd %zmm1, %zmm1 # sched: [4:1.00]
   1497 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1498 ;
   1499 ; SKX-LABEL: ulto16f64:
   1500 ; SKX:       # %bb.0:
   1501 ; SKX-NEXT:    vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50]
   1502 ; SKX-NEXT:    vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.50]
   1503 ; SKX-NEXT:    retq # sched: [7:1.00]
   1504   %b = uitofp <16 x i64> %a to <16 x double>
   1505   ret <16 x double> %b
   1506 }
   1507 
   1508 define <16 x i32> @f64to16si(<16 x float> %a) nounwind {
   1509 ; GENERIC-LABEL: f64to16si:
   1510 ; GENERIC:       # %bb.0:
   1511 ; GENERIC-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
   1512 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1513 ;
   1514 ; SKX-LABEL: f64to16si:
   1515 ; SKX:       # %bb.0:
   1516 ; SKX-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
   1517 ; SKX-NEXT:    retq # sched: [7:1.00]
   1518   %b = fptosi <16 x float> %a to <16 x i32>
   1519   ret <16 x i32> %b
   1520 }
   1521 
   1522 define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
   1523 ; GENERIC-LABEL: f32to16ui:
   1524 ; GENERIC:       # %bb.0:
   1525 ; GENERIC-NEXT:    vcvttps2udq %zmm0, %zmm0 # sched: [3:1.00]
   1526 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1527 ;
   1528 ; SKX-LABEL: f32to16ui:
   1529 ; SKX:       # %bb.0:
   1530 ; SKX-NEXT:    vcvttps2udq %zmm0, %zmm0 # sched: [4:0.50]
   1531 ; SKX-NEXT:    retq # sched: [7:1.00]
   1532   %b = fptoui <16 x float> %a to <16 x i32>
   1533   ret <16 x i32> %b
   1534 }
   1535 
   1536 define <16 x i8> @f32to16uc(<16 x float> %f) {
   1537 ; GENERIC-LABEL: f32to16uc:
   1538 ; GENERIC:       # %bb.0:
   1539 ; GENERIC-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
   1540 ; GENERIC-NEXT:    vpmovdb %zmm0, %xmm0 # sched: [1:1.00]
   1541 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1542 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1543 ;
   1544 ; SKX-LABEL: f32to16uc:
   1545 ; SKX:       # %bb.0:
   1546 ; SKX-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
   1547 ; SKX-NEXT:    vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
   1548 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1549 ; SKX-NEXT:    retq # sched: [7:1.00]
   1550   %res = fptoui <16 x float> %f to <16 x i8>
   1551   ret <16 x i8> %res
   1552 }
   1553 
   1554 define <16 x i16> @f32to16us(<16 x float> %f) {
   1555 ; GENERIC-LABEL: f32to16us:
   1556 ; GENERIC:       # %bb.0:
   1557 ; GENERIC-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
   1558 ; GENERIC-NEXT:    vpmovdw %zmm0, %ymm0 # sched: [1:1.00]
   1559 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1560 ;
   1561 ; SKX-LABEL: f32to16us:
   1562 ; SKX:       # %bb.0:
   1563 ; SKX-NEXT:    vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
   1564 ; SKX-NEXT:    vpmovdw %zmm0, %ymm0 # sched: [4:2.00]
   1565 ; SKX-NEXT:    retq # sched: [7:1.00]
   1566   %res = fptoui <16 x float> %f to <16 x i16>
   1567   ret <16 x i16> %res
   1568 }
   1569 
   1570 define <8 x i32> @f32to8ui(<8 x float> %a) nounwind {
   1571 ; GENERIC-LABEL: f32to8ui:
   1572 ; GENERIC:       # %bb.0:
   1573 ; GENERIC-NEXT:    vcvttps2udq %ymm0, %ymm0 # sched: [3:1.00]
   1574 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1575 ;
   1576 ; SKX-LABEL: f32to8ui:
   1577 ; SKX:       # %bb.0:
   1578 ; SKX-NEXT:    vcvttps2udq %ymm0, %ymm0 # sched: [4:0.50]
   1579 ; SKX-NEXT:    retq # sched: [7:1.00]
   1580   %b = fptoui <8 x float> %a to <8 x i32>
   1581   ret <8 x i32> %b
   1582 }
   1583 
   1584 define <4 x i32> @f32to4ui(<4 x float> %a) nounwind {
   1585 ; GENERIC-LABEL: f32to4ui:
   1586 ; GENERIC:       # %bb.0:
   1587 ; GENERIC-NEXT:    vcvttps2udq %xmm0, %xmm0 # sched: [3:1.00]
   1588 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1589 ;
   1590 ; SKX-LABEL: f32to4ui:
   1591 ; SKX:       # %bb.0:
   1592 ; SKX-NEXT:    vcvttps2udq %xmm0, %xmm0 # sched: [4:0.50]
   1593 ; SKX-NEXT:    retq # sched: [7:1.00]
   1594   %b = fptoui <4 x float> %a to <4 x i32>
   1595   ret <4 x i32> %b
   1596 }
   1597 
   1598 define <8 x i32> @f64to8ui(<8 x double> %a) nounwind {
   1599 ; GENERIC-LABEL: f64to8ui:
   1600 ; GENERIC:       # %bb.0:
   1601 ; GENERIC-NEXT:    vcvttpd2udq %zmm0, %ymm0 # sched: [4:1.00]
   1602 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1603 ;
   1604 ; SKX-LABEL: f64to8ui:
   1605 ; SKX:       # %bb.0:
   1606 ; SKX-NEXT:    vcvttpd2udq %zmm0, %ymm0 # sched: [7:1.00]
   1607 ; SKX-NEXT:    retq # sched: [7:1.00]
   1608   %b = fptoui <8 x double> %a to <8 x i32>
   1609   ret <8 x i32> %b
   1610 }
   1611 
   1612 define <8 x i16> @f64to8us(<8 x double> %f) {
   1613 ; GENERIC-LABEL: f64to8us:
   1614 ; GENERIC:       # %bb.0:
   1615 ; GENERIC-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00]
   1616 ; GENERIC-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [1:1.00]
   1617 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1618 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1619 ;
   1620 ; SKX-LABEL: f64to8us:
   1621 ; SKX:       # %bb.0:
   1622 ; SKX-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
   1623 ; SKX-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
   1624 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1625 ; SKX-NEXT:    retq # sched: [7:1.00]
   1626   %res = fptoui <8 x double> %f to <8 x i16>
   1627   ret <8 x i16> %res
   1628 }
   1629 
   1630 define <8 x i8> @f64to8uc(<8 x double> %f) {
   1631 ; GENERIC-LABEL: f64to8uc:
   1632 ; GENERIC:       # %bb.0:
   1633 ; GENERIC-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00]
   1634 ; GENERIC-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [1:1.00]
   1635 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1636 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1637 ;
   1638 ; SKX-LABEL: f64to8uc:
   1639 ; SKX:       # %bb.0:
   1640 ; SKX-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
   1641 ; SKX-NEXT:    vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
   1642 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1643 ; SKX-NEXT:    retq # sched: [7:1.00]
   1644   %res = fptoui <8 x double> %f to <8 x i8>
   1645   ret <8 x i8> %res
   1646 }
   1647 
   1648 define <4 x i32> @f64to4ui(<4 x double> %a) nounwind {
   1649 ; GENERIC-LABEL: f64to4ui:
   1650 ; GENERIC:       # %bb.0:
   1651 ; GENERIC-NEXT:    vcvttpd2udq %ymm0, %xmm0 # sched: [4:1.00]
   1652 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1653 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1654 ;
   1655 ; SKX-LABEL: f64to4ui:
   1656 ; SKX:       # %bb.0:
   1657 ; SKX-NEXT:    vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00]
   1658 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1659 ; SKX-NEXT:    retq # sched: [7:1.00]
   1660   %b = fptoui <4 x double> %a to <4 x i32>
   1661   ret <4 x i32> %b
   1662 }
   1663 
   1664 define <8 x double> @sito8f64(<8 x i32> %a) {
   1665 ; GENERIC-LABEL: sito8f64:
   1666 ; GENERIC:       # %bb.0:
   1667 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
   1668 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1669 ;
   1670 ; SKX-LABEL: sito8f64:
   1671 ; SKX:       # %bb.0:
   1672 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
   1673 ; SKX-NEXT:    retq # sched: [7:1.00]
   1674   %b = sitofp <8 x i32> %a to <8 x double>
   1675   ret <8 x double> %b
   1676 }
   1677 define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
   1678 ; GENERIC-LABEL: i32to8f64_mask:
   1679 ; GENERIC:       # %bb.0:
   1680 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   1681 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
   1682 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1683 ;
   1684 ; SKX-LABEL: i32to8f64_mask:
   1685 ; SKX:       # %bb.0:
   1686 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   1687 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50]
   1688 ; SKX-NEXT:    retq # sched: [7:1.00]
   1689 ; VLNOBW-LABEL: i32to8f64_mask:
   1690 ; VLNOBW:       # %bb.0:
   1691 ; VLNOBW-NEXT:    kmovw %edi, %k1
   1692 ; VLNOBW-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1}
   1693 ; VLNOBW-NEXT:    ret{{[l|q]}}
   1694   %1 = bitcast i8 %c to <8 x i1>
   1695   %2 = sitofp <8 x i32> %b to <8 x double>
   1696   %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
   1697   ret <8 x double> %3
   1698 }
   1699 define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
   1700 ; GENERIC-LABEL: sito8f64_maskz:
   1701 ; GENERIC:       # %bb.0:
   1702 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   1703 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
   1704 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1705 ;
   1706 ; SKX-LABEL: sito8f64_maskz:
   1707 ; SKX:       # %bb.0:
   1708 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   1709 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50]
   1710 ; SKX-NEXT:    retq # sched: [7:1.00]
   1711 ; VLNOBW-LABEL: sito8f64_maskz:
   1712 ; VLNOBW:       # %bb.0:
   1713 ; VLNOBW-NEXT:    kmovw %edi, %k1
   1714 ; VLNOBW-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
   1715 ; VLNOBW-NEXT:    ret{{[l|q]}}
   1716   %1 = bitcast i8 %b to <8 x i1>
   1717   %2 = sitofp <8 x i32> %a to <8 x double>
   1718   %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
   1719   ret <8 x double> %3
   1720 }
   1721 
   1722 define <8 x i32> @f64to8si(<8 x double> %a) {
   1723 ; GENERIC-LABEL: f64to8si:
   1724 ; GENERIC:       # %bb.0:
   1725 ; GENERIC-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00]
   1726 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1727 ;
   1728 ; SKX-LABEL: f64to8si:
   1729 ; SKX:       # %bb.0:
   1730 ; SKX-NEXT:    vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
   1731 ; SKX-NEXT:    retq # sched: [7:1.00]
   1732   %b = fptosi <8 x double> %a to <8 x i32>
   1733   ret <8 x i32> %b
   1734 }
   1735 
   1736 define <4 x i32> @f64to4si(<4 x double> %a) {
   1737 ; GENERIC-LABEL: f64to4si:
   1738 ; GENERIC:       # %bb.0:
   1739 ; GENERIC-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
   1740 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1741 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1742 ;
   1743 ; SKX-LABEL: f64to4si:
   1744 ; SKX:       # %bb.0:
   1745 ; SKX-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
   1746 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1747 ; SKX-NEXT:    retq # sched: [7:1.00]
   1748   %b = fptosi <4 x double> %a to <4 x i32>
   1749   ret <4 x i32> %b
   1750 }
   1751 
   1752 define <16 x float> @f64to16f32(<16 x double> %b) nounwind {
   1753 ; GENERIC-LABEL: f64to16f32:
   1754 ; GENERIC:       # %bb.0:
   1755 ; GENERIC-NEXT:    vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
   1756 ; GENERIC-NEXT:    vcvtpd2ps %zmm1, %ymm1 # sched: [4:1.00]
   1757 ; GENERIC-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
   1758 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1759 ;
   1760 ; SKX-LABEL: f64to16f32:
   1761 ; SKX:       # %bb.0:
   1762 ; SKX-NEXT:    vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00]
   1763 ; SKX-NEXT:    vcvtpd2ps %zmm1, %ymm1 # sched: [7:1.00]
   1764 ; SKX-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
   1765 ; SKX-NEXT:    retq # sched: [7:1.00]
   1766   %a = fptrunc <16 x double> %b to <16 x float>
   1767   ret <16 x float> %a
   1768 }
   1769 
   1770 define <4 x float> @f64to4f32(<4 x double> %b) {
   1771 ; GENERIC-LABEL: f64to4f32:
   1772 ; GENERIC:       # %bb.0:
   1773 ; GENERIC-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
   1774 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1775 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1776 ;
   1777 ; SKX-LABEL: f64to4f32:
   1778 ; SKX:       # %bb.0:
   1779 ; SKX-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
   1780 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1781 ; SKX-NEXT:    retq # sched: [7:1.00]
   1782   %a = fptrunc <4 x double> %b to <4 x float>
   1783   ret <4 x float> %a
   1784 }
   1785 
   1786 define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) {
   1787 ; GENERIC-LABEL: f64to4f32_mask:
   1788 ; GENERIC:       # %bb.0:
   1789 ; GENERIC-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
   1790 ; GENERIC-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:0.33]
   1791 ; GENERIC-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00]
   1792 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1793 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1794 ;
   1795 ; SKX-LABEL: f64to4f32_mask:
   1796 ; SKX:       # %bb.0:
   1797 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:0.50]
   1798 ; SKX-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:1.00]
   1799 ; SKX-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00]
   1800 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1801 ; SKX-NEXT:    retq # sched: [7:1.00]
   1802   %a = fptrunc <4 x double> %b to <4 x float>
   1803   %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
   1804   ret <4 x float> %c
   1805 }
   1806 
   1807 define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
   1808 ; GENERIC-LABEL: f64tof32_inreg:
   1809 ; GENERIC:       # %bb.0:
   1810 ; GENERIC-NEXT:    vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [4:1.00]
   1811 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1812 ;
   1813 ; SKX-LABEL: f64tof32_inreg:
   1814 ; SKX:       # %bb.0:
   1815 ; SKX-NEXT:    vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
   1816 ; SKX-NEXT:    retq # sched: [7:1.00]
   1817   %ext = extractelement <2 x double> %a0, i32 0
   1818   %cvt = fptrunc double %ext to float
   1819   %res = insertelement <4 x float> %a1, float %cvt, i32 0
   1820   ret <4 x float> %res
   1821 }
   1822 
   1823 define <8 x double> @f32to8f64(<8 x float> %b) nounwind {
   1824 ; GENERIC-LABEL: f32to8f64:
   1825 ; GENERIC:       # %bb.0:
   1826 ; GENERIC-NEXT:    vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
   1827 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1828 ;
   1829 ; SKX-LABEL: f32to8f64:
   1830 ; SKX:       # %bb.0:
   1831 ; SKX-NEXT:    vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00]
   1832 ; SKX-NEXT:    retq # sched: [7:1.00]
   1833   %a = fpext <8 x float> %b to <8 x double>
   1834   ret <8 x double> %a
   1835 }
   1836 
   1837 define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) {
   1838 ; GENERIC-LABEL: f32to4f64_mask:
   1839 ; GENERIC:       # %bb.0:
   1840 ; GENERIC-NEXT:    vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
   1841 ; GENERIC-NEXT:    vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [2:1.00]
   1842 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1843 ;
   1844 ; SKX-LABEL: f32to4f64_mask:
   1845 ; SKX:       # %bb.0:
   1846 ; SKX-NEXT:    vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
   1847 ; SKX-NEXT:    vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [7:1.00]
   1848 ; SKX-NEXT:    retq # sched: [7:1.00]
   1849   %a = fpext <4 x float> %b to <4 x double>
   1850   %mask = fcmp ogt <4 x double> %a1, %b1
   1851   %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
   1852   ret <4 x double> %c
   1853 }
   1854 
   1855 define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
   1856 ; GENERIC-LABEL: f32tof64_inreg:
   1857 ; GENERIC:       # %bb.0:
   1858 ; GENERIC-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   1859 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1860 ;
   1861 ; SKX-LABEL: f32tof64_inreg:
   1862 ; SKX:       # %bb.0:
   1863 ; SKX-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   1864 ; SKX-NEXT:    retq # sched: [7:1.00]
   1865   %ext = extractelement <4 x float> %a1, i32 0
   1866   %cvt = fpext float %ext to double
   1867   %res = insertelement <2 x double> %a0, double %cvt, i32 0
   1868   ret <2 x double> %res
   1869 }
   1870 
   1871 define double @sltof64_load(i64* nocapture %e) {
   1872 ; GENERIC-LABEL: sltof64_load:
   1873 ; GENERIC:       # %bb.0: # %entry
   1874 ; GENERIC-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   1875 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1876 ;
   1877 ; SKX-LABEL: sltof64_load:
   1878 ; SKX:       # %bb.0: # %entry
   1879 ; SKX-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   1880 ; SKX-NEXT:    retq # sched: [7:1.00]
   1881 entry:
   1882   %tmp1 = load i64, i64* %e, align 8
   1883   %conv = sitofp i64 %tmp1 to double
   1884   ret double %conv
   1885 }
   1886 
   1887 define double @sitof64_load(i32* %e) {
   1888 ; GENERIC-LABEL: sitof64_load:
   1889 ; GENERIC:       # %bb.0: # %entry
   1890 ; GENERIC-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   1891 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1892 ;
   1893 ; SKX-LABEL: sitof64_load:
   1894 ; SKX:       # %bb.0: # %entry
   1895 ; SKX-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   1896 ; SKX-NEXT:    retq # sched: [7:1.00]
   1897 entry:
   1898   %tmp1 = load i32, i32* %e, align 4
   1899   %conv = sitofp i32 %tmp1 to double
   1900   ret double %conv
   1901 }
   1902 
   1903 define float @sitof32_load(i32* %e) {
   1904 ; GENERIC-LABEL: sitof32_load:
   1905 ; GENERIC:       # %bb.0: # %entry
   1906 ; GENERIC-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   1907 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1908 ;
   1909 ; SKX-LABEL: sitof32_load:
   1910 ; SKX:       # %bb.0: # %entry
   1911 ; SKX-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   1912 ; SKX-NEXT:    retq # sched: [7:1.00]
   1913 entry:
   1914   %tmp1 = load i32, i32* %e, align 4
   1915   %conv = sitofp i32 %tmp1 to float
   1916   ret float %conv
   1917 }
   1918 
   1919 define float @sltof32_load(i64* %e) {
   1920 ; GENERIC-LABEL: sltof32_load:
   1921 ; GENERIC:       # %bb.0: # %entry
   1922 ; GENERIC-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   1923 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1924 ;
   1925 ; SKX-LABEL: sltof32_load:
   1926 ; SKX:       # %bb.0: # %entry
   1927 ; SKX-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   1928 ; SKX-NEXT:    retq # sched: [7:1.00]
   1929 entry:
   1930   %tmp1 = load i64, i64* %e, align 8
   1931   %conv = sitofp i64 %tmp1 to float
   1932   ret float %conv
   1933 }
   1934 
   1935 define void @f32tof64_loadstore() {
   1936 ; GENERIC-LABEL: f32tof64_loadstore:
   1937 ; GENERIC:       # %bb.0: # %entry
   1938 ; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   1939 ; GENERIC-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
   1940 ; GENERIC-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1941 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1942 ;
   1943 ; SKX-LABEL: f32tof64_loadstore:
   1944 ; SKX:       # %bb.0: # %entry
   1945 ; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   1946 ; SKX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   1947 ; SKX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1948 ; SKX-NEXT:    retq # sched: [7:1.00]
   1949 entry:
   1950   %f = alloca float, align 4
   1951   %d = alloca double, align 8
   1952   %tmp = load float, float* %f, align 4
   1953   %conv = fpext float %tmp to double
   1954   store double %conv, double* %d, align 8
   1955   ret void
   1956 }
   1957 
   1958 define void @f64tof32_loadstore() nounwind uwtable {
   1959 ; GENERIC-LABEL: f64tof32_loadstore:
   1960 ; GENERIC:       # %bb.0: # %entry
   1961 ; GENERIC-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
   1962 ; GENERIC-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
   1963 ; GENERIC-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1964 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1965 ;
   1966 ; SKX-LABEL: f64tof32_loadstore:
   1967 ; SKX:       # %bb.0: # %entry
   1968 ; SKX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   1969 ; SKX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   1970 ; SKX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1971 ; SKX-NEXT:    retq # sched: [7:1.00]
   1972 entry:
   1973   %f = alloca float, align 4
   1974   %d = alloca double, align 8
   1975   %tmp = load double, double* %d, align 8
   1976   %conv = fptrunc double %tmp to float
   1977   store float %conv, float* %f, align 4
   1978   ret void
   1979 }
   1980 
   1981 define double @long_to_double(i64 %x) {
   1982 ; GENERIC-LABEL: long_to_double:
   1983 ; GENERIC:       # %bb.0:
   1984 ; GENERIC-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
   1985 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1986 ;
   1987 ; SKX-LABEL: long_to_double:
   1988 ; SKX:       # %bb.0:
   1989 ; SKX-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
   1990 ; SKX-NEXT:    retq # sched: [7:1.00]
   1991    %res = bitcast i64 %x to double
   1992    ret double %res
   1993 }
   1994 
   1995 define i64 @double_to_long(double %x) {
   1996 ; GENERIC-LABEL: double_to_long:
   1997 ; GENERIC:       # %bb.0:
   1998 ; GENERIC-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
   1999 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2000 ;
   2001 ; SKX-LABEL: double_to_long:
   2002 ; SKX:       # %bb.0:
   2003 ; SKX-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
   2004 ; SKX-NEXT:    retq # sched: [7:1.00]
   2005    %res = bitcast double %x to i64
   2006    ret i64 %res
   2007 }
   2008 
   2009 define float @int_to_float(i32 %x) {
   2010 ; GENERIC-LABEL: int_to_float:
   2011 ; GENERIC:       # %bb.0:
   2012 ; GENERIC-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
   2013 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2014 ;
   2015 ; SKX-LABEL: int_to_float:
   2016 ; SKX:       # %bb.0:
   2017 ; SKX-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
   2018 ; SKX-NEXT:    retq # sched: [7:1.00]
   2019    %res = bitcast i32 %x to float
   2020    ret float %res
   2021 }
   2022 
   2023 define i32 @float_to_int(float %x) {
   2024 ; GENERIC-LABEL: float_to_int:
   2025 ; GENERIC:       # %bb.0:
   2026 ; GENERIC-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
   2027 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2028 ;
   2029 ; SKX-LABEL: float_to_int:
   2030 ; SKX:       # %bb.0:
   2031 ; SKX-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
   2032 ; SKX-NEXT:    retq # sched: [7:1.00]
   2033    %res = bitcast float %x to i32
   2034    ret i32 %res
   2035 }
   2036 
   2037 define <16 x double> @uito16f64(<16 x i32> %a) nounwind {
   2038 ; GENERIC-LABEL: uito16f64:
   2039 ; GENERIC:       # %bb.0:
   2040 ; GENERIC-NEXT:    vcvtudq2pd %ymm0, %zmm2 # sched: [4:1.00]
   2041 ; GENERIC-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00]
   2042 ; GENERIC-NEXT:    vcvtudq2pd %ymm0, %zmm1 # sched: [4:1.00]
   2043 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
   2044 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2045 ;
   2046 ; SKX-LABEL: uito16f64:
   2047 ; SKX:       # %bb.0:
   2048 ; SKX-NEXT:    vcvtudq2pd %ymm0, %zmm2 # sched: [7:1.00]
   2049 ; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00]
   2050 ; SKX-NEXT:    vcvtudq2pd %ymm0, %zmm1 # sched: [7:1.00]
   2051 ; SKX-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:0.33]
   2052 ; SKX-NEXT:    retq # sched: [7:1.00]
   2053   %b = uitofp <16 x i32> %a to <16 x double>
   2054   ret <16 x double> %b
   2055 }
   2056 
   2057 define <8 x float> @slto8f32(<8 x i64> %a) {
   2058 ; GENERIC-LABEL: slto8f32:
   2059 ; GENERIC:       # %bb.0:
   2060 ; GENERIC-NEXT:    vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00]
   2061 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2062 ;
   2063 ; SKX-LABEL: slto8f32:
   2064 ; SKX:       # %bb.0:
   2065 ; SKX-NEXT:    vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00]
   2066 ; SKX-NEXT:    retq # sched: [7:1.00]
   2067   %b = sitofp <8 x i64> %a to <8 x float>
   2068   ret <8 x float> %b
   2069 }
   2070 
   2071 define <16 x float> @slto16f32(<16 x i64> %a) {
   2072 ; GENERIC-LABEL: slto16f32:
   2073 ; GENERIC:       # %bb.0:
   2074 ; GENERIC-NEXT:    vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00]
   2075 ; GENERIC-NEXT:    vcvtqq2ps %zmm1, %ymm1 # sched: [3:1.00]
   2076 ; GENERIC-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
   2077 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2078 ;
   2079 ; SKX-LABEL: slto16f32:
   2080 ; SKX:       # %bb.0:
   2081 ; SKX-NEXT:    vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00]
   2082 ; SKX-NEXT:    vcvtqq2ps %zmm1, %ymm1 # sched: [7:1.00]
   2083 ; SKX-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
   2084 ; SKX-NEXT:    retq # sched: [7:1.00]
   2085   %b = sitofp <16 x i64> %a to <16 x float>
   2086   ret <16 x float> %b
   2087 }
   2088 
   2089 define <8 x double> @slto8f64(<8 x i64> %a) {
   2090 ; GENERIC-LABEL: slto8f64:
   2091 ; GENERIC:       # %bb.0:
   2092 ; GENERIC-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
   2093 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2094 ;
   2095 ; SKX-LABEL: slto8f64:
   2096 ; SKX:       # %bb.0:
   2097 ; SKX-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
   2098 ; SKX-NEXT:    retq # sched: [7:1.00]
   2099   %b = sitofp <8 x i64> %a to <8 x double>
   2100   ret <8 x double> %b
   2101 }
   2102 
   2103 define <16 x double> @slto16f64(<16 x i64> %a) {
   2104 ; GENERIC-LABEL: slto16f64:
   2105 ; GENERIC:       # %bb.0:
   2106 ; GENERIC-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
   2107 ; GENERIC-NEXT:    vcvtqq2pd %zmm1, %zmm1 # sched: [4:1.00]
   2108 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2109 ;
   2110 ; SKX-LABEL: slto16f64:
   2111 ; SKX:       # %bb.0:
   2112 ; SKX-NEXT:    vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
   2113 ; SKX-NEXT:    vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.50]
   2114 ; SKX-NEXT:    retq # sched: [7:1.00]
   2115   %b = sitofp <16 x i64> %a to <16 x double>
   2116   ret <16 x double> %b
   2117 }
   2118 
   2119 define <8 x float> @ulto8f32(<8 x i64> %a) {
   2120 ; GENERIC-LABEL: ulto8f32:
   2121 ; GENERIC:       # %bb.0:
   2122 ; GENERIC-NEXT:    vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00]
   2123 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2124 ;
   2125 ; SKX-LABEL: ulto8f32:
   2126 ; SKX:       # %bb.0:
   2127 ; SKX-NEXT:    vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00]
   2128 ; SKX-NEXT:    retq # sched: [7:1.00]
   2129   %b = uitofp <8 x i64> %a to <8 x float>
   2130   ret <8 x float> %b
   2131 }
   2132 
   2133 define <16 x float> @ulto16f32(<16 x i64> %a) {
   2134 ; GENERIC-LABEL: ulto16f32:
   2135 ; GENERIC:       # %bb.0:
   2136 ; GENERIC-NEXT:    vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00]
   2137 ; GENERIC-NEXT:    vcvtuqq2ps %zmm1, %ymm1 # sched: [3:1.00]
   2138 ; GENERIC-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
   2139 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2140 ;
   2141 ; SKX-LABEL: ulto16f32:
   2142 ; SKX:       # %bb.0:
   2143 ; SKX-NEXT:    vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00]
   2144 ; SKX-NEXT:    vcvtuqq2ps %zmm1, %ymm1 # sched: [7:1.00]
   2145 ; SKX-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
   2146 ; SKX-NEXT:    retq # sched: [7:1.00]
   2147   %b = uitofp <16 x i64> %a to <16 x float>
   2148   ret <16 x float> %b
   2149 }
   2150 
   2151 define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
   2152 ; GENERIC-LABEL: uito8f64_mask:
   2153 ; GENERIC:       # %bb.0:
   2154 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   2155 ; GENERIC-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
   2156 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2157 ;
   2158 ; SKX-LABEL: uito8f64_mask:
   2159 ; SKX:       # %bb.0:
   2160 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   2161 ; SKX-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50]
   2162 ; SKX-NEXT:    retq # sched: [7:1.00]
   2163 ; VLNOBW-LABEL: uito8f64_mask:
   2164 ; VLNOBW:       # %bb.0:
   2165 ; VLNOBW-NEXT:    kmovw %edi, %k1
   2166 ; VLNOBW-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1}
   2167 ; VLNOBW-NEXT:    ret{{[l|q]}}
   2168   %1 = bitcast i8 %c to <8 x i1>
   2169   %2 = uitofp <8 x i32> %b to <8 x double>
   2170   %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
   2171   ret <8 x double> %3
   2172 }
   2173 define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
   2174 ; GENERIC-LABEL: uito8f64_maskz:
   2175 ; GENERIC:       # %bb.0:
   2176 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   2177 ; GENERIC-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
   2178 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2179 ;
   2180 ; SKX-LABEL: uito8f64_maskz:
   2181 ; SKX:       # %bb.0:
   2182 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   2183 ; SKX-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50]
   2184 ; SKX-NEXT:    retq # sched: [7:1.00]
   2185   %1 = bitcast i8 %b to <8 x i1>
   2186   %2 = uitofp <8 x i32> %a to <8 x double>
   2187   %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
   2188   ret <8 x double> %3
   2189 }
   2190 
   2191 define <4 x double> @uito4f64(<4 x i32> %a) nounwind {
   2192 ; GENERIC-LABEL: uito4f64:
   2193 ; GENERIC:       # %bb.0:
   2194 ; GENERIC-NEXT:    vcvtudq2pd %xmm0, %ymm0 # sched: [4:1.00]
   2195 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2196 ;
   2197 ; SKX-LABEL: uito4f64:
   2198 ; SKX:       # %bb.0:
   2199 ; SKX-NEXT:    vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00]
   2200 ; SKX-NEXT:    retq # sched: [7:1.00]
   2201   %b = uitofp <4 x i32> %a to <4 x double>
   2202   ret <4 x double> %b
   2203 }
   2204 
   2205 define <16 x float> @uito16f32(<16 x i32> %a) nounwind {
   2206 ; GENERIC-LABEL: uito16f32:
   2207 ; GENERIC:       # %bb.0:
   2208 ; GENERIC-NEXT:    vcvtudq2ps %zmm0, %zmm0 # sched: [3:1.00]
   2209 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2210 ;
   2211 ; SKX-LABEL: uito16f32:
   2212 ; SKX:       # %bb.0:
   2213 ; SKX-NEXT:    vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.50]
   2214 ; SKX-NEXT:    retq # sched: [7:1.00]
   2215   %b = uitofp <16 x i32> %a to <16 x float>
   2216   ret <16 x float> %b
   2217 }
   2218 
   2219 define <8 x double> @uito8f64(<8 x i32> %a) {
   2220 ; GENERIC-LABEL: uito8f64:
   2221 ; GENERIC:       # %bb.0:
   2222 ; GENERIC-NEXT:    vcvtudq2pd %ymm0, %zmm0 # sched: [4:1.00]
   2223 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2224 ;
   2225 ; SKX-LABEL: uito8f64:
   2226 ; SKX:       # %bb.0:
   2227 ; SKX-NEXT:    vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00]
   2228 ; SKX-NEXT:    retq # sched: [7:1.00]
   2229   %b = uitofp <8 x i32> %a to <8 x double>
   2230   ret <8 x double> %b
   2231 }
   2232 
   2233 define <8 x float> @uito8f32(<8 x i32> %a) nounwind {
   2234 ; GENERIC-LABEL: uito8f32:
   2235 ; GENERIC:       # %bb.0:
   2236 ; GENERIC-NEXT:    vcvtudq2ps %ymm0, %ymm0 # sched: [3:1.00]
   2237 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2238 ;
   2239 ; SKX-LABEL: uito8f32:
   2240 ; SKX:       # %bb.0:
   2241 ; SKX-NEXT:    vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.50]
   2242 ; SKX-NEXT:    retq # sched: [7:1.00]
   2243   %b = uitofp <8 x i32> %a to <8 x float>
   2244   ret <8 x float> %b
   2245 }
   2246 
   2247 define <4 x float> @uito4f32(<4 x i32> %a) nounwind {
   2248 ; GENERIC-LABEL: uito4f32:
   2249 ; GENERIC:       # %bb.0:
   2250 ; GENERIC-NEXT:    vcvtudq2ps %xmm0, %xmm0 # sched: [3:1.00]
   2251 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2252 ;
   2253 ; SKX-LABEL: uito4f32:
   2254 ; SKX:       # %bb.0:
   2255 ; SKX-NEXT:    vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.50]
   2256 ; SKX-NEXT:    retq # sched: [7:1.00]
   2257   %b = uitofp <4 x i32> %a to <4 x float>
   2258   ret <4 x float> %b
   2259 }
   2260 
   2261 define i32 @fptosi(float %a) nounwind {
   2262 ; GENERIC-LABEL: fptosi:
   2263 ; GENERIC:       # %bb.0:
   2264 ; GENERIC-NEXT:    vcvttss2si %xmm0, %eax # sched: [5:1.00]
   2265 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2266 ;
   2267 ; SKX-LABEL: fptosi:
   2268 ; SKX:       # %bb.0:
   2269 ; SKX-NEXT:    vcvttss2si %xmm0, %eax # sched: [6:1.00]
   2270 ; SKX-NEXT:    retq # sched: [7:1.00]
   2271   %b = fptosi float %a to i32
   2272   ret i32 %b
   2273 }
   2274 
   2275 define i32 @fptoui(float %a) nounwind {
   2276 ; GENERIC-LABEL: fptoui:
   2277 ; GENERIC:       # %bb.0:
   2278 ; GENERIC-NEXT:    vcvttss2usi %xmm0, %eax # sched: [5:1.00]
   2279 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2280 ;
   2281 ; SKX-LABEL: fptoui:
   2282 ; SKX:       # %bb.0:
   2283 ; SKX-NEXT:    vcvttss2usi %xmm0, %eax # sched: [6:1.00]
   2284 ; SKX-NEXT:    retq # sched: [7:1.00]
   2285   %b = fptoui float %a to i32
   2286   ret i32 %b
   2287 }
   2288 
   2289 define float @uitof32(i32 %a) nounwind {
   2290 ; GENERIC-LABEL: uitof32:
   2291 ; GENERIC:       # %bb.0:
   2292 ; GENERIC-NEXT:    vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
   2293 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2294 ;
   2295 ; SKX-LABEL: uitof32:
   2296 ; SKX:       # %bb.0:
   2297 ; SKX-NEXT:    vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
   2298 ; SKX-NEXT:    retq # sched: [7:1.00]
   2299   %b = uitofp i32 %a to float
   2300   ret float %b
   2301 }
   2302 
   2303 define double @uitof64(i32 %a) nounwind {
   2304 ; GENERIC-LABEL: uitof64:
   2305 ; GENERIC:       # %bb.0:
   2306 ; GENERIC-NEXT:    vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
   2307 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2308 ;
   2309 ; SKX-LABEL: uitof64:
   2310 ; SKX:       # %bb.0:
   2311 ; SKX-NEXT:    vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
   2312 ; SKX-NEXT:    retq # sched: [7:1.00]
   2313   %b = uitofp i32 %a to double
   2314   ret double %b
   2315 }
   2316 
   2317 define <16 x float> @sbto16f32(<16 x i32> %a) {
   2318 ; GENERIC-LABEL: sbto16f32:
   2319 ; GENERIC:       # %bb.0:
   2320 ; GENERIC-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:0.33]
   2321 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
   2322 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
   2323 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2324 ;
   2325 ; SKX-LABEL: sbto16f32:
   2326 ; SKX:       # %bb.0:
   2327 ; SKX-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:1.00]
   2328 ; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
   2329 ; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
   2330 ; SKX-NEXT:    retq # sched: [7:1.00]
   2331   %mask = icmp slt <16 x i32> %a, zeroinitializer
   2332   %1 = sitofp <16 x i1> %mask to <16 x float>
   2333   ret <16 x float> %1
   2334 }
   2335 
   2336 define <16 x float> @scto16f32(<16 x i8> %a) {
   2337 ; GENERIC-LABEL: scto16f32:
   2338 ; GENERIC:       # %bb.0:
   2339 ; GENERIC-NEXT:    vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00]
   2340 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
   2341 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2342 ;
   2343 ; SKX-LABEL: scto16f32:
   2344 ; SKX:       # %bb.0:
   2345 ; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00]
   2346 ; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
   2347 ; SKX-NEXT:    retq # sched: [7:1.00]
   2348   %1 = sitofp <16 x i8> %a to <16 x float>
   2349   ret <16 x float> %1
   2350 }
   2351 
   2352 define <16 x float> @ssto16f32(<16 x i16> %a) {
   2353 ; GENERIC-LABEL: ssto16f32:
   2354 ; GENERIC:       # %bb.0:
   2355 ; GENERIC-NEXT:    vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00]
   2356 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
   2357 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2358 ;
   2359 ; SKX-LABEL: ssto16f32:
   2360 ; SKX:       # %bb.0:
   2361 ; SKX-NEXT:    vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
   2362 ; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
   2363 ; SKX-NEXT:    retq # sched: [7:1.00]
   2364   %1 = sitofp <16 x i16> %a to <16 x float>
   2365   ret <16 x float> %1
   2366 }
   2367 
   2368 define <8 x double> @ssto16f64(<8 x i16> %a) {
   2369 ; GENERIC-LABEL: ssto16f64:
   2370 ; GENERIC:       # %bb.0:
   2371 ; GENERIC-NEXT:    vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
   2372 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
   2373 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2374 ;
   2375 ; SKX-LABEL: ssto16f64:
   2376 ; SKX:       # %bb.0:
   2377 ; SKX-NEXT:    vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
   2378 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
   2379 ; SKX-NEXT:    retq # sched: [7:1.00]
   2380   %1 = sitofp <8 x i16> %a to <8 x double>
   2381   ret <8 x double> %1
   2382 }
   2383 
   2384 define <8 x double> @scto8f64(<8 x i8> %a) {
   2385 ; GENERIC-LABEL: scto8f64:
   2386 ; GENERIC:       # %bb.0:
   2387 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   2388 ; GENERIC-NEXT:    vpslld $24, %ymm0, %ymm0 # sched: [1:1.00]
   2389 ; GENERIC-NEXT:    vpsrad $24, %ymm0, %ymm0 # sched: [1:1.00]
   2390 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
   2391 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2392 ;
   2393 ; SKX-LABEL: scto8f64:
   2394 ; SKX:       # %bb.0:
   2395 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
   2396 ; SKX-NEXT:    vpslld $24, %ymm0, %ymm0 # sched: [1:0.50]
   2397 ; SKX-NEXT:    vpsrad $24, %ymm0, %ymm0 # sched: [1:0.50]
   2398 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
   2399 ; SKX-NEXT:    retq # sched: [7:1.00]
   2400   %1 = sitofp <8 x i8> %a to <8 x double>
   2401   ret <8 x double> %1
   2402 }
   2403 
   2404 define <16 x double> @scto16f64(<16 x i8> %a) {
   2405 ; GENERIC-LABEL: scto16f64:
   2406 ; GENERIC:       # %bb.0:
   2407 ; GENERIC-NEXT:    vpmovsxbd %xmm0, %zmm1 # sched: [1:1.00]
   2408 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
   2409 ; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
   2410 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
   2411 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2412 ;
   2413 ; SKX-LABEL: scto16f64:
   2414 ; SKX:       # %bb.0:
   2415 ; SKX-NEXT:    vpmovsxbd %xmm0, %zmm1 # sched: [3:1.00]
   2416 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
   2417 ; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
   2418 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
   2419 ; SKX-NEXT:    retq # sched: [7:1.00]
   2420   %b = sitofp <16 x i8> %a to <16 x double>
   2421   ret <16 x double> %b
   2422 }
   2423 
   2424 define <16 x double> @sbto16f64(<16 x double> %a) {
   2425 ; GENERIC-LABEL: sbto16f64:
   2426 ; GENERIC:       # %bb.0:
   2427 ; GENERIC-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
   2428 ; GENERIC-NEXT:    vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00]
   2429 ; GENERIC-NEXT:    vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00]
   2430 ; GENERIC-NEXT:    kunpckbw %k0, %k1, %k0 # sched: [1:1.00]
   2431 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm1 # sched: [1:0.33]
   2432 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
   2433 ; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
   2434 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
   2435 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2436 ;
   2437 ; SKX-LABEL: sbto16f64:
   2438 ; SKX:       # %bb.0:
   2439 ; SKX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
   2440 ; SKX-NEXT:    vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00]
   2441 ; SKX-NEXT:    vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00]
   2442 ; SKX-NEXT:    kunpckbw %k0, %k1, %k0 # sched: [3:1.00]
   2443 ; SKX-NEXT:    vpmovm2d %k0, %zmm1 # sched: [1:0.25]
   2444 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
   2445 ; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
   2446 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
   2447 ; SKX-NEXT:    retq # sched: [7:1.00]
   2448   %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
   2449   %1 = sitofp <16 x i1> %cmpres to <16 x double>
   2450   ret <16 x double> %1
   2451 }
   2452 
   2453 define <8 x double> @sbto8f64(<8 x double> %a) {
   2454 ; GENERIC-LABEL: sbto8f64:
   2455 ; GENERIC:       # %bb.0:
   2456 ; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   2457 ; GENERIC-NEXT:    vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
   2458 ; GENERIC-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.33]
   2459 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
   2460 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2461 ;
   2462 ; SKX-LABEL: sbto8f64:
   2463 ; SKX:       # %bb.0:
   2464 ; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2465 ; SKX-NEXT:    vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
   2466 ; SKX-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.25]
   2467 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
   2468 ; SKX-NEXT:    retq # sched: [7:1.00]
   2469   %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
   2470   %1 = sitofp <8 x i1> %cmpres to <8 x double>
   2471   ret <8 x double> %1
   2472 }
   2473 
   2474 define <8 x float> @sbto8f32(<8 x float> %a) {
   2475 ; GENERIC-LABEL: sbto8f32:
   2476 ; GENERIC:       # %bb.0:
   2477 ; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   2478 ; GENERIC-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   2479 ; GENERIC-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
   2480 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2481 ;
   2482 ; SKX-LABEL: sbto8f32:
   2483 ; SKX:       # %bb.0:
   2484 ; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2485 ; SKX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   2486 ; SKX-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
   2487 ; SKX-NEXT:    retq # sched: [7:1.00]
   2488   %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
   2489   %1 = sitofp <8 x i1> %cmpres to <8 x float>
   2490   ret <8 x float> %1
   2491 }
   2492 
   2493 define <4 x float> @sbto4f32(<4 x float> %a) {
   2494 ; GENERIC-LABEL: sbto4f32:
   2495 ; GENERIC:       # %bb.0:
   2496 ; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   2497 ; GENERIC-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   2498 ; GENERIC-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
   2499 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2500 ;
   2501 ; SKX-LABEL: sbto4f32:
   2502 ; SKX:       # %bb.0:
   2503 ; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2504 ; SKX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   2505 ; SKX-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
   2506 ; SKX-NEXT:    retq # sched: [7:1.00]
   2507   %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
   2508   %1 = sitofp <4 x i1> %cmpres to <4 x float>
   2509   ret <4 x float> %1
   2510 }
   2511 
   2512 define <4 x double> @sbto4f64(<4 x double> %a) {
   2513 ; GENERIC-LABEL: sbto4f64:
   2514 ; GENERIC:       # %bb.0:
   2515 ; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   2516 ; GENERIC-NEXT:    vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
   2517 ; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
   2518 ; GENERIC-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
   2519 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2520 ;
   2521 ; SKX-LABEL: sbto4f64:
   2522 ; SKX:       # %bb.0:
   2523 ; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2524 ; SKX-NEXT:    vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
   2525 ; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
   2526 ; SKX-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
   2527 ; SKX-NEXT:    retq # sched: [7:1.00]
   2528   %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
   2529   %1 = sitofp <4 x i1> %cmpres to <4 x double>
   2530   ret <4 x double> %1
   2531 }
   2532 
   2533 define <2 x float> @sbto2f32(<2 x float> %a) {
   2534 ; GENERIC-LABEL: sbto2f32:
   2535 ; GENERIC:       # %bb.0:
   2536 ; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   2537 ; GENERIC-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   2538 ; GENERIC-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
   2539 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2540 ;
   2541 ; SKX-LABEL: sbto2f32:
   2542 ; SKX:       # %bb.0:
   2543 ; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2544 ; SKX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   2545 ; SKX-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
   2546 ; SKX-NEXT:    retq # sched: [7:1.00]
   2547   %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
   2548   %1 = sitofp <2 x i1> %cmpres to <2 x float>
   2549   ret <2 x float> %1
   2550 }
   2551 
   2552 define <2 x double> @sbto2f64(<2 x double> %a) {
   2553 ; GENERIC-LABEL: sbto2f64:
   2554 ; GENERIC:       # %bb.0:
   2555 ; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   2556 ; GENERIC-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   2557 ; GENERIC-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
   2558 ; GENERIC-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
   2559 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2560 ;
   2561 ; SKX-LABEL: sbto2f64:
   2562 ; SKX:       # %bb.0:
   2563 ; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2564 ; SKX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   2565 ; SKX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
   2566 ; SKX-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
   2567 ; SKX-NEXT:    retq # sched: [7:1.00]
   2568   %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
   2569   %1 = sitofp <2 x i1> %cmpres to <2 x double>
   2570   ret <2 x double> %1
   2571 }
   2572 
   2573 define <16 x float> @ucto16f32(<16 x i8> %a) {
   2574 ; GENERIC-LABEL: ucto16f32:
   2575 ; GENERIC:       # %bb.0:
   2576 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
   2577 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
   2578 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2579 ;
   2580 ; SKX-LABEL: ucto16f32:
   2581 ; SKX:       # %bb.0:
   2582 ; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
   2583 ; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
   2584 ; SKX-NEXT:    retq # sched: [7:1.00]
   2585   %b = uitofp <16 x i8> %a to <16 x float>
   2586   ret <16 x float>%b
   2587 }
   2588 
   2589 define <8 x double> @ucto8f64(<8 x i8> %a) {
   2590 ; GENERIC-LABEL: ucto8f64:
   2591 ; GENERIC:       # %bb.0:
   2592 ; GENERIC-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
   2593 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   2594 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
   2595 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2596 ;
   2597 ; SKX-LABEL: ucto8f64:
   2598 ; SKX:       # %bb.0:
   2599 ; SKX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
   2600 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
   2601 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
   2602 ; SKX-NEXT:    retq # sched: [7:1.00]
   2603   %b = uitofp <8 x i8> %a to <8 x double>
   2604   ret <8 x double> %b
   2605 }
   2606 
   2607 define <16 x float> @swto16f32(<16 x i16> %a) {
   2608 ; GENERIC-LABEL: swto16f32:
   2609 ; GENERIC:       # %bb.0:
   2610 ; GENERIC-NEXT:    vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00]
   2611 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
   2612 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2613 ;
   2614 ; SKX-LABEL: swto16f32:
   2615 ; SKX:       # %bb.0:
   2616 ; SKX-NEXT:    vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
   2617 ; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
   2618 ; SKX-NEXT:    retq # sched: [7:1.00]
   2619   %b = sitofp <16 x i16> %a to <16 x float>
   2620   ret <16 x float> %b
   2621 }
   2622 
   2623 define <8 x double> @swto8f64(<8 x i16> %a) {
   2624 ; GENERIC-LABEL: swto8f64:
   2625 ; GENERIC:       # %bb.0:
   2626 ; GENERIC-NEXT:    vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
   2627 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
   2628 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2629 ;
   2630 ; SKX-LABEL: swto8f64:
   2631 ; SKX:       # %bb.0:
   2632 ; SKX-NEXT:    vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
   2633 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
   2634 ; SKX-NEXT:    retq # sched: [7:1.00]
   2635   %b = sitofp <8 x i16> %a to <8 x double>
   2636   ret <8 x double> %b
   2637 }
   2638 
   2639 define <16 x double> @swto16f64(<16 x i16> %a) {
   2640 ; GENERIC-LABEL: swto16f64:
   2641 ; GENERIC:       # %bb.0:
   2642 ; GENERIC-NEXT:    vpmovsxwd %ymm0, %zmm1 # sched: [1:1.00]
   2643 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
   2644 ; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
   2645 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
   2646 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2647 ;
   2648 ; SKX-LABEL: swto16f64:
   2649 ; SKX:       # %bb.0:
   2650 ; SKX-NEXT:    vpmovsxwd %ymm0, %zmm1 # sched: [3:1.00]
   2651 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
   2652 ; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
   2653 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
   2654 ; SKX-NEXT:    retq # sched: [7:1.00]
   2655   %b = sitofp <16 x i16> %a to <16 x double>
   2656   ret <16 x double> %b
   2657 }
   2658 
   2659 define <16 x double> @ucto16f64(<16 x i8> %a) {
   2660 ; GENERIC-LABEL: ucto16f64:
   2661 ; GENERIC:       # %bb.0:
   2662 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
   2663 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
   2664 ; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
   2665 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
   2666 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2667 ;
   2668 ; SKX-LABEL: ucto16f64:
   2669 ; SKX:       # %bb.0:
   2670 ; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
   2671 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
   2672 ; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
   2673 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
   2674 ; SKX-NEXT:    retq # sched: [7:1.00]
   2675   %b = uitofp <16 x i8> %a to <16 x double>
   2676   ret <16 x double> %b
   2677 }
   2678 
   2679 define <16 x float> @uwto16f32(<16 x i16> %a) {
   2680 ; GENERIC-LABEL: uwto16f32:
   2681 ; GENERIC:       # %bb.0:
   2682 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
   2683 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
   2684 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2685 ;
   2686 ; SKX-LABEL: uwto16f32:
   2687 ; SKX:       # %bb.0:
   2688 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
   2689 ; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
   2690 ; SKX-NEXT:    retq # sched: [7:1.00]
   2691   %b = uitofp <16 x i16> %a to <16 x float>
   2692   ret <16 x float> %b
   2693 }
   2694 
   2695 define <8 x double> @uwto8f64(<8 x i16> %a) {
   2696 ; GENERIC-LABEL: uwto8f64:
   2697 ; GENERIC:       # %bb.0:
   2698 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   2699 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
   2700 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2701 ;
   2702 ; SKX-LABEL: uwto8f64:
   2703 ; SKX:       # %bb.0:
   2704 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
   2705 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
   2706 ; SKX-NEXT:    retq # sched: [7:1.00]
   2707   %b = uitofp <8 x i16> %a to <8 x double>
   2708   ret <8 x double> %b
   2709 }
   2710 
   2711 define <16 x double> @uwto16f64(<16 x i16> %a) {
   2712 ; GENERIC-LABEL: uwto16f64:
   2713 ; GENERIC:       # %bb.0:
   2714 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
   2715 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
   2716 ; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
   2717 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
   2718 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2719 ;
   2720 ; SKX-LABEL: uwto16f64:
   2721 ; SKX:       # %bb.0:
   2722 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
   2723 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
   2724 ; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
   2725 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
   2726 ; SKX-NEXT:    retq # sched: [7:1.00]
   2727   %b = uitofp <16 x i16> %a to <16 x double>
   2728   ret <16 x double> %b
   2729 }
   2730 
   2731 define <16 x float> @sito16f32(<16 x i32> %a) {
   2732 ; GENERIC-LABEL: sito16f32:
   2733 ; GENERIC:       # %bb.0:
   2734 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
   2735 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2736 ;
   2737 ; SKX-LABEL: sito16f32:
   2738 ; SKX:       # %bb.0:
   2739 ; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
   2740 ; SKX-NEXT:    retq # sched: [7:1.00]
   2741   %b = sitofp <16 x i32> %a to <16 x float>
   2742   ret <16 x float> %b
   2743 }
   2744 
   2745 define <16 x double> @sito16f64(<16 x i32> %a) {
   2746 ; GENERIC-LABEL: sito16f64:
   2747 ; GENERIC:       # %bb.0:
   2748 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm2 # sched: [4:1.00]
   2749 ; GENERIC-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00]
   2750 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm1 # sched: [4:1.00]
   2751 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
   2752 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2753 ;
   2754 ; SKX-LABEL: sito16f64:
   2755 ; SKX:       # %bb.0:
   2756 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm2 # sched: [7:1.00]
   2757 ; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00]
   2758 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm1 # sched: [7:1.00]
   2759 ; SKX-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:0.33]
   2760 ; SKX-NEXT:    retq # sched: [7:1.00]
   2761   %b = sitofp <16 x i32> %a to <16 x double>
   2762   ret <16 x double> %b
   2763 }
   2764 
   2765 define <16 x float> @usto16f32(<16 x i16> %a) {
   2766 ; GENERIC-LABEL: usto16f32:
   2767 ; GENERIC:       # %bb.0:
   2768 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
   2769 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
   2770 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2771 ;
   2772 ; SKX-LABEL: usto16f32:
   2773 ; SKX:       # %bb.0:
   2774 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
   2775 ; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
   2776 ; SKX-NEXT:    retq # sched: [7:1.00]
   2777   %b = uitofp <16 x i16> %a to <16 x float>
   2778   ret <16 x float> %b
   2779 }
   2780 
   2781 define <16 x float> @ubto16f32(<16 x i32> %a) {
   2782 ; GENERIC-LABEL: ubto16f32:
   2783 ; GENERIC:       # %bb.0:
   2784 ; GENERIC-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:0.33]
   2785 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
   2786 ; GENERIC-NEXT:    vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
   2787 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
   2788 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2789 ;
   2790 ; SKX-LABEL: ubto16f32:
   2791 ; SKX:       # %bb.0:
   2792 ; SKX-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:1.00]
   2793 ; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
   2794 ; SKX-NEXT:    vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
   2795 ; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
   2796 ; SKX-NEXT:    retq # sched: [7:1.00]
   2797   %mask = icmp slt <16 x i32> %a, zeroinitializer
   2798   %1 = uitofp <16 x i1> %mask to <16 x float>
   2799   ret <16 x float> %1
   2800 }
   2801 
   2802 define <16 x double> @ubto16f64(<16 x i32> %a) {
   2803 ; GENERIC-LABEL: ubto16f64:
   2804 ; GENERIC:       # %bb.0:
   2805 ; GENERIC-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:0.33]
   2806 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
   2807 ; GENERIC-NEXT:    vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00]
   2808 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
   2809 ; GENERIC-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
   2810 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
   2811 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2812 ;
   2813 ; SKX-LABEL: ubto16f64:
   2814 ; SKX:       # %bb.0:
   2815 ; SKX-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:1.00]
   2816 ; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
   2817 ; SKX-NEXT:    vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00]
   2818 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
   2819 ; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
   2820 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
   2821 ; SKX-NEXT:    retq # sched: [7:1.00]
   2822   %mask = icmp slt <16 x i32> %a, zeroinitializer
   2823   %1 = uitofp <16 x i1> %mask to <16 x double>
   2824   ret <16 x double> %1
   2825 }
   2826 
   2827 define <8 x float> @ubto8f32(<8 x i32> %a) {
   2828 ; GENERIC-LABEL: ubto8f32:
   2829 ; GENERIC:       # %bb.0:
   2830 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2831 ; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
   2832 ; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
   2833 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2834 ;
   2835 ; SKX-LABEL: ubto8f32:
   2836 ; SKX:       # %bb.0:
   2837 ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2838 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
   2839 ; SKX-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
   2840 ; SKX-NEXT:    retq # sched: [7:1.00]
   2841   %mask = icmp slt <8 x i32> %a, zeroinitializer
   2842   %1 = uitofp <8 x i1> %mask to <8 x float>
   2843   ret <8 x float> %1
   2844 }
   2845 
   2846 define <8 x double> @ubto8f64(<8 x i32> %a) {
   2847 ; GENERIC-LABEL: ubto8f64:
   2848 ; GENERIC:       # %bb.0:
   2849 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2850 ; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
   2851 ; GENERIC-NEXT:    vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
   2852 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
   2853 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2854 ;
   2855 ; SKX-LABEL: ubto8f64:
   2856 ; SKX:       # %bb.0:
   2857 ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2858 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
   2859 ; SKX-NEXT:    vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
   2860 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
   2861 ; SKX-NEXT:    retq # sched: [7:1.00]
   2862   %mask = icmp slt <8 x i32> %a, zeroinitializer
   2863   %1 = uitofp <8 x i1> %mask to <8 x double>
   2864   ret <8 x double> %1
   2865 }
   2866 
   2867 define <4 x float> @ubto4f32(<4 x i32> %a) {
   2868 ; GENERIC-LABEL: ubto4f32:
   2869 ; GENERIC:       # %bb.0:
   2870 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2871 ; GENERIC-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   2872 ; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
   2873 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2874 ;
   2875 ; SKX-LABEL: ubto4f32:
   2876 ; SKX:       # %bb.0:
   2877 ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2878 ; SKX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   2879 ; SKX-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
   2880 ; SKX-NEXT:    retq # sched: [7:1.00]
   2881   %mask = icmp slt <4 x i32> %a, zeroinitializer
   2882   %1 = uitofp <4 x i1> %mask to <4 x float>
   2883   ret <4 x float> %1
   2884 }
   2885 
   2886 define <4 x double> @ubto4f64(<4 x i32> %a) {
   2887 ; GENERIC-LABEL: ubto4f64:
   2888 ; GENERIC:       # %bb.0:
   2889 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2890 ; GENERIC-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   2891 ; GENERIC-NEXT:    vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
   2892 ; GENERIC-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
   2893 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2894 ;
   2895 ; SKX-LABEL: ubto4f64:
   2896 ; SKX:       # %bb.0:
   2897 ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2898 ; SKX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   2899 ; SKX-NEXT:    vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
   2900 ; SKX-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
   2901 ; SKX-NEXT:    retq # sched: [7:1.00]
   2902   %mask = icmp slt <4 x i32> %a, zeroinitializer
   2903   %1 = uitofp <4 x i1> %mask to <4 x double>
   2904   ret <4 x double> %1
   2905 }
   2906 
   2907 define <2 x float> @ubto2f32(<2 x i32> %a) {
   2908 ; GENERIC-LABEL: ubto2f32:
   2909 ; GENERIC:       # %bb.0:
   2910 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2911 ; GENERIC-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
   2912 ; GENERIC-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2913 ; GENERIC-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
   2914 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50]
   2915 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2916 ;
   2917 ; SKX-LABEL: ubto2f32:
   2918 ; SKX:       # %bb.0:
   2919 ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2920 ; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
   2921 ; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2922 ; SKX-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
   2923 ; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
   2924 ; SKX-NEXT:    retq # sched: [7:1.00]
   2925   %mask = icmp ne <2 x i32> %a, zeroinitializer
   2926   %1 = uitofp <2 x i1> %mask to <2 x float>
   2927   ret <2 x float> %1
   2928 }
   2929 
   2930 define <2 x double> @ubto2f64(<2 x i32> %a) {
   2931 ; GENERIC-LABEL: ubto2f64:
   2932 ; GENERIC:       # %bb.0:
   2933 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2934 ; GENERIC-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
   2935 ; GENERIC-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2936 ; GENERIC-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
   2937 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50]
   2938 ; GENERIC-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
   2939 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2940 ;
   2941 ; SKX-LABEL: ubto2f64:
   2942 ; SKX:       # %bb.0:
   2943 ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   2944 ; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
   2945 ; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2946 ; SKX-NEXT:    vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
   2947 ; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
   2948 ; SKX-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
   2949 ; SKX-NEXT:    retq # sched: [7:1.00]
   2950   %mask = icmp ne <2 x i32> %a, zeroinitializer
   2951   %1 = uitofp <2 x i1> %mask to <2 x double>
   2952   ret <2 x double> %1
   2953 }
   2954 
   2955 define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
   2956 ; GENERIC-LABEL: zext_8x8mem_to_8x16:
   2957 ; GENERIC:       # %bb.0:
   2958 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   2959 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   2960 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
   2961 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2962 ;
   2963 ; SKX-LABEL: zext_8x8mem_to_8x16:
   2964 ; SKX:       # %bb.0:
   2965 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   2966 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   2967 ; SKX-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
   2968 ; SKX-NEXT:    retq # sched: [7:1.00]
   2969   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   2970   %x   = zext <8 x i8> %a to <8 x i16>
   2971   %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
   2972   ret <8 x i16> %ret
   2973 }
   2974 
   2975 define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
   2976 ; GENERIC-LABEL: sext_8x8mem_to_8x16:
   2977 ; GENERIC:       # %bb.0:
   2978 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   2979 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   2980 ; GENERIC-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
   2981 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2982 ;
   2983 ; SKX-LABEL: sext_8x8mem_to_8x16:
   2984 ; SKX:       # %bb.0:
   2985 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   2986 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   2987 ; SKX-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
   2988 ; SKX-NEXT:    retq # sched: [7:1.00]
   2989   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   2990   %x   = sext <8 x i8> %a to <8 x i16>
   2991   %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
   2992   ret <8 x i16> %ret
   2993 }
   2994 
   2995 
   2996 define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
   2997 ; GENERIC-LABEL: zext_16x8mem_to_16x16:
   2998 ; GENERIC:       # %bb.0:
   2999 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
   3000 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
   3001 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
   3002 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3003 ;
   3004 ; SKX-LABEL: zext_16x8mem_to_16x16:
   3005 ; SKX:       # %bb.0:
   3006 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
   3007 ; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
   3008 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
   3009 ; SKX-NEXT:    retq # sched: [7:1.00]
   3010   %a   = load <16 x i8>,<16 x i8> *%i,align 1
   3011   %x   = zext <16 x i8> %a to <16 x i16>
   3012   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
   3013   ret <16 x i16> %ret
   3014 }
   3015 
   3016 define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
   3017 ; GENERIC-LABEL: sext_16x8mem_to_16x16:
   3018 ; GENERIC:       # %bb.0:
   3019 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
   3020 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
   3021 ; GENERIC-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
   3022 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3023 ;
   3024 ; SKX-LABEL: sext_16x8mem_to_16x16:
   3025 ; SKX:       # %bb.0:
   3026 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
   3027 ; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
   3028 ; SKX-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
   3029 ; SKX-NEXT:    retq # sched: [7:1.00]
   3030   %a   = load <16 x i8>,<16 x i8> *%i,align 1
   3031   %x   = sext <16 x i8> %a to <16 x i16>
   3032   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
   3033   ret <16 x i16> %ret
   3034 }
   3035 
   3036 define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
   3037 ; GENERIC-LABEL: zext_16x8_to_16x16:
   3038 ; GENERIC:       # %bb.0:
   3039 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
   3040 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3041 ;
   3042 ; SKX-LABEL: zext_16x8_to_16x16:
   3043 ; SKX:       # %bb.0:
   3044 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
   3045 ; SKX-NEXT:    retq # sched: [7:1.00]
   3046   %x   = zext <16 x i8> %a to <16 x i16>
   3047   ret <16 x i16> %x
   3048 }
   3049 
   3050 define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
   3051 ; GENERIC-LABEL: zext_16x8_to_16x16_mask:
   3052 ; GENERIC:       # %bb.0:
   3053 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
   3054 ; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
   3055 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
   3056 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3057 ;
   3058 ; SKX-LABEL: zext_16x8_to_16x16_mask:
   3059 ; SKX:       # %bb.0:
   3060 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
   3061 ; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
   3062 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
   3063 ; SKX-NEXT:    retq # sched: [7:1.00]
   3064   %x   = zext <16 x i8> %a to <16 x i16>
   3065   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
   3066   ret <16 x i16> %ret
   3067 }
   3068 
   3069 define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
   3070 ; GENERIC-LABEL: sext_16x8_to_16x16:
   3071 ; GENERIC:       # %bb.0:
   3072 ; GENERIC-NEXT:    vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
   3073 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3074 ;
   3075 ; SKX-LABEL: sext_16x8_to_16x16:
   3076 ; SKX:       # %bb.0:
   3077 ; SKX-NEXT:    vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
   3078 ; SKX-NEXT:    retq # sched: [7:1.00]
   3079   %x   = sext <16 x i8> %a to <16 x i16>
   3080   ret <16 x i16> %x
   3081 }
   3082 
   3083 define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
   3084 ; GENERIC-LABEL: sext_16x8_to_16x16_mask:
   3085 ; GENERIC:       # %bb.0:
   3086 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
   3087 ; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
   3088 ; GENERIC-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [1:1.00]
   3089 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3090 ;
   3091 ; SKX-LABEL: sext_16x8_to_16x16_mask:
   3092 ; SKX:       # %bb.0:
   3093 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
   3094 ; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
   3095 ; SKX-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00]
   3096 ; SKX-NEXT:    retq # sched: [7:1.00]
   3097   %x   = sext <16 x i8> %a to <16 x i16>
   3098   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
   3099   ret <16 x i16> %ret
   3100 }
   3101 
   3102 define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
   3103 ; GENERIC-LABEL: zext_32x8mem_to_32x16:
   3104 ; GENERIC:       # %bb.0:
   3105 ; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
   3106 ; GENERIC-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:0.33]
   3107 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [8:1.00]
   3108 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3109 ;
   3110 ; SKX-LABEL: zext_32x8mem_to_32x16:
   3111 ; SKX:       # %bb.0:
   3112 ; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
   3113 ; SKX-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:1.00]
   3114 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [10:1.00]
   3115 ; SKX-NEXT:    retq # sched: [7:1.00]
   3116   %a   = load <32 x i8>,<32 x i8> *%i,align 1
   3117   %x   = zext <32 x i8> %a to <32 x i16>
   3118   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
   3119   ret <32 x i16> %ret
   3120 }
   3121 
   3122 define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
   3123 ; GENERIC-LABEL: sext_32x8mem_to_32x16:
   3124 ; GENERIC:       # %bb.0:
   3125 ; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
   3126 ; GENERIC-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:0.33]
   3127 ; GENERIC-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
   3128 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3129 ;
   3130 ; SKX-LABEL: sext_32x8mem_to_32x16:
   3131 ; SKX:       # %bb.0:
   3132 ; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
   3133 ; SKX-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:1.00]
   3134 ; SKX-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
   3135 ; SKX-NEXT:    retq # sched: [7:1.00]
   3136   %a   = load <32 x i8>,<32 x i8> *%i,align 1
   3137   %x   = sext <32 x i8> %a to <32 x i16>
   3138   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
   3139   ret <32 x i16> %ret
   3140 }
   3141 
   3142 define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
   3143 ; GENERIC-LABEL: zext_32x8_to_32x16:
   3144 ; GENERIC:       # %bb.0:
   3145 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00]
   3146 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3147 ;
   3148 ; SKX-LABEL: zext_32x8_to_32x16:
   3149 ; SKX:       # %bb.0:
   3150 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00]
   3151 ; SKX-NEXT:    retq # sched: [7:1.00]
   3152   %x   = zext <32 x i8> %a to <32 x i16>
   3153   ret <32 x i16> %x
   3154 }
   3155 
   3156 define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
   3157 ; GENERIC-LABEL: zext_32x8_to_32x16_mask:
   3158 ; GENERIC:       # %bb.0:
   3159 ; GENERIC-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
   3160 ; GENERIC-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:0.33]
   3161 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00]
   3162 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3163 ;
   3164 ; SKX-LABEL: zext_32x8_to_32x16_mask:
   3165 ; SKX:       # %bb.0:
   3166 ; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
   3167 ; SKX-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:1.00]
   3168 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00]
   3169 ; SKX-NEXT:    retq # sched: [7:1.00]
   3170   %x   = zext <32 x i8> %a to <32 x i16>
   3171   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
   3172   ret <32 x i16> %ret
   3173 }
   3174 
   3175 define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
   3176 ; GENERIC-LABEL: sext_32x8_to_32x16:
   3177 ; GENERIC:       # %bb.0:
   3178 ; GENERIC-NEXT:    vpmovsxbw %ymm0, %zmm0 # sched: [1:1.00]
   3179 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3180 ;
   3181 ; SKX-LABEL: sext_32x8_to_32x16:
   3182 ; SKX:       # %bb.0:
   3183 ; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0 # sched: [3:1.00]
   3184 ; SKX-NEXT:    retq # sched: [7:1.00]
   3185   %x   = sext <32 x i8> %a to <32 x i16>
   3186   ret <32 x i16> %x
   3187 }
   3188 
   3189 define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
   3190 ; GENERIC-LABEL: sext_32x8_to_32x16_mask:
   3191 ; GENERIC:       # %bb.0:
   3192 ; GENERIC-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
   3193 ; GENERIC-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:0.33]
   3194 ; GENERIC-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [1:1.00]
   3195 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3196 ;
   3197 ; SKX-LABEL: sext_32x8_to_32x16_mask:
   3198 ; SKX:       # %bb.0:
   3199 ; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
   3200 ; SKX-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:1.00]
   3201 ; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [3:1.00]
   3202 ; SKX-NEXT:    retq # sched: [7:1.00]
   3203   %x   = sext <32 x i8> %a to <32 x i16>
   3204   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
   3205   ret <32 x i16> %ret
   3206 }
   3207 
   3208 define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
   3209 ; GENERIC-LABEL: zext_4x8mem_to_4x32:
   3210 ; GENERIC:       # %bb.0:
   3211 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   3212 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
   3213 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
   3214 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3215 ;
   3216 ; SKX-LABEL: zext_4x8mem_to_4x32:
   3217 ; SKX:       # %bb.0:
   3218 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   3219 ; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
   3220 ; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
   3221 ; SKX-NEXT:    retq # sched: [7:1.00]
   3222   %a   = load <4 x i8>,<4 x i8> *%i,align 1
   3223   %x   = zext <4 x i8> %a to <4 x i32>
   3224   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
   3225   ret <4 x i32> %ret
   3226 }
   3227 
   3228 define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
   3229 ; GENERIC-LABEL: sext_4x8mem_to_4x32:
   3230 ; GENERIC:       # %bb.0:
   3231 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   3232 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
   3233 ; GENERIC-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
   3234 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3235 ;
   3236 ; SKX-LABEL: sext_4x8mem_to_4x32:
   3237 ; SKX:       # %bb.0:
   3238 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   3239 ; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
   3240 ; SKX-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
   3241 ; SKX-NEXT:    retq # sched: [7:1.00]
   3242   %a   = load <4 x i8>,<4 x i8> *%i,align 1
   3243   %x   = sext <4 x i8> %a to <4 x i32>
   3244   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
   3245   ret <4 x i32> %ret
   3246 }
   3247 
   3248 define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
   3249 ; GENERIC-LABEL: zext_8x8mem_to_8x32:
   3250 ; GENERIC:       # %bb.0:
   3251 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   3252 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   3253 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
   3254 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3255 ;
   3256 ; SKX-LABEL: zext_8x8mem_to_8x32:
   3257 ; SKX:       # %bb.0:
   3258 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   3259 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   3260 ; SKX-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
   3261 ; SKX-NEXT:    retq # sched: [7:1.00]
   3262   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   3263   %x   = zext <8 x i8> %a to <8 x i32>
   3264   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
   3265   ret <8 x i32> %ret
   3266 }
   3267 
   3268 define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
   3269 ; GENERIC-LABEL: sext_8x8mem_to_8x32:
   3270 ; GENERIC:       # %bb.0:
   3271 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   3272 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   3273 ; GENERIC-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
   3274 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3275 ;
   3276 ; SKX-LABEL: sext_8x8mem_to_8x32:
   3277 ; SKX:       # %bb.0:
   3278 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   3279 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   3280 ; SKX-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
   3281 ; SKX-NEXT:    retq # sched: [7:1.00]
   3282   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   3283   %x   = sext <8 x i8> %a to <8 x i32>
   3284   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
   3285   ret <8 x i32> %ret
   3286 }
   3287 
   3288 define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
   3289 ; GENERIC-LABEL: zext_16x8mem_to_16x32:
   3290 ; GENERIC:       # %bb.0:
   3291 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
   3292 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
   3293 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [8:1.00]
   3294 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3295 ;
   3296 ; SKX-LABEL: zext_16x8mem_to_16x32:
   3297 ; SKX:       # %bb.0:
   3298 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
   3299 ; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
   3300 ; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [10:1.00]
   3301 ; SKX-NEXT:    retq # sched: [7:1.00]
   3302   %a   = load <16 x i8>,<16 x i8> *%i,align 1
   3303   %x   = zext <16 x i8> %a to <16 x i32>
   3304   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   3305   ret <16 x i32> %ret
   3306 }
   3307 
   3308 define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
   3309 ; GENERIC-LABEL: sext_16x8mem_to_16x32:
   3310 ; GENERIC:       # %bb.0:
   3311 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
   3312 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
   3313 ; GENERIC-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
   3314 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3315 ;
   3316 ; SKX-LABEL: sext_16x8mem_to_16x32:
   3317 ; SKX:       # %bb.0:
   3318 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
   3319 ; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
   3320 ; SKX-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
   3321 ; SKX-NEXT:    retq # sched: [7:1.00]
   3322   %a   = load <16 x i8>,<16 x i8> *%i,align 1
   3323   %x   = sext <16 x i8> %a to <16 x i32>
   3324   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   3325   ret <16 x i32> %ret
   3326 }
   3327 
   3328 define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
   3329 ; GENERIC-LABEL: zext_16x8_to_16x32_mask:
   3330 ; GENERIC:       # %bb.0:
   3331 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
   3332 ; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
   3333 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
   3334 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3335 ;
   3336 ; SKX-LABEL: zext_16x8_to_16x32_mask:
   3337 ; SKX:       # %bb.0:
   3338 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
   3339 ; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
   3340 ; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
   3341 ; SKX-NEXT:    retq # sched: [7:1.00]
   3342   %x   = zext <16 x i8> %a to <16 x i32>
   3343   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   3344   ret <16 x i32> %ret
   3345 }
   3346 
   3347 define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
   3348 ; GENERIC-LABEL: sext_16x8_to_16x32_mask:
   3349 ; GENERIC:       # %bb.0:
   3350 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
   3351 ; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
   3352 ; GENERIC-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
   3353 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3354 ;
   3355 ; SKX-LABEL: sext_16x8_to_16x32_mask:
   3356 ; SKX:       # %bb.0:
   3357 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
   3358 ; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
   3359 ; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
   3360 ; SKX-NEXT:    retq # sched: [7:1.00]
   3361   %x   = sext <16 x i8> %a to <16 x i32>
   3362   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   3363   ret <16 x i32> %ret
   3364 }
   3365 
   3366 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
   3367 ; GENERIC-LABEL: zext_16x8_to_16x32:
   3368 ; GENERIC:       # %bb.0:
   3369 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
   3370 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3371 ;
   3372 ; SKX-LABEL: zext_16x8_to_16x32:
   3373 ; SKX:       # %bb.0:
   3374 ; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
   3375 ; SKX-NEXT:    retq # sched: [7:1.00]
   3376   %x = zext <16 x i8> %i to <16 x i32>
   3377   ret <16 x i32> %x
   3378 }
   3379 
   3380 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
   3381 ; GENERIC-LABEL: sext_16x8_to_16x32:
   3382 ; GENERIC:       # %bb.0:
   3383 ; GENERIC-NEXT:    vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00]
   3384 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3385 ;
   3386 ; SKX-LABEL: sext_16x8_to_16x32:
   3387 ; SKX:       # %bb.0:
   3388 ; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00]
   3389 ; SKX-NEXT:    retq # sched: [7:1.00]
   3390   %x = sext <16 x i8> %i to <16 x i32>
   3391   ret <16 x i32> %x
   3392 }
   3393 
   3394 define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
   3395 ; GENERIC-LABEL: zext_2x8mem_to_2x64:
   3396 ; GENERIC:       # %bb.0:
   3397 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
   3398 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
   3399 ; GENERIC-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
   3400 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3401 ;
   3402 ; SKX-LABEL: zext_2x8mem_to_2x64:
   3403 ; SKX:       # %bb.0:
   3404 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
   3405 ; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
   3406 ; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
   3407 ; SKX-NEXT:    retq # sched: [7:1.00]
   3408   %a   = load <2 x i8>,<2 x i8> *%i,align 1
   3409   %x   = zext <2 x i8> %a to <2 x i64>
   3410   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
   3411   ret <2 x i64> %ret
   3412 }
   3413 define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
   3414 ; GENERIC-LABEL: sext_2x8mem_to_2x64mask:
   3415 ; GENERIC:       # %bb.0:
   3416 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
   3417 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
   3418 ; GENERIC-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
   3419 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3420 ;
   3421 ; SKX-LABEL: sext_2x8mem_to_2x64mask:
   3422 ; SKX:       # %bb.0:
   3423 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
   3424 ; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
   3425 ; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
   3426 ; SKX-NEXT:    retq # sched: [7:1.00]
   3427   %a   = load <2 x i8>,<2 x i8> *%i,align 1
   3428   %x   = sext <2 x i8> %a to <2 x i64>
   3429   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
   3430   ret <2 x i64> %ret
   3431 }
   3432 define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
   3433 ; GENERIC-LABEL: sext_2x8mem_to_2x64:
   3434 ; GENERIC:       # %bb.0:
   3435 ; GENERIC-NEXT:    vpmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
   3436 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3437 ;
   3438 ; SKX-LABEL: sext_2x8mem_to_2x64:
   3439 ; SKX:       # %bb.0:
   3440 ; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
   3441 ; SKX-NEXT:    retq # sched: [7:1.00]
   3442   %a   = load <2 x i8>,<2 x i8> *%i,align 1
   3443   %x   = sext <2 x i8> %a to <2 x i64>
   3444   ret <2 x i64> %x
   3445 }
   3446 
   3447 define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
   3448 ; GENERIC-LABEL: zext_4x8mem_to_4x64:
   3449 ; GENERIC:       # %bb.0:
   3450 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   3451 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
   3452 ; GENERIC-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
   3453 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3454 ;
   3455 ; SKX-LABEL: zext_4x8mem_to_4x64:
   3456 ; SKX:       # %bb.0:
   3457 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   3458 ; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
   3459 ; SKX-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
   3460 ; SKX-NEXT:    retq # sched: [7:1.00]
   3461   %a   = load <4 x i8>,<4 x i8> *%i,align 1
   3462   %x   = zext <4 x i8> %a to <4 x i64>
   3463   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   3464   ret <4 x i64> %ret
   3465 }
   3466 
   3467 define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
   3468 ; GENERIC-LABEL: sext_4x8mem_to_4x64mask:
   3469 ; GENERIC:       # %bb.0:
   3470 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   3471 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
   3472 ; GENERIC-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
   3473 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3474 ;
   3475 ; SKX-LABEL: sext_4x8mem_to_4x64mask:
   3476 ; SKX:       # %bb.0:
   3477 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   3478 ; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
   3479 ; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
   3480 ; SKX-NEXT:    retq # sched: [7:1.00]
   3481   %a   = load <4 x i8>,<4 x i8> *%i,align 1
   3482   %x   = sext <4 x i8> %a to <4 x i64>
   3483   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   3484   ret <4 x i64> %ret
   3485 }
   3486 
   3487 define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
   3488 ; GENERIC-LABEL: sext_4x8mem_to_4x64:
   3489 ; GENERIC:       # %bb.0:
   3490 ; GENERIC-NEXT:    vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00]
   3491 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3492 ;
   3493 ; SKX-LABEL: sext_4x8mem_to_4x64:
   3494 ; SKX:       # %bb.0:
   3495 ; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00]
   3496 ; SKX-NEXT:    retq # sched: [7:1.00]
   3497   %a   = load <4 x i8>,<4 x i8> *%i,align 1
   3498   %x   = sext <4 x i8> %a to <4 x i64>
   3499   ret <4 x i64> %x
   3500 }
   3501 
   3502 define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
   3503 ; GENERIC-LABEL: zext_8x8mem_to_8x64:
   3504 ; GENERIC:       # %bb.0:
   3505 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   3506 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   3507 ; GENERIC-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
   3508 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3509 ;
   3510 ; SKX-LABEL: zext_8x8mem_to_8x64:
   3511 ; SKX:       # %bb.0:
   3512 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   3513 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   3514 ; SKX-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
   3515 ; SKX-NEXT:    retq # sched: [7:1.00]
   3516   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   3517   %x   = zext <8 x i8> %a to <8 x i64>
   3518   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   3519   ret <8 x i64> %ret
   3520 }
   3521 
   3522 define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
   3523 ; GENERIC-LABEL: sext_8x8mem_to_8x64mask:
   3524 ; GENERIC:       # %bb.0:
   3525 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   3526 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   3527 ; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
   3528 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3529 ;
   3530 ; SKX-LABEL: sext_8x8mem_to_8x64mask:
   3531 ; SKX:       # %bb.0:
   3532 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   3533 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   3534 ; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
   3535 ; SKX-NEXT:    retq # sched: [7:1.00]
   3536   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   3537   %x   = sext <8 x i8> %a to <8 x i64>
   3538   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   3539   ret <8 x i64> %ret
   3540 }
   3541 
   3542 define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
   3543 ; GENERIC-LABEL: sext_8x8mem_to_8x64:
   3544 ; GENERIC:       # %bb.0:
   3545 ; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
   3546 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3547 ;
   3548 ; SKX-LABEL: sext_8x8mem_to_8x64:
   3549 ; SKX:       # %bb.0:
   3550 ; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00]
   3551 ; SKX-NEXT:    retq # sched: [7:1.00]
   3552   %a   = load <8 x i8>,<8 x i8> *%i,align 1
   3553   %x   = sext <8 x i8> %a to <8 x i64>
   3554   ret <8 x i64> %x
   3555 }
   3556 
   3557 define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
   3558 ; GENERIC-LABEL: zext_4x16mem_to_4x32:
   3559 ; GENERIC:       # %bb.0:
   3560 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   3561 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
   3562 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
   3563 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3564 ;
   3565 ; SKX-LABEL: zext_4x16mem_to_4x32:
   3566 ; SKX:       # %bb.0:
   3567 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   3568 ; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
   3569 ; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
   3570 ; SKX-NEXT:    retq # sched: [7:1.00]
   3571   %a   = load <4 x i16>,<4 x i16> *%i,align 1
   3572   %x   = zext <4 x i16> %a to <4 x i32>
   3573   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
   3574   ret <4 x i32> %ret
   3575 }
   3576 
   3577 define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
   3578 ; GENERIC-LABEL: sext_4x16mem_to_4x32mask:
   3579 ; GENERIC:       # %bb.0:
   3580 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   3581 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
   3582 ; GENERIC-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
   3583 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3584 ;
   3585 ; SKX-LABEL: sext_4x16mem_to_4x32mask:
   3586 ; SKX:       # %bb.0:
   3587 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   3588 ; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
   3589 ; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
   3590 ; SKX-NEXT:    retq # sched: [7:1.00]
   3591   %a   = load <4 x i16>,<4 x i16> *%i,align 1
   3592   %x   = sext <4 x i16> %a to <4 x i32>
   3593   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
   3594   ret <4 x i32> %ret
   3595 }
   3596 
   3597 define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
   3598 ; GENERIC-LABEL: sext_4x16mem_to_4x32:
   3599 ; GENERIC:       # %bb.0:
   3600 ; GENERIC-NEXT:    vpmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
   3601 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3602 ;
   3603 ; SKX-LABEL: sext_4x16mem_to_4x32:
   3604 ; SKX:       # %bb.0:
   3605 ; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
   3606 ; SKX-NEXT:    retq # sched: [7:1.00]
   3607   %a   = load <4 x i16>,<4 x i16> *%i,align 1
   3608   %x   = sext <4 x i16> %a to <4 x i32>
   3609   ret <4 x i32> %x
   3610 }
   3611 
   3612 
   3613 define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
   3614 ; GENERIC-LABEL: zext_8x16mem_to_8x32:
   3615 ; GENERIC:       # %bb.0:
   3616 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   3617 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   3618 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
   3619 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3620 ;
   3621 ; SKX-LABEL: zext_8x16mem_to_8x32:
   3622 ; SKX:       # %bb.0:
   3623 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   3624 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   3625 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00]
   3626 ; SKX-NEXT:    retq # sched: [7:1.00]
   3627   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   3628   %x   = zext <8 x i16> %a to <8 x i32>
   3629   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
   3630   ret <8 x i32> %ret
   3631 }
   3632 
   3633 define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
   3634 ; GENERIC-LABEL: sext_8x16mem_to_8x32mask:
   3635 ; GENERIC:       # %bb.0:
   3636 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   3637 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   3638 ; GENERIC-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
   3639 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3640 ;
   3641 ; SKX-LABEL: sext_8x16mem_to_8x32mask:
   3642 ; SKX:       # %bb.0:
   3643 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   3644 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   3645 ; SKX-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
   3646 ; SKX-NEXT:    retq # sched: [7:1.00]
   3647   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   3648   %x   = sext <8 x i16> %a to <8 x i32>
   3649   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
   3650   ret <8 x i32> %ret
   3651 }
   3652 
   3653 define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
   3654 ; GENERIC-LABEL: sext_8x16mem_to_8x32:
   3655 ; GENERIC:       # %bb.0:
   3656 ; GENERIC-NEXT:    vpmovsxwd (%rdi), %ymm0 # sched: [8:1.00]
   3657 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3658 ;
   3659 ; SKX-LABEL: sext_8x16mem_to_8x32:
   3660 ; SKX:       # %bb.0:
   3661 ; SKX-NEXT:    vpmovsxwd (%rdi), %ymm0 # sched: [9:1.00]
   3662 ; SKX-NEXT:    retq # sched: [7:1.00]
   3663   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   3664   %x   = sext <8 x i16> %a to <8 x i32>
   3665   ret <8 x i32> %x
   3666 }
   3667 
   3668 define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
   3669 ; GENERIC-LABEL: zext_8x16_to_8x32mask:
   3670 ; GENERIC:       # %bb.0:
   3671 ; GENERIC-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
   3672 ; GENERIC-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:0.33]
   3673 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   3674 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3675 ;
   3676 ; SKX-LABEL: zext_8x16_to_8x32mask:
   3677 ; SKX:       # %bb.0:
   3678 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
   3679 ; SKX-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:1.00]
   3680 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
   3681 ; SKX-NEXT:    retq # sched: [7:1.00]
   3682   %x   = zext <8 x i16> %a to <8 x i32>
   3683   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
   3684   ret <8 x i32> %ret
   3685 }
   3686 
   3687 define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
   3688 ; GENERIC-LABEL: zext_8x16_to_8x32:
   3689 ; GENERIC:       # %bb.0:
   3690 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   3691 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3692 ;
   3693 ; SKX-LABEL: zext_8x16_to_8x32:
   3694 ; SKX:       # %bb.0:
   3695 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
   3696 ; SKX-NEXT:    retq # sched: [7:1.00]
   3697   %x   = zext <8 x i16> %a to <8 x i32>
   3698   ret <8 x i32> %x
   3699 }
   3700 
   3701 define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
   3702 ; GENERIC-LABEL: zext_16x16mem_to_16x32:
   3703 ; GENERIC:       # %bb.0:
   3704 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
   3705 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
   3706 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
   3707 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3708 ;
   3709 ; SKX-LABEL: zext_16x16mem_to_16x32:
   3710 ; SKX:       # %bb.0:
   3711 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
   3712 ; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
   3713 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
   3714 ; SKX-NEXT:    retq # sched: [7:1.00]
   3715   %a   = load <16 x i16>,<16 x i16> *%i,align 1
   3716   %x   = zext <16 x i16> %a to <16 x i32>
   3717   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   3718   ret <16 x i32> %ret
   3719 }
   3720 
   3721 define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
   3722 ; GENERIC-LABEL: sext_16x16mem_to_16x32mask:
   3723 ; GENERIC:       # %bb.0:
   3724 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
   3725 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
   3726 ; GENERIC-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
   3727 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3728 ;
   3729 ; SKX-LABEL: sext_16x16mem_to_16x32mask:
   3730 ; SKX:       # %bb.0:
   3731 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
   3732 ; SKX-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:1.00]
   3733 ; SKX-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
   3734 ; SKX-NEXT:    retq # sched: [7:1.00]
   3735   %a   = load <16 x i16>,<16 x i16> *%i,align 1
   3736   %x   = sext <16 x i16> %a to <16 x i32>
   3737   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   3738   ret <16 x i32> %ret
   3739 }
   3740 
   3741 define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
   3742 ; GENERIC-LABEL: sext_16x16mem_to_16x32:
   3743 ; GENERIC:       # %bb.0:
   3744 ; GENERIC-NEXT:    vpmovsxwd (%rdi), %zmm0 # sched: [8:1.00]
   3745 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3746 ;
   3747 ; SKX-LABEL: sext_16x16mem_to_16x32:
   3748 ; SKX:       # %bb.0:
   3749 ; SKX-NEXT:    vpmovsxwd (%rdi), %zmm0 # sched: [10:1.00]
   3750 ; SKX-NEXT:    retq # sched: [7:1.00]
   3751   %a   = load <16 x i16>,<16 x i16> *%i,align 1
   3752   %x   = sext <16 x i16> %a to <16 x i32>
   3753   ret <16 x i32> %x
   3754 }
   3755 define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
   3756 ; GENERIC-LABEL: zext_16x16_to_16x32mask:
   3757 ; GENERIC:       # %bb.0:
   3758 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
   3759 ; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
   3760 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
   3761 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3762 ;
   3763 ; SKX-LABEL: zext_16x16_to_16x32mask:
   3764 ; SKX:       # %bb.0:
   3765 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
   3766 ; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
   3767 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
   3768 ; SKX-NEXT:    retq # sched: [7:1.00]
   3769   %x   = zext <16 x i16> %a to <16 x i32>
   3770   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   3771   ret <16 x i32> %ret
   3772 }
   3773 
   3774 define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
   3775 ; GENERIC-LABEL: zext_16x16_to_16x32:
   3776 ; GENERIC:       # %bb.0:
   3777 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
   3778 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3779 ;
   3780 ; SKX-LABEL: zext_16x16_to_16x32:
   3781 ; SKX:       # %bb.0:
   3782 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
   3783 ; SKX-NEXT:    retq # sched: [7:1.00]
   3784   %x   = zext <16 x i16> %a to <16 x i32>
   3785   ret <16 x i32> %x
   3786 }
   3787 
   3788 define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
   3789 ; GENERIC-LABEL: zext_2x16mem_to_2x64:
   3790 ; GENERIC:       # %bb.0:
   3791 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
   3792 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
   3793 ; GENERIC-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:1.00]
   3794 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3795 ;
   3796 ; SKX-LABEL: zext_2x16mem_to_2x64:
   3797 ; SKX:       # %bb.0:
   3798 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
   3799 ; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
   3800 ; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [9:1.00]
   3801 ; SKX-NEXT:    retq # sched: [7:1.00]
   3802   %a   = load <2 x i16>,<2 x i16> *%i,align 1
   3803   %x   = zext <2 x i16> %a to <2 x i64>
   3804   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
   3805   ret <2 x i64> %ret
   3806 }
   3807 
   3808 define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
   3809 ; GENERIC-LABEL: sext_2x16mem_to_2x64mask:
   3810 ; GENERIC:       # %bb.0:
   3811 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
   3812 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
   3813 ; GENERIC-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
   3814 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3815 ;
   3816 ; SKX-LABEL: sext_2x16mem_to_2x64mask:
   3817 ; SKX:       # %bb.0:
   3818 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
   3819 ; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
   3820 ; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
   3821 ; SKX-NEXT:    retq # sched: [7:1.00]
   3822   %a   = load <2 x i16>,<2 x i16> *%i,align 1
   3823   %x   = sext <2 x i16> %a to <2 x i64>
   3824   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
   3825   ret <2 x i64> %ret
   3826 }
   3827 
   3828 define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
   3829 ; GENERIC-LABEL: sext_2x16mem_to_2x64:
   3830 ; GENERIC:       # %bb.0:
   3831 ; GENERIC-NEXT:    vpmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
   3832 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3833 ;
   3834 ; SKX-LABEL: sext_2x16mem_to_2x64:
   3835 ; SKX:       # %bb.0:
   3836 ; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
   3837 ; SKX-NEXT:    retq # sched: [7:1.00]
   3838   %a   = load <2 x i16>,<2 x i16> *%i,align 1
   3839   %x   = sext <2 x i16> %a to <2 x i64>
   3840   ret <2 x i64> %x
   3841 }
   3842 
   3843 define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
   3844 ; GENERIC-LABEL: zext_4x16mem_to_4x64:
   3845 ; GENERIC:       # %bb.0:
   3846 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   3847 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
   3848 ; GENERIC-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
   3849 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3850 ;
   3851 ; SKX-LABEL: zext_4x16mem_to_4x64:
   3852 ; SKX:       # %bb.0:
   3853 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   3854 ; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
   3855 ; SKX-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
   3856 ; SKX-NEXT:    retq # sched: [7:1.00]
   3857   %a   = load <4 x i16>,<4 x i16> *%i,align 1
   3858   %x   = zext <4 x i16> %a to <4 x i64>
   3859   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   3860   ret <4 x i64> %ret
   3861 }
   3862 
   3863 define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
   3864 ; GENERIC-LABEL: sext_4x16mem_to_4x64mask:
   3865 ; GENERIC:       # %bb.0:
   3866 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   3867 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
   3868 ; GENERIC-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
   3869 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3870 ;
   3871 ; SKX-LABEL: sext_4x16mem_to_4x64mask:
   3872 ; SKX:       # %bb.0:
   3873 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   3874 ; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
   3875 ; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
   3876 ; SKX-NEXT:    retq # sched: [7:1.00]
   3877   %a   = load <4 x i16>,<4 x i16> *%i,align 1
   3878   %x   = sext <4 x i16> %a to <4 x i64>
   3879   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   3880   ret <4 x i64> %ret
   3881 }
   3882 
   3883 define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
   3884 ; GENERIC-LABEL: sext_4x16mem_to_4x64:
   3885 ; GENERIC:       # %bb.0:
   3886 ; GENERIC-NEXT:    vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00]
   3887 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3888 ;
   3889 ; SKX-LABEL: sext_4x16mem_to_4x64:
   3890 ; SKX:       # %bb.0:
   3891 ; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00]
   3892 ; SKX-NEXT:    retq # sched: [7:1.00]
   3893   %a   = load <4 x i16>,<4 x i16> *%i,align 1
   3894   %x   = sext <4 x i16> %a to <4 x i64>
   3895   ret <4 x i64> %x
   3896 }
   3897 
   3898 define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
   3899 ; GENERIC-LABEL: zext_8x16mem_to_8x64:
   3900 ; GENERIC:       # %bb.0:
   3901 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   3902 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   3903 ; GENERIC-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
   3904 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3905 ;
   3906 ; SKX-LABEL: zext_8x16mem_to_8x64:
   3907 ; SKX:       # %bb.0:
   3908 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   3909 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   3910 ; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
   3911 ; SKX-NEXT:    retq # sched: [7:1.00]
   3912   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   3913   %x   = zext <8 x i16> %a to <8 x i64>
   3914   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   3915   ret <8 x i64> %ret
   3916 }
   3917 
   3918 define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
   3919 ; GENERIC-LABEL: sext_8x16mem_to_8x64mask:
   3920 ; GENERIC:       # %bb.0:
   3921 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   3922 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   3923 ; GENERIC-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
   3924 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3925 ;
   3926 ; SKX-LABEL: sext_8x16mem_to_8x64mask:
   3927 ; SKX:       # %bb.0:
   3928 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   3929 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   3930 ; SKX-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
   3931 ; SKX-NEXT:    retq # sched: [7:1.00]
   3932   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   3933   %x   = sext <8 x i16> %a to <8 x i64>
   3934   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   3935   ret <8 x i64> %ret
   3936 }
   3937 
   3938 define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
   3939 ; GENERIC-LABEL: sext_8x16mem_to_8x64:
   3940 ; GENERIC:       # %bb.0:
   3941 ; GENERIC-NEXT:    vpmovsxwq (%rdi), %zmm0 # sched: [8:1.00]
   3942 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3943 ;
   3944 ; SKX-LABEL: sext_8x16mem_to_8x64:
   3945 ; SKX:       # %bb.0:
   3946 ; SKX-NEXT:    vpmovsxwq (%rdi), %zmm0 # sched: [10:1.00]
   3947 ; SKX-NEXT:    retq # sched: [7:1.00]
   3948   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   3949   %x   = sext <8 x i16> %a to <8 x i64>
   3950   ret <8 x i64> %x
   3951 }
   3952 
   3953 define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
   3954 ; GENERIC-LABEL: zext_8x16_to_8x64mask:
   3955 ; GENERIC:       # %bb.0:
   3956 ; GENERIC-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
   3957 ; GENERIC-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:0.33]
   3958 ; GENERIC-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
   3959 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3960 ;
   3961 ; SKX-LABEL: zext_8x16_to_8x64mask:
   3962 ; SKX:       # %bb.0:
   3963 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
   3964 ; SKX-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:1.00]
   3965 ; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
   3966 ; SKX-NEXT:    retq # sched: [7:1.00]
   3967   %x   = zext <8 x i16> %a to <8 x i64>
   3968   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   3969   ret <8 x i64> %ret
   3970 }
   3971 
   3972 define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
   3973 ; GENERIC-LABEL: zext_8x16_to_8x64:
   3974 ; GENERIC:       # %bb.0:
   3975 ; GENERIC-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
   3976 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3977 ;
   3978 ; SKX-LABEL: zext_8x16_to_8x64:
   3979 ; SKX:       # %bb.0:
   3980 ; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
   3981 ; SKX-NEXT:    retq # sched: [7:1.00]
   3982   %ret   = zext <8 x i16> %a to <8 x i64>
   3983   ret <8 x i64> %ret
   3984 }
   3985 
   3986 define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
   3987 ; GENERIC-LABEL: zext_2x32mem_to_2x64:
   3988 ; GENERIC:       # %bb.0:
   3989 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
   3990 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
   3991 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [8:1.00]
   3992 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3993 ;
   3994 ; SKX-LABEL: zext_2x32mem_to_2x64:
   3995 ; SKX:       # %bb.0:
   3996 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
   3997 ; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
   3998 ; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [9:1.00]
   3999 ; SKX-NEXT:    retq # sched: [7:1.00]
   4000   %a   = load <2 x i32>,<2 x i32> *%i,align 1
   4001   %x   = zext <2 x i32> %a to <2 x i64>
   4002   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
   4003   ret <2 x i64> %ret
   4004 }
   4005 
   4006 define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
   4007 ; GENERIC-LABEL: sext_2x32mem_to_2x64mask:
   4008 ; GENERIC:       # %bb.0:
   4009 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
   4010 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:0.33]
   4011 ; GENERIC-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
   4012 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4013 ;
   4014 ; SKX-LABEL: sext_2x32mem_to_2x64mask:
   4015 ; SKX:       # %bb.0:
   4016 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
   4017 ; SKX-NEXT:    vpmovq2m %xmm0, %k1 # sched: [1:1.00]
   4018 ; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
   4019 ; SKX-NEXT:    retq # sched: [7:1.00]
   4020   %a   = load <2 x i32>,<2 x i32> *%i,align 1
   4021   %x   = sext <2 x i32> %a to <2 x i64>
   4022   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
   4023   ret <2 x i64> %ret
   4024 }
   4025 
   4026 define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
   4027 ; GENERIC-LABEL: sext_2x32mem_to_2x64:
   4028 ; GENERIC:       # %bb.0:
   4029 ; GENERIC-NEXT:    vpmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
   4030 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4031 ;
   4032 ; SKX-LABEL: sext_2x32mem_to_2x64:
   4033 ; SKX:       # %bb.0:
   4034 ; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
   4035 ; SKX-NEXT:    retq # sched: [7:1.00]
   4036   %a   = load <2 x i32>,<2 x i32> *%i,align 1
   4037   %x   = sext <2 x i32> %a to <2 x i64>
   4038   ret <2 x i64> %x
   4039 }
   4040 
   4041 define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
   4042 ; GENERIC-LABEL: zext_4x32mem_to_4x64:
   4043 ; GENERIC:       # %bb.0:
   4044 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   4045 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
   4046 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
   4047 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4048 ;
   4049 ; SKX-LABEL: zext_4x32mem_to_4x64:
   4050 ; SKX:       # %bb.0:
   4051 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   4052 ; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
   4053 ; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
   4054 ; SKX-NEXT:    retq # sched: [7:1.00]
   4055   %a   = load <4 x i32>,<4 x i32> *%i,align 1
   4056   %x   = zext <4 x i32> %a to <4 x i64>
   4057   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   4058   ret <4 x i64> %ret
   4059 }
   4060 
   4061 define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
   4062 ; GENERIC-LABEL: sext_4x32mem_to_4x64mask:
   4063 ; GENERIC:       # %bb.0:
   4064 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   4065 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:0.33]
   4066 ; GENERIC-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
   4067 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4068 ;
   4069 ; SKX-LABEL: sext_4x32mem_to_4x64mask:
   4070 ; SKX:       # %bb.0:
   4071 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   4072 ; SKX-NEXT:    vpmovd2m %xmm0, %k1 # sched: [1:1.00]
   4073 ; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
   4074 ; SKX-NEXT:    retq # sched: [7:1.00]
   4075   %a   = load <4 x i32>,<4 x i32> *%i,align 1
   4076   %x   = sext <4 x i32> %a to <4 x i64>
   4077   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   4078   ret <4 x i64> %ret
   4079 }
   4080 
   4081 define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
   4082 ; GENERIC-LABEL: sext_4x32mem_to_4x64:
   4083 ; GENERIC:       # %bb.0:
   4084 ; GENERIC-NEXT:    vpmovsxdq (%rdi), %ymm0 # sched: [8:1.00]
   4085 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4086 ;
   4087 ; SKX-LABEL: sext_4x32mem_to_4x64:
   4088 ; SKX:       # %bb.0:
   4089 ; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 # sched: [9:1.00]
   4090 ; SKX-NEXT:    retq # sched: [7:1.00]
   4091   %a   = load <4 x i32>,<4 x i32> *%i,align 1
   4092   %x   = sext <4 x i32> %a to <4 x i64>
   4093   ret <4 x i64> %x
   4094 }
   4095 
   4096 define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
   4097 ; GENERIC-LABEL: sext_4x32_to_4x64:
   4098 ; GENERIC:       # %bb.0:
   4099 ; GENERIC-NEXT:    vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
   4100 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4101 ;
   4102 ; SKX-LABEL: sext_4x32_to_4x64:
   4103 ; SKX:       # %bb.0:
   4104 ; SKX-NEXT:    vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
   4105 ; SKX-NEXT:    retq # sched: [7:1.00]
   4106   %x   = sext <4 x i32> %a to <4 x i64>
   4107   ret <4 x i64> %x
   4108 }
   4109 
   4110 define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
   4111 ; GENERIC-LABEL: zext_4x32_to_4x64mask:
   4112 ; GENERIC:       # %bb.0:
   4113 ; GENERIC-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
   4114 ; GENERIC-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:0.33]
   4115 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
   4116 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4117 ;
   4118 ; SKX-LABEL: zext_4x32_to_4x64mask:
   4119 ; SKX:       # %bb.0:
   4120 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1 # sched: [1:0.50]
   4121 ; SKX-NEXT:    vpmovd2m %xmm1, %k1 # sched: [1:1.00]
   4122 ; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
   4123 ; SKX-NEXT:    retq # sched: [7:1.00]
   4124   %x   = zext <4 x i32> %a to <4 x i64>
   4125   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   4126   ret <4 x i64> %ret
   4127 }
   4128 
   4129 define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
   4130 ; GENERIC-LABEL: zext_8x32mem_to_8x64:
   4131 ; GENERIC:       # %bb.0:
   4132 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   4133 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   4134 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
   4135 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4136 ;
   4137 ; SKX-LABEL: zext_8x32mem_to_8x64:
   4138 ; SKX:       # %bb.0:
   4139 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   4140 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   4141 ; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00]
   4142 ; SKX-NEXT:    retq # sched: [7:1.00]
   4143   %a   = load <8 x i32>,<8 x i32> *%i,align 1
   4144   %x   = zext <8 x i32> %a to <8 x i64>
   4145   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   4146   ret <8 x i64> %ret
   4147 }
   4148 
   4149 define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
   4150 ; GENERIC-LABEL: sext_8x32mem_to_8x64mask:
   4151 ; GENERIC:       # %bb.0:
   4152 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   4153 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
   4154 ; GENERIC-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
   4155 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4156 ;
   4157 ; SKX-LABEL: sext_8x32mem_to_8x64mask:
   4158 ; SKX:       # %bb.0:
   4159 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   4160 ; SKX-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:1.00]
   4161 ; SKX-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
   4162 ; SKX-NEXT:    retq # sched: [7:1.00]
   4163   %a   = load <8 x i32>,<8 x i32> *%i,align 1
   4164   %x   = sext <8 x i32> %a to <8 x i64>
   4165   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   4166   ret <8 x i64> %ret
   4167 }
   4168 
   4169 define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
   4170 ; GENERIC-LABEL: sext_8x32mem_to_8x64:
   4171 ; GENERIC:       # %bb.0:
   4172 ; GENERIC-NEXT:    vpmovsxdq (%rdi), %zmm0 # sched: [8:1.00]
   4173 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4174 ;
   4175 ; SKX-LABEL: sext_8x32mem_to_8x64:
   4176 ; SKX:       # %bb.0:
   4177 ; SKX-NEXT:    vpmovsxdq (%rdi), %zmm0 # sched: [10:1.00]
   4178 ; SKX-NEXT:    retq # sched: [7:1.00]
   4179   %a   = load <8 x i32>,<8 x i32> *%i,align 1
   4180   %x   = sext <8 x i32> %a to <8 x i64>
   4181   ret <8 x i64> %x
   4182 }
   4183 
   4184 define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
   4185 ; GENERIC-LABEL: sext_8x32_to_8x64:
   4186 ; GENERIC:       # %bb.0:
   4187 ; GENERIC-NEXT:    vpmovsxdq %ymm0, %zmm0 # sched: [1:1.00]
   4188 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4189 ;
   4190 ; SKX-LABEL: sext_8x32_to_8x64:
   4191 ; SKX:       # %bb.0:
   4192 ; SKX-NEXT:    vpmovsxdq %ymm0, %zmm0 # sched: [3:1.00]
   4193 ; SKX-NEXT:    retq # sched: [7:1.00]
   4194   %x   = sext <8 x i32> %a to <8 x i64>
   4195   ret <8 x i64> %x
   4196 }
   4197 
   4198 define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
   4199 ; GENERIC-LABEL: zext_8x32_to_8x64mask:
   4200 ; GENERIC:       # %bb.0:
   4201 ; GENERIC-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
   4202 ; GENERIC-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:0.33]
   4203 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [1:1.00]
   4204 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4205 ;
   4206 ; SKX-LABEL: zext_8x32_to_8x64mask:
   4207 ; SKX:       # %bb.0:
   4208 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
   4209 ; SKX-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:1.00]
   4210 ; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [3:1.00]
   4211 ; SKX-NEXT:    retq # sched: [7:1.00]
   4212   %x   = zext <8 x i32> %a to <8 x i64>
   4213   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   4214   ret <8 x i64> %ret
   4215 }
   4216 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
   4217 ; GENERIC-LABEL: fptrunc_test:
   4218 ; GENERIC:       # %bb.0:
   4219 ; GENERIC-NEXT:    vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
   4220 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4221 ;
   4222 ; SKX-LABEL: fptrunc_test:
   4223 ; SKX:       # %bb.0:
   4224 ; SKX-NEXT:    vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00]
   4225 ; SKX-NEXT:    retq # sched: [7:1.00]
   4226   %b = fptrunc <8 x double> %a to <8 x float>
   4227   ret <8 x float> %b
   4228 }
   4229 
   4230 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
   4231 ; GENERIC-LABEL: fpext_test:
   4232 ; GENERIC:       # %bb.0:
   4233 ; GENERIC-NEXT:    vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
   4234 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4235 ;
   4236 ; SKX-LABEL: fpext_test:
   4237 ; SKX:       # %bb.0:
   4238 ; SKX-NEXT:    vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00]
   4239 ; SKX-NEXT:    retq # sched: [7:1.00]
   4240   %b = fpext <8 x float> %a to <8 x double>
   4241   ret <8 x double> %b
   4242 }
   4243 
   4244 define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
   4245 ; GENERIC-LABEL: zext_16i1_to_16xi32:
   4246 ; GENERIC:       # %bb.0:
   4247 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
   4248 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
   4249 ; GENERIC-NEXT:    vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
   4250 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4251 ;
   4252 ; SKX-LABEL: zext_16i1_to_16xi32:
   4253 ; SKX:       # %bb.0:
   4254 ; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
   4255 ; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
   4256 ; SKX-NEXT:    vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
   4257 ; SKX-NEXT:    retq # sched: [7:1.00]
   4258   %a = bitcast i16 %b to <16 x i1>
   4259   %c = zext <16 x i1> %a to <16 x i32>
   4260   ret <16 x i32> %c
   4261 }
   4262 
   4263 define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
   4264 ; GENERIC-LABEL: zext_8i1_to_8xi64:
   4265 ; GENERIC:       # %bb.0:
   4266 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
   4267 ; GENERIC-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.33]
   4268 ; GENERIC-NEXT:    vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00]
   4269 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4270 ;
   4271 ; SKX-LABEL: zext_8i1_to_8xi64:
   4272 ; SKX:       # %bb.0:
   4273 ; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
   4274 ; SKX-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.25]
   4275 ; SKX-NEXT:    vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00]
   4276 ; SKX-NEXT:    retq # sched: [7:1.00]
   4277   %a = bitcast i8 %b to <8 x i1>
   4278   %c = zext <8 x i1> %a to <8 x i64>
   4279   ret <8 x i64> %c
   4280 }
   4281 
   4282 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
   4283 ; GENERIC-LABEL: trunc_16i8_to_16i1:
   4284 ; GENERIC:       # %bb.0:
   4285 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
   4286 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
   4287 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   4288 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
   4289 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4290 ;
   4291 ; SKX-LABEL: trunc_16i8_to_16i1:
   4292 ; SKX:       # %bb.0:
   4293 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
   4294 ; SKX-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:1.00]
   4295 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   4296 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   4297 ; SKX-NEXT:    retq # sched: [7:1.00]
   4298   %mask_b = trunc <16 x i8>%a to <16 x i1>
   4299   %mask = bitcast <16 x i1> %mask_b to i16
   4300   ret i16 %mask
   4301 }
   4302 
   4303 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
   4304 ; GENERIC-LABEL: trunc_16i32_to_16i1:
   4305 ; GENERIC:       # %bb.0:
   4306 ; GENERIC-NEXT:    vpslld $31, %zmm0, %zmm0 # sched: [1:1.00]
   4307 ; GENERIC-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:0.33]
   4308 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   4309 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
   4310 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   4311 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4312 ;
   4313 ; SKX-LABEL: trunc_16i32_to_16i1:
   4314 ; SKX:       # %bb.0:
   4315 ; SKX-NEXT:    vpslld $31, %zmm0, %zmm0 # sched: [1:1.00]
   4316 ; SKX-NEXT:    vpmovd2m %zmm0, %k0 # sched: [1:1.00]
   4317 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   4318 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   4319 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   4320 ; SKX-NEXT:    retq # sched: [7:1.00]
   4321   %mask_b = trunc <16 x i32>%a to <16 x i1>
   4322   %mask = bitcast <16 x i1> %mask_b to i16
   4323   ret i16 %mask
   4324 }
   4325 
   4326 define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
   4327 ; GENERIC-LABEL: trunc_4i32_to_4i1:
   4328 ; GENERIC:       # %bb.0:
   4329 ; GENERIC-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4330 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   4331 ; GENERIC-NEXT:    vpsrad $31, %xmm0, %xmm0 # sched: [1:1.00]
   4332 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4333 ;
   4334 ; SKX-LABEL: trunc_4i32_to_4i1:
   4335 ; SKX:       # %bb.0:
   4336 ; SKX-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4337 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   4338 ; SKX-NEXT:    vpsrad $31, %xmm0, %xmm0 # sched: [1:0.50]
   4339 ; SKX-NEXT:    retq # sched: [7:1.00]
   4340   %mask_a = trunc <4 x i32>%a to <4 x i1>
   4341   %mask_b = trunc <4 x i32>%b to <4 x i1>
   4342   %a_and_b = and <4 x i1>%mask_a, %mask_b
   4343   %res = sext <4 x i1>%a_and_b to <4 x i32>
   4344   ret <4 x i32>%res
   4345 }
   4346 
   4347 
   4348 define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
   4349 ; GENERIC-LABEL: trunc_8i16_to_8i1:
   4350 ; GENERIC:       # %bb.0:
   4351 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   4352 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
   4353 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   4354 ; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
   4355 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4356 ;
   4357 ; SKX-LABEL: trunc_8i16_to_8i1:
   4358 ; SKX:       # %bb.0:
   4359 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   4360 ; SKX-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:1.00]
   4361 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   4362 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
   4363 ; SKX-NEXT:    retq # sched: [7:1.00]
   4364   %mask_b = trunc <8 x i16>%a to <8 x i1>
   4365   %mask = bitcast <8 x i1> %mask_b to i8
   4366   ret i8 %mask
   4367 }
   4368 
   4369 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   4370 ; GENERIC-LABEL: sext_8i1_8i32:
   4371 ; GENERIC:       # %bb.0:
   4372 ; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
   4373 ; GENERIC-NEXT:    vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.50]
   4374 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4375 ;
   4376 ; SKX-LABEL: sext_8i1_8i32:
   4377 ; SKX:       # %bb.0:
   4378 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
   4379 ; SKX-NEXT:    vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.33]
   4380 ; SKX-NEXT:    retq # sched: [7:1.00]
   4381   %x = icmp slt <8 x i32> %a1, %a2
   4382   %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
   4383   %y = sext <8 x i1> %x1 to <8 x i32>
   4384   ret <8 x i32> %y
   4385 }
   4386 
   4387 
   4388 define i16 @trunc_i32_to_i1(i32 %a) {
   4389 ; GENERIC-LABEL: trunc_i32_to_i1:
   4390 ; GENERIC:       # %bb.0:
   4391 ; GENERIC-NEXT:    movw $-4, %ax # sched: [1:0.33]
   4392 ; GENERIC-NEXT:    kmovd %eax, %k0 # sched: [1:0.33]
   4393 ; GENERIC-NEXT:    kshiftrw $1, %k0, %k0 # sched: [1:1.00]
   4394 ; GENERIC-NEXT:    kshiftlw $1, %k0, %k0 # sched: [1:1.00]
   4395 ; GENERIC-NEXT:    andl $1, %edi # sched: [1:0.33]
   4396 ; GENERIC-NEXT:    kmovw %edi, %k1 # sched: [1:0.33]
   4397 ; GENERIC-NEXT:    korw %k1, %k0, %k0 # sched: [1:0.33]
   4398 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   4399 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
   4400 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4401 ;
   4402 ; SKX-LABEL: trunc_i32_to_i1:
   4403 ; SKX:       # %bb.0:
   4404 ; SKX-NEXT:    movw $-4, %ax # sched: [1:0.25]
   4405 ; SKX-NEXT:    kmovd %eax, %k0 # sched: [1:1.00]
   4406 ; SKX-NEXT:    kshiftrw $1, %k0, %k0 # sched: [3:1.00]
   4407 ; SKX-NEXT:    kshiftlw $1, %k0, %k0 # sched: [3:1.00]
   4408 ; SKX-NEXT:    andl $1, %edi # sched: [1:0.25]
   4409 ; SKX-NEXT:    kmovw %edi, %k1 # sched: [1:1.00]
   4410 ; SKX-NEXT:    korw %k1, %k0, %k0 # sched: [1:1.00]
   4411 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   4412 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   4413 ; SKX-NEXT:    retq # sched: [7:1.00]
   4414   %a_i = trunc i32 %a to i1
   4415   %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
   4416   %res = bitcast <16 x i1> %maskv to i16
   4417   ret i16 %res
   4418 }
   4419 
   4420 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   4421 ; GENERIC-LABEL: sext_8i1_8i16:
   4422 ; GENERIC:       # %bb.0:
   4423 ; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50]
   4424 ; GENERIC-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.33]
   4425 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   4426 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4427 ;
   4428 ; SKX-LABEL: sext_8i1_8i16:
   4429 ; SKX:       # %bb.0:
   4430 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
   4431 ; SKX-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.25]
   4432 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   4433 ; SKX-NEXT:    retq # sched: [7:1.00]
   4434   %x = icmp slt <8 x i32> %a1, %a2
   4435   %y = sext <8 x i1> %x to <8 x i16>
   4436   ret <8 x i16> %y
   4437 }
   4438 
   4439 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
   4440 ; GENERIC-LABEL: sext_16i1_16i32:
   4441 ; GENERIC:       # %bb.0:
   4442 ; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0 # sched: [1:0.50]
   4443 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
   4444 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4445 ;
   4446 ; SKX-LABEL: sext_16i1_16i32:
   4447 ; SKX:       # %bb.0:
   4448 ; SKX-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
   4449 ; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
   4450 ; SKX-NEXT:    retq # sched: [7:1.00]
   4451   %x = icmp slt <16 x i32> %a1, %a2
   4452   %y = sext <16 x i1> %x to <16 x i32>
   4453   ret <16 x i32> %y
   4454 }
   4455 
   4456 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   4457 ; GENERIC-LABEL: sext_8i1_8i64:
   4458 ; GENERIC:       # %bb.0:
   4459 ; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50]
   4460 ; GENERIC-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.33]
   4461 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4462 ;
   4463 ; SKX-LABEL: sext_8i1_8i64:
   4464 ; SKX:       # %bb.0:
   4465 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
   4466 ; SKX-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.25]
   4467 ; SKX-NEXT:    retq # sched: [7:1.00]
   4468   %x = icmp slt <8 x i32> %a1, %a2
   4469   %y = sext <8 x i1> %x to <8 x i64>
   4470   ret <8 x i64> %y
   4471 }
   4472 
   4473 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
   4474 ; GENERIC-LABEL: extload_v8i64:
   4475 ; GENERIC:       # %bb.0:
   4476 ; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
   4477 ; GENERIC-NEXT:    vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
   4478 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   4479 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4480 ;
   4481 ; SKX-LABEL: extload_v8i64:
   4482 ; SKX:       # %bb.0:
   4483 ; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00]
   4484 ; SKX-NEXT:    vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
   4485 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   4486 ; SKX-NEXT:    retq # sched: [7:1.00]
   4487   %sign_load = load <8 x i8>, <8 x i8>* %a
   4488   %c = sext <8 x i8> %sign_load to <8 x i64>
   4489   store <8 x i64> %c, <8 x i64>* %res
   4490   ret void
   4491 }
   4492 
   4493 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
   4494 ; GENERIC-LABEL: test21:
   4495 ; GENERIC:       # %bb.0:
   4496 ; GENERIC-NEXT:    vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00]
   4497 ; GENERIC-NEXT:    vpmovb2m %zmm2, %k1 # sched: [1:0.33]
   4498 ; GENERIC-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
   4499 ; GENERIC-NEXT:    kshiftrq $32, %k1, %k1 # sched: [1:1.00]
   4500 ; GENERIC-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.50]
   4501 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4502 ;
   4503 ; SKX-LABEL: test21:
   4504 ; SKX:       # %bb.0:
   4505 ; SKX-NEXT:    vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00]
   4506 ; SKX-NEXT:    vpmovb2m %zmm2, %k1 # sched: [1:1.00]
   4507 ; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
   4508 ; SKX-NEXT:    kshiftrq $32, %k1, %k1 # sched: [3:1.00]
   4509 ; SKX-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33]
   4510 ; SKX-NEXT:    retq # sched: [7:1.00]
   4511   %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
   4512   ret <64 x i16> %ret
   4513 }
   4514 
   4515 define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
   4516 ; GENERIC-LABEL: shuffle_zext_16x8_to_16x16:
   4517 ; GENERIC:       # %bb.0:
   4518 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
   4519 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4520 ;
   4521 ; SKX-LABEL: shuffle_zext_16x8_to_16x16:
   4522 ; SKX:       # %bb.0:
   4523 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
   4524 ; SKX-NEXT:    retq # sched: [7:1.00]
   4525   %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
   4526   %2 = bitcast <32 x i8> %1 to <16 x i16>
   4527   ret <16 x i16> %2
   4528 }
   4529 
   4530 define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone {
   4531 ; GENERIC-LABEL: shuffle_zext_16x8_to_16x16_mask:
   4532 ; GENERIC:       # %bb.0:
   4533 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
   4534 ; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
   4535 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
   4536 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4537 ;
   4538 ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask:
   4539 ; SKX:       # %bb.0:
   4540 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
   4541 ; SKX-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:1.00]
   4542 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
   4543 ; SKX-NEXT:    retq # sched: [7:1.00]
   4544   %x   = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
   4545   %bc  = bitcast <32 x i8> %x to <16 x i16>
   4546   %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
   4547   ret <16 x i16> %ret
   4548 }
   4549 
   4550 define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
   4551 ; GENERIC-LABEL: zext_32x8_to_16x16:
   4552 ; GENERIC:       # %bb.0:
   4553 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
   4554 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4555 ;
   4556 ; SKX-LABEL: zext_32x8_to_16x16:
   4557 ; SKX:       # %bb.0:
   4558 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
   4559 ; SKX-NEXT:    retq # sched: [7:1.00]
   4560   %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
   4561   %2 = bitcast <32 x i8> %1 to <16 x i16>
   4562   ret <16 x i16> %2
   4563 }
   4564 
   4565 define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
   4566 ; GENERIC-LABEL: zext_32x8_to_8x32:
   4567 ; GENERIC:       # %bb.0:
   4568 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
   4569 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4570 ;
   4571 ; SKX-LABEL: zext_32x8_to_8x32:
   4572 ; SKX:       # %bb.0:
   4573 ; SKX-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
   4574 ; SKX-NEXT:    retq # sched: [7:1.00]
   4575   %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
   4576   %2 = bitcast <32 x i8> %1 to <8 x i32>
   4577   ret <8 x i32> %2
   4578 }
   4579 
   4580 define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
   4581 ; GENERIC-LABEL: zext_32x8_to_4x64:
   4582 ; GENERIC:       # %bb.0:
   4583 ; GENERIC-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
   4584 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4585 ;
   4586 ; SKX-LABEL: zext_32x8_to_4x64:
   4587 ; SKX:       # %bb.0:
   4588 ; SKX-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
   4589 ; SKX-NEXT:    retq # sched: [7:1.00]
   4590   %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
   4591   %2 = bitcast <32 x i8> %1 to <4 x i64>
   4592   ret <4 x i64> %2
   4593 }
   4594 
   4595 define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
   4596 ; GENERIC-LABEL: zext_16x16_to_8x32:
   4597 ; GENERIC:       # %bb.0:
   4598 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   4599 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4600 ;
   4601 ; SKX-LABEL: zext_16x16_to_8x32:
   4602 ; SKX:       # %bb.0:
   4603 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
   4604 ; SKX-NEXT:    retq # sched: [7:1.00]
   4605   %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
   4606   %2 = bitcast <16 x i16> %1 to <8 x i32>
   4607   ret <8 x i32> %2
   4608 }
   4609 
   4610 define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
   4611 ; GENERIC-LABEL: zext_16x16_to_4x64:
   4612 ; GENERIC:       # %bb.0:
   4613 ; GENERIC-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
   4614 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4615 ;
   4616 ; SKX-LABEL: zext_16x16_to_4x64:
   4617 ; SKX:       # %bb.0:
   4618 ; SKX-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
   4619 ; SKX-NEXT:    retq # sched: [7:1.00]
   4620   %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
   4621   %2 = bitcast <16 x i16> %1 to <4 x i64>
   4622   ret <4 x i64> %2
   4623 }
   4624 
   4625 define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
   4626 ; GENERIC-LABEL: zext_8x32_to_4x64:
   4627 ; GENERIC:       # %bb.0:
   4628 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
   4629 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4630 ;
   4631 ; SKX-LABEL: zext_8x32_to_4x64:
   4632 ; SKX:       # %bb.0:
   4633 ; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
   4634 ; SKX-NEXT:    retq # sched: [7:1.00]
   4635   %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
   4636   %2 = bitcast <8 x i32> %1 to <4 x i64>
   4637   ret <4 x i64> %2
   4638 }
   4639 
   4640 define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
   4641 ; GENERIC-LABEL: zext_64xi1_to_64xi8:
   4642 ; GENERIC:       # %bb.0:
   4643 ; GENERIC-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1 # sched: [1:0.50]
   4644 ; GENERIC-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [7:0.50]
   4645 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4646 ;
   4647 ; SKX-LABEL: zext_64xi1_to_64xi8:
   4648 ; SKX:       # %bb.0:
   4649 ; SKX-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00]
   4650 ; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
   4651 ; SKX-NEXT:    retq # sched: [7:1.00]
   4652   %mask = icmp eq <64 x i8> %x, %y
   4653   %1 = zext <64 x i1> %mask to <64 x i8>
   4654   ret <64 x i8> %1
   4655 }
   4656 
   4657 define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
   4658 ; GENERIC-LABEL: zext_32xi1_to_32xi16:
   4659 ; GENERIC:       # %bb.0:
   4660 ; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # sched: [1:0.50]
   4661 ; GENERIC-NEXT:    vpmovm2w %k0, %zmm0 # sched: [1:0.33]
   4662 ; GENERIC-NEXT:    vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00]
   4663 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4664 ;
   4665 ; SKX-LABEL: zext_32xi1_to_32xi16:
   4666 ; SKX:       # %bb.0:
   4667 ; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
   4668 ; SKX-NEXT:    vpmovm2w %k0, %zmm0 # sched: [1:0.25]
   4669 ; SKX-NEXT:    vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00]
   4670 ; SKX-NEXT:    retq # sched: [7:1.00]
   4671   %mask = icmp eq <32 x i16> %x, %y
   4672   %1 = zext <32 x i1> %mask to <32 x i16>
   4673   ret <32 x i16> %1
   4674 }
   4675 
   4676 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
   4677 ; GENERIC-LABEL: zext_16xi1_to_16xi16:
   4678 ; GENERIC:       # %bb.0:
   4679 ; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
   4680 ; GENERIC-NEXT:    vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00]
   4681 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4682 ;
   4683 ; SKX-LABEL: zext_16xi1_to_16xi16:
   4684 ; SKX:       # %bb.0:
   4685 ; SKX-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
   4686 ; SKX-NEXT:    vpsrlw $15, %ymm0, %ymm0 # sched: [1:0.50]
   4687 ; SKX-NEXT:    retq # sched: [7:1.00]
   4688   %mask = icmp eq <16 x i16> %x, %y
   4689   %1 = zext <16 x i1> %mask to <16 x i16>
   4690   ret <16 x i16> %1
   4691 }
   4692 
   4693 
   4694 define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
   4695 ; GENERIC-LABEL: zext_32xi1_to_32xi8:
   4696 ; GENERIC:       # %bb.0:
   4697 ; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [1:0.50]
   4698 ; GENERIC-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [7:0.50]
   4699 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4700 ;
   4701 ; SKX-LABEL: zext_32xi1_to_32xi8:
   4702 ; SKX:       # %bb.0:
   4703 ; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
   4704 ; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
   4705 ; SKX-NEXT:    retq # sched: [7:1.00]
   4706   %mask = icmp eq <32 x i16> %x, %y
   4707   %1 = zext <32 x i1> %mask to <32 x i8>
   4708   ret <32 x i8> %1
   4709 }
   4710 
   4711 define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
   4712 ; GENERIC-LABEL: zext_4xi1_to_4x32:
   4713 ; GENERIC:       # %bb.0:
   4714 ; GENERIC-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50]
   4715 ; GENERIC-NEXT:    vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
   4716 ; GENERIC-NEXT:    vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
   4717 ; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4718 ; GENERIC-NEXT:    vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
   4719 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4720 ;
   4721 ; SKX-LABEL: zext_4xi1_to_4x32:
   4722 ; SKX:       # %bb.0:
   4723 ; SKX-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50]
   4724 ; SKX-NEXT:    vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
   4725 ; SKX-NEXT:    vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
   4726 ; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4727 ; SKX-NEXT:    vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
   4728 ; SKX-NEXT:    retq # sched: [7:1.00]
   4729   %mask = icmp eq <4 x i8> %x, %y
   4730   %1 = zext <4 x i1> %mask to <4 x i32>
   4731   ret <4 x i32> %1
   4732 }
   4733 
   4734 define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
   4735 ; GENERIC-LABEL: zext_2xi1_to_2xi64:
   4736 ; GENERIC:       # %bb.0:
   4737 ; GENERIC-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50]
   4738 ; GENERIC-NEXT:    vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
   4739 ; GENERIC-NEXT:    vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
   4740 ; GENERIC-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4741 ; GENERIC-NEXT:    vpsrlq $63, %xmm0, %xmm0 # sched: [1:1.00]
   4742 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4743 ;
   4744 ; SKX-LABEL: zext_2xi1_to_2xi64:
   4745 ; SKX:       # %bb.0:
   4746 ; SKX-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50]
   4747 ; SKX-NEXT:    vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
   4748 ; SKX-NEXT:    vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
   4749 ; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4750 ; SKX-NEXT:    vpsrlq $63, %xmm0, %xmm0 # sched: [1:0.50]
   4751 ; SKX-NEXT:    retq # sched: [7:1.00]
   4752   %mask = icmp eq <2 x i8> %x, %y
   4753   %1 = zext <2 x i1> %mask to <2 x i64>
   4754   ret <2 x i64> %1
   4755 }
   4756 
   4757 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   4758 ; GENERIC-LABEL: test_x86_fmadd_ps_z:
   4759 ; GENERIC:       # %bb.0:
   4760 ; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
   4761 ; GENERIC-NEXT:    vaddps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
   4762 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4763 ;
   4764 ; SKX-LABEL: test_x86_fmadd_ps_z:
   4765 ; SKX:       # %bb.0:
   4766 ; SKX-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
   4767 ; SKX-NEXT:    vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
   4768 ; SKX-NEXT:    retq # sched: [7:1.00]
   4769   %x = fmul <16 x float> %a0, %a1
   4770   %res = fadd <16 x float> %x, %a2
   4771   ret <16 x float> %res
   4772 }
   4773 
   4774 define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   4775 ; GENERIC-LABEL: test_x86_fmsub_ps_z:
   4776 ; GENERIC:       # %bb.0:
   4777 ; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
   4778 ; GENERIC-NEXT:    vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
   4779 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4780 ;
   4781 ; SKX-LABEL: test_x86_fmsub_ps_z:
   4782 ; SKX:       # %bb.0:
   4783 ; SKX-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
   4784 ; SKX-NEXT:    vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
   4785 ; SKX-NEXT:    retq # sched: [7:1.00]
   4786   %x = fmul <16 x float> %a0, %a1
   4787   %res = fsub <16 x float> %x, %a2
   4788   ret <16 x float> %res
   4789 }
   4790 
   4791 define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   4792 ; GENERIC-LABEL: test_x86_fnmadd_ps_z:
   4793 ; GENERIC:       # %bb.0:
   4794 ; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
   4795 ; GENERIC-NEXT:    vsubps %zmm0, %zmm2, %zmm0 # sched: [3:1.00]
   4796 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4797 ;
   4798 ; SKX-LABEL: test_x86_fnmadd_ps_z:
   4799 ; SKX:       # %bb.0:
   4800 ; SKX-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
   4801 ; SKX-NEXT:    vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.50]
   4802 ; SKX-NEXT:    retq # sched: [7:1.00]
   4803   %x = fmul <16 x float> %a0, %a1
   4804   %res = fsub <16 x float> %a2, %x
   4805   ret <16 x float> %res
   4806 }
   4807 
   4808 define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   4809 ; GENERIC-LABEL: test_x86_fnmsub_ps_z:
   4810 ; GENERIC:       # %bb.0:
   4811 ; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
   4812 ; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
   4813 ; GENERIC-NEXT:    vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
   4814 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4815 ;
   4816 ; SKX-LABEL: test_x86_fnmsub_ps_z:
   4817 ; SKX:       # %bb.0:
   4818 ; SKX-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
   4819 ; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
   4820 ; SKX-NEXT:    vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
   4821 ; SKX-NEXT:    retq # sched: [7:1.00]
   4822   %x = fmul <16 x float> %a0, %a1
   4823   %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
   4824                           float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
   4825                           float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
   4826                           float -0.000000e+00>, %x
   4827   %res = fsub <16 x float> %y, %a2
   4828   ret <16 x float> %res
   4829 }
   4830 
   4831 define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   4832 ; GENERIC-LABEL: test_x86_fmadd_pd_z:
   4833 ; GENERIC:       # %bb.0:
   4834 ; GENERIC-NEXT:    vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
   4835 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
   4836 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4837 ;
   4838 ; SKX-LABEL: test_x86_fmadd_pd_z:
   4839 ; SKX:       # %bb.0:
   4840 ; SKX-NEXT:    vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
   4841 ; SKX-NEXT:    vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
   4842 ; SKX-NEXT:    retq # sched: [7:1.00]
   4843   %x = fmul <8 x double> %a0, %a1
   4844   %res = fadd <8 x double> %x, %a2
   4845   ret <8 x double> %res
   4846 }
   4847 
   4848 define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   4849 ; GENERIC-LABEL: test_x86_fmsub_pd_z:
   4850 ; GENERIC:       # %bb.0:
   4851 ; GENERIC-NEXT:    vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
   4852 ; GENERIC-NEXT:    vsubpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
   4853 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4854 ;
   4855 ; SKX-LABEL: test_x86_fmsub_pd_z:
   4856 ; SKX:       # %bb.0:
   4857 ; SKX-NEXT:    vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
   4858 ; SKX-NEXT:    vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
   4859 ; SKX-NEXT:    retq # sched: [7:1.00]
   4860   %x = fmul <8 x double> %a0, %a1
   4861   %res = fsub <8 x double> %x, %a2
   4862   ret <8 x double> %res
   4863 }
   4864 
   4865 define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
   4866 ; GENERIC-LABEL: test_x86_fmsub_213:
   4867 ; GENERIC:       # %bb.0:
   4868 ; GENERIC-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   4869 ; GENERIC-NEXT:    vsubsd %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
   4870 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4871 ;
   4872 ; SKX-LABEL: test_x86_fmsub_213:
   4873 ; SKX:       # %bb.0:
   4874 ; SKX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4875 ; SKX-NEXT:    vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.50]
   4876 ; SKX-NEXT:    retq # sched: [7:1.00]
   4877   %x = fmul double %a0, %a1
   4878   %res = fsub double %x, %a2
   4879   ret double %res
   4880 }
   4881 
   4882 define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
   4883 ; GENERIC-LABEL: test_x86_fmsub_213_m:
   4884 ; GENERIC:       # %bb.0:
   4885 ; GENERIC-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   4886 ; GENERIC-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   4887 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4888 ;
   4889 ; SKX-LABEL: test_x86_fmsub_213_m:
   4890 ; SKX:       # %bb.0:
   4891 ; SKX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4892 ; SKX-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   4893 ; SKX-NEXT:    retq # sched: [7:1.00]
   4894   %a2 = load double , double *%a2_ptr
   4895   %x = fmul double %a0, %a1
   4896   %res = fsub double %x, %a2
   4897   ret double %res
   4898 }
   4899 
   4900 define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
   4901 ; GENERIC-LABEL: test_x86_fmsub_231_m:
   4902 ; GENERIC:       # %bb.0:
   4903 ; GENERIC-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   4904 ; GENERIC-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4905 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4906 ;
   4907 ; SKX-LABEL: test_x86_fmsub_231_m:
   4908 ; SKX:       # %bb.0:
   4909 ; SKX-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   4910 ; SKX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4911 ; SKX-NEXT:    retq # sched: [7:1.00]
   4912   %a2 = load double , double *%a2_ptr
   4913   %x = fmul double %a0, %a2
   4914   %res = fsub double %x, %a1
   4915   ret double %res
   4916 }
   4917 
   4918 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
   4919 ; GENERIC-LABEL: test231_br:
   4920 ; GENERIC:       # %bb.0:
   4921 ; GENERIC-NEXT:    vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [12:1.00]
   4922 ; GENERIC-NEXT:    vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
   4923 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4924 ;
   4925 ; SKX-LABEL: test231_br:
   4926 ; SKX:       # %bb.0:
   4927 ; SKX-NEXT:    vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
   4928 ; SKX-NEXT:    vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
   4929 ; SKX-NEXT:    retq # sched: [7:1.00]
   4930   %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
   4931   %b2 = fadd <16 x float> %b1, %a2
   4932   ret <16 x float> %b2
   4933 }
   4934 
   4935 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
   4936 ; GENERIC-LABEL: test213_br:
   4937 ; GENERIC:       # %bb.0:
   4938 ; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
   4939 ; GENERIC-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00]
   4940 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4941 ;
   4942 ; SKX-LABEL: test213_br:
   4943 ; SKX:       # %bb.0:
   4944 ; SKX-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
   4945 ; SKX-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
   4946 ; SKX-NEXT:    retq # sched: [7:1.00]
   4947   %b1 = fmul <16 x float> %a1, %a2
   4948   %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
   4949   ret <16 x float> %b2
   4950 }
   4951 
   4952 ;mask (a*c+b , a)
   4953 define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
   4954 ; GENERIC-LABEL: test_x86_fmadd132_ps:
   4955 ; GENERIC:       # %bb.0:
   4956 ; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
   4957 ; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
   4958 ; GENERIC-NEXT:    vmulps (%rdi), %zmm0, %zmm2 # sched: [12:1.00]
   4959 ; GENERIC-NEXT:    vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
   4960 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4961 ;
   4962 ; SKX-LABEL: test_x86_fmadd132_ps:
   4963 ; SKX:       # %bb.0:
   4964 ; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
   4965 ; SKX-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:1.00]
   4966 ; SKX-NEXT:    vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50]
   4967 ; SKX-NEXT:    vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.50]
   4968 ; SKX-NEXT:    retq # sched: [7:1.00]
   4969   %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
   4970   %x = fmul <16 x float> %a0, %a2
   4971   %y = fadd <16 x float> %x, %a1
   4972   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
   4973   ret <16 x float> %res
   4974 }
   4975 
   4976 ;mask (a*c+b , b)
   4977 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
   4978 ; GENERIC-LABEL: test_x86_fmadd231_ps:
   4979 ; GENERIC:       # %bb.0:
   4980 ; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
   4981 ; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
   4982 ; GENERIC-NEXT:    vmulps (%rdi), %zmm0, %zmm0 # sched: [12:1.00]
   4983 ; GENERIC-NEXT:    vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00]
   4984 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
   4985 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4986 ;
   4987 ; SKX-LABEL: test_x86_fmadd231_ps:
   4988 ; SKX:       # %bb.0:
   4989 ; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
   4990 ; SKX-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:1.00]
   4991 ; SKX-NEXT:    vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
   4992 ; SKX-NEXT:    vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.50]
   4993 ; SKX-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:0.33]
   4994 ; SKX-NEXT:    retq # sched: [7:1.00]
   4995   %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
   4996   %x = fmul <16 x float> %a0, %a2
   4997   %y = fadd <16 x float> %x, %a1
   4998   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
   4999   ret <16 x float> %res
   5000 }
   5001 
   5002 ;mask (b*a+c , b)
   5003 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
   5004 ; GENERIC-LABEL: test_x86_fmadd213_ps:
   5005 ; GENERIC:       # %bb.0:
   5006 ; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
   5007 ; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
   5008 ; GENERIC-NEXT:    vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
   5009 ; GENERIC-NEXT:    vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [10:1.00]
   5010 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
   5011 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5012 ;
   5013 ; SKX-LABEL: test_x86_fmadd213_ps:
   5014 ; SKX:       # %bb.0:
   5015 ; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
   5016 ; SKX-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:1.00]
   5017 ; SKX-NEXT:    vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
   5018 ; SKX-NEXT:    vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50]
   5019 ; SKX-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:0.33]
   5020 ; SKX-NEXT:    retq # sched: [7:1.00]
   5021   %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
   5022   %x = fmul <16 x float> %a1, %a0
   5023   %y = fadd <16 x float> %x, %a2
   5024   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
   5025   ret <16 x float> %res
   5026 }
   5027 
   5028 define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
   5029 ; GENERIC-LABEL: vpandd:
   5030 ; GENERIC:       # %bb.0: # %entry
   5031 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
   5032 ; GENERIC-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
   5033 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5034 ;
   5035 ; SKX-LABEL: vpandd:
   5036 ; SKX:       # %bb.0: # %entry
   5037 ; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
   5038 ; SKX-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5039 ; SKX-NEXT:    retq # sched: [7:1.00]
   5040 entry:
   5041   ; Force the execution domain with an add.
   5042   %a2 = add <16 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2,
   5043                             i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   5044   %x = and <16 x i32> %a2, %b
   5045   ret <16 x i32> %x
   5046 }
   5047 
   5048 define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
   5049 ; GENERIC-LABEL: vpandnd:
   5050 ; GENERIC:       # %bb.0: # %entry
   5051 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
   5052 ; GENERIC-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
   5053 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5054 ;
   5055 ; SKX-LABEL: vpandnd:
   5056 ; SKX:       # %bb.0: # %entry
   5057 ; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
   5058 ; SKX-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
   5059 ; SKX-NEXT:    retq # sched: [7:1.00]
   5060 entry:
   5061   ; Force the execution domain with an add.
   5062   %a2 = add <16 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3,
   5063                             i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   5064   %b2 = xor <16 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1,
   5065                             i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
   5066   %x = and <16 x i32> %a2, %b2
   5067   ret <16 x i32> %x
   5068 }
   5069 
   5070 define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
   5071 ; GENERIC-LABEL: vpord:
   5072 ; GENERIC:       # %bb.0: # %entry
   5073 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
   5074 ; GENERIC-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
   5075 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5076 ;
   5077 ; SKX-LABEL: vpord:
   5078 ; SKX:       # %bb.0: # %entry
   5079 ; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
   5080 ; SKX-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5081 ; SKX-NEXT:    retq # sched: [7:1.00]
   5082 entry:
   5083   ; Force the execution domain with an add.
   5084   %a2 = add <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4,
   5085                             i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   5086   %x = or <16 x i32> %a2, %b
   5087   ret <16 x i32> %x
   5088 }
   5089 
   5090 define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
   5091 ; GENERIC-LABEL: vpxord:
   5092 ; GENERIC:       # %bb.0: # %entry
   5093 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
   5094 ; GENERIC-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
   5095 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5096 ;
   5097 ; SKX-LABEL: vpxord:
   5098 ; SKX:       # %bb.0: # %entry
   5099 ; SKX-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
   5100 ; SKX-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5101 ; SKX-NEXT:    retq # sched: [7:1.00]
   5102 entry:
   5103   ; Force the execution domain with an add.
   5104   %a2 = add <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5,
   5105                             i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   5106   %x = xor <16 x i32> %a2, %b
   5107   ret <16 x i32> %x
   5108 }
   5109 
   5110 define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
   5111 ; GENERIC-LABEL: vpandq:
   5112 ; GENERIC:       # %bb.0: # %entry
   5113 ; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
   5114 ; GENERIC-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
   5115 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5116 ;
   5117 ; SKX-LABEL: vpandq:
   5118 ; SKX:       # %bb.0: # %entry
   5119 ; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
   5120 ; SKX-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5121 ; SKX-NEXT:    retq # sched: [7:1.00]
   5122 entry:
   5123   ; Force the execution domain with an add.
   5124   %a2 = add <8 x i64> %a, <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>
   5125   %x = and <8 x i64> %a2, %b
   5126   ret <8 x i64> %x
   5127 }
   5128 
   5129 define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
   5130 ; GENERIC-LABEL: vpandnq:
   5131 ; GENERIC:       # %bb.0: # %entry
   5132 ; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
   5133 ; GENERIC-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
   5134 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5135 ;
   5136 ; SKX-LABEL: vpandnq:
   5137 ; SKX:       # %bb.0: # %entry
   5138 ; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
   5139 ; SKX-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
   5140 ; SKX-NEXT:    retq # sched: [7:1.00]
   5141 entry:
   5142   ; Force the execution domain with an add.
   5143   %a2 = add <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   5144   %b2 = xor <8 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
   5145   %x = and <8 x i64> %a2, %b2
   5146   ret <8 x i64> %x
   5147 }
   5148 
   5149 define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
   5150 ; GENERIC-LABEL: vporq:
   5151 ; GENERIC:       # %bb.0: # %entry
   5152 ; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
   5153 ; GENERIC-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
   5154 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5155 ;
   5156 ; SKX-LABEL: vporq:
   5157 ; SKX:       # %bb.0: # %entry
   5158 ; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
   5159 ; SKX-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5160 ; SKX-NEXT:    retq # sched: [7:1.00]
   5161 entry:
   5162   ; Force the execution domain with an add.
   5163   %a2 = add <8 x i64> %a, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
   5164   %x = or <8 x i64> %a2, %b
   5165   ret <8 x i64> %x
   5166 }
   5167 
   5168 define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
   5169 ; GENERIC-LABEL: vpxorq:
   5170 ; GENERIC:       # %bb.0: # %entry
   5171 ; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
   5172 ; GENERIC-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
   5173 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5174 ;
   5175 ; SKX-LABEL: vpxorq:
   5176 ; SKX:       # %bb.0: # %entry
   5177 ; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
   5178 ; SKX-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5179 ; SKX-NEXT:    retq # sched: [7:1.00]
   5180 entry:
   5181   ; Force the execution domain with an add.
   5182   %a2 = add <8 x i64> %a, <i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9>
   5183   %x = xor <8 x i64> %a2, %b
   5184   ret <8 x i64> %x
   5185 }
   5186 
   5187 define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
   5188 ; GENERIC-LABEL: and_v64i8:
   5189 ; GENERIC:       # %bb.0:
   5190 ; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
   5191 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5192 ;
   5193 ; SKX-LABEL: and_v64i8:
   5194 ; SKX:       # %bb.0:
   5195 ; SKX-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5196 ; SKX-NEXT:    retq # sched: [7:1.00]
   5197   %res = and <64 x i8> %a, %b
   5198   ret <64 x i8> %res
   5199 }
   5200 
   5201 define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) {
   5202 ; GENERIC-LABEL: andn_v64i8:
   5203 ; GENERIC:       # %bb.0:
   5204 ; GENERIC-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
   5205 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5206 ;
   5207 ; SKX-LABEL: andn_v64i8:
   5208 ; SKX:       # %bb.0:
   5209 ; SKX-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
   5210 ; SKX-NEXT:    retq # sched: [7:1.00]
   5211   %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
   5212                            i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
   5213                            i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
   5214                            i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   5215   %res = and <64 x i8> %a, %b2
   5216   ret <64 x i8> %res
   5217 }
   5218 
   5219 define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
   5220 ; GENERIC-LABEL: or_v64i8:
   5221 ; GENERIC:       # %bb.0:
   5222 ; GENERIC-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
   5223 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5224 ;
   5225 ; SKX-LABEL: or_v64i8:
   5226 ; SKX:       # %bb.0:
   5227 ; SKX-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5228 ; SKX-NEXT:    retq # sched: [7:1.00]
   5229   %res = or <64 x i8> %a, %b
   5230   ret <64 x i8> %res
   5231 }
   5232 
   5233 define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
   5234 ; GENERIC-LABEL: xor_v64i8:
   5235 ; GENERIC:       # %bb.0:
   5236 ; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
   5237 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5238 ;
   5239 ; SKX-LABEL: xor_v64i8:
   5240 ; SKX:       # %bb.0:
   5241 ; SKX-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5242 ; SKX-NEXT:    retq # sched: [7:1.00]
   5243   %res = xor <64 x i8> %a, %b
   5244   ret <64 x i8> %res
   5245 }
   5246 
   5247 define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
   5248 ; GENERIC-LABEL: and_v32i16:
   5249 ; GENERIC:       # %bb.0:
   5250 ; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
   5251 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5252 ;
   5253 ; SKX-LABEL: and_v32i16:
   5254 ; SKX:       # %bb.0:
   5255 ; SKX-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5256 ; SKX-NEXT:    retq # sched: [7:1.00]
   5257   %res = and <32 x i16> %a, %b
   5258   ret <32 x i16> %res
   5259 }
   5260 
   5261 define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) {
   5262 ; GENERIC-LABEL: andn_v32i16:
   5263 ; GENERIC:       # %bb.0:
   5264 ; GENERIC-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
   5265 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5266 ;
   5267 ; SKX-LABEL: andn_v32i16:
   5268 ; SKX:       # %bb.0:
   5269 ; SKX-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
   5270 ; SKX-NEXT:    retq # sched: [7:1.00]
   5271   %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
   5272                             i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   5273   %res = and <32 x i16> %a, %b2
   5274   ret <32 x i16> %res
   5275 }
   5276 
   5277 define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
   5278 ; GENERIC-LABEL: or_v32i16:
   5279 ; GENERIC:       # %bb.0:
   5280 ; GENERIC-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
   5281 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5282 ;
   5283 ; SKX-LABEL: or_v32i16:
   5284 ; SKX:       # %bb.0:
   5285 ; SKX-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5286 ; SKX-NEXT:    retq # sched: [7:1.00]
   5287   %res = or <32 x i16> %a, %b
   5288   ret <32 x i16> %res
   5289 }
   5290 
   5291 define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
   5292 ; GENERIC-LABEL: xor_v32i16:
   5293 ; GENERIC:       # %bb.0:
   5294 ; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
   5295 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5296 ;
   5297 ; SKX-LABEL: xor_v32i16:
   5298 ; SKX:       # %bb.0:
   5299 ; SKX-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
   5300 ; SKX-NEXT:    retq # sched: [7:1.00]
   5301   %res = xor <32 x i16> %a, %b
   5302   ret <32 x i16> %res
   5303 }
   5304 
   5305 define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
   5306 ; GENERIC-LABEL: masked_and_v16f32:
   5307 ; GENERIC:       # %bb.0:
   5308 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5309 ; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
   5310 ; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
   5311 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5312 ;
   5313 ; SKX-LABEL: masked_and_v16f32:
   5314 ; SKX:       # %bb.0:
   5315 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5316 ; SKX-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
   5317 ; SKX-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
   5318 ; SKX-NEXT:    retq # sched: [7:1.00]
   5319   %a1 = bitcast <16 x float> %a to <16 x i32>
   5320   %b1 = bitcast <16 x float> %b to <16 x i32>
   5321   %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
   5322   %mask1 = bitcast i16 %mask to <16 x i1>
   5323   %op = and <16 x i32> %a1, %b1
   5324   %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
   5325   %cast = bitcast <16 x i32> %select to <16 x float>
   5326   %add = fadd <16 x float> %c, %cast
   5327   ret <16 x float> %add
   5328 }
   5329 
   5330 define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
   5331 ; GENERIC-LABEL: masked_or_v16f32:
   5332 ; GENERIC:       # %bb.0:
   5333 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5334 ; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
   5335 ; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
   5336 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5337 ;
   5338 ; SKX-LABEL: masked_or_v16f32:
   5339 ; SKX:       # %bb.0:
   5340 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5341 ; SKX-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
   5342 ; SKX-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
   5343 ; SKX-NEXT:    retq # sched: [7:1.00]
   5344   %a1 = bitcast <16 x float> %a to <16 x i32>
   5345   %b1 = bitcast <16 x float> %b to <16 x i32>
   5346   %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
   5347   %mask1 = bitcast i16 %mask to <16 x i1>
   5348   %op = and <16 x i32> %a1, %b1
   5349   %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
   5350   %cast = bitcast <16 x i32> %select to <16 x float>
   5351   %add = fadd <16 x float> %c, %cast
   5352   ret <16 x float> %add
   5353 }
   5354 
   5355 define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
   5356 ; GENERIC-LABEL: masked_xor_v16f32:
   5357 ; GENERIC:       # %bb.0:
   5358 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5359 ; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
   5360 ; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
   5361 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5362 ;
   5363 ; SKX-LABEL: masked_xor_v16f32:
   5364 ; SKX:       # %bb.0:
   5365 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5366 ; SKX-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
   5367 ; SKX-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
   5368 ; SKX-NEXT:    retq # sched: [7:1.00]
   5369   %a1 = bitcast <16 x float> %a to <16 x i32>
   5370   %b1 = bitcast <16 x float> %b to <16 x i32>
   5371   %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
   5372   %mask1 = bitcast i16 %mask to <16 x i1>
   5373   %op = and <16 x i32> %a1, %b1
   5374   %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
   5375   %cast = bitcast <16 x i32> %select to <16 x float>
   5376   %add = fadd <16 x float> %c, %cast
   5377   ret <16 x float> %add
   5378 }
   5379 
   5380 define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
   5381 ; GENERIC-LABEL: masked_and_v8f64:
   5382 ; GENERIC:       # %bb.0:
   5383 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5384 ; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
   5385 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
   5386 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5387 ;
   5388 ; SKX-LABEL: masked_and_v8f64:
   5389 ; SKX:       # %bb.0:
   5390 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5391 ; SKX-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
   5392 ; SKX-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
   5393 ; SKX-NEXT:    retq # sched: [7:1.00]
   5394   %a1 = bitcast <8 x double> %a to <8 x i64>
   5395   %b1 = bitcast <8 x double> %b to <8 x i64>
   5396   %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
   5397   %mask1 = bitcast i8 %mask to <8 x i1>
   5398   %op = and <8 x i64> %a1, %b1
   5399   %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
   5400   %cast = bitcast <8 x i64> %select to <8 x double>
   5401   %add = fadd <8 x double> %c, %cast
   5402   ret <8 x double> %add
   5403 }
   5404 
   5405 define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
   5406 ; GENERIC-LABEL: masked_or_v8f64:
   5407 ; GENERIC:       # %bb.0:
   5408 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5409 ; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
   5410 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
   5411 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5412 ;
   5413 ; SKX-LABEL: masked_or_v8f64:
   5414 ; SKX:       # %bb.0:
   5415 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5416 ; SKX-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
   5417 ; SKX-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
   5418 ; SKX-NEXT:    retq # sched: [7:1.00]
   5419   %a1 = bitcast <8 x double> %a to <8 x i64>
   5420   %b1 = bitcast <8 x double> %b to <8 x i64>
   5421   %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
   5422   %mask1 = bitcast i8 %mask to <8 x i1>
   5423   %op = and <8 x i64> %a1, %b1
   5424   %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
   5425   %cast = bitcast <8 x i64> %select to <8 x double>
   5426   %add = fadd <8 x double> %c, %cast
   5427   ret <8 x double> %add
   5428 }
   5429 
   5430 define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
   5431 ; GENERIC-LABEL: masked_xor_v8f64:
   5432 ; GENERIC:       # %bb.0:
   5433 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5434 ; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
   5435 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
   5436 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5437 ;
   5438 ; SKX-LABEL: masked_xor_v8f64:
   5439 ; SKX:       # %bb.0:
   5440 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5441 ; SKX-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
   5442 ; SKX-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
   5443 ; SKX-NEXT:    retq # sched: [7:1.00]
   5444   %a1 = bitcast <8 x double> %a to <8 x i64>
   5445   %b1 = bitcast <8 x double> %b to <8 x i64>
   5446   %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
   5447   %mask1 = bitcast i8 %mask to <8 x i1>
   5448   %op = and <8 x i64> %a1, %b1
   5449   %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
   5450   %cast = bitcast <8 x i64> %select to <8 x double>
   5451   %add = fadd <8 x double> %c, %cast
   5452   ret <8 x double> %add
   5453 }
   5454 
   5455 define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
   5456 ; GENERIC-LABEL: test_mm512_mask_and_epi32:
   5457 ; GENERIC:       # %bb.0: # %entry
   5458 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5459 ; GENERIC-NEXT:    vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
   5460 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5461 ;
   5462 ; SKX-LABEL: test_mm512_mask_and_epi32:
   5463 ; SKX:       # %bb.0: # %entry
   5464 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5465 ; SKX-NEXT:    vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
   5466 ; SKX-NEXT:    retq # sched: [7:1.00]
   5467 entry:
   5468   %and1.i.i = and <8 x i64> %__a, %__b
   5469   %0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
   5470   %1 = bitcast <8 x i64> %__src to <16 x i32>
   5471   %2 = bitcast i16 %__k to <16 x i1>
   5472   %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
   5473   %4 = bitcast <16 x i32> %3 to <8 x i64>
   5474   ret <8 x i64> %4
   5475 }
   5476 
   5477 define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
   5478 ; GENERIC-LABEL: test_mm512_mask_or_epi32:
   5479 ; GENERIC:       # %bb.0: # %entry
   5480 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5481 ; GENERIC-NEXT:    vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
   5482 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5483 ;
   5484 ; SKX-LABEL: test_mm512_mask_or_epi32:
   5485 ; SKX:       # %bb.0: # %entry
   5486 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5487 ; SKX-NEXT:    vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
   5488 ; SKX-NEXT:    retq # sched: [7:1.00]
   5489 entry:
   5490   %or1.i.i = or <8 x i64> %__a, %__b
   5491   %0 = bitcast <8 x i64> %or1.i.i to <16 x i32>
   5492   %1 = bitcast <8 x i64> %__src to <16 x i32>
   5493   %2 = bitcast i16 %__k to <16 x i1>
   5494   %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
   5495   %4 = bitcast <16 x i32> %3 to <8 x i64>
   5496   ret <8 x i64> %4
   5497 }
   5498 
   5499 define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
   5500 ; GENERIC-LABEL: test_mm512_mask_xor_epi32:
   5501 ; GENERIC:       # %bb.0: # %entry
   5502 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5503 ; GENERIC-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
   5504 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5505 ;
   5506 ; SKX-LABEL: test_mm512_mask_xor_epi32:
   5507 ; SKX:       # %bb.0: # %entry
   5508 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5509 ; SKX-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
   5510 ; SKX-NEXT:    retq # sched: [7:1.00]
   5511 entry:
   5512   %xor1.i.i = xor <8 x i64> %__a, %__b
   5513   %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32>
   5514   %1 = bitcast <8 x i64> %__src to <16 x i32>
   5515   %2 = bitcast i16 %__k to <16 x i1>
   5516   %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
   5517   %4 = bitcast <16 x i32> %3 to <8 x i64>
   5518   ret <8 x i64> %4
   5519 }
   5520 
   5521 define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
   5522 ; GENERIC-LABEL: test_mm512_mask_xor_pd:
   5523 ; GENERIC:       # %bb.0: # %entry
   5524 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5525 ; GENERIC-NEXT:    vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
   5526 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5527 ;
   5528 ; SKX-LABEL: test_mm512_mask_xor_pd:
   5529 ; SKX:       # %bb.0: # %entry
   5530 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5531 ; SKX-NEXT:    vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
   5532 ; SKX-NEXT:    retq # sched: [7:1.00]
   5533 entry:
   5534   %0 = bitcast <8 x double> %__A to <8 x i64>
   5535   %1 = bitcast <8 x double> %__B to <8 x i64>
   5536   %xor.i.i = xor <8 x i64> %0, %1
   5537   %2 = bitcast <8 x i64> %xor.i.i to <8 x double>
   5538   %3 = bitcast i8 %__U to <8 x i1>
   5539   %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
   5540   ret <8 x double> %4
   5541 }
   5542 
   5543 define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
   5544 ; GENERIC-LABEL: test_mm512_maskz_xor_pd:
   5545 ; GENERIC:       # %bb.0: # %entry
   5546 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5547 ; GENERIC-NEXT:    vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
   5548 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5549 ;
   5550 ; SKX-LABEL: test_mm512_maskz_xor_pd:
   5551 ; SKX:       # %bb.0: # %entry
   5552 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5553 ; SKX-NEXT:    vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
   5554 ; SKX-NEXT:    retq # sched: [7:1.00]
   5555 entry:
   5556   %0 = bitcast <8 x double> %__A to <8 x i64>
   5557   %1 = bitcast <8 x double> %__B to <8 x i64>
   5558   %xor.i.i = xor <8 x i64> %0, %1
   5559   %2 = bitcast <8 x i64> %xor.i.i to <8 x double>
   5560   %3 = bitcast i8 %__U to <8 x i1>
   5561   %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
   5562   ret <8 x double> %4
   5563 }
   5564 
   5565 define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
   5566 ; GENERIC-LABEL: test_mm512_mask_xor_ps:
   5567 ; GENERIC:       # %bb.0: # %entry
   5568 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5569 ; GENERIC-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
   5570 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5571 ;
   5572 ; SKX-LABEL: test_mm512_mask_xor_ps:
   5573 ; SKX:       # %bb.0: # %entry
   5574 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5575 ; SKX-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
   5576 ; SKX-NEXT:    retq # sched: [7:1.00]
   5577 entry:
   5578   %0 = bitcast <16 x float> %__A to <16 x i32>
   5579   %1 = bitcast <16 x float> %__B to <16 x i32>
   5580   %xor.i.i = xor <16 x i32> %0, %1
   5581   %2 = bitcast <16 x i32> %xor.i.i to <16 x float>
   5582   %3 = bitcast i16 %__U to <16 x i1>
   5583   %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
   5584   ret <16 x float> %4
   5585 }
   5586 
   5587 define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
   5588 ; GENERIC-LABEL: test_mm512_maskz_xor_ps:
   5589 ; GENERIC:       # %bb.0: # %entry
   5590 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5591 ; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
   5592 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5593 ;
   5594 ; SKX-LABEL: test_mm512_maskz_xor_ps:
   5595 ; SKX:       # %bb.0: # %entry
   5596 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5597 ; SKX-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
   5598 ; SKX-NEXT:    retq # sched: [7:1.00]
   5599 entry:
   5600   %0 = bitcast <16 x float> %__A to <16 x i32>
   5601   %1 = bitcast <16 x float> %__B to <16 x i32>
   5602   %xor.i.i = xor <16 x i32> %0, %1
   5603   %2 = bitcast <16 x i32> %xor.i.i to <16 x float>
   5604   %3 = bitcast i16 %__U to <16 x i1>
   5605   %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
   5606   ret <16 x float> %4
   5607 }
   5608 
   5609 define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
   5610 ; GENERIC-LABEL: test_mm512_mask_or_pd:
   5611 ; GENERIC:       # %bb.0: # %entry
   5612 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5613 ; GENERIC-NEXT:    vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
   5614 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5615 ;
   5616 ; SKX-LABEL: test_mm512_mask_or_pd:
   5617 ; SKX:       # %bb.0: # %entry
   5618 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5619 ; SKX-NEXT:    vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
   5620 ; SKX-NEXT:    retq # sched: [7:1.00]
   5621 entry:
   5622   %0 = bitcast <8 x double> %__A to <8 x i64>
   5623   %1 = bitcast <8 x double> %__B to <8 x i64>
   5624   %or.i.i = or <8 x i64> %1, %0
   5625   %2 = bitcast <8 x i64> %or.i.i to <8 x double>
   5626   %3 = bitcast i8 %__U to <8 x i1>
   5627   %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
   5628   ret <8 x double> %4
   5629 }
   5630 
   5631 define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
   5632 ; GENERIC-LABEL: test_mm512_maskz_or_pd:
   5633 ; GENERIC:       # %bb.0: # %entry
   5634 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5635 ; GENERIC-NEXT:    vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
   5636 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5637 ;
   5638 ; SKX-LABEL: test_mm512_maskz_or_pd:
   5639 ; SKX:       # %bb.0: # %entry
   5640 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5641 ; SKX-NEXT:    vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
   5642 ; SKX-NEXT:    retq # sched: [7:1.00]
   5643 entry:
   5644   %0 = bitcast <8 x double> %__A to <8 x i64>
   5645   %1 = bitcast <8 x double> %__B to <8 x i64>
   5646   %or.i.i = or <8 x i64> %1, %0
   5647   %2 = bitcast <8 x i64> %or.i.i to <8 x double>
   5648   %3 = bitcast i8 %__U to <8 x i1>
   5649   %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
   5650   ret <8 x double> %4
   5651 }
   5652 
   5653 define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
   5654 ; GENERIC-LABEL: test_mm512_mask_or_ps:
   5655 ; GENERIC:       # %bb.0: # %entry
   5656 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5657 ; GENERIC-NEXT:    vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
   5658 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5659 ;
   5660 ; SKX-LABEL: test_mm512_mask_or_ps:
   5661 ; SKX:       # %bb.0: # %entry
   5662 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5663 ; SKX-NEXT:    vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
   5664 ; SKX-NEXT:    retq # sched: [7:1.00]
   5665 entry:
   5666   %0 = bitcast <16 x float> %__A to <16 x i32>
   5667   %1 = bitcast <16 x float> %__B to <16 x i32>
   5668   %or.i.i = or <16 x i32> %1, %0
   5669   %2 = bitcast <16 x i32> %or.i.i to <16 x float>
   5670   %3 = bitcast i16 %__U to <16 x i1>
   5671   %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
   5672   ret <16 x float> %4
   5673 }
   5674 
   5675 define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
   5676 ; GENERIC-LABEL: test_mm512_maskz_or_ps:
   5677 ; GENERIC:       # %bb.0: # %entry
   5678 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5679 ; GENERIC-NEXT:    vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
   5680 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5681 ;
   5682 ; SKX-LABEL: test_mm512_maskz_or_ps:
   5683 ; SKX:       # %bb.0: # %entry
   5684 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5685 ; SKX-NEXT:    vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
   5686 ; SKX-NEXT:    retq # sched: [7:1.00]
   5687 entry:
   5688   %0 = bitcast <16 x float> %__A to <16 x i32>
   5689   %1 = bitcast <16 x float> %__B to <16 x i32>
   5690   %or.i.i = or <16 x i32> %1, %0
   5691   %2 = bitcast <16 x i32> %or.i.i to <16 x float>
   5692   %3 = bitcast i16 %__U to <16 x i1>
   5693   %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
   5694   ret <16 x float> %4
   5695 }
   5696 
   5697 define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
   5698 ; GENERIC-LABEL: test_mm512_mask_and_pd:
   5699 ; GENERIC:       # %bb.0: # %entry
   5700 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5701 ; GENERIC-NEXT:    vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
   5702 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5703 ;
   5704 ; SKX-LABEL: test_mm512_mask_and_pd:
   5705 ; SKX:       # %bb.0: # %entry
   5706 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5707 ; SKX-NEXT:    vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
   5708 ; SKX-NEXT:    retq # sched: [7:1.00]
   5709 entry:
   5710   %0 = bitcast <8 x double> %__A to <8 x i64>
   5711   %1 = bitcast <8 x double> %__B to <8 x i64>
   5712   %and.i.i = and <8 x i64> %1, %0
   5713   %2 = bitcast <8 x i64> %and.i.i to <8 x double>
   5714   %3 = bitcast i8 %__U to <8 x i1>
   5715   %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
   5716   ret <8 x double> %4
   5717 }
   5718 
   5719 define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
   5720 ; GENERIC-LABEL: test_mm512_maskz_and_pd:
   5721 ; GENERIC:       # %bb.0: # %entry
   5722 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5723 ; GENERIC-NEXT:    vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
   5724 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5725 ;
   5726 ; SKX-LABEL: test_mm512_maskz_and_pd:
   5727 ; SKX:       # %bb.0: # %entry
   5728 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5729 ; SKX-NEXT:    vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
   5730 ; SKX-NEXT:    retq # sched: [7:1.00]
   5731 entry:
   5732   %0 = bitcast <8 x double> %__A to <8 x i64>
   5733   %1 = bitcast <8 x double> %__B to <8 x i64>
   5734   %and.i.i = and <8 x i64> %1, %0
   5735   %2 = bitcast <8 x i64> %and.i.i to <8 x double>
   5736   %3 = bitcast i8 %__U to <8 x i1>
   5737   %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
   5738   ret <8 x double> %4
   5739 }
   5740 
   5741 define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
   5742 ; GENERIC-LABEL: test_mm512_mask_and_ps:
   5743 ; GENERIC:       # %bb.0: # %entry
   5744 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5745 ; GENERIC-NEXT:    vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
   5746 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5747 ;
   5748 ; SKX-LABEL: test_mm512_mask_and_ps:
   5749 ; SKX:       # %bb.0: # %entry
   5750 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5751 ; SKX-NEXT:    vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
   5752 ; SKX-NEXT:    retq # sched: [7:1.00]
   5753 entry:
   5754   %0 = bitcast <16 x float> %__A to <16 x i32>
   5755   %1 = bitcast <16 x float> %__B to <16 x i32>
   5756   %and.i.i = and <16 x i32> %1, %0
   5757   %2 = bitcast <16 x i32> %and.i.i to <16 x float>
   5758   %3 = bitcast i16 %__U to <16 x i1>
   5759   %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
   5760   ret <16 x float> %4
   5761 }
   5762 
   5763 define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
   5764 ; GENERIC-LABEL: test_mm512_maskz_and_ps:
   5765 ; GENERIC:       # %bb.0: # %entry
   5766 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5767 ; GENERIC-NEXT:    vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
   5768 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5769 ;
   5770 ; SKX-LABEL: test_mm512_maskz_and_ps:
   5771 ; SKX:       # %bb.0: # %entry
   5772 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5773 ; SKX-NEXT:    vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
   5774 ; SKX-NEXT:    retq # sched: [7:1.00]
   5775 entry:
   5776   %0 = bitcast <16 x float> %__A to <16 x i32>
   5777   %1 = bitcast <16 x float> %__B to <16 x i32>
   5778   %and.i.i = and <16 x i32> %1, %0
   5779   %2 = bitcast <16 x i32> %and.i.i to <16 x float>
   5780   %3 = bitcast i16 %__U to <16 x i1>
   5781   %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
   5782   ret <16 x float> %4
   5783 }
   5784 
   5785 define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
   5786 ; GENERIC-LABEL: test_mm512_mask_andnot_pd:
   5787 ; GENERIC:       # %bb.0: # %entry
   5788 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5789 ; GENERIC-NEXT:    vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
   5790 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5791 ;
   5792 ; SKX-LABEL: test_mm512_mask_andnot_pd:
   5793 ; SKX:       # %bb.0: # %entry
   5794 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5795 ; SKX-NEXT:    vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
   5796 ; SKX-NEXT:    retq # sched: [7:1.00]
   5797 entry:
   5798   %0 = bitcast <8 x double> %__A to <8 x i64>
   5799   %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
   5800   %1 = bitcast <8 x double> %__B to <8 x i64>
   5801   %and.i.i = and <8 x i64> %1, %neg.i.i
   5802   %2 = bitcast <8 x i64> %and.i.i to <8 x double>
   5803   %3 = bitcast i8 %__U to <8 x i1>
   5804   %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
   5805   ret <8 x double> %4
   5806 }
   5807 
   5808 define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
   5809 ; GENERIC-LABEL: test_mm512_maskz_andnot_pd:
   5810 ; GENERIC:       # %bb.0: # %entry
   5811 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5812 ; GENERIC-NEXT:    vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
   5813 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5814 ;
   5815 ; SKX-LABEL: test_mm512_maskz_andnot_pd:
   5816 ; SKX:       # %bb.0: # %entry
   5817 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5818 ; SKX-NEXT:    vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
   5819 ; SKX-NEXT:    retq # sched: [7:1.00]
   5820 entry:
   5821   %0 = bitcast <8 x double> %__A to <8 x i64>
   5822   %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
   5823   %1 = bitcast <8 x double> %__B to <8 x i64>
   5824   %and.i.i = and <8 x i64> %1, %neg.i.i
   5825   %2 = bitcast <8 x i64> %and.i.i to <8 x double>
   5826   %3 = bitcast i8 %__U to <8 x i1>
   5827   %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
   5828   ret <8 x double> %4
   5829 }
   5830 
   5831 define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
   5832 ; GENERIC-LABEL: test_mm512_mask_andnot_ps:
   5833 ; GENERIC:       # %bb.0: # %entry
   5834 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5835 ; GENERIC-NEXT:    vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
   5836 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5837 ;
   5838 ; SKX-LABEL: test_mm512_mask_andnot_ps:
   5839 ; SKX:       # %bb.0: # %entry
   5840 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5841 ; SKX-NEXT:    vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
   5842 ; SKX-NEXT:    retq # sched: [7:1.00]
   5843 entry:
   5844   %0 = bitcast <16 x float> %__A to <16 x i32>
   5845   %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
   5846   %1 = bitcast <16 x float> %__B to <16 x i32>
   5847   %and.i.i = and <16 x i32> %1, %neg.i.i
   5848   %2 = bitcast <16 x i32> %and.i.i to <16 x float>
   5849   %3 = bitcast i16 %__U to <16 x i1>
   5850   %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
   5851   ret <16 x float> %4
   5852 }
   5853 
   5854 define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
   5855 ; GENERIC-LABEL: test_mm512_maskz_andnot_ps:
   5856 ; GENERIC:       # %bb.0: # %entry
   5857 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   5858 ; GENERIC-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
   5859 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5860 ;
   5861 ; SKX-LABEL: test_mm512_maskz_andnot_ps:
   5862 ; SKX:       # %bb.0: # %entry
   5863 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   5864 ; SKX-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
   5865 ; SKX-NEXT:    retq # sched: [7:1.00]
   5866 entry:
   5867   %0 = bitcast <16 x float> %__A to <16 x i32>
   5868   %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
   5869   %1 = bitcast <16 x float> %__B to <16 x i32>
   5870   %and.i.i = and <16 x i32> %1, %neg.i.i
   5871   %2 = bitcast <16 x i32> %and.i.i to <16 x float>
   5872   %3 = bitcast i16 %__U to <16 x i1>
   5873   %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
   5874   ret <16 x float> %4
   5875 }
   5876 
   5877 define i32 @mov_test1(float %x) {
   5878 ; GENERIC-LABEL: mov_test1:
   5879 ; GENERIC:       # %bb.0:
   5880 ; GENERIC-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
   5881 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5882 ;
   5883 ; SKX-LABEL: mov_test1:
   5884 ; SKX:       # %bb.0:
   5885 ; SKX-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
   5886 ; SKX-NEXT:    retq # sched: [7:1.00]
   5887    %res = bitcast float %x to i32
   5888    ret i32 %res
   5889 }
   5890 
   5891 define <4 x i32> @mov_test2(i32 %x) {
   5892 ; GENERIC-LABEL: mov_test2:
   5893 ; GENERIC:       # %bb.0:
   5894 ; GENERIC-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
   5895 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5896 ;
   5897 ; SKX-LABEL: mov_test2:
   5898 ; SKX:       # %bb.0:
   5899 ; SKX-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
   5900 ; SKX-NEXT:    retq # sched: [7:1.00]
   5901    %res = insertelement <4 x i32>undef, i32 %x, i32 0
   5902    ret <4 x i32>%res
   5903 }
   5904 
   5905 define <2 x i64> @mov_test3(i64 %x) {
   5906 ; GENERIC-LABEL: mov_test3:
   5907 ; GENERIC:       # %bb.0:
   5908 ; GENERIC-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
   5909 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5910 ;
   5911 ; SKX-LABEL: mov_test3:
   5912 ; SKX:       # %bb.0:
   5913 ; SKX-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
   5914 ; SKX-NEXT:    retq # sched: [7:1.00]
   5915    %res = insertelement <2 x i64>undef, i64 %x, i32 0
   5916    ret <2 x i64>%res
   5917 }
   5918 
   5919 define <4 x i32> @mov_test4(i32* %x) {
   5920 ; GENERIC-LABEL: mov_test4:
   5921 ; GENERIC:       # %bb.0:
   5922 ; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   5923 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5924 ;
   5925 ; SKX-LABEL: mov_test4:
   5926 ; SKX:       # %bb.0:
   5927 ; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   5928 ; SKX-NEXT:    retq # sched: [7:1.00]
   5929    %y = load i32, i32* %x
   5930    %res = insertelement <4 x i32>undef, i32 %y, i32 0
   5931    ret <4 x i32>%res
   5932 }
   5933 
   5934 define void @mov_test5(float %x, float* %y) {
   5935 ; GENERIC-LABEL: mov_test5:
   5936 ; GENERIC:       # %bb.0:
   5937 ; GENERIC-NEXT:    vmovss %xmm0, (%rdi) # sched: [1:1.00]
   5938 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5939 ;
   5940 ; SKX-LABEL: mov_test5:
   5941 ; SKX:       # %bb.0:
   5942 ; SKX-NEXT:    vmovss %xmm0, (%rdi) # sched: [1:1.00]
   5943 ; SKX-NEXT:    retq # sched: [7:1.00]
   5944    store float %x, float* %y, align 4
   5945    ret void
   5946 }
   5947 
   5948 define void @mov_test6(double %x, double* %y) {
   5949 ; GENERIC-LABEL: mov_test6:
   5950 ; GENERIC:       # %bb.0:
   5951 ; GENERIC-NEXT:    vmovsd %xmm0, (%rdi) # sched: [1:1.00]
   5952 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5953 ;
   5954 ; SKX-LABEL: mov_test6:
   5955 ; SKX:       # %bb.0:
   5956 ; SKX-NEXT:    vmovsd %xmm0, (%rdi) # sched: [1:1.00]
   5957 ; SKX-NEXT:    retq # sched: [7:1.00]
   5958    store double %x, double* %y, align 8
   5959    ret void
   5960 }
   5961 
   5962 define float @mov_test7(i32* %x) {
   5963 ; GENERIC-LABEL: mov_test7:
   5964 ; GENERIC:       # %bb.0:
   5965 ; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   5966 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5967 ;
   5968 ; SKX-LABEL: mov_test7:
   5969 ; SKX:       # %bb.0:
   5970 ; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   5971 ; SKX-NEXT:    retq # sched: [7:1.00]
   5972    %y = load i32, i32* %x
   5973    %res = bitcast i32 %y to float
   5974    ret float %res
   5975 }
   5976 
   5977 define i32 @mov_test8(<4 x i32> %x) {
   5978 ; GENERIC-LABEL: mov_test8:
   5979 ; GENERIC:       # %bb.0:
   5980 ; GENERIC-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
   5981 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5982 ;
   5983 ; SKX-LABEL: mov_test8:
   5984 ; SKX:       # %bb.0:
   5985 ; SKX-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
   5986 ; SKX-NEXT:    retq # sched: [7:1.00]
   5987    %res = extractelement <4 x i32> %x, i32 0
   5988    ret i32 %res
   5989 }
   5990 
   5991 define i64 @mov_test9(<2 x i64> %x) {
   5992 ; GENERIC-LABEL: mov_test9:
   5993 ; GENERIC:       # %bb.0:
   5994 ; GENERIC-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
   5995 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5996 ;
   5997 ; SKX-LABEL: mov_test9:
   5998 ; SKX:       # %bb.0:
   5999 ; SKX-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
   6000 ; SKX-NEXT:    retq # sched: [7:1.00]
   6001    %res = extractelement <2 x i64> %x, i32 0
   6002    ret i64 %res
   6003 }
   6004 
   6005 define <4 x i32> @mov_test10(i32* %x) {
   6006 ; GENERIC-LABEL: mov_test10:
   6007 ; GENERIC:       # %bb.0:
   6008 ; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   6009 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6010 ;
   6011 ; SKX-LABEL: mov_test10:
   6012 ; SKX:       # %bb.0:
   6013 ; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   6014 ; SKX-NEXT:    retq # sched: [7:1.00]
   6015    %y = load i32, i32* %x, align 4
   6016    %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
   6017    ret <4 x i32>%res
   6018 }
   6019 
   6020 define <4 x float> @mov_test11(float* %x) {
   6021 ; GENERIC-LABEL: mov_test11:
   6022 ; GENERIC:       # %bb.0:
   6023 ; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   6024 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6025 ;
   6026 ; SKX-LABEL: mov_test11:
   6027 ; SKX:       # %bb.0:
   6028 ; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   6029 ; SKX-NEXT:    retq # sched: [7:1.00]
   6030    %y = load float, float* %x, align 4
   6031    %res = insertelement <4 x float>zeroinitializer, float %y, i32 0
   6032    ret <4 x float>%res
   6033 }
   6034 
   6035 define <2 x double> @mov_test12(double* %x) {
   6036 ; GENERIC-LABEL: mov_test12:
   6037 ; GENERIC:       # %bb.0:
   6038 ; GENERIC-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
   6039 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6040 ;
   6041 ; SKX-LABEL: mov_test12:
   6042 ; SKX:       # %bb.0:
   6043 ; SKX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   6044 ; SKX-NEXT:    retq # sched: [7:1.00]
   6045    %y = load double, double* %x, align 8
   6046    %res = insertelement <2 x double>zeroinitializer, double %y, i32 0
   6047    ret <2 x double>%res
   6048 }
   6049 
   6050 define <2 x i64> @mov_test13(i64 %x) {
   6051 ; GENERIC-LABEL: mov_test13:
   6052 ; GENERIC:       # %bb.0:
   6053 ; GENERIC-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
   6054 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6055 ;
   6056 ; SKX-LABEL: mov_test13:
   6057 ; SKX:       # %bb.0:
   6058 ; SKX-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
   6059 ; SKX-NEXT:    retq # sched: [7:1.00]
   6060    %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0
   6061    ret <2 x i64>%res
   6062 }
   6063 
   6064 define <4 x i32> @mov_test14(i32 %x) {
   6065 ; GENERIC-LABEL: mov_test14:
   6066 ; GENERIC:       # %bb.0:
   6067 ; GENERIC-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
   6068 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6069 ;
   6070 ; SKX-LABEL: mov_test14:
   6071 ; SKX:       # %bb.0:
   6072 ; SKX-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
   6073 ; SKX-NEXT:    retq # sched: [7:1.00]
   6074    %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
   6075    ret <4 x i32>%res
   6076 }
   6077 
   6078 define <4 x i32> @mov_test15(i32* %x) {
   6079 ; GENERIC-LABEL: mov_test15:
   6080 ; GENERIC:       # %bb.0:
   6081 ; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   6082 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6083 ;
   6084 ; SKX-LABEL: mov_test15:
   6085 ; SKX:       # %bb.0:
   6086 ; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   6087 ; SKX-NEXT:    retq # sched: [7:1.00]
   6088    %y = load i32, i32* %x, align 4
   6089    %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
   6090    ret <4 x i32>%res
   6091 }
   6092 
   6093 define <16 x i32> @mov_test16(i8 * %addr) {
   6094 ; GENERIC-LABEL: mov_test16:
   6095 ; GENERIC:       # %bb.0:
   6096 ; GENERIC-NEXT:    vmovups (%rdi), %zmm0 # sched: [7:0.50]
   6097 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6098 ;
   6099 ; SKX-LABEL: mov_test16:
   6100 ; SKX:       # %bb.0:
   6101 ; SKX-NEXT:    vmovups (%rdi), %zmm0 # sched: [8:0.50]
   6102 ; SKX-NEXT:    retq # sched: [7:1.00]
   6103   %vaddr = bitcast i8* %addr to <16 x i32>*
   6104   %res = load <16 x i32>, <16 x i32>* %vaddr, align 1
   6105   ret <16 x i32>%res
   6106 }
   6107 
   6108 define <16 x i32> @mov_test17(i8 * %addr) {
   6109 ; GENERIC-LABEL: mov_test17:
   6110 ; GENERIC:       # %bb.0:
   6111 ; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 # sched: [7:0.50]
   6112 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6113 ;
   6114 ; SKX-LABEL: mov_test17:
   6115 ; SKX:       # %bb.0:
   6116 ; SKX-NEXT:    vmovaps (%rdi), %zmm0 # sched: [8:0.50]
   6117 ; SKX-NEXT:    retq # sched: [7:1.00]
   6118   %vaddr = bitcast i8* %addr to <16 x i32>*
   6119   %res = load <16 x i32>, <16 x i32>* %vaddr, align 64
   6120   ret <16 x i32>%res
   6121 }
   6122 
   6123 define void @mov_test18(i8 * %addr, <8 x i64> %data) {
   6124 ; GENERIC-LABEL: mov_test18:
   6125 ; GENERIC:       # %bb.0:
   6126 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
   6127 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6128 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6129 ;
   6130 ; SKX-LABEL: mov_test18:
   6131 ; SKX:       # %bb.0:
   6132 ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
   6133 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6134 ; SKX-NEXT:    retq # sched: [7:1.00]
   6135   %vaddr = bitcast i8* %addr to <8 x i64>*
   6136   store <8 x i64>%data, <8 x i64>* %vaddr, align 64
   6137   ret void
   6138 }
   6139 
   6140 define void @mov_test19(i8 * %addr, <16 x i32> %data) {
   6141 ; GENERIC-LABEL: mov_test19:
   6142 ; GENERIC:       # %bb.0:
   6143 ; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
   6144 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6145 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6146 ;
   6147 ; SKX-LABEL: mov_test19:
   6148 ; SKX:       # %bb.0:
   6149 ; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
   6150 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6151 ; SKX-NEXT:    retq # sched: [7:1.00]
   6152   %vaddr = bitcast i8* %addr to <16 x i32>*
   6153   store <16 x i32>%data, <16 x i32>* %vaddr, align 1
   6154   ret void
   6155 }
   6156 
   6157 define void @mov_test20(i8 * %addr, <16 x i32> %data) {
   6158 ; GENERIC-LABEL: mov_test20:
   6159 ; GENERIC:       # %bb.0:
   6160 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
   6161 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6162 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6163 ;
   6164 ; SKX-LABEL: mov_test20:
   6165 ; SKX:       # %bb.0:
   6166 ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
   6167 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6168 ; SKX-NEXT:    retq # sched: [7:1.00]
   6169   %vaddr = bitcast i8* %addr to <16 x i32>*
   6170   store <16 x i32>%data, <16 x i32>* %vaddr, align 64
   6171   ret void
   6172 }
   6173 
   6174 define  <8 x i64> @mov_test21(i8 * %addr) {
   6175 ; GENERIC-LABEL: mov_test21:
   6176 ; GENERIC:       # %bb.0:
   6177 ; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 # sched: [7:0.50]
   6178 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6179 ;
   6180 ; SKX-LABEL: mov_test21:
   6181 ; SKX:       # %bb.0:
   6182 ; SKX-NEXT:    vmovaps (%rdi), %zmm0 # sched: [8:0.50]
   6183 ; SKX-NEXT:    retq # sched: [7:1.00]
   6184   %vaddr = bitcast i8* %addr to <8 x i64>*
   6185   %res = load <8 x i64>, <8 x i64>* %vaddr, align 64
   6186   ret <8 x i64>%res
   6187 }
   6188 
   6189 define void @mov_test22(i8 * %addr, <8 x i64> %data) {
   6190 ; GENERIC-LABEL: mov_test22:
   6191 ; GENERIC:       # %bb.0:
   6192 ; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
   6193 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6194 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6195 ;
   6196 ; SKX-LABEL: mov_test22:
   6197 ; SKX:       # %bb.0:
   6198 ; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
   6199 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6200 ; SKX-NEXT:    retq # sched: [7:1.00]
   6201   %vaddr = bitcast i8* %addr to <8 x i64>*
   6202   store <8 x i64>%data, <8 x i64>* %vaddr, align 1
   6203   ret void
   6204 }
   6205 
   6206 define <8 x i64> @mov_test23(i8 * %addr) {
   6207 ; GENERIC-LABEL: mov_test23:
   6208 ; GENERIC:       # %bb.0:
   6209 ; GENERIC-NEXT:    vmovups (%rdi), %zmm0 # sched: [7:0.50]
   6210 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6211 ;
   6212 ; SKX-LABEL: mov_test23:
   6213 ; SKX:       # %bb.0:
   6214 ; SKX-NEXT:    vmovups (%rdi), %zmm0 # sched: [8:0.50]
   6215 ; SKX-NEXT:    retq # sched: [7:1.00]
   6216   %vaddr = bitcast i8* %addr to <8 x i64>*
   6217   %res = load <8 x i64>, <8 x i64>* %vaddr, align 1
   6218   ret <8 x i64>%res
   6219 }
   6220 
   6221 define void @mov_test24(i8 * %addr, <8 x double> %data) {
   6222 ; GENERIC-LABEL: mov_test24:
   6223 ; GENERIC:       # %bb.0:
   6224 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
   6225 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6226 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6227 ;
   6228 ; SKX-LABEL: mov_test24:
   6229 ; SKX:       # %bb.0:
   6230 ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
   6231 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6232 ; SKX-NEXT:    retq # sched: [7:1.00]
   6233   %vaddr = bitcast i8* %addr to <8 x double>*
   6234   store <8 x double>%data, <8 x double>* %vaddr, align 64
   6235   ret void
   6236 }
   6237 
   6238 define <8 x double> @mov_test25(i8 * %addr) {
   6239 ; GENERIC-LABEL: mov_test25:
   6240 ; GENERIC:       # %bb.0:
   6241 ; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 # sched: [7:0.50]
   6242 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6243 ;
   6244 ; SKX-LABEL: mov_test25:
   6245 ; SKX:       # %bb.0:
   6246 ; SKX-NEXT:    vmovaps (%rdi), %zmm0 # sched: [8:0.50]
   6247 ; SKX-NEXT:    retq # sched: [7:1.00]
   6248   %vaddr = bitcast i8* %addr to <8 x double>*
   6249   %res = load <8 x double>, <8 x double>* %vaddr, align 64
   6250   ret <8 x double>%res
   6251 }
   6252 
   6253 define void @mov_test26(i8 * %addr, <16 x float> %data) {
   6254 ; GENERIC-LABEL: mov_test26:
   6255 ; GENERIC:       # %bb.0:
   6256 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
   6257 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6258 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6259 ;
   6260 ; SKX-LABEL: mov_test26:
   6261 ; SKX:       # %bb.0:
   6262 ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
   6263 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6264 ; SKX-NEXT:    retq # sched: [7:1.00]
   6265   %vaddr = bitcast i8* %addr to <16 x float>*
   6266   store <16 x float>%data, <16 x float>* %vaddr, align 64
   6267   ret void
   6268 }
   6269 
   6270 define <16 x float> @mov_test27(i8 * %addr) {
   6271 ; GENERIC-LABEL: mov_test27:
   6272 ; GENERIC:       # %bb.0:
   6273 ; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 # sched: [7:0.50]
   6274 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6275 ;
   6276 ; SKX-LABEL: mov_test27:
   6277 ; SKX:       # %bb.0:
   6278 ; SKX-NEXT:    vmovaps (%rdi), %zmm0 # sched: [8:0.50]
   6279 ; SKX-NEXT:    retq # sched: [7:1.00]
   6280   %vaddr = bitcast i8* %addr to <16 x float>*
   6281   %res = load <16 x float>, <16 x float>* %vaddr, align 64
   6282   ret <16 x float>%res
   6283 }
   6284 
   6285 define void @mov_test28(i8 * %addr, <8 x double> %data) {
   6286 ; GENERIC-LABEL: mov_test28:
   6287 ; GENERIC:       # %bb.0:
   6288 ; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
   6289 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6290 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6291 ;
   6292 ; SKX-LABEL: mov_test28:
   6293 ; SKX:       # %bb.0:
   6294 ; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
   6295 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6296 ; SKX-NEXT:    retq # sched: [7:1.00]
   6297   %vaddr = bitcast i8* %addr to <8 x double>*
   6298   store <8 x double>%data, <8 x double>* %vaddr, align 1
   6299   ret void
   6300 }
   6301 
   6302 define <8 x double> @mov_test29(i8 * %addr) {
   6303 ; GENERIC-LABEL: mov_test29:
   6304 ; GENERIC:       # %bb.0:
   6305 ; GENERIC-NEXT:    vmovups (%rdi), %zmm0 # sched: [7:0.50]
   6306 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6307 ;
   6308 ; SKX-LABEL: mov_test29:
   6309 ; SKX:       # %bb.0:
   6310 ; SKX-NEXT:    vmovups (%rdi), %zmm0 # sched: [8:0.50]
   6311 ; SKX-NEXT:    retq # sched: [7:1.00]
   6312   %vaddr = bitcast i8* %addr to <8 x double>*
   6313   %res = load <8 x double>, <8 x double>* %vaddr, align 1
   6314   ret <8 x double>%res
   6315 }
   6316 
   6317 define void @mov_test30(i8 * %addr, <16 x float> %data) {
   6318 ; GENERIC-LABEL: mov_test30:
   6319 ; GENERIC:       # %bb.0:
   6320 ; GENERIC-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
   6321 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6322 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6323 ;
   6324 ; SKX-LABEL: mov_test30:
   6325 ; SKX:       # %bb.0:
   6326 ; SKX-NEXT:    vmovups %zmm0, (%rdi) # sched: [1:1.00]
   6327 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6328 ; SKX-NEXT:    retq # sched: [7:1.00]
   6329   %vaddr = bitcast i8* %addr to <16 x float>*
   6330   store <16 x float>%data, <16 x float>* %vaddr, align 1
   6331   ret void
   6332 }
   6333 
   6334 define <16 x float> @mov_test31(i8 * %addr) {
   6335 ; GENERIC-LABEL: mov_test31:
   6336 ; GENERIC:       # %bb.0:
   6337 ; GENERIC-NEXT:    vmovups (%rdi), %zmm0 # sched: [7:0.50]
   6338 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6339 ;
   6340 ; SKX-LABEL: mov_test31:
   6341 ; SKX:       # %bb.0:
   6342 ; SKX-NEXT:    vmovups (%rdi), %zmm0 # sched: [8:0.50]
   6343 ; SKX-NEXT:    retq # sched: [7:1.00]
   6344   %vaddr = bitcast i8* %addr to <16 x float>*
   6345   %res = load <16 x float>, <16 x float>* %vaddr, align 1
   6346   ret <16 x float>%res
   6347 }
   6348 
   6349 define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
   6350 ; GENERIC-LABEL: mov_test32:
   6351 ; GENERIC:       # %bb.0:
   6352 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
   6353 ; GENERIC-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
   6354 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6355 ;
   6356 ; SKX-LABEL: mov_test32:
   6357 ; SKX:       # %bb.0:
   6358 ; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
   6359 ; SKX-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
   6360 ; SKX-NEXT:    retq # sched: [7:1.00]
   6361   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   6362   %vaddr = bitcast i8* %addr to <16 x i32>*
   6363   %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
   6364   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
   6365   ret <16 x i32>%res
   6366 }
   6367 
   6368 define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
   6369 ; GENERIC-LABEL: mov_test33:
   6370 ; GENERIC:       # %bb.0:
   6371 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
   6372 ; GENERIC-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
   6373 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6374 ;
   6375 ; SKX-LABEL: mov_test33:
   6376 ; SKX:       # %bb.0:
   6377 ; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
   6378 ; SKX-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
   6379 ; SKX-NEXT:    retq # sched: [7:1.00]
   6380   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   6381   %vaddr = bitcast i8* %addr to <16 x i32>*
   6382   %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
   6383   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
   6384   ret <16 x i32>%res
   6385 }
   6386 
   6387 define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) {
   6388 ; GENERIC-LABEL: mov_test34:
   6389 ; GENERIC:       # %bb.0:
   6390 ; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
   6391 ; GENERIC-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
   6392 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6393 ;
   6394 ; SKX-LABEL: mov_test34:
   6395 ; SKX:       # %bb.0:
   6396 ; SKX-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
   6397 ; SKX-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
   6398 ; SKX-NEXT:    retq # sched: [7:1.00]
   6399   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   6400   %vaddr = bitcast i8* %addr to <16 x i32>*
   6401   %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
   6402   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
   6403   ret <16 x i32>%res
   6404 }
   6405 
   6406 define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) {
   6407 ; GENERIC-LABEL: mov_test35:
   6408 ; GENERIC:       # %bb.0:
   6409 ; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
   6410 ; GENERIC-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
   6411 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6412 ;
   6413 ; SKX-LABEL: mov_test35:
   6414 ; SKX:       # %bb.0:
   6415 ; SKX-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
   6416 ; SKX-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
   6417 ; SKX-NEXT:    retq # sched: [7:1.00]
   6418   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   6419   %vaddr = bitcast i8* %addr to <16 x i32>*
   6420   %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
   6421   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
   6422   ret <16 x i32>%res
   6423 }
   6424 
   6425 define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
   6426 ; GENERIC-LABEL: mov_test36:
   6427 ; GENERIC:       # %bb.0:
   6428 ; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
   6429 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
   6430 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6431 ;
   6432 ; SKX-LABEL: mov_test36:
   6433 ; SKX:       # %bb.0:
   6434 ; SKX-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
   6435 ; SKX-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
   6436 ; SKX-NEXT:    retq # sched: [7:1.00]
   6437   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   6438   %vaddr = bitcast i8* %addr to <8 x i64>*
   6439   %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
   6440   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
   6441   ret <8 x i64>%res
   6442 }
   6443 
   6444 define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
   6445 ; GENERIC-LABEL: mov_test37:
   6446 ; GENERIC:       # %bb.0:
   6447 ; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
   6448 ; GENERIC-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
   6449 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6450 ;
   6451 ; SKX-LABEL: mov_test37:
   6452 ; SKX:       # %bb.0:
   6453 ; SKX-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
   6454 ; SKX-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
   6455 ; SKX-NEXT:    retq # sched: [7:1.00]
   6456   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   6457   %vaddr = bitcast i8* %addr to <8 x i64>*
   6458   %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
   6459   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
   6460   ret <8 x i64>%res
   6461 }
   6462 
   6463 define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) {
   6464 ; GENERIC-LABEL: mov_test38:
   6465 ; GENERIC:       # %bb.0:
   6466 ; GENERIC-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
   6467 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
   6468 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6469 ;
   6470 ; SKX-LABEL: mov_test38:
   6471 ; SKX:       # %bb.0:
   6472 ; SKX-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
   6473 ; SKX-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
   6474 ; SKX-NEXT:    retq # sched: [7:1.00]
   6475   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   6476   %vaddr = bitcast i8* %addr to <8 x i64>*
   6477   %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
   6478   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
   6479   ret <8 x i64>%res
   6480 }
   6481 
   6482 define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) {
   6483 ; GENERIC-LABEL: mov_test39:
   6484 ; GENERIC:       # %bb.0:
   6485 ; GENERIC-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
   6486 ; GENERIC-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
   6487 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6488 ;
   6489 ; SKX-LABEL: mov_test39:
   6490 ; SKX:       # %bb.0:
   6491 ; SKX-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
   6492 ; SKX-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
   6493 ; SKX-NEXT:    retq # sched: [7:1.00]
   6494   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   6495   %vaddr = bitcast i8* %addr to <8 x i64>*
   6496   %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
   6497   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
   6498   ret <8 x i64>%res
   6499 }
   6500 
   6501 define <16 x float> @mov_test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
   6502 ; GENERIC-LABEL: mov_test40:
   6503 ; GENERIC:       # %bb.0:
   6504 ; GENERIC-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
   6505 ; GENERIC-NEXT:    vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
   6506 ; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 {%k1} # sched: [7:0.50]
   6507 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6508 ;
   6509 ; SKX-LABEL: mov_test40:
   6510 ; SKX:       # %bb.0:
   6511 ; SKX-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
   6512 ; SKX-NEXT:    vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
   6513 ; SKX-NEXT:    vmovaps (%rdi), %zmm0 {%k1} # sched: [8:0.50]
   6514 ; SKX-NEXT:    retq # sched: [7:1.00]
   6515   %mask = fcmp one <16 x float> %mask1, zeroinitializer
   6516   %vaddr = bitcast i8* %addr to <16 x float>*
   6517   %r = load <16 x float>, <16 x float>* %vaddr, align 64
   6518   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
   6519   ret <16 x float>%res
   6520 }
   6521 
   6522 define <16 x float> @mov_test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
   6523 ; GENERIC-LABEL: mov_test41:
   6524 ; GENERIC:       # %bb.0:
   6525 ; GENERIC-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
   6526 ; GENERIC-NEXT:    vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
   6527 ; GENERIC-NEXT:    vmovups (%rdi), %zmm0 {%k1} # sched: [7:0.50]
   6528 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6529 ;
   6530 ; SKX-LABEL: mov_test41:
   6531 ; SKX:       # %bb.0:
   6532 ; SKX-NEXT:    vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
   6533 ; SKX-NEXT:    vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
   6534 ; SKX-NEXT:    vmovups (%rdi), %zmm0 {%k1} # sched: [8:0.50]
   6535 ; SKX-NEXT:    retq # sched: [7:1.00]
   6536   %mask = fcmp one <16 x float> %mask1, zeroinitializer
   6537   %vaddr = bitcast i8* %addr to <16 x float>*
   6538   %r = load <16 x float>, <16 x float>* %vaddr, align 1
   6539   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
   6540   ret <16 x float>%res
   6541 }
   6542 
   6543 define <16 x float> @mov_test42(i8 * %addr, <16 x float> %mask1) {
   6544 ; GENERIC-LABEL: mov_test42:
   6545 ; GENERIC:       # %bb.0:
   6546 ; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   6547 ; GENERIC-NEXT:    vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
   6548 ; GENERIC-NEXT:    vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
   6549 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6550 ;
   6551 ; SKX-LABEL: mov_test42:
   6552 ; SKX:       # %bb.0:
   6553 ; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   6554 ; SKX-NEXT:    vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
   6555 ; SKX-NEXT:    vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
   6556 ; SKX-NEXT:    retq # sched: [7:1.00]
   6557   %mask = fcmp one <16 x float> %mask1, zeroinitializer
   6558   %vaddr = bitcast i8* %addr to <16 x float>*
   6559   %r = load <16 x float>, <16 x float>* %vaddr, align 64
   6560   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
   6561   ret <16 x float>%res
   6562 }
   6563 
   6564 define <16 x float> @mov_test43(i8 * %addr, <16 x float> %mask1) {
   6565 ; GENERIC-LABEL: mov_test43:
   6566 ; GENERIC:       # %bb.0:
   6567 ; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   6568 ; GENERIC-NEXT:    vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
   6569 ; GENERIC-NEXT:    vmovups (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
   6570 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6571 ;
   6572 ; SKX-LABEL: mov_test43:
   6573 ; SKX:       # %bb.0:
   6574 ; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   6575 ; SKX-NEXT:    vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
   6576 ; SKX-NEXT:    vmovups (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
   6577 ; SKX-NEXT:    retq # sched: [7:1.00]
   6578   %mask = fcmp one <16 x float> %mask1, zeroinitializer
   6579   %vaddr = bitcast i8* %addr to <16 x float>*
   6580   %r = load <16 x float>, <16 x float>* %vaddr, align 1
   6581   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
   6582   ret <16 x float>%res
   6583 }
   6584 
   6585 define <8 x double> @mov_test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
   6586 ; GENERIC-LABEL: mov_test44:
   6587 ; GENERIC:       # %bb.0:
   6588 ; GENERIC-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
   6589 ; GENERIC-NEXT:    vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
   6590 ; GENERIC-NEXT:    vmovapd (%rdi), %zmm0 {%k1} # sched: [7:0.50]
   6591 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6592 ;
   6593 ; SKX-LABEL: mov_test44:
   6594 ; SKX:       # %bb.0:
   6595 ; SKX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
   6596 ; SKX-NEXT:    vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
   6597 ; SKX-NEXT:    vmovapd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
   6598 ; SKX-NEXT:    retq # sched: [7:1.00]
   6599   %mask = fcmp one <8 x double> %mask1, zeroinitializer
   6600   %vaddr = bitcast i8* %addr to <8 x double>*
   6601   %r = load <8 x double>, <8 x double>* %vaddr, align 64
   6602   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
   6603   ret <8 x double>%res
   6604 }
   6605 
   6606 define <8 x double> @mov_test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
   6607 ; GENERIC-LABEL: mov_test45:
   6608 ; GENERIC:       # %bb.0:
   6609 ; GENERIC-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
   6610 ; GENERIC-NEXT:    vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
   6611 ; GENERIC-NEXT:    vmovupd (%rdi), %zmm0 {%k1} # sched: [7:0.50]
   6612 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6613 ;
   6614 ; SKX-LABEL: mov_test45:
   6615 ; SKX:       # %bb.0:
   6616 ; SKX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
   6617 ; SKX-NEXT:    vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
   6618 ; SKX-NEXT:    vmovupd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
   6619 ; SKX-NEXT:    retq # sched: [7:1.00]
   6620   %mask = fcmp one <8 x double> %mask1, zeroinitializer
   6621   %vaddr = bitcast i8* %addr to <8 x double>*
   6622   %r = load <8 x double>, <8 x double>* %vaddr, align 1
   6623   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
   6624   ret <8 x double>%res
   6625 }
   6626 
   6627 define <8 x double> @mov_test46(i8 * %addr, <8 x double> %mask1) {
   6628 ; GENERIC-LABEL: mov_test46:
   6629 ; GENERIC:       # %bb.0:
   6630 ; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   6631 ; GENERIC-NEXT:    vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
   6632 ; GENERIC-NEXT:    vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
   6633 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6634 ;
   6635 ; SKX-LABEL: mov_test46:
   6636 ; SKX:       # %bb.0:
   6637 ; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   6638 ; SKX-NEXT:    vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
   6639 ; SKX-NEXT:    vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
   6640 ; SKX-NEXT:    retq # sched: [7:1.00]
   6641   %mask = fcmp one <8 x double> %mask1, zeroinitializer
   6642   %vaddr = bitcast i8* %addr to <8 x double>*
   6643   %r = load <8 x double>, <8 x double>* %vaddr, align 64
   6644   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
   6645   ret <8 x double>%res
   6646 }
   6647 
   6648 define <8 x double> @mov_test47(i8 * %addr, <8 x double> %mask1) {
   6649 ; GENERIC-LABEL: mov_test47:
   6650 ; GENERIC:       # %bb.0:
   6651 ; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   6652 ; GENERIC-NEXT:    vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
   6653 ; GENERIC-NEXT:    vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
   6654 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6655 ;
   6656 ; SKX-LABEL: mov_test47:
   6657 ; SKX:       # %bb.0:
   6658 ; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   6659 ; SKX-NEXT:    vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
   6660 ; SKX-NEXT:    vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
   6661 ; SKX-NEXT:    retq # sched: [7:1.00]
   6662   %mask = fcmp one <8 x double> %mask1, zeroinitializer
   6663   %vaddr = bitcast i8* %addr to <8 x double>*
   6664   %r = load <8 x double>, <8 x double>* %vaddr, align 1
   6665   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
   6666   ret <8 x double>%res
   6667 }
   6668 
   6669 define i16 @mask16(i16 %x) {
   6670 ; GENERIC-LABEL: mask16:
   6671 ; GENERIC:       # %bb.0:
   6672 ; GENERIC-NEXT:    notl %edi # sched: [1:0.33]
   6673 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
   6674 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6675 ;
   6676 ; SKX-LABEL: mask16:
   6677 ; SKX:       # %bb.0:
   6678 ; SKX-NEXT:    notl %edi # sched: [1:0.25]
   6679 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
   6680 ; SKX-NEXT:    retq # sched: [7:1.00]
   6681   %m0 = bitcast i16 %x to <16 x i1>
   6682   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   6683   %ret = bitcast <16 x i1> %m1 to i16
   6684   ret i16 %ret
   6685 }
   6686 
   6687 define i32 @mask16_zext(i16 %x) {
   6688 ; GENERIC-LABEL: mask16_zext:
   6689 ; GENERIC:       # %bb.0:
   6690 ; GENERIC-NEXT:    notl %edi # sched: [1:0.33]
   6691 ; GENERIC-NEXT:    movzwl %di, %eax # sched: [1:0.33]
   6692 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6693 ;
   6694 ; SKX-LABEL: mask16_zext:
   6695 ; SKX:       # %bb.0:
   6696 ; SKX-NEXT:    notl %edi # sched: [1:0.25]
   6697 ; SKX-NEXT:    movzwl %di, %eax # sched: [1:0.25]
   6698 ; SKX-NEXT:    retq # sched: [7:1.00]
   6699   %m0 = bitcast i16 %x to <16 x i1>
   6700   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   6701   %m2 = bitcast <16 x i1> %m1 to i16
   6702   %ret = zext i16 %m2 to i32
   6703   ret i32 %ret
   6704 }
   6705 
   6706 define i8 @mask8(i8 %x) {
   6707 ; GENERIC-LABEL: mask8:
   6708 ; GENERIC:       # %bb.0:
   6709 ; GENERIC-NEXT:    notb %dil # sched: [1:0.33]
   6710 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
   6711 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6712 ;
   6713 ; SKX-LABEL: mask8:
   6714 ; SKX:       # %bb.0:
   6715 ; SKX-NEXT:    notb %dil # sched: [1:0.25]
   6716 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
   6717 ; SKX-NEXT:    retq # sched: [7:1.00]
   6718   %m0 = bitcast i8 %x to <8 x i1>
   6719   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   6720   %ret = bitcast <8 x i1> %m1 to i8
   6721   ret i8 %ret
   6722 }
   6723 
   6724 define i32 @mask8_zext(i8 %x) {
   6725 ; GENERIC-LABEL: mask8_zext:
   6726 ; GENERIC:       # %bb.0:
   6727 ; GENERIC-NEXT:    notb %dil # sched: [1:0.33]
   6728 ; GENERIC-NEXT:    movzbl %dil, %eax # sched: [1:0.33]
   6729 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6730 ;
   6731 ; SKX-LABEL: mask8_zext:
   6732 ; SKX:       # %bb.0:
   6733 ; SKX-NEXT:    notb %dil # sched: [1:0.25]
   6734 ; SKX-NEXT:    movzbl %dil, %eax # sched: [1:0.25]
   6735 ; SKX-NEXT:    retq # sched: [7:1.00]
   6736   %m0 = bitcast i8 %x to <8 x i1>
   6737   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   6738   %m2 = bitcast <8 x i1> %m1 to i8
   6739   %ret = zext i8 %m2 to i32
   6740   ret i32 %ret
   6741 }
   6742 
   6743 define void @mask16_mem(i16* %ptr) {
   6744 ; GENERIC-LABEL: mask16_mem:
   6745 ; GENERIC:       # %bb.0:
   6746 ; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [5:0.50]
   6747 ; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
   6748 ; GENERIC-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
   6749 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6750 ;
   6751 ; SKX-LABEL: mask16_mem:
   6752 ; SKX:       # %bb.0:
   6753 ; SKX-NEXT:    kmovw (%rdi), %k0 # sched: [7:1.00]
   6754 ; SKX-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
   6755 ; SKX-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
   6756 ; SKX-NEXT:    retq # sched: [7:1.00]
   6757   %x = load i16, i16* %ptr, align 4
   6758   %m0 = bitcast i16 %x to <16 x i1>
   6759   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   6760   %ret = bitcast <16 x i1> %m1 to i16
   6761   store i16 %ret, i16* %ptr, align 4
   6762   ret void
   6763 }
   6764 
   6765 define void @mask8_mem(i8* %ptr) {
   6766 ; GENERIC-LABEL: mask8_mem:
   6767 ; GENERIC:       # %bb.0:
   6768 ; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
   6769 ; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:0.33]
   6770 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   6771 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6772 ;
   6773 ; SKX-LABEL: mask8_mem:
   6774 ; SKX:       # %bb.0:
   6775 ; SKX-NEXT:    kmovb (%rdi), %k0 # sched: [7:1.00]
   6776 ; SKX-NEXT:    knotb %k0, %k0 # sched: [1:1.00]
   6777 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   6778 ; SKX-NEXT:    retq # sched: [7:1.00]
   6779   %x = load i8, i8* %ptr, align 4
   6780   %m0 = bitcast i8 %x to <8 x i1>
   6781   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   6782   %ret = bitcast <8 x i1> %m1 to i8
   6783   store i8 %ret, i8* %ptr, align 4
   6784   ret void
   6785 }
   6786 
   6787 define i16 @mand16(i16 %x, i16 %y) {
   6788 ; GENERIC-LABEL: mand16:
   6789 ; GENERIC:       # %bb.0:
   6790 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
   6791 ; GENERIC-NEXT:    xorl %esi, %eax # sched: [1:0.33]
   6792 ; GENERIC-NEXT:    andl %esi, %edi # sched: [1:0.33]
   6793 ; GENERIC-NEXT:    orl %eax, %edi # sched: [1:0.33]
   6794 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
   6795 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6796 ;
   6797 ; SKX-LABEL: mand16:
   6798 ; SKX:       # %bb.0:
   6799 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
   6800 ; SKX-NEXT:    xorl %esi, %eax # sched: [1:0.25]
   6801 ; SKX-NEXT:    andl %esi, %edi # sched: [1:0.25]
   6802 ; SKX-NEXT:    orl %eax, %edi # sched: [1:0.25]
   6803 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
   6804 ; SKX-NEXT:    retq # sched: [7:1.00]
   6805   %ma = bitcast i16 %x to <16 x i1>
   6806   %mb = bitcast i16 %y to <16 x i1>
   6807   %mc = and <16 x i1> %ma, %mb
   6808   %md = xor <16 x i1> %ma, %mb
   6809   %me = or <16 x i1> %mc, %md
   6810   %ret = bitcast <16 x i1> %me to i16
   6811   ret i16 %ret
   6812 }
   6813 
   6814 define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
   6815 ; GENERIC-LABEL: mand16_mem:
   6816 ; GENERIC:       # %bb.0:
   6817 ; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [5:0.50]
   6818 ; GENERIC-NEXT:    kmovw (%rsi), %k1 # sched: [5:0.50]
   6819 ; GENERIC-NEXT:    kandw %k1, %k0, %k2 # sched: [1:0.33]
   6820 ; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
   6821 ; GENERIC-NEXT:    korw %k0, %k2, %k0 # sched: [1:0.33]
   6822 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   6823 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
   6824 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6825 ;
   6826 ; SKX-LABEL: mand16_mem:
   6827 ; SKX:       # %bb.0:
   6828 ; SKX-NEXT:    kmovw (%rdi), %k0 # sched: [7:1.00]
   6829 ; SKX-NEXT:    kmovw (%rsi), %k1 # sched: [7:1.00]
   6830 ; SKX-NEXT:    kandw %k1, %k0, %k2 # sched: [1:1.00]
   6831 ; SKX-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
   6832 ; SKX-NEXT:    korw %k0, %k2, %k0 # sched: [1:1.00]
   6833 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   6834 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   6835 ; SKX-NEXT:    retq # sched: [7:1.00]
   6836   %ma = load <16 x i1>, <16 x i1>* %x
   6837   %mb = load <16 x i1>, <16 x i1>* %y
   6838   %mc = and <16 x i1> %ma, %mb
   6839   %md = xor <16 x i1> %ma, %mb
   6840   %me = or <16 x i1> %mc, %md
   6841   %ret = bitcast <16 x i1> %me to i16
   6842   ret i16 %ret
   6843 }
   6844 
   6845 define i8 @shuf_test1(i16 %v) nounwind {
   6846 ; GENERIC-LABEL: shuf_test1:
   6847 ; GENERIC:       # %bb.0:
   6848 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
   6849 ; GENERIC-NEXT:    kshiftrw $8, %k0, %k0 # sched: [1:1.00]
   6850 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   6851 ; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
   6852 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6853 ;
   6854 ; SKX-LABEL: shuf_test1:
   6855 ; SKX:       # %bb.0:
   6856 ; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
   6857 ; SKX-NEXT:    kshiftrw $8, %k0, %k0 # sched: [3:1.00]
   6858 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   6859 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
   6860 ; SKX-NEXT:    retq # sched: [7:1.00]
   6861    %v1 = bitcast i16 %v to <16 x i1>
   6862    %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   6863    %mask1 = bitcast <8 x i1> %mask to i8
   6864    ret i8 %mask1
   6865 }
   6866 
   6867 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
   6868 ; GENERIC-LABEL: zext_test1:
   6869 ; GENERIC:       # %bb.0:
   6870 ; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
   6871 ; GENERIC-NEXT:    kshiftrw $5, %k0, %k0 # sched: [1:1.00]
   6872 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   6873 ; GENERIC-NEXT:    andl $1, %eax # sched: [1:0.33]
   6874 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6875 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6876 ;
   6877 ; SKX-LABEL: zext_test1:
   6878 ; SKX:       # %bb.0:
   6879 ; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
   6880 ; SKX-NEXT:    kshiftrw $5, %k0, %k0 # sched: [3:1.00]
   6881 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   6882 ; SKX-NEXT:    andl $1, %eax # sched: [1:0.25]
   6883 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6884 ; SKX-NEXT:    retq # sched: [7:1.00]
   6885   %cmp_res = icmp ugt <16 x i32> %a, %b
   6886   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
   6887   %res = zext i1 %cmp_res.i1 to i32
   6888   ret i32 %res
   6889 }
   6890 
   6891 define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
   6892 ; GENERIC-LABEL: zext_test2:
   6893 ; GENERIC:       # %bb.0:
   6894 ; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
   6895 ; GENERIC-NEXT:    kshiftrw $5, %k0, %k0 # sched: [1:1.00]
   6896 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   6897 ; GENERIC-NEXT:    andl $1, %eax # sched: [1:0.33]
   6898 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
   6899 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6900 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6901 ;
   6902 ; SKX-LABEL: zext_test2:
   6903 ; SKX:       # %bb.0:
   6904 ; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
   6905 ; SKX-NEXT:    kshiftrw $5, %k0, %k0 # sched: [3:1.00]
   6906 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   6907 ; SKX-NEXT:    andl $1, %eax # sched: [1:0.25]
   6908 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   6909 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6910 ; SKX-NEXT:    retq # sched: [7:1.00]
   6911   %cmp_res = icmp ugt <16 x i32> %a, %b
   6912   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
   6913   %res = zext i1 %cmp_res.i1 to i16
   6914   ret i16 %res
   6915 }
   6916 
   6917 define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
   6918 ; GENERIC-LABEL: zext_test3:
   6919 ; GENERIC:       # %bb.0:
   6920 ; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
   6921 ; GENERIC-NEXT:    kshiftrw $5, %k0, %k0 # sched: [1:1.00]
   6922 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   6923 ; GENERIC-NEXT:    andb $1, %al # sched: [1:0.33]
   6924 ; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
   6925 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6926 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6927 ;
   6928 ; SKX-LABEL: zext_test3:
   6929 ; SKX:       # %bb.0:
   6930 ; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
   6931 ; SKX-NEXT:    kshiftrw $5, %k0, %k0 # sched: [3:1.00]
   6932 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   6933 ; SKX-NEXT:    andb $1, %al # sched: [1:0.25]
   6934 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
   6935 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6936 ; SKX-NEXT:    retq # sched: [7:1.00]
   6937   %cmp_res = icmp ugt <16 x i32> %a, %b
   6938   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
   6939   %res = zext i1 %cmp_res.i1 to i8
   6940   ret i8 %res
   6941 }
   6942 
   6943 define i8 @conv1(<8 x i1>* %R) {
   6944 ; GENERIC-LABEL: conv1:
   6945 ; GENERIC:       # %bb.0: # %entry
   6946 ; GENERIC-NEXT:    movb $-1, (%rdi) # sched: [1:1.00]
   6947 ; GENERIC-NEXT:    movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   6948 ; GENERIC-NEXT:    movb $-2, %al # sched: [1:0.33]
   6949 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6950 ;
   6951 ; SKX-LABEL: conv1:
   6952 ; SKX:       # %bb.0: # %entry
   6953 ; SKX-NEXT:    movb $-1, (%rdi) # sched: [1:1.00]
   6954 ; SKX-NEXT:    movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   6955 ; SKX-NEXT:    movb $-2, %al # sched: [1:0.25]
   6956 ; SKX-NEXT:    retq # sched: [7:1.00]
   6957 entry:
   6958   store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
   6959 
   6960   %maskPtr = alloca <8 x i1>
   6961   store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
   6962   %mask = load <8 x i1>, <8 x i1>* %maskPtr
   6963   %mask_convert = bitcast <8 x i1> %mask to i8
   6964   ret i8 %mask_convert
   6965 }
   6966 
   6967 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
   6968 ; GENERIC-LABEL: test4:
   6969 ; GENERIC:       # %bb.0:
   6970 ; GENERIC-NEXT:    vpcmpleq %ymm1, %ymm0, %k1 # sched: [1:0.50]
   6971 ; GENERIC-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [1:0.50]
   6972 ; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
   6973 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   6974 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6975 ;
   6976 ; SKX-LABEL: test4:
   6977 ; SKX:       # %bb.0:
   6978 ; SKX-NEXT:    vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
   6979 ; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
   6980 ; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
   6981 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   6982 ; SKX-NEXT:    retq # sched: [7:1.00]
   6983   %x_gt_y = icmp sgt <4 x i64> %x, %y
   6984   %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
   6985   %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
   6986   %resse = sext <4 x i1>%res to <4 x i32>
   6987   ret <4 x i32> %resse
   6988 }
   6989 
   6990 define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
   6991 ; GENERIC-LABEL: vcmp_test5:
   6992 ; GENERIC:       # %bb.0:
   6993 ; GENERIC-NEXT:    vpcmpleq %xmm3, %xmm2, %k1 # sched: [1:0.50]
   6994 ; GENERIC-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [1:0.50]
   6995 ; GENERIC-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.33]
   6996 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6997 ;
   6998 ; SKX-LABEL: vcmp_test5:
   6999 ; SKX:       # %bb.0:
   7000 ; SKX-NEXT:    vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00]
   7001 ; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00]
   7002 ; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
   7003 ; SKX-NEXT:    retq # sched: [7:1.00]
   7004   %x_gt_y = icmp slt <2 x i64> %x, %y
   7005   %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
   7006   %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
   7007   %resse = sext <2 x i1>%res to <2 x i64>
   7008   ret <2 x i64> %resse
   7009 }define void @vcmp_test6(<16 x i1> %mask)  {
   7010 allocas:
   7011   %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
   7012   %b = bitcast <16 x i1> %a to i16
   7013   %c = icmp eq i16 %b, 0
   7014   br i1 %c, label %true, label %false
   7015 
   7016 true:
   7017   ret void
   7018 
   7019 false:
   7020   ret void
   7021 }
   7022 define void @vcmp_test7(<8 x i1> %mask)  {
   7023 ; GENERIC-LABEL: vcmp_test7:
   7024 ; GENERIC:       # %bb.0: # %allocas
   7025 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   7026 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
   7027 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   7028 ; GENERIC-NEXT:    orb $85, %al # sched: [1:0.33]
   7029 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7030 ;
   7031 ; SKX-LABEL: vcmp_test7:
   7032 ; SKX:       # %bb.0: # %allocas
   7033 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   7034 ; SKX-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:1.00]
   7035 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   7036 ; SKX-NEXT:    orb $85, %al # sched: [1:0.25]
   7037 ; SKX-NEXT:    retq # sched: [7:1.00]
   7038 allocas:
   7039   %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
   7040   %b = bitcast <8 x i1> %a to i8
   7041   %c = icmp eq i8 %b, 0
   7042   br i1 %c, label %true, label %false
   7043 
   7044 true:
   7045   ret void
   7046 
   7047 false:
   7048   ret void
   7049 }
   7050 define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
   7051 ; GENERIC-LABEL: vcmp_test8:
   7052 ; GENERIC:       # %bb.0:
   7053 ; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
   7054 ; GENERIC-NEXT:    jg .LBB386_1 # sched: [1:1.00]
   7055 ; GENERIC-NEXT:  # %bb.2:
   7056 ; GENERIC-NEXT:    kxorw %k0, %k0, %k0 # sched: [1:0.33]
   7057 ; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
   7058 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   7059 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7060 ; GENERIC-NEXT:  .LBB386_1:
   7061 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   7062 ; GENERIC-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50]
   7063 ; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
   7064 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   7065 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7066 ;
   7067 ; SKX-LABEL: vcmp_test8:
   7068 ; SKX:       # %bb.0:
   7069 ; SKX-NEXT:    cmpl %esi, %edi # sched: [1:0.25]
   7070 ; SKX-NEXT:    jg .LBB386_1 # sched: [1:0.50]
   7071 ; SKX-NEXT:  # %bb.2:
   7072 ; SKX-NEXT:    kxorw %k0, %k0, %k0 # sched: [1:1.00]
   7073 ; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
   7074 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   7075 ; SKX-NEXT:    retq # sched: [7:1.00]
   7076 ; SKX-NEXT:  .LBB386_1:
   7077 ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
   7078 ; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00]
   7079 ; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
   7080 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   7081 ; SKX-NEXT:    retq # sched: [7:1.00]
   7082   %cond = icmp sgt i32 %a1, %b1
   7083   %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
   7084   %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
   7085   %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
   7086   %res = sext <16 x i1> %mix to <16 x i8>
   7087   ret <16 x i8> %res
   7088 }
   7089 define <16 x i1> @vpmov_test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
   7090 ; GENERIC-LABEL: vpmov_test9:
   7091 ; GENERIC:       # %bb.0:
   7092 ; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
   7093 ; GENERIC-NEXT:    jg .LBB387_1 # sched: [1:1.00]
   7094 ; GENERIC-NEXT:  # %bb.2:
   7095 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm0 # sched: [1:1.00]
   7096 ; GENERIC-NEXT:    jmp .LBB387_3 # sched: [1:1.00]
   7097 ; GENERIC-NEXT:  .LBB387_1:
   7098 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
   7099 ; GENERIC-NEXT:  .LBB387_3:
   7100 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
   7101 ; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
   7102 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7103 ;
   7104 ; SKX-LABEL: vpmov_test9:
   7105 ; SKX:       # %bb.0:
   7106 ; SKX-NEXT:    cmpl %esi, %edi # sched: [1:0.25]
   7107 ; SKX-NEXT:    jg .LBB387_1 # sched: [1:0.50]
   7108 ; SKX-NEXT:  # %bb.2:
   7109 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm0 # sched: [1:0.50]
   7110 ; SKX-NEXT:    jmp .LBB387_3 # sched: [1:0.50]
   7111 ; SKX-NEXT:  .LBB387_1:
   7112 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
   7113 ; SKX-NEXT:  .LBB387_3:
   7114 ; SKX-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:1.00]
   7115 ; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
   7116 ; SKX-NEXT:    retq # sched: [7:1.00]
   7117   %mask = icmp sgt i32 %a1, %b1
   7118   %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
   7119   ret <16 x i1>%c
   7120 }define <8 x i1> @vpmov_test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
   7121   %mask = icmp sgt i32 %a1, %b1
   7122   %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
   7123   ret <8 x i1>%c
   7124 }
   7125 
   7126 define <4 x i1> @vmov_test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
   7127 ; GENERIC-LABEL: vmov_test11:
   7128 ; GENERIC:       # %bb.0:
   7129 ; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
   7130 ; GENERIC-NEXT:    jg .LBB389_1 # sched: [1:1.00]
   7131 ; GENERIC-NEXT:  # %bb.2:
   7132 ; GENERIC-NEXT:    vpslld $31, %xmm1, %xmm0 # sched: [1:1.00]
   7133 ; GENERIC-NEXT:    jmp .LBB389_3 # sched: [1:1.00]
   7134 ; GENERIC-NEXT:  .LBB389_1:
   7135 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   7136 ; GENERIC-NEXT:  .LBB389_3:
   7137 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:0.33]
   7138 ; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
   7139 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7140 ;
   7141 ; SKX-LABEL: vmov_test11:
   7142 ; SKX:       # %bb.0:
   7143 ; SKX-NEXT:    cmpl %esi, %edi # sched: [1:0.25]
   7144 ; SKX-NEXT:    jg .LBB389_1 # sched: [1:0.50]
   7145 ; SKX-NEXT:  # %bb.2:
   7146 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm0 # sched: [1:0.50]
   7147 ; SKX-NEXT:    jmp .LBB389_3 # sched: [1:0.50]
   7148 ; SKX-NEXT:  .LBB389_1:
   7149 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   7150 ; SKX-NEXT:  .LBB389_3:
   7151 ; SKX-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:1.00]
   7152 ; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
   7153 ; SKX-NEXT:    retq # sched: [7:1.00]
   7154   %mask = icmp sgt i32 %a1, %b1
   7155   %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
   7156   ret <4 x i1>%c
   7157 }
   7158 
   7159 define i32 @vmov_test12(i32 %x, i32 %y)  {
   7160 ; GENERIC-LABEL: vmov_test12:
   7161 ; GENERIC:       # %bb.0:
   7162 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
   7163 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7164 ;
   7165 ; SKX-LABEL: vmov_test12:
   7166 ; SKX:       # %bb.0:
   7167 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
   7168 ; SKX-NEXT:    retq # sched: [7:1.00]
   7169   %a = bitcast i16 21845 to <16 x i1>
   7170   %b = extractelement <16 x i1> %a, i32 0
   7171   %c = select i1 %b, i32 %x, i32 %y
   7172   ret i32 %c
   7173 }
   7174 
   7175 define i32 @vmov_test13(i32 %x, i32 %y)  {
   7176 ; GENERIC-LABEL: vmov_test13:
   7177 ; GENERIC:       # %bb.0:
   7178 ; GENERIC-NEXT:    movl %esi, %eax # sched: [1:0.33]
   7179 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7180 ;
   7181 ; SKX-LABEL: vmov_test13:
   7182 ; SKX:       # %bb.0:
   7183 ; SKX-NEXT:    movl %esi, %eax # sched: [1:0.25]
   7184 ; SKX-NEXT:    retq # sched: [7:1.00]
   7185   %a = bitcast i16 21845 to <16 x i1>
   7186   %b = extractelement <16 x i1> %a, i32 3
   7187   %c = select i1 %b, i32 %x, i32 %y
   7188   ret i32 %c
   7189 }define <4 x i1> @vmov_test14()  {
   7190   %a = bitcast i16 21845 to <16 x i1>
   7191   %b = extractelement <16 x i1> %a, i32 2
   7192   %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
   7193   ret <4 x i1> %c
   7194 }
   7195 
   7196 define <16 x i1> @vmov_test15(i32 %x, i32 %y)  {
   7197 ; GENERIC-LABEL: vmov_test15:
   7198 ; GENERIC:       # %bb.0:
   7199 ; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
   7200 ; GENERIC-NEXT:    movl $21845, %eax # imm = 0x5555
   7201 ; GENERIC-NEXT:    # sched: [1:0.33]
   7202 ; GENERIC-NEXT:    movl $1, %ecx # sched: [1:0.33]
   7203 ; GENERIC-NEXT:    cmovgl %eax, %ecx # sched: [2:0.67]
   7204 ; GENERIC-NEXT:    kmovd %ecx, %k0 # sched: [1:0.33]
   7205 ; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
   7206 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7207 ;
   7208 ; SKX-LABEL: vmov_test15:
   7209 ; SKX:       # %bb.0:
   7210 ; SKX-NEXT:    cmpl %esi, %edi # sched: [1:0.25]
   7211 ; SKX-NEXT:    movl $21845, %eax # imm = 0x5555
   7212 ; SKX-NEXT:    # sched: [1:0.25]
   7213 ; SKX-NEXT:    movl $1, %ecx # sched: [1:0.25]
   7214 ; SKX-NEXT:    cmovgl %eax, %ecx # sched: [1:0.50]
   7215 ; SKX-NEXT:    kmovd %ecx, %k0 # sched: [1:1.00]
   7216 ; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
   7217 ; SKX-NEXT:    retq # sched: [7:1.00]
   7218   %a = bitcast i16 21845 to <16 x i1>
   7219   %b = bitcast i16 1 to <16 x i1>
   7220   %mask = icmp sgt i32 %x, %y
   7221   %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
   7222   ret <16 x i1> %c
   7223 }
   7224 
   7225 define <64 x i8> @vmov_test16(i64 %x) {
   7226 ;
   7227 ; GENERIC-LABEL: vmov_test16:
   7228 ; GENERIC:       # %bb.0:
   7229 ; GENERIC-NEXT:    kmovq %rdi, %k0 # sched: [1:0.33]
   7230 ; GENERIC-NEXT:    movb $1, %al # sched: [1:0.33]
   7231 ; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
   7232 ; GENERIC-NEXT:    kshiftrq $5, %k0, %k2 # sched: [1:1.00]
   7233 ; GENERIC-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:0.33]
   7234 ; GENERIC-NEXT:    kshiftlq $63, %k1, %k1 # sched: [1:1.00]
   7235 ; GENERIC-NEXT:    kshiftrq $58, %k1, %k1 # sched: [1:1.00]
   7236 ; GENERIC-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:0.33]
   7237 ; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
   7238 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7239 ;
   7240 ; SKX-LABEL: vmov_test16:
   7241 ; SKX:       # %bb.0:
   7242 ; SKX-NEXT:    kmovq %rdi, %k0 # sched: [1:1.00]
   7243 ; SKX-NEXT:    movb $1, %al # sched: [1:0.25]
   7244 ; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
   7245 ; SKX-NEXT:    kshiftrq $5, %k0, %k2 # sched: [3:1.00]
   7246 ; SKX-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:1.00]
   7247 ; SKX-NEXT:    kshiftlq $63, %k1, %k1 # sched: [3:1.00]
   7248 ; SKX-NEXT:    kshiftrq $58, %k1, %k1 # sched: [3:1.00]
   7249 ; SKX-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:1.00]
   7250 ; SKX-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.25]
   7251 ; SKX-NEXT:    retq # sched: [7:1.00]
   7252   %a = bitcast i64 %x to <64 x i1>
   7253   %b = insertelement <64 x i1>%a, i1 true, i32 5
   7254   %c = sext <64 x i1>%b to <64 x i8>
   7255   ret <64 x i8>%c
   7256 }
   7257 
   7258 define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) {
   7259 ;
   7260 ; GENERIC-LABEL: vmov_test17:
   7261 ; GENERIC:       # %bb.0:
   7262 ; GENERIC-NEXT:    kmovq %rdi, %k0 # sched: [1:0.33]
   7263 ; GENERIC-NEXT:    cmpl %edx, %esi # sched: [1:0.33]
   7264 ; GENERIC-NEXT:    setg %al # sched: [1:0.50]
   7265 ; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
   7266 ; GENERIC-NEXT:    kshiftrq $5, %k0, %k2 # sched: [1:1.00]
   7267 ; GENERIC-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:0.33]
   7268 ; GENERIC-NEXT:    kshiftlq $63, %k1, %k1 # sched: [1:1.00]
   7269 ; GENERIC-NEXT:    kshiftrq $58, %k1, %k1 # sched: [1:1.00]
   7270 ; GENERIC-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:0.33]
   7271 ; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
   7272 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7273 ;
   7274 ; SKX-LABEL: vmov_test17:
   7275 ; SKX:       # %bb.0:
   7276 ; SKX-NEXT:    kmovq %rdi, %k0 # sched: [1:1.00]
   7277 ; SKX-NEXT:    cmpl %edx, %esi # sched: [1:0.25]
   7278 ; SKX-NEXT:    setg %al # sched: [1:0.50]
   7279 ; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
   7280 ; SKX-NEXT:    kshiftrq $5, %k0, %k2 # sched: [3:1.00]
   7281 ; SKX-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:1.00]
   7282 ; SKX-NEXT:    kshiftlq $63, %k1, %k1 # sched: [3:1.00]
   7283 ; SKX-NEXT:    kshiftrq $58, %k1, %k1 # sched: [3:1.00]
   7284 ; SKX-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:1.00]
   7285 ; SKX-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.25]
   7286 ; SKX-NEXT:    retq # sched: [7:1.00]
   7287   %a = bitcast i64 %x to <64 x i1>
   7288   %b = icmp sgt i32 %y, %z
   7289   %c = insertelement <64 x i1>%a, i1 %b, i32 5
   7290   %d = sext <64 x i1>%c to <64 x i8>
   7291   ret <64 x i8>%d
   7292 }
   7293 
   7294 define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
   7295 ; GENERIC-LABEL: vmov_test18:
   7296 ; GENERIC:       # %bb.0:
   7297 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
   7298 ; GENERIC-NEXT:    kmovd %esi, %k2 # sched: [1:0.33]
   7299 ; GENERIC-NEXT:    kshiftrw $8, %k2, %k0 # sched: [1:1.00]
   7300 ; GENERIC-NEXT:    kshiftrw $9, %k2, %k2 # sched: [1:1.00]
   7301 ; GENERIC-NEXT:    kshiftrb $6, %k1, %k3 # sched: [1:1.00]
   7302 ; GENERIC-NEXT:    kxorb %k2, %k3, %k2 # sched: [1:0.33]
   7303 ; GENERIC-NEXT:    kshiftlb $7, %k2, %k2 # sched: [1:1.00]
   7304 ; GENERIC-NEXT:    kshiftrb $1, %k2, %k2 # sched: [1:1.00]
   7305 ; GENERIC-NEXT:    kxorb %k2, %k1, %k1 # sched: [1:0.33]
   7306 ; GENERIC-NEXT:    kshiftlb $1, %k1, %k1 # sched: [1:1.00]
   7307 ; GENERIC-NEXT:    kshiftrb $1, %k1, %k1 # sched: [1:1.00]
   7308 ; GENERIC-NEXT:    kshiftlb $7, %k0, %k0 # sched: [1:1.00]
   7309 ; GENERIC-NEXT:    korb %k0, %k1, %k0 # sched: [1:0.33]
   7310 ; GENERIC-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.33]
   7311 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7312 ;
   7313 ; SKX-LABEL: vmov_test18:
   7314 ; SKX:       # %bb.0:
   7315 ; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
   7316 ; SKX-NEXT:    kmovd %esi, %k2 # sched: [1:1.00]
   7317 ; SKX-NEXT:    kshiftrw $8, %k2, %k0 # sched: [3:1.00]
   7318 ; SKX-NEXT:    kshiftrw $9, %k2, %k2 # sched: [3:1.00]
   7319 ; SKX-NEXT:    kshiftrb $6, %k1, %k3 # sched: [3:1.00]
   7320 ; SKX-NEXT:    kxorb %k2, %k3, %k2 # sched: [1:1.00]
   7321 ; SKX-NEXT:    kshiftlb $7, %k2, %k2 # sched: [3:1.00]
   7322 ; SKX-NEXT:    kshiftrb $1, %k2, %k2 # sched: [3:1.00]
   7323 ; SKX-NEXT:    kxorb %k2, %k1, %k1 # sched: [1:1.00]
   7324 ; SKX-NEXT:    kshiftlb $1, %k1, %k1 # sched: [3:1.00]
   7325 ; SKX-NEXT:    kshiftrb $1, %k1, %k1 # sched: [3:1.00]
   7326 ; SKX-NEXT:    kshiftlb $7, %k0, %k0 # sched: [3:1.00]
   7327 ; SKX-NEXT:    korb %k0, %k1, %k0 # sched: [1:1.00]
   7328 ; SKX-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.25]
   7329 ; SKX-NEXT:    retq # sched: [7:1.00]
   7330   %b = bitcast i8 %a to <8 x i1>
   7331   %b1 = bitcast i16 %y to <16 x i1>
   7332   %el1 = extractelement <16 x i1>%b1, i32 8
   7333   %el2 = extractelement <16 x i1>%b1, i32 9
   7334   %c = insertelement <8 x i1>%b, i1 %el1, i32 7
   7335   %d = insertelement <8 x i1>%c, i1 %el2, i32 6
   7336   ret <8 x i1>%d
   7337 }
   7338 define <32 x i16> @vmov_test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
   7339 ; GENERIC-LABEL: vmov_test21:
   7340 ; GENERIC:       # %bb.0:
   7341 ; GENERIC-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
   7342 ; GENERIC-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:0.33]
   7343 ; GENERIC-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
   7344 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7345 ;
   7346 ; SKX-LABEL: vmov_test21:
   7347 ; SKX:       # %bb.0:
   7348 ; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
   7349 ; SKX-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:1.00]
   7350 ; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
   7351 ; SKX-NEXT:    retq # sched: [7:1.00]
   7352   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
   7353   ret <32 x i16> %ret
   7354 }
   7355 
   7356 define void @vmov_test22(<4 x i1> %a, <4 x i1>* %addr) {
   7357 ; GENERIC-LABEL: vmov_test22:
   7358 ; GENERIC:       # %bb.0:
   7359 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   7360 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:0.33]
   7361 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7362 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7363 ;
   7364 ; SKX-LABEL: vmov_test22:
   7365 ; SKX:       # %bb.0:
   7366 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   7367 ; SKX-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:1.00]
   7368 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7369 ; SKX-NEXT:    retq # sched: [7:1.00]
   7370   store <4 x i1> %a, <4 x i1>* %addr
   7371   ret void
   7372 }
   7373 
   7374 define void @vmov_test23(<2 x i1> %a, <2 x i1>* %addr) {
   7375 ; GENERIC-LABEL: vmov_test23:
   7376 ; GENERIC:       # %bb.0:
   7377 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
   7378 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:0.33]
   7379 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7380 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7381 ;
   7382 ; SKX-LABEL: vmov_test23:
   7383 ; SKX:       # %bb.0:
   7384 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
   7385 ; SKX-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:1.00]
   7386 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7387 ; SKX-NEXT:    retq # sched: [7:1.00]
   7388   store <2 x i1> %a, <2 x i1>* %addr
   7389   ret void
   7390 }
   7391 
   7392 define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
   7393 ; GENERIC-LABEL: store_v1i1:
   7394 ; GENERIC:       # %bb.0:
   7395 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
   7396 ; GENERIC-NEXT:    kxnorw %k0, %k0, %k1 # sched: [1:0.33]
   7397 ; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
   7398 ; GENERIC-NEXT:    kmovb %k0, (%rsi) # sched: [1:1.00]
   7399 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7400 ;
   7401 ; SKX-LABEL: store_v1i1:
   7402 ; SKX:       # %bb.0:
   7403 ; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
   7404 ; SKX-NEXT:    kxnorw %k0, %k0, %k1 # sched: [1:1.00]
   7405 ; SKX-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
   7406 ; SKX-NEXT:    kmovb %k0, (%rsi) # sched: [1:1.00]
   7407 ; SKX-NEXT:    retq # sched: [7:1.00]
   7408   %x = xor <1 x i1> %c, <i1 1>
   7409   store <1 x i1> %x, <1 x i1>*  %ptr, align 4
   7410   ret void
   7411 }
   7412 
   7413 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
   7414 ; GENERIC-LABEL: store_v2i1:
   7415 ; GENERIC:       # %bb.0:
   7416 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
   7417 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:0.33]
   7418 ; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
   7419 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7420 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7421 ;
   7422 ; SKX-LABEL: store_v2i1:
   7423 ; SKX:       # %bb.0:
   7424 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
   7425 ; SKX-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:1.00]
   7426 ; SKX-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
   7427 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7428 ; SKX-NEXT:    retq # sched: [7:1.00]
   7429   %x = xor <2 x i1> %c, <i1 1, i1 1>
   7430   store <2 x i1> %x, <2 x i1>*  %ptr, align 4
   7431   ret void
   7432 }
   7433 
   7434 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
   7435 ; GENERIC-LABEL: store_v4i1:
   7436 ; GENERIC:       # %bb.0:
   7437 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
   7438 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:0.33]
   7439 ; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
   7440 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7441 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7442 ;
   7443 ; SKX-LABEL: store_v4i1:
   7444 ; SKX:       # %bb.0:
   7445 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
   7446 ; SKX-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:1.00]
   7447 ; SKX-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
   7448 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7449 ; SKX-NEXT:    retq # sched: [7:1.00]
   7450   %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
   7451   store <4 x i1> %x, <4 x i1>*  %ptr, align 4
   7452   ret void
   7453 }
   7454 
   7455 define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
   7456 ; GENERIC-LABEL: store_v8i1:
   7457 ; GENERIC:       # %bb.0:
   7458 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   7459 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
   7460 ; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:0.33]
   7461 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7462 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7463 ;
   7464 ; SKX-LABEL: store_v8i1:
   7465 ; SKX:       # %bb.0:
   7466 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   7467 ; SKX-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:1.00]
   7468 ; SKX-NEXT:    knotb %k0, %k0 # sched: [1:1.00]
   7469 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7470 ; SKX-NEXT:    retq # sched: [7:1.00]
   7471   %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
   7472   store <8 x i1> %x, <8 x i1>*  %ptr, align 4
   7473   ret void
   7474 }
   7475 
   7476 define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
   7477 ; GENERIC-LABEL: store_v16i1:
   7478 ; GENERIC:       # %bb.0:
   7479 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
   7480 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
   7481 ; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
   7482 ; GENERIC-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
   7483 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7484 ;
   7485 ; SKX-LABEL: store_v16i1:
   7486 ; SKX:       # %bb.0:
   7487 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
   7488 ; SKX-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:1.00]
   7489 ; SKX-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
   7490 ; SKX-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
   7491 ; SKX-NEXT:    retq # sched: [7:1.00]
   7492   %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
   7493   store <16 x i1> %x, <16 x i1>*  %ptr, align 4
   7494   ret void
   7495 }
   7496 
   7497 ;void f2(int);
   7498 ;void f1(int c)
   7499 ;{
   7500 ;  static int v = 0;
   7501 ;  if (v == 0)
   7502 ;    v = 1;
   7503 ;  else
   7504 ;    v = 0;
   7505 ;  f2(v);
   7506 ;}
   7507 
   7508 @f1.v = internal unnamed_addr global i1 false, align 4
   7509 
   7510 define void @f1(i32 %c) {
   7511 ; GENERIC-LABEL: f1:
   7512 ; GENERIC:       # %bb.0: # %entry
   7513 ; GENERIC-NEXT:    movzbl {{.*}}(%rip), %edi # sched: [5:0.50]
   7514 ; GENERIC-NEXT:    xorl $1, %edi # sched: [1:0.33]
   7515 ; GENERIC-NEXT:    movb %dil, {{.*}}(%rip) # sched: [1:1.00]
   7516 ; GENERIC-NEXT:    jmp f2 # TAILCALL
   7517 ;
   7518 ; SKX-LABEL: f1:
   7519 ; SKX:       # %bb.0: # %entry
   7520 ; SKX-NEXT:    movzbl {{.*}}(%rip), %edi # sched: [5:0.50]
   7521 ; SKX-NEXT:    xorl $1, %edi # sched: [1:0.25]
   7522 ; SKX-NEXT:    movb %dil, {{.*}}(%rip) # sched: [1:1.00]
   7523 ; SKX-NEXT:    jmp f2 # TAILCALL
   7524 entry:
   7525   %.b1 = load i1, i1* @f1.v, align 4
   7526   %not..b1 = xor i1 %.b1, true
   7527   store i1 %not..b1, i1* @f1.v, align 4
   7528   %0 = zext i1 %not..b1 to i32
   7529   tail call void @f2(i32 %0) #2
   7530   ret void
   7531 }
   7532 
   7533 declare void @f2(i32) #1
   7534 
   7535 define void @store_i16_i1(i16 %x, i1 *%y) {
   7536 ; GENERIC-LABEL: store_i16_i1:
   7537 ; GENERIC:       # %bb.0:
   7538 ; GENERIC-NEXT:    andl $1, %edi # sched: [1:0.33]
   7539 ; GENERIC-NEXT:    movb %dil, (%rsi) # sched: [1:1.00]
   7540 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7541 ;
   7542 ; SKX-LABEL: store_i16_i1:
   7543 ; SKX:       # %bb.0:
   7544 ; SKX-NEXT:    andl $1, %edi # sched: [1:0.25]
   7545 ; SKX-NEXT:    movb %dil, (%rsi) # sched: [1:1.00]
   7546 ; SKX-NEXT:    retq # sched: [7:1.00]
   7547   %c = trunc i16 %x to i1
   7548   store i1 %c, i1* %y
   7549   ret void
   7550 }
   7551 
   7552 define void @store_i8_i1(i8 %x, i1 *%y) {
   7553 ; GENERIC-LABEL: store_i8_i1:
   7554 ; GENERIC:       # %bb.0:
   7555 ; GENERIC-NEXT:    andl $1, %edi # sched: [1:0.33]
   7556 ; GENERIC-NEXT:    movb %dil, (%rsi) # sched: [1:1.00]
   7557 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7558 ;
   7559 ; SKX-LABEL: store_i8_i1:
   7560 ; SKX:       # %bb.0:
   7561 ; SKX-NEXT:    andl $1, %edi # sched: [1:0.25]
   7562 ; SKX-NEXT:    movb %dil, (%rsi) # sched: [1:1.00]
   7563 ; SKX-NEXT:    retq # sched: [7:1.00]
   7564   %c = trunc i8 %x to i1
   7565   store i1 %c, i1* %y
   7566   ret void
   7567 }
   7568 
   7569 define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
   7570 ; GENERIC-LABEL: test_build_vec_v32i1:
   7571 ; GENERIC:       # %bb.0:
   7572 ; GENERIC-NEXT:    movl $1497715861, %eax # imm = 0x59455495
   7573 ; GENERIC-NEXT:    # sched: [1:0.33]
   7574 ; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
   7575 ; GENERIC-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
   7576 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7577 ;
   7578 ; SKX-LABEL: test_build_vec_v32i1:
   7579 ; SKX:       # %bb.0:
   7580 ; SKX-NEXT:    movl $1497715861, %eax # imm = 0x59455495
   7581 ; SKX-NEXT:    # sched: [1:0.25]
   7582 ; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
   7583 ; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
   7584 ; SKX-NEXT:    retq # sched: [7:1.00]
   7585   %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
   7586   ret <32 x i16> %ret
   7587 }
   7588 
   7589 define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
   7590 ; GENERIC-LABEL: test_build_vec_v64i1:
   7591 ; GENERIC:       # %bb.0:
   7592 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:0.50]
   7593 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7594 ;
   7595 ; SKX-LABEL: test_build_vec_v64i1:
   7596 ; SKX:       # %bb.0:
   7597 ; SKX-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:1.00]
   7598 ; SKX-NEXT:    retq # sched: [7:1.00]
   7599   %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
   7600   ret <64 x i8> %ret
   7601 }
   7602 
   7603 define void @ktest_1(<8 x double> %in, double * %base) {
   7604 ; GENERIC-LABEL: ktest_1:
   7605 ; GENERIC:       # %bb.0:
   7606 ; GENERIC-NEXT:    vmovupd (%rdi), %zmm1 # sched: [7:0.50]
   7607 ; GENERIC-NEXT:    vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
   7608 ; GENERIC-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [7:0.50]
   7609 ; GENERIC-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
   7610 ; GENERIC-NEXT:    kortestb %k0, %k0 # sched: [1:0.33]
   7611 ; GENERIC-NEXT:    je .LBB410_2 # sched: [1:1.00]
   7612 ; GENERIC-NEXT:  # %bb.1: # %L1
   7613 ; GENERIC-NEXT:    vmovapd %zmm0, (%rdi) # sched: [1:1.00]
   7614 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   7615 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7616 ; GENERIC-NEXT:  .LBB410_2: # %L2
   7617 ; GENERIC-NEXT:    vmovapd %zmm0, 8(%rdi) # sched: [1:1.00]
   7618 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   7619 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7620 ;
   7621 ; SKX-LABEL: ktest_1:
   7622 ; SKX:       # %bb.0:
   7623 ; SKX-NEXT:    vmovupd (%rdi), %zmm1 # sched: [8:0.50]
   7624 ; SKX-NEXT:    vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
   7625 ; SKX-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50]
   7626 ; SKX-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
   7627 ; SKX-NEXT:    kortestb %k0, %k0 # sched: [3:1.00]
   7628 ; SKX-NEXT:    je .LBB410_2 # sched: [1:0.50]
   7629 ; SKX-NEXT:  # %bb.1: # %L1
   7630 ; SKX-NEXT:    vmovapd %zmm0, (%rdi) # sched: [1:1.00]
   7631 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   7632 ; SKX-NEXT:    retq # sched: [7:1.00]
   7633 ; SKX-NEXT:  .LBB410_2: # %L2
   7634 ; SKX-NEXT:    vmovapd %zmm0, 8(%rdi) # sched: [1:1.00]
   7635 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   7636 ; SKX-NEXT:    retq # sched: [7:1.00]
   7637   %addr1 = getelementptr double, double * %base, i64 0
   7638   %addr2 = getelementptr double, double * %base, i64 1
   7639 
   7640   %vaddr1 = bitcast double* %addr1 to <8 x double>*
   7641   %vaddr2 = bitcast double* %addr2 to <8 x double>*
   7642 
   7643   %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
   7644   %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
   7645 
   7646   %sel1 = fcmp ogt <8 x double>%in, %val1
   7647   %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
   7648   %sel2 = fcmp olt <8 x double> %in, %val3
   7649   %sel3 = and <8 x i1> %sel1, %sel2
   7650 
   7651   %int_sel3 = bitcast <8 x i1> %sel3 to i8
   7652   %res = icmp eq i8 %int_sel3, zeroinitializer
   7653   br i1 %res, label %L2, label %L1
   7654 L1:
   7655   store <8 x double> %in, <8 x double>* %vaddr1
   7656   br label %End
   7657 L2:
   7658   store <8 x double> %in, <8 x double>* %vaddr2
   7659   br label %End
   7660 End:
   7661   ret void
   7662 }
   7663 
   7664 define void @ktest_2(<32 x float> %in, float * %base) {
   7665 ;
   7666 ; GENERIC-LABEL: ktest_2:
   7667 ; GENERIC:       # %bb.0:
   7668 ; GENERIC-NEXT:    vmovups (%rdi), %zmm2 # sched: [7:0.50]
   7669 ; GENERIC-NEXT:    vmovups 64(%rdi), %zmm3 # sched: [7:0.50]
   7670 ; GENERIC-NEXT:    vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
   7671 ; GENERIC-NEXT:    vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
   7672 ; GENERIC-NEXT:    kunpckwd %k1, %k2, %k0 # sched: [1:1.00]
   7673 ; GENERIC-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [7:0.50]
   7674 ; GENERIC-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [7:0.50]
   7675 ; GENERIC-NEXT:    vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
   7676 ; GENERIC-NEXT:    vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
   7677 ; GENERIC-NEXT:    kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
   7678 ; GENERIC-NEXT:    kortestd %k1, %k0 # sched: [1:0.33]
   7679 ; GENERIC-NEXT:    je .LBB411_2 # sched: [1:1.00]
   7680 ; GENERIC-NEXT:  # %bb.1: # %L1
   7681 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
   7682 ; GENERIC-NEXT:    vmovaps %zmm1, 64(%rdi) # sched: [1:1.00]
   7683 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   7684 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7685 ; GENERIC-NEXT:  .LBB411_2: # %L2
   7686 ; GENERIC-NEXT:    vmovaps %zmm0, 4(%rdi) # sched: [1:1.00]
   7687 ; GENERIC-NEXT:    vmovaps %zmm1, 68(%rdi) # sched: [1:1.00]
   7688 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   7689 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7690 ;
   7691 ; SKX-LABEL: ktest_2:
   7692 ; SKX:       # %bb.0:
   7693 ; SKX-NEXT:    vmovups (%rdi), %zmm2 # sched: [8:0.50]
   7694 ; SKX-NEXT:    vmovups 64(%rdi), %zmm3 # sched: [8:0.50]
   7695 ; SKX-NEXT:    vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
   7696 ; SKX-NEXT:    vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
   7697 ; SKX-NEXT:    kunpckwd %k1, %k2, %k0 # sched: [3:1.00]
   7698 ; SKX-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50]
   7699 ; SKX-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50]
   7700 ; SKX-NEXT:    vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
   7701 ; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
   7702 ; SKX-NEXT:    kunpckwd %k1, %k2, %k1 # sched: [3:1.00]
   7703 ; SKX-NEXT:    kortestd %k1, %k0 # sched: [3:1.00]
   7704 ; SKX-NEXT:    je .LBB411_2 # sched: [1:0.50]
   7705 ; SKX-NEXT:  # %bb.1: # %L1
   7706 ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
   7707 ; SKX-NEXT:    vmovaps %zmm1, 64(%rdi) # sched: [1:1.00]
   7708 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   7709 ; SKX-NEXT:    retq # sched: [7:1.00]
   7710 ; SKX-NEXT:  .LBB411_2: # %L2
   7711 ; SKX-NEXT:    vmovaps %zmm0, 4(%rdi) # sched: [1:1.00]
   7712 ; SKX-NEXT:    vmovaps %zmm1, 68(%rdi) # sched: [1:1.00]
   7713 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   7714 ; SKX-NEXT:    retq # sched: [7:1.00]
   7715   %addr1 = getelementptr float, float * %base, i64 0
   7716   %addr2 = getelementptr float, float * %base, i64 1
   7717 
   7718   %vaddr1 = bitcast float* %addr1 to <32 x float>*
   7719   %vaddr2 = bitcast float* %addr2 to <32 x float>*
   7720 
   7721   %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
   7722   %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
   7723 
   7724   %sel1 = fcmp ogt <32 x float>%in, %val1
   7725   %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
   7726   %sel2 = fcmp olt <32 x float> %in, %val3
   7727   %sel3 = or <32 x i1> %sel1, %sel2
   7728 
   7729   %int_sel3 = bitcast <32 x i1> %sel3 to i32
   7730   %res = icmp eq i32 %int_sel3, zeroinitializer
   7731   br i1 %res, label %L2, label %L1
   7732 L1:
   7733   store <32 x float> %in, <32 x float>* %vaddr1
   7734   br label %End
   7735 L2:
   7736   store <32 x float> %in, <32 x float>* %vaddr2
   7737   br label %End
   7738 End:
   7739   ret void
   7740 }
   7741 
   7742 define <8 x i64> @load_8i1(<8 x i1>* %a) {
   7743 ; GENERIC-LABEL: load_8i1:
   7744 ; GENERIC:       # %bb.0:
   7745 ; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
   7746 ; GENERIC-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.33]
   7747 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7748 ;
   7749 ; SKX-LABEL: load_8i1:
   7750 ; SKX:       # %bb.0:
   7751 ; SKX-NEXT:    kmovb (%rdi), %k0 # sched: [7:1.00]
   7752 ; SKX-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.25]
   7753 ; SKX-NEXT:    retq # sched: [7:1.00]
   7754   %b = load <8 x i1>, <8 x i1>* %a
   7755   %c = sext <8 x i1> %b to <8 x i64>
   7756   ret <8 x i64> %c
   7757 }
   7758 
   7759 define <16 x i32> @load_16i1(<16 x i1>* %a) {
   7760 ; GENERIC-LABEL: load_16i1:
   7761 ; GENERIC:       # %bb.0:
   7762 ; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [5:0.50]
   7763 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
   7764 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7765 ;
   7766 ; SKX-LABEL: load_16i1:
   7767 ; SKX:       # %bb.0:
   7768 ; SKX-NEXT:    kmovw (%rdi), %k0 # sched: [7:1.00]
   7769 ; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
   7770 ; SKX-NEXT:    retq # sched: [7:1.00]
   7771   %b = load <16 x i1>, <16 x i1>* %a
   7772   %c = sext <16 x i1> %b to <16 x i32>
   7773   ret <16 x i32> %c
   7774 }
   7775 
   7776 define <2 x i16> @load_2i1(<2 x i1>* %a) {
   7777 ; GENERIC-LABEL: load_2i1:
   7778 ; GENERIC:       # %bb.0:
   7779 ; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
   7780 ; GENERIC-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.33]
   7781 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7782 ;
   7783 ; SKX-LABEL: load_2i1:
   7784 ; SKX:       # %bb.0:
   7785 ; SKX-NEXT:    kmovb (%rdi), %k0 # sched: [7:1.00]
   7786 ; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
   7787 ; SKX-NEXT:    retq # sched: [7:1.00]
   7788   %b = load <2 x i1>, <2 x i1>* %a
   7789   %c = sext <2 x i1> %b to <2 x i16>
   7790   ret <2 x i16> %c
   7791 }
   7792 
   7793 define <4 x i16> @load_4i1(<4 x i1>* %a) {
   7794 ; GENERIC-LABEL: load_4i1:
   7795 ; GENERIC:       # %bb.0:
   7796 ; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
   7797 ; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
   7798 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7799 ;
   7800 ; SKX-LABEL: load_4i1:
   7801 ; SKX:       # %bb.0:
   7802 ; SKX-NEXT:    kmovb (%rdi), %k0 # sched: [7:1.00]
   7803 ; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
   7804 ; SKX-NEXT:    retq # sched: [7:1.00]
   7805   %b = load <4 x i1>, <4 x i1>* %a
   7806   %c = sext <4 x i1> %b to <4 x i16>
   7807   ret <4 x i16> %c
   7808 }
   7809 
   7810 define <32 x i16> @load_32i1(<32 x i1>* %a) {
   7811 ; GENERIC-LABEL: load_32i1:
   7812 ; GENERIC:       # %bb.0:
   7813 ; GENERIC-NEXT:    kmovd (%rdi), %k0 # sched: [5:0.50]
   7814 ; GENERIC-NEXT:    vpmovm2w %k0, %zmm0 # sched: [1:0.33]
   7815 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7816 ;
   7817 ; SKX-LABEL: load_32i1:
   7818 ; SKX:       # %bb.0:
   7819 ; SKX-NEXT:    kmovd (%rdi), %k0 # sched: [7:1.00]
   7820 ; SKX-NEXT:    vpmovm2w %k0, %zmm0 # sched: [1:0.25]
   7821 ; SKX-NEXT:    retq # sched: [7:1.00]
   7822   %b = load <32 x i1>, <32 x i1>* %a
   7823   %c = sext <32 x i1> %b to <32 x i16>
   7824   ret <32 x i16> %c
   7825 }
   7826 
   7827 define <64 x i8> @load_64i1(<64 x i1>* %a) {
   7828 ; GENERIC-LABEL: load_64i1:
   7829 ; GENERIC:       # %bb.0:
   7830 ; GENERIC-NEXT:    kmovq (%rdi), %k0 # sched: [5:0.50]
   7831 ; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
   7832 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7833 ;
   7834 ; SKX-LABEL: load_64i1:
   7835 ; SKX:       # %bb.0:
   7836 ; SKX-NEXT:    kmovq (%rdi), %k0 # sched: [7:1.00]
   7837 ; SKX-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.25]
   7838 ; SKX-NEXT:    retq # sched: [7:1.00]
   7839   %b = load <64 x i1>, <64 x i1>* %a
   7840   %c = sext <64 x i1> %b to <64 x i8>
   7841   ret <64 x i8> %c
   7842 }
   7843 
   7844 define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
   7845 ; GENERIC-LABEL: store_8i1:
   7846 ; GENERIC:       # %bb.0:
   7847 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   7848 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
   7849 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7850 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7851 ;
   7852 ; SKX-LABEL: store_8i1:
   7853 ; SKX:       # %bb.0:
   7854 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   7855 ; SKX-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:1.00]
   7856 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7857 ; SKX-NEXT:    retq # sched: [7:1.00]
   7858   store <8 x i1> %v, <8 x i1>* %a
   7859   ret void
   7860 }
   7861 
   7862 define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
   7863 ; GENERIC-LABEL: store_8i1_1:
   7864 ; GENERIC:       # %bb.0:
   7865 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
   7866 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
   7867 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7868 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7869 ;
   7870 ; SKX-LABEL: store_8i1_1:
   7871 ; SKX:       # %bb.0:
   7872 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
   7873 ; SKX-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:1.00]
   7874 ; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
   7875 ; SKX-NEXT:    retq # sched: [7:1.00]
   7876   %v1 = trunc <8 x i16> %v to <8 x i1>
   7877   store <8 x i1> %v1, <8 x i1>* %a
   7878   ret void
   7879 }
   7880 
   7881 define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
   7882 ; GENERIC-LABEL: store_16i1:
   7883 ; GENERIC:       # %bb.0:
   7884 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
   7885 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
   7886 ; GENERIC-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
   7887 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7888 ;
   7889 ; SKX-LABEL: store_16i1:
   7890 ; SKX:       # %bb.0:
   7891 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
   7892 ; SKX-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:1.00]
   7893 ; SKX-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
   7894 ; SKX-NEXT:    retq # sched: [7:1.00]
   7895   store <16 x i1> %v, <16 x i1>* %a
   7896   ret void
   7897 }
   7898 
   7899 define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
   7900 ; GENERIC-LABEL: store_32i1:
   7901 ; GENERIC:       # %bb.0:
   7902 ; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
   7903 ; GENERIC-NEXT:    vpmovb2m %ymm0, %k0 # sched: [1:0.33]
   7904 ; GENERIC-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
   7905 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   7906 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7907 ;
   7908 ; SKX-LABEL: store_32i1:
   7909 ; SKX:       # %bb.0:
   7910 ; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
   7911 ; SKX-NEXT:    vpmovb2m %ymm0, %k0 # sched: [1:1.00]
   7912 ; SKX-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
   7913 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   7914 ; SKX-NEXT:    retq # sched: [7:1.00]
   7915   store <32 x i1> %v, <32 x i1>* %a
   7916   ret void
   7917 }
   7918 
   7919 define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
   7920 ; GENERIC-LABEL: store_32i1_1:
   7921 ; GENERIC:       # %bb.0:
   7922 ; GENERIC-NEXT:    vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00]
   7923 ; GENERIC-NEXT:    vpmovw2m %zmm0, %k0 # sched: [1:0.33]
   7924 ; GENERIC-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
   7925 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   7926 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7927 ;
   7928 ; SKX-LABEL: store_32i1_1:
   7929 ; SKX:       # %bb.0:
   7930 ; SKX-NEXT:    vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00]
   7931 ; SKX-NEXT:    vpmovw2m %zmm0, %k0 # sched: [1:1.00]
   7932 ; SKX-NEXT:    kmovd %k0, (%rdi) # sched: [1:1.00]
   7933 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   7934 ; SKX-NEXT:    retq # sched: [7:1.00]
   7935   %v1 = trunc <32 x i16> %v to <32 x i1>
   7936   store <32 x i1> %v1, <32 x i1>* %a
   7937   ret void
   7938 }
   7939 
   7940 
   7941 define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
   7942 ;
   7943 ; GENERIC-LABEL: store_64i1:
   7944 ; GENERIC:       # %bb.0:
   7945 ; GENERIC-NEXT:    vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00]
   7946 ; GENERIC-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:0.33]
   7947 ; GENERIC-NEXT:    kmovq %k0, (%rdi) # sched: [1:1.00]
   7948 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   7949 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7950 ;
   7951 ; SKX-LABEL: store_64i1:
   7952 ; SKX:       # %bb.0:
   7953 ; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00]
   7954 ; SKX-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:1.00]
   7955 ; SKX-NEXT:    kmovq %k0, (%rdi) # sched: [1:1.00]
   7956 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   7957 ; SKX-NEXT:    retq # sched: [7:1.00]
   7958   store <64 x i1> %v, <64 x i1>* %a
   7959   ret void
   7960 }
   7961 
   7962 define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
   7963 ; GENERIC-LABEL: test_bitcast_v8i1_zext:
   7964 ; GENERIC:       # %bb.0:
   7965 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33]
   7966 ; GENERIC-NEXT:    kmovb %k0, %eax # sched: [1:0.33]
   7967 ; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
   7968 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   7969 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7970 ;
   7971 ; SKX-LABEL: test_bitcast_v8i1_zext:
   7972 ; SKX:       # %bb.0:
   7973 ; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
   7974 ; SKX-NEXT:    kmovb %k0, %eax # sched: [3:1.00]
   7975 ; SKX-NEXT:    addl %eax, %eax # sched: [1:0.25]
   7976 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   7977 ; SKX-NEXT:    retq # sched: [7:1.00]
   7978    %v1 = icmp eq <16 x i32> %a, zeroinitializer
   7979    %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   7980    %mask1 = bitcast <8 x i1> %mask to i8
   7981    %val = zext i8 %mask1 to i32
   7982    %val1 = add i32 %val, %val
   7983    ret i32 %val1
   7984 }
   7985 
   7986 define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
   7987 ; GENERIC-LABEL: test_bitcast_v16i1_zext:
   7988 ; GENERIC:       # %bb.0:
   7989 ; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33]
   7990 ; GENERIC-NEXT:    kmovw %k0, %eax # sched: [1:0.33]
   7991 ; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
   7992 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   7993 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7994 ;
   7995 ; SKX-LABEL: test_bitcast_v16i1_zext:
   7996 ; SKX:       # %bb.0:
   7997 ; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
   7998 ; SKX-NEXT:    kmovw %k0, %eax # sched: [3:1.00]
   7999 ; SKX-NEXT:    addl %eax, %eax # sched: [1:0.25]
   8000 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   8001 ; SKX-NEXT:    retq # sched: [7:1.00]
   8002    %v1 = icmp eq <16 x i32> %a, zeroinitializer
   8003    %mask1 = bitcast <16 x i1> %v1 to i16
   8004    %val = zext i16 %mask1 to i32
   8005    %val1 = add i32 %val, %val
   8006    ret i32 %val1
   8007 }
   8008 
   8009 define i16 @test_v16i1_add(i16 %x, i16 %y) {
   8010 ; GENERIC-LABEL: test_v16i1_add:
   8011 ; GENERIC:       # %bb.0:
   8012 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
   8013 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
   8014 ; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
   8015 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   8016 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
   8017 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8018 ;
   8019 ; SKX-LABEL: test_v16i1_add:
   8020 ; SKX:       # %bb.0:
   8021 ; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
   8022 ; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
   8023 ; SKX-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
   8024 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   8025 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   8026 ; SKX-NEXT:    retq # sched: [7:1.00]
   8027   %m0 = bitcast i16 %x to <16 x i1>
   8028   %m1 = bitcast i16 %y to <16 x i1>
   8029   %m2 = add <16 x i1> %m0,  %m1
   8030   %ret = bitcast <16 x i1> %m2 to i16
   8031   ret i16 %ret
   8032 }
   8033 
   8034 define i16 @test_v16i1_sub(i16 %x, i16 %y) {
   8035 ; GENERIC-LABEL: test_v16i1_sub:
   8036 ; GENERIC:       # %bb.0:
   8037 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
   8038 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
   8039 ; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
   8040 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   8041 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
   8042 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8043 ;
   8044 ; SKX-LABEL: test_v16i1_sub:
   8045 ; SKX:       # %bb.0:
   8046 ; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
   8047 ; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
   8048 ; SKX-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
   8049 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   8050 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   8051 ; SKX-NEXT:    retq # sched: [7:1.00]
   8052   %m0 = bitcast i16 %x to <16 x i1>
   8053   %m1 = bitcast i16 %y to <16 x i1>
   8054   %m2 = sub <16 x i1> %m0,  %m1
   8055   %ret = bitcast <16 x i1> %m2 to i16
   8056   ret i16 %ret
   8057 }
   8058 
   8059 define i16 @test_v16i1_mul(i16 %x, i16 %y) {
   8060 ; GENERIC-LABEL: test_v16i1_mul:
   8061 ; GENERIC:       # %bb.0:
   8062 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
   8063 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
   8064 ; GENERIC-NEXT:    kandw %k1, %k0, %k0 # sched: [1:0.33]
   8065 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   8066 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
   8067 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8068 ;
   8069 ; SKX-LABEL: test_v16i1_mul:
   8070 ; SKX:       # %bb.0:
   8071 ; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
   8072 ; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
   8073 ; SKX-NEXT:    kandw %k1, %k0, %k0 # sched: [1:1.00]
   8074 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   8075 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   8076 ; SKX-NEXT:    retq # sched: [7:1.00]
   8077   %m0 = bitcast i16 %x to <16 x i1>
   8078   %m1 = bitcast i16 %y to <16 x i1>
   8079   %m2 = mul <16 x i1> %m0,  %m1
   8080   %ret = bitcast <16 x i1> %m2 to i16
   8081   ret i16 %ret
   8082 }
   8083 
   8084 define i8 @test_v8i1_add(i8 %x, i8 %y) {
   8085 ; GENERIC-LABEL: test_v8i1_add:
   8086 ; GENERIC:       # %bb.0:
   8087 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
   8088 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
   8089 ; GENERIC-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:0.33]
   8090 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   8091 ; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
   8092 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8093 ;
   8094 ; SKX-LABEL: test_v8i1_add:
   8095 ; SKX:       # %bb.0:
   8096 ; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
   8097 ; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
   8098 ; SKX-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:1.00]
   8099 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   8100 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
   8101 ; SKX-NEXT:    retq # sched: [7:1.00]
   8102   %m0 = bitcast i8 %x to <8 x i1>
   8103   %m1 = bitcast i8 %y to <8 x i1>
   8104   %m2 = add <8 x i1> %m0,  %m1
   8105   %ret = bitcast <8 x i1> %m2 to i8
   8106   ret i8 %ret
   8107 }
   8108 
   8109 define i8 @test_v8i1_sub(i8 %x, i8 %y) {
   8110 ; GENERIC-LABEL: test_v8i1_sub:
   8111 ; GENERIC:       # %bb.0:
   8112 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
   8113 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
   8114 ; GENERIC-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:0.33]
   8115 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   8116 ; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
   8117 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8118 ;
   8119 ; SKX-LABEL: test_v8i1_sub:
   8120 ; SKX:       # %bb.0:
   8121 ; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
   8122 ; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
   8123 ; SKX-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:1.00]
   8124 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   8125 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
   8126 ; SKX-NEXT:    retq # sched: [7:1.00]
   8127   %m0 = bitcast i8 %x to <8 x i1>
   8128   %m1 = bitcast i8 %y to <8 x i1>
   8129   %m2 = sub <8 x i1> %m0,  %m1
   8130   %ret = bitcast <8 x i1> %m2 to i8
   8131   ret i8 %ret
   8132 }
   8133 
   8134 define i8 @test_v8i1_mul(i8 %x, i8 %y) {
   8135 ; GENERIC-LABEL: test_v8i1_mul:
   8136 ; GENERIC:       # %bb.0:
   8137 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
   8138 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
   8139 ; GENERIC-NEXT:    kandb %k1, %k0, %k0 # sched: [1:0.33]
   8140 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
   8141 ; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
   8142 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8143 ;
   8144 ; SKX-LABEL: test_v8i1_mul:
   8145 ; SKX:       # %bb.0:
   8146 ; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
   8147 ; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
   8148 ; SKX-NEXT:    kandb %k1, %k0, %k0 # sched: [1:1.00]
   8149 ; SKX-NEXT:    kmovd %k0, %eax # sched: [3:1.00]
   8150 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
   8151 ; SKX-NEXT:    retq # sched: [7:1.00]
   8152   %m0 = bitcast i8 %x to <8 x i1>
   8153   %m1 = bitcast i8 %y to <8 x i1>
   8154   %m2 = mul <8 x i1> %m0,  %m1
   8155   %ret = bitcast <8 x i1> %m2 to i8
   8156   ret i8 %ret
   8157 }
   8158 
   8159 define   <16 x i32> @_inreg16xi32(i32 %a) {
   8160 ; GENERIC-LABEL: _inreg16xi32:
   8161 ; GENERIC:       # %bb.0:
   8162 ; GENERIC-NEXT:    vpbroadcastd %edi, %zmm0 # sched: [1:1.00]
   8163 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8164 ;
   8165 ; SKX-LABEL: _inreg16xi32:
   8166 ; SKX:       # %bb.0:
   8167 ; SKX-NEXT:    vpbroadcastd %edi, %zmm0 # sched: [3:1.00]
   8168 ; SKX-NEXT:    retq # sched: [7:1.00]
   8169   %b = insertelement <16 x i32> undef, i32 %a, i32 0
   8170   %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
   8171   ret <16 x i32> %c
   8172 }
   8173 
   8174 define   <8 x i64> @_inreg8xi64(i64 %a) {
   8175 ; GENERIC-LABEL: _inreg8xi64:
   8176 ; GENERIC:       # %bb.0:
   8177 ; GENERIC-NEXT:    vpbroadcastq %rdi, %zmm0 # sched: [1:1.00]
   8178 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8179 ;
   8180 ; SKX-LABEL: _inreg8xi64:
   8181 ; SKX:       # %bb.0:
   8182 ; SKX-NEXT:    vpbroadcastq %rdi, %zmm0 # sched: [3:1.00]
   8183 ; SKX-NEXT:    retq # sched: [7:1.00]
   8184   %b = insertelement <8 x i64> undef, i64 %a, i32 0
   8185   %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
   8186   ret <8 x i64> %c
   8187 }
   8188 
   8189 define   <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
   8190 ; GENERIC-LABEL: _ss16xfloat_v4:
   8191 ; GENERIC:       # %bb.0:
   8192 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
   8193 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8194 ;
   8195 ; SKX-LABEL: _ss16xfloat_v4:
   8196 ; SKX:       # %bb.0:
   8197 ; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
   8198 ; SKX-NEXT:    retq # sched: [7:1.00]
   8199   %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer
   8200   ret <16 x float> %b
   8201 }
   8202 
   8203 define   <16 x float> @_inreg16xfloat(float %a) {
   8204 ; GENERIC-LABEL: _inreg16xfloat:
   8205 ; GENERIC:       # %bb.0:
   8206 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
   8207 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8208 ;
   8209 ; SKX-LABEL: _inreg16xfloat:
   8210 ; SKX:       # %bb.0:
   8211 ; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
   8212 ; SKX-NEXT:    retq # sched: [7:1.00]
   8213   %b = insertelement <16 x float> undef, float %a, i32 0
   8214   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
   8215   ret <16 x float> %c
   8216 }
   8217 
   8218 define   <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
   8219 ; GENERIC-LABEL: _ss16xfloat_mask:
   8220 ; GENERIC:       # %bb.0:
   8221 ; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
   8222 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00]
   8223 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
   8224 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8225 ;
   8226 ; SKX-LABEL: _ss16xfloat_mask:
   8227 ; SKX:       # %bb.0:
   8228 ; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
   8229 ; SKX-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1} # sched: [3:1.00]
   8230 ; SKX-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:0.33]
   8231 ; SKX-NEXT:    retq # sched: [7:1.00]
   8232   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   8233   %b = insertelement <16 x float> undef, float %a, i32 0
   8234   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
   8235   %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
   8236   ret <16 x float> %r
   8237 }
   8238 
   8239 define   <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
   8240 ; GENERIC-LABEL: _ss16xfloat_maskz:
   8241 ; GENERIC:       # %bb.0:
   8242 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
   8243 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
   8244 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8245 ;
   8246 ; SKX-LABEL: _ss16xfloat_maskz:
   8247 ; SKX:       # %bb.0:
   8248 ; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
   8249 ; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
   8250 ; SKX-NEXT:    retq # sched: [7:1.00]
   8251   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   8252   %b = insertelement <16 x float> undef, float %a, i32 0
   8253   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
   8254   %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
   8255   ret <16 x float> %r
   8256 }
   8257 
   8258 define   <16 x float> @_ss16xfloat_load(float* %a.ptr) {
   8259 ; GENERIC-LABEL: _ss16xfloat_load:
   8260 ; GENERIC:       # %bb.0:
   8261 ; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 # sched: [8:1.00]
   8262 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8263 ;
   8264 ; SKX-LABEL: _ss16xfloat_load:
   8265 ; SKX:       # %bb.0:
   8266 ; SKX-NEXT:    vbroadcastss (%rdi), %zmm0 # sched: [8:0.50]
   8267 ; SKX-NEXT:    retq # sched: [7:1.00]
   8268   %a = load float, float* %a.ptr
   8269   %b = insertelement <16 x float> undef, float %a, i32 0
   8270   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
   8271   ret <16 x float> %c
   8272 }
   8273 
   8274 define   <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
   8275 ; GENERIC-LABEL: _ss16xfloat_mask_load:
   8276 ; GENERIC:       # %bb.0:
   8277 ; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
   8278 ; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00]
   8279 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8280 ;
   8281 ; SKX-LABEL: _ss16xfloat_mask_load:
   8282 ; SKX:       # %bb.0:
   8283 ; SKX-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
   8284 ; SKX-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:0.50]
   8285 ; SKX-NEXT:    retq # sched: [7:1.00]
   8286   %a = load float, float* %a.ptr
   8287   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   8288   %b = insertelement <16 x float> undef, float %a, i32 0
   8289   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
   8290   %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
   8291   ret <16 x float> %r
   8292 }
   8293 
   8294 define   <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
   8295 ; GENERIC-LABEL: _ss16xfloat_maskz_load:
   8296 ; GENERIC:       # %bb.0:
   8297 ; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
   8298 ; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
   8299 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8300 ;
   8301 ; SKX-LABEL: _ss16xfloat_maskz_load:
   8302 ; SKX:       # %bb.0:
   8303 ; SKX-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
   8304 ; SKX-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
   8305 ; SKX-NEXT:    retq # sched: [7:1.00]
   8306   %a = load float, float* %a.ptr
   8307   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   8308   %b = insertelement <16 x float> undef, float %a, i32 0
   8309   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
   8310   %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
   8311   ret <16 x float> %r
   8312 }
   8313 
   8314 define   <8 x double> @_inreg8xdouble(double %a) {
   8315 ; GENERIC-LABEL: _inreg8xdouble:
   8316 ; GENERIC:       # %bb.0:
   8317 ; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
   8318 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8319 ;
   8320 ; SKX-LABEL: _inreg8xdouble:
   8321 ; SKX:       # %bb.0:
   8322 ; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
   8323 ; SKX-NEXT:    retq # sched: [7:1.00]
   8324   %b = insertelement <8 x double> undef, double %a, i32 0
   8325   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   8326   ret <8 x double> %c
   8327 }
   8328 
   8329 define   <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
   8330 ; GENERIC-LABEL: _sd8xdouble_mask:
   8331 ; GENERIC:       # %bb.0:
   8332 ; GENERIC-NEXT:    vptestmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
   8333 ; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [1:1.00]
   8334 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
   8335 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8336 ;
   8337 ; SKX-LABEL: _sd8xdouble_mask:
   8338 ; SKX:       # %bb.0:
   8339 ; SKX-NEXT:    vptestmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
   8340 ; SKX-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [3:1.00]
   8341 ; SKX-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:0.33]
   8342 ; SKX-NEXT:    retq # sched: [7:1.00]
   8343   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   8344   %b = insertelement <8 x double> undef, double %a, i32 0
   8345   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   8346   %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
   8347   ret <8 x double> %r
   8348 }
   8349 
   8350 define   <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
   8351 ; GENERIC-LABEL: _sd8xdouble_maskz:
   8352 ; GENERIC:       # %bb.0:
   8353 ; GENERIC-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
   8354 ; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
   8355 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8356 ;
   8357 ; SKX-LABEL: _sd8xdouble_maskz:
   8358 ; SKX:       # %bb.0:
   8359 ; SKX-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
   8360 ; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
   8361 ; SKX-NEXT:    retq # sched: [7:1.00]
   8362   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   8363   %b = insertelement <8 x double> undef, double %a, i32 0
   8364   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   8365   %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
   8366   ret <8 x double> %r
   8367 }
   8368 
   8369 define   <8 x double> @_sd8xdouble_load(double* %a.ptr) {
   8370 ; GENERIC-LABEL: _sd8xdouble_load:
   8371 ; GENERIC:       # %bb.0:
   8372 ; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 # sched: [8:1.00]
   8373 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8374 ;
   8375 ; SKX-LABEL: _sd8xdouble_load:
   8376 ; SKX:       # %bb.0:
   8377 ; SKX-NEXT:    vbroadcastsd (%rdi), %zmm0 # sched: [8:0.50]
   8378 ; SKX-NEXT:    retq # sched: [7:1.00]
   8379   %a = load double, double* %a.ptr
   8380   %b = insertelement <8 x double> undef, double %a, i32 0
   8381   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   8382   ret <8 x double> %c
   8383 }
   8384 
   8385 define   <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
   8386 ; GENERIC-LABEL: _sd8xdouble_mask_load:
   8387 ; GENERIC:       # %bb.0:
   8388 ; GENERIC-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
   8389 ; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
   8390 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8391 ;
   8392 ; SKX-LABEL: _sd8xdouble_mask_load:
   8393 ; SKX:       # %bb.0:
   8394 ; SKX-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
   8395 ; SKX-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
   8396 ; SKX-NEXT:    retq # sched: [7:1.00]
   8397   %a = load double, double* %a.ptr
   8398   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   8399   %b = insertelement <8 x double> undef, double %a, i32 0
   8400   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   8401   %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
   8402   ret <8 x double> %r
   8403 }
   8404 
   8405 define   <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
   8406 ; GENERIC-LABEL: _sd8xdouble_maskz_load:
   8407 ; GENERIC:       # %bb.0:
   8408 ; GENERIC-NEXT:    vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
   8409 ; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
   8410 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8411 ;
   8412 ; SKX-LABEL: _sd8xdouble_maskz_load:
   8413 ; SKX:       # %bb.0:
   8414 ; SKX-NEXT:    vptestmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
   8415 ; SKX-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
   8416 ; SKX-NEXT:    retq # sched: [7:1.00]
   8417   %a = load double, double* %a.ptr
   8418   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   8419   %b = insertelement <8 x double> undef, double %a, i32 0
   8420   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   8421   %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
   8422   ret <8 x double> %r
   8423 }
   8424 
   8425 define   <16 x i32> @_xmm16xi32(<16 x i32> %a) {
   8426 ; GENERIC-LABEL: _xmm16xi32:
   8427 ; GENERIC:       # %bb.0:
   8428 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
   8429 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8430 ;
   8431 ; SKX-LABEL: _xmm16xi32:
   8432 ; SKX:       # %bb.0:
   8433 ; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
   8434 ; SKX-NEXT:    retq # sched: [7:1.00]
   8435   %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
   8436   ret <16 x i32> %b
   8437 }
   8438 
   8439 define   <16 x float> @_xmm16xfloat(<16 x float> %a) {
   8440 ; GENERIC-LABEL: _xmm16xfloat:
   8441 ; GENERIC:       # %bb.0:
   8442 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
   8443 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8444 ;
   8445 ; SKX-LABEL: _xmm16xfloat:
   8446 ; SKX:       # %bb.0:
   8447 ; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
   8448 ; SKX-NEXT:    retq # sched: [7:1.00]
   8449   %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
   8450   ret <16 x float> %b
   8451 }
   8452 
   8453 define <16 x i32> @test_vbroadcast() {
   8454 ; GENERIC-LABEL: test_vbroadcast:
   8455 ; GENERIC:       # %bb.0: # %entry
   8456 ; GENERIC-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
   8457 ; GENERIC-NEXT:    vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
   8458 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
   8459 ; GENERIC-NEXT:    knotw %k0, %k1 # sched: [1:0.33]
   8460 ; GENERIC-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
   8461 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8462 ;
   8463 ; SKX-LABEL: test_vbroadcast:
   8464 ; SKX:       # %bb.0: # %entry
   8465 ; SKX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
   8466 ; SKX-NEXT:    vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
   8467 ; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
   8468 ; SKX-NEXT:    knotw %k0, %k1 # sched: [1:1.00]
   8469 ; SKX-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
   8470 ; SKX-NEXT:    retq # sched: [7:1.00]
   8471 entry:
   8472   %0 = sext <16 x i1> zeroinitializer to <16 x i32>
   8473   %1 = fcmp uno <16 x float> undef, zeroinitializer
   8474   %2 = sext <16 x i1> %1 to <16 x i32>
   8475   %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2
   8476   ret <16 x i32> %3
   8477 }
   8478 
   8479 ; We implement the set1 intrinsics with vector initializers.  Verify that the
   8480 ; IR generated will produce broadcasts at the end.
   8481 define <8 x double> @test_set1_pd(double %d) #2 {
   8482 ; GENERIC-LABEL: test_set1_pd:
   8483 ; GENERIC:       # %bb.0: # %entry
   8484 ; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
   8485 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8486 ;
   8487 ; SKX-LABEL: test_set1_pd:
   8488 ; SKX:       # %bb.0: # %entry
   8489 ; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
   8490 ; SKX-NEXT:    retq # sched: [7:1.00]
   8491 entry:
   8492   %vecinit.i = insertelement <8 x double> undef, double %d, i32 0
   8493   %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1
   8494   %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2
   8495   %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3
   8496   %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4
   8497   %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5
   8498   %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6
   8499   %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7
   8500   ret <8 x double> %vecinit7.i
   8501 }
   8502 
   8503 define <8 x i64> @test_set1_epi64(i64 %d) #2 {
   8504 ; GENERIC-LABEL: test_set1_epi64:
   8505 ; GENERIC:       # %bb.0: # %entry
   8506 ; GENERIC-NEXT:    vpbroadcastq %rdi, %zmm0 # sched: [1:1.00]
   8507 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8508 ;
   8509 ; SKX-LABEL: test_set1_epi64:
   8510 ; SKX:       # %bb.0: # %entry
   8511 ; SKX-NEXT:    vpbroadcastq %rdi, %zmm0 # sched: [3:1.00]
   8512 ; SKX-NEXT:    retq # sched: [7:1.00]
   8513 entry:
   8514   %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0
   8515   %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1
   8516   %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2
   8517   %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3
   8518   %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4
   8519   %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5
   8520   %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6
   8521   %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7
   8522   ret <8 x i64> %vecinit7.i
   8523 }
   8524 
   8525 define <16 x float> @test_set1_ps(float %f) #2 {
   8526 ; GENERIC-LABEL: test_set1_ps:
   8527 ; GENERIC:       # %bb.0: # %entry
   8528 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
   8529 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8530 ;
   8531 ; SKX-LABEL: test_set1_ps:
   8532 ; SKX:       # %bb.0: # %entry
   8533 ; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
   8534 ; SKX-NEXT:    retq # sched: [7:1.00]
   8535 entry:
   8536   %vecinit.i = insertelement <16 x float> undef, float %f, i32 0
   8537   %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1
   8538   %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2
   8539   %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3
   8540   %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4
   8541   %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5
   8542   %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6
   8543   %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7
   8544   %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8
   8545   %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9
   8546   %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10
   8547   %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11
   8548   %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12
   8549   %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13
   8550   %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14
   8551   %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15
   8552   ret <16 x float> %vecinit15.i
   8553 }
   8554 
   8555 define <16 x i32> @test_set1_epi32(i32 %f) #2 {
   8556 ; GENERIC-LABEL: test_set1_epi32:
   8557 ; GENERIC:       # %bb.0: # %entry
   8558 ; GENERIC-NEXT:    vpbroadcastd %edi, %zmm0 # sched: [1:1.00]
   8559 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8560 ;
   8561 ; SKX-LABEL: test_set1_epi32:
   8562 ; SKX:       # %bb.0: # %entry
   8563 ; SKX-NEXT:    vpbroadcastd %edi, %zmm0 # sched: [3:1.00]
   8564 ; SKX-NEXT:    retq # sched: [7:1.00]
   8565 entry:
   8566   %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0
   8567   %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1
   8568   %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2
   8569   %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3
   8570   %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4
   8571   %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5
   8572   %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6
   8573   %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7
   8574   %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8
   8575   %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9
   8576   %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10
   8577   %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11
   8578   %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12
   8579   %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13
   8580   %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14
   8581   %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15
   8582   ret <16 x i32> %vecinit15.i
   8583 }
   8584 
   8585 ; We implement the scalar broadcast intrinsics with vector initializers.
   8586 ; Verify that the IR generated will produce the broadcast at the end.
   8587 define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
   8588 ; GENERIC-LABEL: test_mm512_broadcastsd_pd:
   8589 ; GENERIC:       # %bb.0: # %entry
   8590 ; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
   8591 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8592 ;
   8593 ; SKX-LABEL: test_mm512_broadcastsd_pd:
   8594 ; SKX:       # %bb.0: # %entry
   8595 ; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
   8596 ; SKX-NEXT:    retq # sched: [7:1.00]
   8597 entry:
   8598   %0 = extractelement <2 x double> %a, i32 0
   8599   %vecinit.i = insertelement <8 x double> undef, double %0, i32 0
   8600   %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1
   8601   %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2
   8602   %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3
   8603   %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4
   8604   %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5
   8605   %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6
   8606   %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7
   8607   ret <8 x double> %vecinit7.i
   8608 }
   8609 
   8610 define <16 x float> @suff_test1(<8 x float>%a)  {
   8611 ; GENERIC-LABEL: suff_test1:
   8612 ; GENERIC:       # %bb.0:
   8613 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
   8614 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8615 ;
   8616 ; SKX-LABEL: suff_test1:
   8617 ; SKX:       # %bb.0:
   8618 ; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
   8619 ; SKX-NEXT:    retq # sched: [7:1.00]
   8620   %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer
   8621   ret <16 x float>%res
   8622 }
   8623 
   8624 define <8 x double> @suff_test2(<4 x double>%a)  {
   8625 ; GENERIC-LABEL: suff_test2:
   8626 ; GENERIC:       # %bb.0:
   8627 ; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
   8628 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8629 ;
   8630 ; SKX-LABEL: suff_test2:
   8631 ; SKX:       # %bb.0:
   8632 ; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
   8633 ; SKX-NEXT:    retq # sched: [7:1.00]
   8634   %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer
   8635   ret <8 x double>%res
   8636 }
   8637 
   8638 define <64 x i8> @_invec32xi8(<32 x i8>%a)  {
   8639 ; GENERIC-LABEL: _invec32xi8:
   8640 ; GENERIC:       # %bb.0:
   8641 ; GENERIC-NEXT:    vpbroadcastb %xmm0, %zmm0 # sched: [1:1.00]
   8642 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8643 ;
   8644 ; SKX-LABEL: _invec32xi8:
   8645 ; SKX:       # %bb.0:
   8646 ; SKX-NEXT:    vpbroadcastb %xmm0, %zmm0 # sched: [3:1.00]
   8647 ; SKX-NEXT:    retq # sched: [7:1.00]
   8648   %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer
   8649   ret <64 x i8>%res
   8650 }
   8651 
   8652 define <32 x i16> @_invec16xi16(<16 x i16>%a)  {
   8653 ; GENERIC-LABEL: _invec16xi16:
   8654 ; GENERIC:       # %bb.0:
   8655 ; GENERIC-NEXT:    vpbroadcastw %xmm0, %zmm0 # sched: [1:1.00]
   8656 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8657 ;
   8658 ; SKX-LABEL: _invec16xi16:
   8659 ; SKX:       # %bb.0:
   8660 ; SKX-NEXT:    vpbroadcastw %xmm0, %zmm0 # sched: [3:1.00]
   8661 ; SKX-NEXT:    retq # sched: [7:1.00]
   8662   %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer
   8663   ret <32 x i16>%res
   8664 }
   8665 
   8666 define <16 x i32> @_invec8xi32(<8 x i32>%a)  {
   8667 ; GENERIC-LABEL: _invec8xi32:
   8668 ; GENERIC:       # %bb.0:
   8669 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
   8670 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8671 ;
   8672 ; SKX-LABEL: _invec8xi32:
   8673 ; SKX:       # %bb.0:
   8674 ; SKX-NEXT:    vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
   8675 ; SKX-NEXT:    retq # sched: [7:1.00]
   8676   %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer
   8677   ret <16 x i32>%res
   8678 }
   8679 
   8680 define <8 x i64> @_invec4xi64(<4 x i64>%a)  {
   8681 ; GENERIC-LABEL: _invec4xi64:
   8682 ; GENERIC:       # %bb.0:
   8683 ; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
   8684 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8685 ;
   8686 ; SKX-LABEL: _invec4xi64:
   8687 ; SKX:       # %bb.0:
   8688 ; SKX-NEXT:    vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
   8689 ; SKX-NEXT:    retq # sched: [7:1.00]
   8690   %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer
   8691   ret <8 x i64>%res
   8692 }
   8693 
   8694 declare void @func_f32(float)
   8695 define <16 x float> @broadcast_ss_spill(float %x) {
   8696 ; GENERIC-LABEL: broadcast_ss_spill:
   8697 ; GENERIC:       # %bb.0:
   8698 ; GENERIC-NEXT:    subq $24, %rsp # sched: [1:0.33]
   8699 ; GENERIC-NEXT:    .cfi_def_cfa_offset 32
   8700 ; GENERIC-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   8701 ; GENERIC-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
   8702 ; GENERIC-NEXT:    callq func_f32
   8703 ; GENERIC-NEXT:    vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
   8704 ; GENERIC-NEXT:    addq $24, %rsp # sched: [1:0.33]
   8705 ; GENERIC-NEXT:    .cfi_def_cfa_offset 8
   8706 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8707 ;
   8708 ; SKX-LABEL: broadcast_ss_spill:
   8709 ; SKX:       # %bb.0:
   8710 ; SKX-NEXT:    subq $24, %rsp # sched: [1:0.25]
   8711 ; SKX-NEXT:    .cfi_def_cfa_offset 32
   8712 ; SKX-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   8713 ; SKX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
   8714 ; SKX-NEXT:    callq func_f32
   8715 ; SKX-NEXT:    vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
   8716 ; SKX-NEXT:    addq $24, %rsp # sched: [1:0.25]
   8717 ; SKX-NEXT:    .cfi_def_cfa_offset 8
   8718 ; SKX-NEXT:    retq # sched: [7:1.00]
   8719   %a  = fadd float %x, %x
   8720   call void @func_f32(float %a)
   8721   %b = insertelement <16 x float> undef, float %a, i32 0
   8722   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
   8723   ret <16 x float> %c
   8724 }
   8725 
   8726 declare void @func_f64(double)
   8727 define <8 x double> @broadcast_sd_spill(double %x) {
   8728 ; GENERIC-LABEL: broadcast_sd_spill:
   8729 ; GENERIC:       # %bb.0:
   8730 ; GENERIC-NEXT:    subq $24, %rsp # sched: [1:0.33]
   8731 ; GENERIC-NEXT:    .cfi_def_cfa_offset 32
   8732 ; GENERIC-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   8733 ; GENERIC-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
   8734 ; GENERIC-NEXT:    callq func_f64
   8735 ; GENERIC-NEXT:    vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
   8736 ; GENERIC-NEXT:    addq $24, %rsp # sched: [1:0.33]
   8737 ; GENERIC-NEXT:    .cfi_def_cfa_offset 8
   8738 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8739 ;
   8740 ; SKX-LABEL: broadcast_sd_spill:
   8741 ; SKX:       # %bb.0:
   8742 ; SKX-NEXT:    subq $24, %rsp # sched: [1:0.25]
   8743 ; SKX-NEXT:    .cfi_def_cfa_offset 32
   8744 ; SKX-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   8745 ; SKX-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
   8746 ; SKX-NEXT:    callq func_f64
   8747 ; SKX-NEXT:    vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
   8748 ; SKX-NEXT:    addq $24, %rsp # sched: [1:0.25]
   8749 ; SKX-NEXT:    .cfi_def_cfa_offset 8
   8750 ; SKX-NEXT:    retq # sched: [7:1.00]
   8751   %a  = fadd double %x, %x
   8752   call void @func_f64(double %a)
   8753   %b = insertelement <8 x double> undef, double %a, i32 0
   8754   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   8755   ret <8 x double> %c
   8756 }
   8757